From 232af392fdb52aa2739dad4e03fed273b3c3f24a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 9 Jul 2016 10:12:05 +0100 Subject: drm/i915/breadcrumbs: Queue hangcheck before sleeping Never go to sleep waiting on the GPU without first ensuring that we will get woken up. We have a choice of queuing the hangcheck before every schedule() or the first time we wakeup. In order to simply accommodate both the signaler and the ordinary waiter, move the queuing to the common point of enabling the irq. We lose the paranoid safety of ensuring that the hangcheck is active before the sleep, but avoid code duplication (and redundant hangcheck queuing). Testcase: igt/prime_busy Fixes: c81d46138da6 ("drm/i915: Convert trace-irq to the breadcrumb waiter") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/1468055535-19740-2-git-send-email-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 8f50919..7fd4498 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1501,15 +1501,6 @@ int __i915_wait_request(struct drm_i915_gem_request *req, break; } - /* Ensure that even if the GPU hangs, we get woken up. - * - * However, note that if no one is waiting, we never notice - * a gpu hang. Eventually, we will have to wait for a resource - * held by the GPU and so trigger a hangcheck. In the most - * pathological case, this will be upon memory starvation! - */ - i915_queue_hangcheck(req->i915); - timeout_remain = io_schedule_timeout(timeout_remain); if (timeout_remain == 0) { ret = -ETIME; diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index d89b2c9..b074f3d 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -93,6 +93,15 @@ static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b) if (!b->irq_enabled || test_bit(engine->id, &i915->gpu_error.missed_irq_rings)) mod_timer(&b->fake_irq, jiffies + 1); + + /* Ensure that even if the GPU hangs, we get woken up. + * + * However, note that if no one is waiting, we never notice + * a gpu hang. Eventually, we will have to wait for a resource + * held by the GPU and so trigger a hangcheck. In the most + * pathological case, this will be upon memory starvation! + */ + i915_queue_hangcheck(i915); } static void __intel_breadcrumbs_disable_irq(struct intel_breadcrumbs *b) -- cgit v0.10.2 From c96156138284a44e0a96b4e163a68bdeefc05735 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 9 Jul 2016 10:12:06 +0100 Subject: drm/i915: Kick hangcheck from retire worker Let's ensure that we cannot run indefinitely without the hangcheck worker being queued. We removed it from being kicked on every request because we were kicking it a few millions times in every hangcheck interval and only once is necessary! However, that leaves us with the issue of what if userspace never waits for a request, or runs out of resources, what if userspace just issues a request then spins on BUSY_IOCTL? Testcase: igt/gem_busy Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/1468055535-19740-3-git-send-email-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 7fd4498..adeca0e 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3281,10 +3281,12 @@ i915_gem_retire_work_handler(struct work_struct *work) * We do not need to do this test under locking as in the worst-case * we queue the retire worker once too often. */ - if (READ_ONCE(dev_priv->gt.awake)) + if (READ_ONCE(dev_priv->gt.awake)) { + i915_queue_hangcheck(dev_priv); queue_delayed_work(dev_priv->wq, &dev_priv->gt.retire_work, round_jiffies_up_relative(HZ)); + } } static void -- cgit v0.10.2 From 1b290f97c8d9a8fb31aa07208c37e4d964b835b4 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 9 Jul 2016 10:12:14 +0100 Subject: drm/i915: Remove temporary RPM wakeref assert disables Now that the last couple of hacks have been removed from the runtime powermanagement users, we can fully enable the asserts by preventing the temptation to disable them when our code is buggy. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1468055535-19740-11-git-send-email-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 55aeaf0..6c8485c 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1665,13 +1665,6 @@ enable_rpm_wakeref_asserts(struct drm_i915_private *dev_priv) atomic_dec(&dev_priv->pm.wakeref_count); } -/* TODO: convert users of these to rely instead on proper RPM refcounting */ -#define DISABLE_RPM_WAKEREF_ASSERTS(dev_priv) \ - disable_rpm_wakeref_asserts(dev_priv) - -#define ENABLE_RPM_WAKEREF_ASSERTS(dev_priv) \ - enable_rpm_wakeref_asserts(dev_priv) - void intel_runtime_pm_get(struct drm_i915_private *dev_priv); bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *dev_priv); void intel_runtime_pm_get_noresume(struct drm_i915_private *dev_priv); -- cgit v0.10.2 From 4f074a5393431a7d2cc0de7fcfe2f61d24854628 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 11 Jul 2016 14:46:17 +0100 Subject: drm/i915: Update ifdeffery for mutex->owner In commit 7608a43d8f2e ("locking/mutexes: Use MUTEX_SPIN_ON_OWNER when appropriate") the owner field in the mutex was updated from being dependent upon CONFIG_SMP to using optimistic spin. Update our peek function to suite. Fixes:7608a43d8f2e ("locking/mutexes: Use MUTEX_SPIN_ON_OWNER...") Reported-by: Hong Liu Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1468244777-4888-1-git-send-email-chris@chris-wilson.co.uk Reviewed-by: Matthew Auld diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index 067632a..6f10b42 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -40,7 +40,7 @@ static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task) if (!mutex_is_locked(mutex)) return false; -#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_MUTEXES) +#if defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_MUTEX_SPIN_ON_OWNER) return mutex->owner == task; #else /* Since UP may be pre-empted, we cannot assume that we own the lock */ -- cgit v0.10.2 From 8d35acba25daa4aaccd1d2f430f82192a8d444a9 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 12 Jul 2016 12:55:29 +0100 Subject: drm/i915: Provide argument names for static stubs Make sure we keep kbuilder happy in all of its random configs by providing argument names for compile-time stubs. In file included from drivers/gpu/drm/i915/intel_dp_mst.c:27:0: drivers/gpu/drm/i915/i915_drv.h: In function 'i915_debugfs_register': >> drivers/gpu/drm/i915/i915_drv.h:3612:48: error: parameter name omitted static inline int i915_debugfs_register(struct drm_i915_private *) {return 0;} ^~~~~~~~~~~~~~~~ drivers/gpu/drm/i915/i915_drv.h: In function 'i915_debugfs_unregister': drivers/gpu/drm/i915/i915_drv.h:3613:51: error: parameter name omitted static inline void i915_debugfs_unregister(struct drm_i915_private *) {} Reported-by: 0day Signed-off-by: Chris Wilson Reviewed-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1468324529-20461-1-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 03e1bfa..e76cfe2 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3644,8 +3644,8 @@ void i915_debugfs_unregister(struct drm_i915_private *dev_priv); int i915_debugfs_connector_add(struct drm_connector *connector); void intel_display_crc_init(struct drm_device *dev); #else -static inline int i915_debugfs_register(struct drm_i915_private *) {return 0;} -static inline void i915_debugfs_unregister(struct drm_i915_private *) {} +static inline int i915_debugfs_register(struct drm_i915_private *dev_priv) {return 0;} +static inline void i915_debugfs_unregister(struct drm_i915_private *dev_priv) {} static inline int i915_debugfs_connector_add(struct drm_connector *connector) { return 0; } static inline void intel_display_crc_init(struct drm_device *dev) {} -- cgit v0.10.2 From 035ea405c91e2dc89325a79129cf9af2b9c2ae8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 12 Jul 2016 19:24:47 +0300 Subject: drm/i915: Unbreak interrupts on pre-gen6 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prior to gen6 we didn't have per-ring IMR registers, which means that since commit 61ff75ac20ff ("drm/i915: Simplify enabling user-interrupts with L3-remapping") we're now masking off all interrupts when init_render_ring() gets called. That's rather rude. Let's limit the ring IMR frobbing to machines that actually have the per-ring IMR registers. Cc: Chris Wilson Cc: Tvrtko Ursulin Fixes: 61ff75ac20ff ("drm/i915: Simplify enabling user-interrupts with L3-remapping") Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1468340687-3596-1-git-send-email-ville.syrjala@linux.intel.com Reviewd-by: Chris Wilson diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 61e00bf..c8e77c0 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1305,7 +1305,8 @@ static int init_render_ring(struct intel_engine_cs *engine) if (IS_GEN(dev_priv, 6, 7)) I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING)); - I915_WRITE_IMR(engine, ~engine->irq_keep_mask); + if (INTEL_INFO(dev_priv)->gen >= 6) + I915_WRITE_IMR(engine, ~engine->irq_keep_mask); return init_workarounds_ring(engine); } -- cgit v0.10.2 From c2c7f240081da626ed9da6dc536306d2ad393c9a Mon Sep 17 00:00:00 2001 From: Dave Gordon Date: Wed, 13 Jul 2016 16:03:35 +0100 Subject: drm/i915: unify first-stage engine struct setup intel_lrc.c has a table of "logical rings" (meaning engines), while intel_ringbuffer.c has separately open-coded initialisation for each engine. We can deduplicate this somewhat by using the same first-stage engine-setup function for both modes. So here we expose the function that transfers information from the static table of (all) known engines to the dev_priv->engine array of engines available on this device (adjusting the names along the way) and then embed calls to it in both the LRC and the legacy-mode setup. Signed-off-by: Dave Gordon Reviewed-by: Chris Wilson Signed-off-by: Tvrtko Ursulin diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 70c6990..1eb6d46 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1978,8 +1978,9 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine) } static inline void -logical_ring_default_irqs(struct intel_engine_cs *engine, unsigned shift) +logical_ring_default_irqs(struct intel_engine_cs *engine) { + unsigned shift = engine->irq_shift; engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift; engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift; } @@ -2083,14 +2084,14 @@ static int logical_render_ring_init(struct intel_engine_cs *engine) return ret; } -static const struct logical_ring_info { +static const struct engine_info { const char *name; unsigned exec_id; unsigned guc_id; u32 mmio_base; unsigned irq_shift; int (*init)(struct intel_engine_cs *engine); -} logical_rings[] = { +} intel_engines[] = { [RCS] = { .name = "render ring", .exec_id = I915_EXEC_RENDER, @@ -2133,20 +2134,31 @@ static const struct logical_ring_info { }, }; -static struct intel_engine_cs * -logical_ring_setup(struct drm_i915_private *dev_priv, enum intel_engine_id id) +struct intel_engine_cs * +intel_engine_setup(struct drm_i915_private *dev_priv, + enum intel_engine_id id) { - const struct logical_ring_info *info = &logical_rings[id]; + const struct engine_info *info = &intel_engines[id]; struct intel_engine_cs *engine = &dev_priv->engine[id]; - enum forcewake_domains fw_domains; engine->id = id; + engine->i915 = dev_priv; engine->name = info->name; engine->exec_id = info->exec_id; - engine->guc_id = info->guc_id; + engine->hw_id = engine->guc_id = info->guc_id; engine->mmio_base = info->mmio_base; + engine->irq_shift = info->irq_shift; - engine->i915 = dev_priv; + return engine; +} + +static struct intel_engine_cs * +logical_ring_setup(struct drm_i915_private *dev_priv, enum intel_engine_id id) +{ + struct intel_engine_cs *engine; + enum forcewake_domains fw_domains; + + engine = intel_engine_setup(dev_priv, id); /* Intentionally left blank. */ engine->buffer = NULL; @@ -2176,7 +2188,7 @@ logical_ring_setup(struct drm_i915_private *dev_priv, enum intel_engine_id id) logical_ring_init_platform_invariants(engine); logical_ring_default_vfuncs(engine); - logical_ring_default_irqs(engine, info->irq_shift); + logical_ring_default_irqs(engine); intel_engine_init_hangcheck(engine); i915_gem_batch_pool_init(&dev_priv->drm, &engine->batch_pool); @@ -2205,14 +2217,14 @@ int intel_logical_rings_init(struct drm_device *dev) WARN_ON(INTEL_INFO(dev_priv)->ring_mask & GENMASK(sizeof(mask) * BITS_PER_BYTE - 1, I915_NUM_ENGINES)); - for (i = 0; i < ARRAY_SIZE(logical_rings); i++) { + for (i = 0; i < ARRAY_SIZE(intel_engines); i++) { if (!HAS_ENGINE(dev_priv, i)) continue; - if (!logical_rings[i].init) + if (!intel_engines[i].init) continue; - ret = logical_rings[i].init(logical_ring_setup(dev_priv, i)); + ret = intel_engines[i].init(logical_ring_setup(dev_priv, i)); if (ret) goto cleanup; @@ -2220,7 +2232,7 @@ int intel_logical_rings_init(struct drm_device *dev) } /* - * Catch failures to update logical_rings table when the new engines + * Catch failures to update intel_engines table when the new engines * are added to the driver by a warning and disabling the forgotten * engines. */ diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index c8e77c0..6db1947 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2839,14 +2839,10 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, int intel_init_render_ring_buffer(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_engine_cs *engine = &dev_priv->engine[RCS]; + struct intel_engine_cs *engine; int ret; - engine->name = "render ring"; - engine->id = RCS; - engine->exec_id = I915_EXEC_RENDER; - engine->hw_id = 0; - engine->mmio_base = RENDER_RING_BASE; + engine = intel_engine_setup(dev_priv, RCS); intel_ring_default_vfuncs(dev_priv, engine); @@ -2901,17 +2897,13 @@ int intel_init_render_ring_buffer(struct drm_device *dev) int intel_init_bsd_ring_buffer(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_engine_cs *engine = &dev_priv->engine[VCS]; + struct intel_engine_cs *engine; - engine->name = "bsd ring"; - engine->id = VCS; - engine->exec_id = I915_EXEC_BSD; - engine->hw_id = 1; + engine = intel_engine_setup(dev_priv, VCS); intel_ring_default_vfuncs(dev_priv, engine); if (INTEL_GEN(dev_priv) >= 6) { - engine->mmio_base = GEN6_BSD_RING_BASE; /* gen6 bsd needs a special wa for tail updates */ if (IS_GEN6(dev_priv)) engine->write_tail = gen6_bsd_ring_write_tail; @@ -2939,13 +2931,9 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev) int intel_init_bsd2_ring_buffer(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_engine_cs *engine = &dev_priv->engine[VCS2]; + struct intel_engine_cs *engine; - engine->name = "bsd2 ring"; - engine->id = VCS2; - engine->exec_id = I915_EXEC_BSD; - engine->hw_id = 4; - engine->mmio_base = GEN8_BSD2_RING_BASE; + engine = intel_engine_setup(dev_priv, VCS2); intel_ring_default_vfuncs(dev_priv, engine); @@ -2959,13 +2947,9 @@ int intel_init_bsd2_ring_buffer(struct drm_device *dev) int intel_init_blt_ring_buffer(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_engine_cs *engine = &dev_priv->engine[BCS]; + struct intel_engine_cs *engine; - engine->name = "blitter ring"; - engine->id = BCS; - engine->exec_id = I915_EXEC_BLT; - engine->hw_id = 2; - engine->mmio_base = BLT_RING_BASE; + engine = intel_engine_setup(dev_priv, BCS); intel_ring_default_vfuncs(dev_priv, engine); @@ -2982,13 +2966,9 @@ int intel_init_blt_ring_buffer(struct drm_device *dev) int intel_init_vebox_ring_buffer(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_engine_cs *engine = &dev_priv->engine[VECS]; + struct intel_engine_cs *engine; - engine->name = "video enhancement ring"; - engine->id = VECS; - engine->exec_id = I915_EXEC_VEBOX; - engine->hw_id = 3; - engine->mmio_base = VEBOX_RING_BASE; + engine = intel_engine_setup(dev_priv, VECS); intel_ring_default_vfuncs(dev_priv, engine); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 12cb7ed..f8eeb50 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -147,6 +147,7 @@ struct intel_engine_cs { unsigned int hw_id; unsigned int guc_id; /* XXX same as hw_id? */ u32 mmio_base; + unsigned int irq_shift; struct intel_ringbuffer *buffer; struct list_head buffers; @@ -361,6 +362,10 @@ struct intel_engine_cs { u32 (*get_cmd_length_mask)(u32 cmd_header); }; +struct intel_engine_cs * +intel_engine_setup(struct drm_i915_private *dev_priv, + enum intel_engine_id id); + static inline bool intel_engine_initialized(const struct intel_engine_cs *engine) { -- cgit v0.10.2 From bb45438f5e2eb339c16e1c3c0ebb8fe36dd55acf Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Wed, 13 Jul 2016 16:03:36 +0100 Subject: drm/i915: Prepare for engine init unification Move the execlist engine setup to vfuncs so that the engine init loop is clearly split into the mode agnostic and specific steps. Signed-off-by: Tvrtko Ursulin Reviewed-by: Daniel Vetter Reviewed-by: Chris Wilson diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 1eb6d46..604cfbb 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -2003,6 +2003,46 @@ lrc_setup_hws(struct intel_engine_cs *engine, return 0; } +static void +logical_ring_setup(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + enum forcewake_domains fw_domains; + + /* Intentionally left blank. */ + engine->buffer = NULL; + + fw_domains = intel_uncore_forcewake_for_reg(dev_priv, + RING_ELSP(engine), + FW_REG_WRITE); + + fw_domains |= intel_uncore_forcewake_for_reg(dev_priv, + RING_CONTEXT_STATUS_PTR(engine), + FW_REG_READ | FW_REG_WRITE); + + fw_domains |= intel_uncore_forcewake_for_reg(dev_priv, + RING_CONTEXT_STATUS_BUF_BASE(engine), + FW_REG_READ); + + engine->fw_domains = fw_domains; + + INIT_LIST_HEAD(&engine->active_list); + INIT_LIST_HEAD(&engine->request_list); + INIT_LIST_HEAD(&engine->buffers); + INIT_LIST_HEAD(&engine->execlist_queue); + spin_lock_init(&engine->execlist_lock); + + tasklet_init(&engine->irq_tasklet, + intel_lrc_irq_handler, (unsigned long)engine); + + logical_ring_init_platform_invariants(engine); + logical_ring_default_vfuncs(engine); + logical_ring_default_irqs(engine); + + intel_engine_init_hangcheck(engine); + i915_gem_batch_pool_init(&dev_priv->drm, &engine->batch_pool); +} + static int logical_ring_init(struct intel_engine_cs *engine) { @@ -2048,6 +2088,8 @@ static int logical_render_ring_init(struct intel_engine_cs *engine) struct drm_i915_private *dev_priv = engine->i915; int ret; + logical_ring_setup(engine); + if (HAS_L3_DPF(dev_priv)) engine->irq_keep_mask |= GT_RENDER_L3_PARITY_ERROR_INTERRUPT; @@ -2084,6 +2126,13 @@ static int logical_render_ring_init(struct intel_engine_cs *engine) return ret; } +static int logical_xcs_ring_init(struct intel_engine_cs *engine) +{ + logical_ring_setup(engine); + + return logical_ring_init(engine); +} + static const struct engine_info { const char *name; unsigned exec_id; @@ -2106,7 +2155,7 @@ static const struct engine_info { .guc_id = GUC_BLITTER_ENGINE, .mmio_base = BLT_RING_BASE, .irq_shift = GEN8_BCS_IRQ_SHIFT, - .init = logical_ring_init, + .init = logical_xcs_ring_init, }, [VCS] = { .name = "bsd ring", @@ -2114,7 +2163,7 @@ static const struct engine_info { .guc_id = GUC_VIDEO_ENGINE, .mmio_base = GEN6_BSD_RING_BASE, .irq_shift = GEN8_VCS1_IRQ_SHIFT, - .init = logical_ring_init, + .init = logical_xcs_ring_init, }, [VCS2] = { .name = "bsd2 ring", @@ -2122,7 +2171,7 @@ static const struct engine_info { .guc_id = GUC_VIDEO_ENGINE2, .mmio_base = GEN8_BSD2_RING_BASE, .irq_shift = GEN8_VCS2_IRQ_SHIFT, - .init = logical_ring_init, + .init = logical_xcs_ring_init, }, [VECS] = { .name = "video enhancement ring", @@ -2130,7 +2179,7 @@ static const struct engine_info { .guc_id = GUC_VIDEOENHANCE_ENGINE, .mmio_base = VEBOX_RING_BASE, .irq_shift = GEN8_VECS_IRQ_SHIFT, - .init = logical_ring_init, + .init = logical_xcs_ring_init, }, }; @@ -2152,50 +2201,6 @@ intel_engine_setup(struct drm_i915_private *dev_priv, return engine; } -static struct intel_engine_cs * -logical_ring_setup(struct drm_i915_private *dev_priv, enum intel_engine_id id) -{ - struct intel_engine_cs *engine; - enum forcewake_domains fw_domains; - - engine = intel_engine_setup(dev_priv, id); - - /* Intentionally left blank. */ - engine->buffer = NULL; - - fw_domains = intel_uncore_forcewake_for_reg(dev_priv, - RING_ELSP(engine), - FW_REG_WRITE); - - fw_domains |= intel_uncore_forcewake_for_reg(dev_priv, - RING_CONTEXT_STATUS_PTR(engine), - FW_REG_READ | FW_REG_WRITE); - - fw_domains |= intel_uncore_forcewake_for_reg(dev_priv, - RING_CONTEXT_STATUS_BUF_BASE(engine), - FW_REG_READ); - - engine->fw_domains = fw_domains; - - INIT_LIST_HEAD(&engine->active_list); - INIT_LIST_HEAD(&engine->request_list); - INIT_LIST_HEAD(&engine->buffers); - INIT_LIST_HEAD(&engine->execlist_queue); - spin_lock_init(&engine->execlist_lock); - - tasklet_init(&engine->irq_tasklet, - intel_lrc_irq_handler, (unsigned long)engine); - - logical_ring_init_platform_invariants(engine); - logical_ring_default_vfuncs(engine); - logical_ring_default_irqs(engine); - - intel_engine_init_hangcheck(engine); - i915_gem_batch_pool_init(&dev_priv->drm, &engine->batch_pool); - - return engine; -} - /** * intel_logical_rings_init() - allocate, populate and init the Engine Command Streamers * @dev: DRM device. @@ -2224,7 +2229,7 @@ int intel_logical_rings_init(struct drm_device *dev) if (!intel_engines[i].init) continue; - ret = intel_engines[i].init(logical_ring_setup(dev_priv, i)); + ret = intel_engines[i].init(intel_engine_setup(dev_priv, i)); if (ret) goto cleanup; -- cgit v0.10.2 From 8b3e2d36391716a6e9e707bcf0c0cf908ad85990 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Wed, 13 Jul 2016 16:03:37 +0100 Subject: drm/i915: Unify engine init loop With the unified common engine setup done, and the execlist engine initialization loop clearly split into two phases, we can eliminate the separate legacy engine initialization code. v2: Fix cleanup path for legacy. v3: Rename constructors. (Chris Wilson) Signed-off-by: Tvrtko Ursulin Reviewed-by: Daniel Vetter Reviewed-by: Chris Wilson diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index e76cfe2..65d69e5 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2016,7 +2016,6 @@ struct drm_i915_private { int (*execbuf_submit)(struct i915_execbuffer_params *params, struct drm_i915_gem_execbuffer2 *args, struct list_head *vmas); - int (*init_engines)(struct drm_device *dev); void (*cleanup_engine)(struct intel_engine_cs *engine); void (*stop_engine)(struct intel_engine_cs *engine); @@ -3374,7 +3373,6 @@ static inline u32 i915_reset_count(struct i915_gpu_error *error) void i915_gem_reset(struct drm_device *dev); bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force); int __must_check i915_gem_init(struct drm_device *dev); -int i915_gem_init_engines(struct drm_device *dev); int __must_check i915_gem_init_hw(struct drm_device *dev); void i915_gem_init_swizzling(struct drm_device *dev); void i915_gem_cleanup_engines(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index adeca0e..b788f97 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -5056,53 +5056,6 @@ static void init_unused_rings(struct drm_device *dev) } } -int i915_gem_init_engines(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = to_i915(dev); - int ret; - - ret = intel_init_render_ring_buffer(dev); - if (ret) - return ret; - - if (HAS_BSD(dev)) { - ret = intel_init_bsd_ring_buffer(dev); - if (ret) - goto cleanup_render_ring; - } - - if (HAS_BLT(dev)) { - ret = intel_init_blt_ring_buffer(dev); - if (ret) - goto cleanup_bsd_ring; - } - - if (HAS_VEBOX(dev)) { - ret = intel_init_vebox_ring_buffer(dev); - if (ret) - goto cleanup_blt_ring; - } - - if (HAS_BSD2(dev)) { - ret = intel_init_bsd2_ring_buffer(dev); - if (ret) - goto cleanup_vebox_ring; - } - - return 0; - -cleanup_vebox_ring: - intel_cleanup_engine(&dev_priv->engine[VECS]); -cleanup_blt_ring: - intel_cleanup_engine(&dev_priv->engine[BCS]); -cleanup_bsd_ring: - intel_cleanup_engine(&dev_priv->engine[VCS]); -cleanup_render_ring: - intel_cleanup_engine(&dev_priv->engine[RCS]); - - return ret; -} - int i915_gem_init_hw(struct drm_device *dev) { @@ -5178,12 +5131,10 @@ int i915_gem_init(struct drm_device *dev) if (!i915.enable_execlists) { dev_priv->gt.execbuf_submit = i915_gem_ringbuffer_submission; - dev_priv->gt.init_engines = i915_gem_init_engines; dev_priv->gt.cleanup_engine = intel_cleanup_engine; dev_priv->gt.stop_engine = intel_stop_engine; } else { dev_priv->gt.execbuf_submit = intel_execlists_submission; - dev_priv->gt.init_engines = intel_logical_rings_init; dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup; dev_priv->gt.stop_engine = intel_logical_ring_stop; } @@ -5203,7 +5154,7 @@ int i915_gem_init(struct drm_device *dev) if (ret) goto out_unlock; - ret = dev_priv->gt.init_engines(dev); + ret = intel_engines_init(dev); if (ret) goto out_unlock; diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 604cfbb..2e13a3a 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -2139,7 +2139,8 @@ static const struct engine_info { unsigned guc_id; u32 mmio_base; unsigned irq_shift; - int (*init)(struct intel_engine_cs *engine); + int (*init_legacy)(struct intel_engine_cs *engine); + int (*init_execlists)(struct intel_engine_cs *engine); } intel_engines[] = { [RCS] = { .name = "render ring", @@ -2147,7 +2148,8 @@ static const struct engine_info { .guc_id = GUC_RENDER_ENGINE, .mmio_base = RENDER_RING_BASE, .irq_shift = GEN8_RCS_IRQ_SHIFT, - .init = logical_render_ring_init, + .init_execlists = logical_render_ring_init, + .init_legacy = intel_init_render_ring_buffer, }, [BCS] = { .name = "blitter ring", @@ -2155,7 +2157,8 @@ static const struct engine_info { .guc_id = GUC_BLITTER_ENGINE, .mmio_base = BLT_RING_BASE, .irq_shift = GEN8_BCS_IRQ_SHIFT, - .init = logical_xcs_ring_init, + .init_execlists = logical_xcs_ring_init, + .init_legacy = intel_init_blt_ring_buffer, }, [VCS] = { .name = "bsd ring", @@ -2163,7 +2166,8 @@ static const struct engine_info { .guc_id = GUC_VIDEO_ENGINE, .mmio_base = GEN6_BSD_RING_BASE, .irq_shift = GEN8_VCS1_IRQ_SHIFT, - .init = logical_xcs_ring_init, + .init_execlists = logical_xcs_ring_init, + .init_legacy = intel_init_bsd_ring_buffer, }, [VCS2] = { .name = "bsd2 ring", @@ -2171,7 +2175,8 @@ static const struct engine_info { .guc_id = GUC_VIDEO_ENGINE2, .mmio_base = GEN8_BSD2_RING_BASE, .irq_shift = GEN8_VCS2_IRQ_SHIFT, - .init = logical_xcs_ring_init, + .init_execlists = logical_xcs_ring_init, + .init_legacy = intel_init_bsd2_ring_buffer, }, [VECS] = { .name = "video enhancement ring", @@ -2179,7 +2184,8 @@ static const struct engine_info { .guc_id = GUC_VIDEOENHANCE_ENGINE, .mmio_base = VEBOX_RING_BASE, .irq_shift = GEN8_VECS_IRQ_SHIFT, - .init = logical_xcs_ring_init, + .init_execlists = logical_xcs_ring_init, + .init_legacy = intel_init_vebox_ring_buffer, }, }; @@ -2202,20 +2208,16 @@ intel_engine_setup(struct drm_i915_private *dev_priv, } /** - * intel_logical_rings_init() - allocate, populate and init the Engine Command Streamers + * intel_engines_init() - allocate, populate and init the Engine Command Streamers * @dev: DRM device. * - * This function inits the engines for an Execlists submission style (the - * equivalent in the legacy ringbuffer submission world would be - * i915_gem_init_engines). It does it only for those engines that are present in - * the hardware. - * * Return: non-zero if the initialization failed. */ -int intel_logical_rings_init(struct drm_device *dev) +int intel_engines_init(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); unsigned int mask = 0; + int (*init)(struct intel_engine_cs *engine); unsigned int i; int ret; @@ -2226,10 +2228,15 @@ int intel_logical_rings_init(struct drm_device *dev) if (!HAS_ENGINE(dev_priv, i)) continue; - if (!intel_engines[i].init) + if (i915.enable_execlists) + init = intel_engines[i].init_execlists; + else + init = intel_engines[i].init_legacy; + + if (!init) continue; - ret = intel_engines[i].init(intel_engine_setup(dev_priv, i)); + ret = init(intel_engine_setup(dev_priv, i)); if (ret) goto cleanup; @@ -2250,8 +2257,12 @@ int intel_logical_rings_init(struct drm_device *dev) return 0; cleanup: - for (i = 0; i < I915_NUM_ENGINES; i++) - intel_logical_ring_cleanup(&dev_priv->engine[i]); + for (i = 0; i < I915_NUM_ENGINES; i++) { + if (i915.enable_execlists) + intel_logical_ring_cleanup(&dev_priv->engine[i]); + else + intel_cleanup_engine(&dev_priv->engine[i]); + } return ret; } diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 2b8255c..aa8905c 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -67,7 +67,7 @@ int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request int intel_logical_ring_reserve_space(struct drm_i915_gem_request *request); void intel_logical_ring_stop(struct intel_engine_cs *engine); void intel_logical_ring_cleanup(struct intel_engine_cs *engine); -int intel_logical_rings_init(struct drm_device *dev); +int intel_engines_init(struct drm_device *dev); int logical_ring_flush_all_caches(struct drm_i915_gem_request *req); /** diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 6db1947..16ced27 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2836,14 +2836,11 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, intel_ring_init_semaphores(dev_priv, engine); } -int intel_init_render_ring_buffer(struct drm_device *dev) +int intel_init_render_ring_buffer(struct intel_engine_cs *engine) { - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_engine_cs *engine; + struct drm_i915_private *dev_priv = engine->i915; int ret; - engine = intel_engine_setup(dev_priv, RCS); - intel_ring_default_vfuncs(dev_priv, engine); engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT; @@ -2877,7 +2874,7 @@ int intel_init_render_ring_buffer(struct drm_device *dev) engine->init_hw = init_render_ring; engine->cleanup = render_ring_cleanup; - ret = intel_init_ring_buffer(dev, engine); + ret = intel_init_ring_buffer(&dev_priv->drm, engine); if (ret) return ret; @@ -2894,12 +2891,9 @@ int intel_init_render_ring_buffer(struct drm_device *dev) return 0; } -int intel_init_bsd_ring_buffer(struct drm_device *dev) +int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine) { - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_engine_cs *engine; - - engine = intel_engine_setup(dev_priv, VCS); + struct drm_i915_private *dev_priv = engine->i915; intel_ring_default_vfuncs(dev_priv, engine); @@ -2922,18 +2916,15 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev) engine->irq_enable_mask = I915_BSD_USER_INTERRUPT; } - return intel_init_ring_buffer(dev, engine); + return intel_init_ring_buffer(&dev_priv->drm, engine); } /** * Initialize the second BSD ring (eg. Broadwell GT3, Skylake GT3) */ -int intel_init_bsd2_ring_buffer(struct drm_device *dev) +int intel_init_bsd2_ring_buffer(struct intel_engine_cs *engine) { - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_engine_cs *engine; - - engine = intel_engine_setup(dev_priv, VCS2); + struct drm_i915_private *dev_priv = engine->i915; intel_ring_default_vfuncs(dev_priv, engine); @@ -2941,15 +2932,12 @@ int intel_init_bsd2_ring_buffer(struct drm_device *dev) engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT; - return intel_init_ring_buffer(dev, engine); + return intel_init_ring_buffer(&dev_priv->drm, engine); } -int intel_init_blt_ring_buffer(struct drm_device *dev) +int intel_init_blt_ring_buffer(struct intel_engine_cs *engine) { - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_engine_cs *engine; - - engine = intel_engine_setup(dev_priv, BCS); + struct drm_i915_private *dev_priv = engine->i915; intel_ring_default_vfuncs(dev_priv, engine); @@ -2960,15 +2948,12 @@ int intel_init_blt_ring_buffer(struct drm_device *dev) else engine->irq_enable_mask = GT_BLT_USER_INTERRUPT; - return intel_init_ring_buffer(dev, engine); + return intel_init_ring_buffer(&dev_priv->drm, engine); } -int intel_init_vebox_ring_buffer(struct drm_device *dev) +int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine) { - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_engine_cs *engine; - - engine = intel_engine_setup(dev_priv, VECS); + struct drm_i915_private *dev_priv = engine->i915; intel_ring_default_vfuncs(dev_priv, engine); @@ -2983,7 +2968,7 @@ int intel_init_vebox_ring_buffer(struct drm_device *dev) engine->irq_disable = hsw_vebox_irq_disable; } - return intel_init_ring_buffer(dev, engine); + return intel_init_ring_buffer(&dev_priv->drm, engine); } int diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index f8eeb50..a25eac1 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -484,11 +484,11 @@ int intel_ring_invalidate_all_caches(struct drm_i915_gem_request *req); int intel_init_pipe_control(struct intel_engine_cs *engine, int size); void intel_fini_pipe_control(struct intel_engine_cs *engine); -int intel_init_render_ring_buffer(struct drm_device *dev); -int intel_init_bsd_ring_buffer(struct drm_device *dev); -int intel_init_bsd2_ring_buffer(struct drm_device *dev); -int intel_init_blt_ring_buffer(struct drm_device *dev); -int intel_init_vebox_ring_buffer(struct drm_device *dev); +int intel_init_render_ring_buffer(struct intel_engine_cs *engine); +int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine); +int intel_init_bsd2_ring_buffer(struct intel_engine_cs *engine); +int intel_init_blt_ring_buffer(struct intel_engine_cs *engine); +int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine); u64 intel_ring_get_active_head(struct intel_engine_cs *engine); static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine) -- cgit v0.10.2 From c78d60613422504f625a118fb8bff5f52d7b1598 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Wed, 13 Jul 2016 16:03:38 +0100 Subject: drm/i915: Make more use of the shared engine irq setup Use more of the shared engine setup data for legacy engine initialization. This time to simplify the irq initialization code. Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 16ced27..62f8c77 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2790,6 +2790,8 @@ static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv, static void intel_ring_init_irq(struct drm_i915_private *dev_priv, struct intel_engine_cs *engine) { + engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << engine->irq_shift; + if (INTEL_GEN(dev_priv) >= 8) { engine->irq_enable = gen8_irq_enable; engine->irq_disable = gen8_irq_disable; @@ -2843,7 +2845,6 @@ int intel_init_render_ring_buffer(struct intel_engine_cs *engine) intel_ring_default_vfuncs(dev_priv, engine); - engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT; if (HAS_L3_DPF(dev_priv)) engine->irq_keep_mask = GT_RENDER_L3_PARITY_ERROR_INTERRUPT; @@ -2902,10 +2903,7 @@ int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine) if (IS_GEN6(dev_priv)) engine->write_tail = gen6_bsd_ring_write_tail; engine->flush = gen6_bsd_ring_flush; - if (INTEL_GEN(dev_priv) >= 8) - engine->irq_enable_mask = - GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT; - else + if (INTEL_GEN(dev_priv) < 8) engine->irq_enable_mask = GT_BSD_USER_INTERRUPT; } else { engine->mmio_base = BSD_RING_BASE; @@ -2929,8 +2927,6 @@ int intel_init_bsd2_ring_buffer(struct intel_engine_cs *engine) intel_ring_default_vfuncs(dev_priv, engine); engine->flush = gen6_bsd_ring_flush; - engine->irq_enable_mask = - GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT; return intel_init_ring_buffer(&dev_priv->drm, engine); } @@ -2942,10 +2938,7 @@ int intel_init_blt_ring_buffer(struct intel_engine_cs *engine) intel_ring_default_vfuncs(dev_priv, engine); engine->flush = gen6_ring_flush; - if (INTEL_GEN(dev_priv) >= 8) - engine->irq_enable_mask = - GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT; - else + if (INTEL_GEN(dev_priv) < 8) engine->irq_enable_mask = GT_BLT_USER_INTERRUPT; return intel_init_ring_buffer(&dev_priv->drm, engine); @@ -2959,10 +2952,7 @@ int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine) engine->flush = gen6_ring_flush; - if (INTEL_GEN(dev_priv) >= 8) { - engine->irq_enable_mask = - GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT; - } else { + if (INTEL_GEN(dev_priv) < 8) { engine->irq_enable_mask = PM_VEBOX_USER_INTERRUPT; engine->irq_enable = hsw_vebox_irq_enable; engine->irq_disable = hsw_vebox_irq_disable; -- cgit v0.10.2 From acd2784562ae506137575c31136bef34dc642a2e Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Wed, 13 Jul 2016 16:03:39 +0100 Subject: drm/i915: Simplify intel_init_ring_buffer prototype Engine contains dev_priv so need to pass it in. Signed-off-by: Tvrtko Ursulin Reviewed-by: Daniel Vetter Reviewed-by: Chris Wilson diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 62f8c77..3a8df75 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2168,21 +2168,19 @@ static void intel_ring_context_unpin(struct i915_gem_context *ctx, i915_gem_context_unreference(ctx); } -static int intel_init_ring_buffer(struct drm_device *dev, - struct intel_engine_cs *engine) +static int intel_init_ring_buffer(struct intel_engine_cs *engine) { - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = engine->i915; struct intel_ringbuffer *ringbuf; int ret; WARN_ON(engine->buffer); - engine->i915 = dev_priv; INIT_LIST_HEAD(&engine->active_list); INIT_LIST_HEAD(&engine->request_list); INIT_LIST_HEAD(&engine->execlist_queue); INIT_LIST_HEAD(&engine->buffers); - i915_gem_batch_pool_init(dev, &engine->batch_pool); + i915_gem_batch_pool_init(&dev_priv->drm, &engine->batch_pool); memset(engine->semaphore.sync_seqno, 0, sizeof(engine->semaphore.sync_seqno)); @@ -2875,7 +2873,7 @@ int intel_init_render_ring_buffer(struct intel_engine_cs *engine) engine->init_hw = init_render_ring; engine->cleanup = render_ring_cleanup; - ret = intel_init_ring_buffer(&dev_priv->drm, engine); + ret = intel_init_ring_buffer(engine); if (ret) return ret; @@ -2914,7 +2912,7 @@ int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine) engine->irq_enable_mask = I915_BSD_USER_INTERRUPT; } - return intel_init_ring_buffer(&dev_priv->drm, engine); + return intel_init_ring_buffer(engine); } /** @@ -2928,7 +2926,7 @@ int intel_init_bsd2_ring_buffer(struct intel_engine_cs *engine) engine->flush = gen6_bsd_ring_flush; - return intel_init_ring_buffer(&dev_priv->drm, engine); + return intel_init_ring_buffer(engine); } int intel_init_blt_ring_buffer(struct intel_engine_cs *engine) @@ -2941,7 +2939,7 @@ int intel_init_blt_ring_buffer(struct intel_engine_cs *engine) if (INTEL_GEN(dev_priv) < 8) engine->irq_enable_mask = GT_BLT_USER_INTERRUPT; - return intel_init_ring_buffer(&dev_priv->drm, engine); + return intel_init_ring_buffer(engine); } int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine) @@ -2958,7 +2956,7 @@ int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine) engine->irq_disable = hsw_vebox_irq_disable; } - return intel_init_ring_buffer(&dev_priv->drm, engine); + return intel_init_ring_buffer(engine); } int -- cgit v0.10.2 From 88d2ba2e95c85554e12b5a342bd93dbc2adf7546 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Wed, 13 Jul 2016 16:03:40 +0100 Subject: drm/i915: Move common engine setup into intel_engine_cs.c Common code deserves to be put in a separate file from legacy and execlists implementation for clarity and ease of maintenance. Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 684fc1c..75318eb 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -40,6 +40,7 @@ i915-y += i915_cmd_parser.o \ i915_gpu_error.o \ i915_trace_points.o \ intel_breadcrumbs.o \ + intel_engine_cs.o \ intel_lrc.o \ intel_mocs.o \ intel_ringbuffer.o \ diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c new file mode 100644 index 0000000..80117b4 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -0,0 +1,162 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "i915_drv.h" +#include "intel_ringbuffer.h" +#include "intel_lrc.h" + +static const struct engine_info { + const char *name; + unsigned exec_id; + unsigned guc_id; + u32 mmio_base; + unsigned irq_shift; + int (*init_legacy)(struct intel_engine_cs *engine); + int (*init_execlists)(struct intel_engine_cs *engine); +} intel_engines[] = { + [RCS] = { + .name = "render ring", + .exec_id = I915_EXEC_RENDER, + .guc_id = GUC_RENDER_ENGINE, + .mmio_base = RENDER_RING_BASE, + .irq_shift = GEN8_RCS_IRQ_SHIFT, + .init_execlists = logical_render_ring_init, + .init_legacy = intel_init_render_ring_buffer, + }, + [BCS] = { + .name = "blitter ring", + .exec_id = I915_EXEC_BLT, + .guc_id = GUC_BLITTER_ENGINE, + .mmio_base = BLT_RING_BASE, + .irq_shift = GEN8_BCS_IRQ_SHIFT, + .init_execlists = logical_xcs_ring_init, + .init_legacy = intel_init_blt_ring_buffer, + }, + [VCS] = { + .name = "bsd ring", + .exec_id = I915_EXEC_BSD, + .guc_id = GUC_VIDEO_ENGINE, + .mmio_base = GEN6_BSD_RING_BASE, + .irq_shift = GEN8_VCS1_IRQ_SHIFT, + .init_execlists = logical_xcs_ring_init, + .init_legacy = intel_init_bsd_ring_buffer, + }, + [VCS2] = { + .name = "bsd2 ring", + .exec_id = I915_EXEC_BSD, + .guc_id = GUC_VIDEO_ENGINE2, + .mmio_base = GEN8_BSD2_RING_BASE, + .irq_shift = GEN8_VCS2_IRQ_SHIFT, + .init_execlists = logical_xcs_ring_init, + .init_legacy = intel_init_bsd2_ring_buffer, + }, + [VECS] = { + .name = "video enhancement ring", + .exec_id = I915_EXEC_VEBOX, + .guc_id = GUC_VIDEOENHANCE_ENGINE, + .mmio_base = VEBOX_RING_BASE, + .irq_shift = GEN8_VECS_IRQ_SHIFT, + .init_execlists = logical_xcs_ring_init, + .init_legacy = intel_init_vebox_ring_buffer, + }, +}; + +static struct intel_engine_cs * +intel_engine_setup(struct drm_i915_private *dev_priv, + enum intel_engine_id id) +{ + const struct engine_info *info = &intel_engines[id]; + struct intel_engine_cs *engine = &dev_priv->engine[id]; + + engine->id = id; + engine->i915 = dev_priv; + engine->name = info->name; + engine->exec_id = info->exec_id; + engine->hw_id = engine->guc_id = info->guc_id; + engine->mmio_base = info->mmio_base; + engine->irq_shift = info->irq_shift; + + return engine; +} + +/** + * intel_engines_init() - allocate, populate and init the Engine Command Streamers + * @dev: DRM device. + * + * Return: non-zero if the initialization failed. + */ +int intel_engines_init(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = to_i915(dev); + unsigned int mask = 0; + int (*init)(struct intel_engine_cs *engine); + unsigned int i; + int ret; + + WARN_ON(INTEL_INFO(dev_priv)->ring_mask & + GENMASK(sizeof(mask) * BITS_PER_BYTE - 1, I915_NUM_ENGINES)); + + for (i = 0; i < ARRAY_SIZE(intel_engines); i++) { + if (!HAS_ENGINE(dev_priv, i)) + continue; + + if (i915.enable_execlists) + init = intel_engines[i].init_execlists; + else + init = intel_engines[i].init_legacy; + + if (!init) + continue; + + ret = init(intel_engine_setup(dev_priv, i)); + if (ret) + goto cleanup; + + mask |= ENGINE_MASK(i); + } + + /* + * Catch failures to update intel_engines table when the new engines + * are added to the driver by a warning and disabling the forgotten + * engines. + */ + if (WARN_ON(mask != INTEL_INFO(dev_priv)->ring_mask)) { + struct intel_device_info *info = + (struct intel_device_info *)&dev_priv->info; + info->ring_mask = mask; + } + + return 0; + +cleanup: + for (i = 0; i < I915_NUM_ENGINES; i++) { + if (i915.enable_execlists) + intel_logical_ring_cleanup(&dev_priv->engine[i]); + else + intel_cleanup_engine(&dev_priv->engine[i]); + } + + return ret; +} + diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 2e13a3a..6e88d9a 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -2083,7 +2083,7 @@ error: return ret; } -static int logical_render_ring_init(struct intel_engine_cs *engine) +int logical_render_ring_init(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; int ret; @@ -2126,147 +2126,13 @@ static int logical_render_ring_init(struct intel_engine_cs *engine) return ret; } -static int logical_xcs_ring_init(struct intel_engine_cs *engine) +int logical_xcs_ring_init(struct intel_engine_cs *engine) { logical_ring_setup(engine); return logical_ring_init(engine); } -static const struct engine_info { - const char *name; - unsigned exec_id; - unsigned guc_id; - u32 mmio_base; - unsigned irq_shift; - int (*init_legacy)(struct intel_engine_cs *engine); - int (*init_execlists)(struct intel_engine_cs *engine); -} intel_engines[] = { - [RCS] = { - .name = "render ring", - .exec_id = I915_EXEC_RENDER, - .guc_id = GUC_RENDER_ENGINE, - .mmio_base = RENDER_RING_BASE, - .irq_shift = GEN8_RCS_IRQ_SHIFT, - .init_execlists = logical_render_ring_init, - .init_legacy = intel_init_render_ring_buffer, - }, - [BCS] = { - .name = "blitter ring", - .exec_id = I915_EXEC_BLT, - .guc_id = GUC_BLITTER_ENGINE, - .mmio_base = BLT_RING_BASE, - .irq_shift = GEN8_BCS_IRQ_SHIFT, - .init_execlists = logical_xcs_ring_init, - .init_legacy = intel_init_blt_ring_buffer, - }, - [VCS] = { - .name = "bsd ring", - .exec_id = I915_EXEC_BSD, - .guc_id = GUC_VIDEO_ENGINE, - .mmio_base = GEN6_BSD_RING_BASE, - .irq_shift = GEN8_VCS1_IRQ_SHIFT, - .init_execlists = logical_xcs_ring_init, - .init_legacy = intel_init_bsd_ring_buffer, - }, - [VCS2] = { - .name = "bsd2 ring", - .exec_id = I915_EXEC_BSD, - .guc_id = GUC_VIDEO_ENGINE2, - .mmio_base = GEN8_BSD2_RING_BASE, - .irq_shift = GEN8_VCS2_IRQ_SHIFT, - .init_execlists = logical_xcs_ring_init, - .init_legacy = intel_init_bsd2_ring_buffer, - }, - [VECS] = { - .name = "video enhancement ring", - .exec_id = I915_EXEC_VEBOX, - .guc_id = GUC_VIDEOENHANCE_ENGINE, - .mmio_base = VEBOX_RING_BASE, - .irq_shift = GEN8_VECS_IRQ_SHIFT, - .init_execlists = logical_xcs_ring_init, - .init_legacy = intel_init_vebox_ring_buffer, - }, -}; - -struct intel_engine_cs * -intel_engine_setup(struct drm_i915_private *dev_priv, - enum intel_engine_id id) -{ - const struct engine_info *info = &intel_engines[id]; - struct intel_engine_cs *engine = &dev_priv->engine[id]; - - engine->id = id; - engine->i915 = dev_priv; - engine->name = info->name; - engine->exec_id = info->exec_id; - engine->hw_id = engine->guc_id = info->guc_id; - engine->mmio_base = info->mmio_base; - engine->irq_shift = info->irq_shift; - - return engine; -} - -/** - * intel_engines_init() - allocate, populate and init the Engine Command Streamers - * @dev: DRM device. - * - * Return: non-zero if the initialization failed. - */ -int intel_engines_init(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = to_i915(dev); - unsigned int mask = 0; - int (*init)(struct intel_engine_cs *engine); - unsigned int i; - int ret; - - WARN_ON(INTEL_INFO(dev_priv)->ring_mask & - GENMASK(sizeof(mask) * BITS_PER_BYTE - 1, I915_NUM_ENGINES)); - - for (i = 0; i < ARRAY_SIZE(intel_engines); i++) { - if (!HAS_ENGINE(dev_priv, i)) - continue; - - if (i915.enable_execlists) - init = intel_engines[i].init_execlists; - else - init = intel_engines[i].init_legacy; - - if (!init) - continue; - - ret = init(intel_engine_setup(dev_priv, i)); - if (ret) - goto cleanup; - - mask |= ENGINE_MASK(i); - } - - /* - * Catch failures to update intel_engines table when the new engines - * are added to the driver by a warning and disabling the forgotten - * engines. - */ - if (WARN_ON(mask != INTEL_INFO(dev_priv)->ring_mask)) { - struct intel_device_info *info = - (struct intel_device_info *)&dev_priv->info; - info->ring_mask = mask; - } - - return 0; - -cleanup: - for (i = 0; i < I915_NUM_ENGINES; i++) { - if (i915.enable_execlists) - intel_logical_ring_cleanup(&dev_priv->engine[i]); - else - intel_cleanup_engine(&dev_priv->engine[i]); - } - - return ret; -} - static u32 make_rpcs(struct drm_i915_private *dev_priv) { diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index aa8905c..938e3ee 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -67,6 +67,9 @@ int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request int intel_logical_ring_reserve_space(struct drm_i915_gem_request *request); void intel_logical_ring_stop(struct intel_engine_cs *engine); void intel_logical_ring_cleanup(struct intel_engine_cs *engine); +int logical_render_ring_init(struct intel_engine_cs *engine); +int logical_xcs_ring_init(struct intel_engine_cs *engine); + int intel_engines_init(struct drm_device *dev); int logical_ring_flush_all_caches(struct drm_i915_gem_request *req); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index a25eac1..db7613e 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -362,10 +362,6 @@ struct intel_engine_cs { u32 (*get_cmd_length_mask)(u32 cmd_header); }; -struct intel_engine_cs * -intel_engine_setup(struct drm_i915_private *dev_priv, - enum intel_engine_id id); - static inline bool intel_engine_initialized(const struct intel_engine_cs *engine) { -- cgit v0.10.2 From 019bf277634a13ba7bc409bde399ee676c096f33 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Wed, 13 Jul 2016 16:03:41 +0100 Subject: drm/i915: Pull out some more common engine init code Created two common helpers for engine setup and engine init phases respectively to help with code sharing. Signed-off-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/1468422221-12132-1-git-send-email-tvrtko.ursulin@linux.intel.com Reviewed-by: Chris Wilson diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 80117b4..e3c9f04 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -160,3 +160,50 @@ cleanup: return ret; } +void intel_engine_init_hangcheck(struct intel_engine_cs *engine) +{ + memset(&engine->hangcheck, 0, sizeof(engine->hangcheck)); +} + +/** + * intel_engines_setup_common - setup engine state not requiring hw access + * @engine: Engine to setup. + * + * Initializes @engine@ structure members shared between legacy and execlists + * submission modes which do not require hardware access. + * + * Typically done early in the submission mode specific engine setup stage. + */ +void intel_engine_setup_common(struct intel_engine_cs *engine) +{ + INIT_LIST_HEAD(&engine->active_list); + INIT_LIST_HEAD(&engine->request_list); + INIT_LIST_HEAD(&engine->buffers); + INIT_LIST_HEAD(&engine->execlist_queue); + spin_lock_init(&engine->execlist_lock); + + intel_engine_init_hangcheck(engine); + i915_gem_batch_pool_init(&engine->i915->drm, &engine->batch_pool); +} + +/** + * intel_engines_init_common - initialize cengine state which might require hw access + * @engine: Engine to initialize. + * + * Initializes @engine@ structure members shared between legacy and execlists + * submission modes which do require hardware access. + * + * Typcally done at later stages of submission mode specific engine setup. + * + * Returns zero on success or an error code on failure. + */ +int intel_engine_init_common(struct intel_engine_cs *engine) +{ + int ret; + + ret = intel_engine_init_breadcrumbs(engine); + if (ret) + return ret; + + return i915_cmd_parser_init_ring(engine); +} diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 6e88d9a..b6af635 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -2009,6 +2009,8 @@ logical_ring_setup(struct intel_engine_cs *engine) struct drm_i915_private *dev_priv = engine->i915; enum forcewake_domains fw_domains; + intel_engine_setup_common(engine); + /* Intentionally left blank. */ engine->buffer = NULL; @@ -2026,21 +2028,12 @@ logical_ring_setup(struct intel_engine_cs *engine) engine->fw_domains = fw_domains; - INIT_LIST_HEAD(&engine->active_list); - INIT_LIST_HEAD(&engine->request_list); - INIT_LIST_HEAD(&engine->buffers); - INIT_LIST_HEAD(&engine->execlist_queue); - spin_lock_init(&engine->execlist_lock); - tasklet_init(&engine->irq_tasklet, intel_lrc_irq_handler, (unsigned long)engine); logical_ring_init_platform_invariants(engine); logical_ring_default_vfuncs(engine); logical_ring_default_irqs(engine); - - intel_engine_init_hangcheck(engine); - i915_gem_batch_pool_init(&dev_priv->drm, &engine->batch_pool); } static int @@ -2049,11 +2042,7 @@ logical_ring_init(struct intel_engine_cs *engine) struct i915_gem_context *dctx = engine->i915->kernel_context; int ret; - ret = intel_engine_init_breadcrumbs(engine); - if (ret) - goto error; - - ret = i915_cmd_parser_init_ring(engine); + ret = intel_engine_init_common(engine); if (ret) goto error; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 3a8df75..94c8ef4 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -549,11 +549,6 @@ static bool stop_ring(struct intel_engine_cs *engine) return (I915_READ_HEAD(engine) & HEAD_ADDR) == 0; } -void intel_engine_init_hangcheck(struct intel_engine_cs *engine) -{ - memset(&engine->hangcheck, 0, sizeof(engine->hangcheck)); -} - static int init_ring_common(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; @@ -2176,15 +2171,12 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine) WARN_ON(engine->buffer); - INIT_LIST_HEAD(&engine->active_list); - INIT_LIST_HEAD(&engine->request_list); - INIT_LIST_HEAD(&engine->execlist_queue); - INIT_LIST_HEAD(&engine->buffers); - i915_gem_batch_pool_init(&dev_priv->drm, &engine->batch_pool); + intel_engine_setup_common(engine); + memset(engine->semaphore.sync_seqno, 0, sizeof(engine->semaphore.sync_seqno)); - ret = intel_engine_init_breadcrumbs(engine); + ret = intel_engine_init_common(engine); if (ret) goto error; @@ -2225,10 +2217,6 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine) goto error; } - ret = i915_cmd_parser_init_ring(engine); - if (ret) - goto error; - return 0; error: diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index db7613e..df7587a 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -480,6 +480,9 @@ int intel_ring_invalidate_all_caches(struct drm_i915_gem_request *req); int intel_init_pipe_control(struct intel_engine_cs *engine, int size); void intel_fini_pipe_control(struct intel_engine_cs *engine); +void intel_engine_setup_common(struct intel_engine_cs *engine); +int intel_engine_init_common(struct intel_engine_cs *engine); + int intel_init_render_ring_buffer(struct intel_engine_cs *engine); int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine); int intel_init_bsd2_ring_buffer(struct intel_engine_cs *engine); -- cgit v0.10.2 From bb10d4ec3be4b069bfb61c60ca4f708f58f440f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 12 Jul 2016 15:00:37 +0300 Subject: drm/i915: Ignore panel type from OpRegion on SKL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Dell XPS 13 9350 apparently doesn't like it when we use the panel type from OpRegion. The OpRegion panel type (0) tells us to use use low vswing for eDP, whereas the VBT panel type (2) tells us to use normal vswing. The problem is that low vswing results in some display flickers. Since no one seems to know how this stuff is supposed to be handled, let's just ignore the OpRegion panel type on SKL for now. v2: Print the panel type correctly in the debug output Reported-by: James Bottomley Cc: James Bottomley Cc: drm-intel-fixes@lists.freedesktop.org References: https://lists.freedesktop.org/archives/intel-gfx/2016-June/098826.html Fixes: a05628195a0d ("drm/i915: Get panel_type from OpRegion panel details") Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1468324837-29237-1-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: Daniel Vetter Tested-by: James Bottomley Signed-off-by: Ville Syrjälä diff --git a/drivers/gpu/drm/i915/intel_opregion.c b/drivers/gpu/drm/i915/intel_opregion.c index c27d5eb..adca262 100644 --- a/drivers/gpu/drm/i915/intel_opregion.c +++ b/drivers/gpu/drm/i915/intel_opregion.c @@ -1072,5 +1072,16 @@ intel_opregion_get_panel_type(struct drm_i915_private *dev_priv) return -ENODEV; } + /* + * FIXME On Dell XPS 13 9350 the OpRegion panel type (0) gives us + * low vswing for eDP, whereas the VBT panel type (2) gives us normal + * vswing instead. Low vswing results in some display flickers, so + * let's simply ignore the OpRegion panel type on SKL for now. + */ + if (IS_SKYLAKE(dev_priv)) { + DRM_DEBUG_KMS("Ignoring OpRegion panel type (%d)\n", ret - 1); + return -ENODEV; + } + return ret - 1; } -- cgit v0.10.2 From 3fef3a5be3caa0710fd29f8ba4d5f08ba8f6ecda Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Tue, 5 Jul 2016 13:00:19 +0100 Subject: drm/i915: remove superfluous i915_gem_object_free_mmap_offset call This should already be handled by drm_gem_object_release, which is called later on. Cc: Chris Wilson Signed-off-by: Matthew Auld Reviewed-by: Chris Wilson Signed-off-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1467720019-31876-1-git-send-email-matthew.auld@intel.com diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index b788f97..ead0e48 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4901,7 +4901,6 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj) if (discard_backing_storage(obj)) obj->madv = I915_MADV_DONTNEED; i915_gem_object_put_pages(obj); - i915_gem_object_free_mmap_offset(obj); BUG_ON(obj->pages); -- cgit v0.10.2 From b913b33c43db849778f044d4b9e74b167898a9bc Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 13 Jul 2016 09:10:31 +0100 Subject: drm/i915: Flush GT idle status upon reset Upon resetting the GPU, we force the engines to be idle by clearing their request lists. However, I neglected to clear the GT active status and so the next request following the reset was not marking the device as busy again. (We had to wait until any outstanding retire worker finally ran and cleared the active status.) Fixes: 67d97da34917 ("drm/i915: Only start retire worker when idle") Testcase: igt/pm_rps/reset Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1468397438-21226-1-git-send-email-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index ead0e48..002ecc6 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3169,6 +3169,8 @@ static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine) } intel_ring_init_seqno(engine, engine->last_submitted_seqno); + + engine->i915->gt.active_engines &= ~intel_engine_flag(engine); } void i915_gem_reset(struct drm_device *dev) @@ -3186,6 +3188,7 @@ void i915_gem_reset(struct drm_device *dev) for_each_engine(engine, dev_priv) i915_gem_reset_engine_cleanup(engine); + mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, 0); i915_gem_context_reset(dev); -- cgit v0.10.2 From 3a45b05c4517f36a65f63d53524c3edbe59e0827 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 13 Jul 2016 09:10:32 +0100 Subject: drm/i915: Preserve current RPS frequency across init MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Select idle frequency during initialisation, then reset the last known frequency when re-enabling. This allows us to preserve the user selected frequency across resets. v2: Stop CHV from overriding the user's choice in cherryview_enable_rps() Signed-off-by: Chris Wilson Cc: Ville Syrjälä Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/1468397438-21226-2-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 5a8ee0c..df72f8e 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -5149,6 +5149,7 @@ static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv) } dev_priv->rps.idle_freq = dev_priv->rps.min_freq; + dev_priv->rps.cur_freq = dev_priv->rps.idle_freq; /* Preserve min/max settings in case of re-init */ if (dev_priv->rps.max_freq_softlimit == 0) @@ -5165,6 +5166,18 @@ static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv) } } +static void reset_rps(struct drm_i915_private *dev_priv, + void (*set)(struct drm_i915_private *, u8)) +{ + u8 freq = dev_priv->rps.cur_freq; + + /* force a reset */ + dev_priv->rps.power = -1; + dev_priv->rps.cur_freq = -1; + + set(dev_priv, freq); +} + /* See the Gen9_GT_PM_Programming_Guide doc for the below */ static void gen9_enable_rps(struct drm_i915_private *dev_priv) { @@ -5201,8 +5214,7 @@ static void gen9_enable_rps(struct drm_i915_private *dev_priv) /* Leaning on the below call to gen6_set_rps to program/setup the * Up/Down EI & threshold registers, as well as the RP_CONTROL, * RP_INTERRUPT_LIMITS & RPNSWREQ registers */ - dev_priv->rps.power = HIGH_POWER; /* force a reset */ - gen6_set_rps(dev_priv, dev_priv->rps.idle_freq); + reset_rps(dev_priv, gen6_set_rps); intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); } @@ -5348,8 +5360,7 @@ static void gen8_enable_rps(struct drm_i915_private *dev_priv) /* 6: Ring frequency + overclocking (our driver does this later */ - dev_priv->rps.power = HIGH_POWER; /* force a reset */ - gen6_set_rps(dev_priv, dev_priv->rps.idle_freq); + reset_rps(dev_priv, gen6_set_rps); intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); } @@ -5442,8 +5453,7 @@ static void gen6_enable_rps(struct drm_i915_private *dev_priv) dev_priv->rps.max_freq = pcu_mbox & 0xff; } - dev_priv->rps.power = HIGH_POWER; /* force a reset */ - gen6_set_rps(dev_priv, dev_priv->rps.idle_freq); + reset_rps(dev_priv, gen6_set_rps); rc6vids = 0; ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids); @@ -5807,6 +5817,7 @@ static void valleyview_init_gt_powersave(struct drm_i915_private *dev_priv) dev_priv->rps.min_freq); dev_priv->rps.idle_freq = dev_priv->rps.min_freq; + dev_priv->rps.cur_freq = dev_priv->rps.idle_freq; /* Preserve min/max settings in case of re-init */ if (dev_priv->rps.max_freq_softlimit == 0) @@ -5871,6 +5882,7 @@ static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv) "Odd GPU freq values\n"); dev_priv->rps.idle_freq = dev_priv->rps.min_freq; + dev_priv->rps.cur_freq = dev_priv->rps.idle_freq; /* Preserve min/max settings in case of re-init */ if (dev_priv->rps.max_freq_softlimit == 0) @@ -5970,16 +5982,7 @@ static void cherryview_enable_rps(struct drm_i915_private *dev_priv) DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE)); DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val); - dev_priv->rps.cur_freq = (val >> 8) & 0xff; - DRM_DEBUG_DRIVER("current GPU freq: %d MHz (%u)\n", - intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq), - dev_priv->rps.cur_freq); - - DRM_DEBUG_DRIVER("setting GPU freq to %d MHz (%u)\n", - intel_gpu_freq(dev_priv, dev_priv->rps.idle_freq), - dev_priv->rps.idle_freq); - - valleyview_set_rps(dev_priv, dev_priv->rps.idle_freq); + reset_rps(dev_priv, valleyview_set_rps); intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); } @@ -6059,16 +6062,7 @@ static void valleyview_enable_rps(struct drm_i915_private *dev_priv) DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE)); DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val); - dev_priv->rps.cur_freq = (val >> 8) & 0xff; - DRM_DEBUG_DRIVER("current GPU freq: %d MHz (%u)\n", - intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq), - dev_priv->rps.cur_freq); - - DRM_DEBUG_DRIVER("setting GPU freq to %d MHz (%u)\n", - intel_gpu_freq(dev_priv, dev_priv->rps.idle_freq), - dev_priv->rps.idle_freq); - - valleyview_set_rps(dev_priv, dev_priv->rps.idle_freq); + reset_rps(dev_priv, valleyview_set_rps); intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); } -- cgit v0.10.2 From 773ea9a801328b042eb6376cd4530292625a3de1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 13 Jul 2016 09:10:33 +0100 Subject: drm/i915: Perform static RPS frequency setup before userspace As these RPS frequency values are part of our userspace interface, they must be established before that userspace interface is registered. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/1468397438-21226-3-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index df72f8e..54f739f 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -5102,35 +5102,31 @@ int sanitize_rc6_option(struct drm_i915_private *dev_priv, int enable_rc6) static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv) { - uint32_t rp_state_cap; - u32 ddcc_status = 0; - int ret; - /* All of these values are in units of 50MHz */ - dev_priv->rps.cur_freq = 0; + /* static values from HW: RP0 > RP1 > RPn (min_freq) */ if (IS_BROXTON(dev_priv)) { - rp_state_cap = I915_READ(BXT_RP_STATE_CAP); + u32 rp_state_cap = I915_READ(BXT_RP_STATE_CAP); dev_priv->rps.rp0_freq = (rp_state_cap >> 16) & 0xff; dev_priv->rps.rp1_freq = (rp_state_cap >> 8) & 0xff; dev_priv->rps.min_freq = (rp_state_cap >> 0) & 0xff; } else { - rp_state_cap = I915_READ(GEN6_RP_STATE_CAP); + u32 rp_state_cap = I915_READ(GEN6_RP_STATE_CAP); dev_priv->rps.rp0_freq = (rp_state_cap >> 0) & 0xff; dev_priv->rps.rp1_freq = (rp_state_cap >> 8) & 0xff; dev_priv->rps.min_freq = (rp_state_cap >> 16) & 0xff; } - /* hw_max = RP0 until we check for overclocking */ - dev_priv->rps.max_freq = dev_priv->rps.rp0_freq; + dev_priv->rps.max_freq = dev_priv->rps.rp0_freq; dev_priv->rps.efficient_freq = dev_priv->rps.rp1_freq; if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv) || IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { - ret = sandybridge_pcode_read(dev_priv, - HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL, - &ddcc_status); - if (0 == ret) + u32 ddcc_status = 0; + + if (sandybridge_pcode_read(dev_priv, + HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL, + &ddcc_status) == 0) dev_priv->rps.efficient_freq = clamp_t(u8, ((ddcc_status >> 8) & 0xff), @@ -5140,30 +5136,14 @@ static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv) if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { /* Store the frequency values in 16.66 MHZ units, which is - the natural hardware unit for SKL */ + * the natural hardware unit for SKL + */ dev_priv->rps.rp0_freq *= GEN9_FREQ_SCALER; dev_priv->rps.rp1_freq *= GEN9_FREQ_SCALER; dev_priv->rps.min_freq *= GEN9_FREQ_SCALER; dev_priv->rps.max_freq *= GEN9_FREQ_SCALER; dev_priv->rps.efficient_freq *= GEN9_FREQ_SCALER; } - - dev_priv->rps.idle_freq = dev_priv->rps.min_freq; - dev_priv->rps.cur_freq = dev_priv->rps.idle_freq; - - /* Preserve min/max settings in case of re-init */ - if (dev_priv->rps.max_freq_softlimit == 0) - dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq; - - if (dev_priv->rps.min_freq_softlimit == 0) { - if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) - dev_priv->rps.min_freq_softlimit = - max_t(int, dev_priv->rps.efficient_freq, - intel_freq_opcode(dev_priv, 450)); - else - dev_priv->rps.min_freq_softlimit = - dev_priv->rps.min_freq; - } } static void reset_rps(struct drm_i915_private *dev_priv, @@ -5183,8 +5163,6 @@ static void gen9_enable_rps(struct drm_i915_private *dev_priv) { intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); - gen6_init_rps_frequencies(dev_priv); - /* WaGsvDisableTurbo: Workaround to disable turbo on BXT A* */ if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { /* @@ -5301,9 +5279,6 @@ static void gen8_enable_rps(struct drm_i915_private *dev_priv) /* 2a: Disable RC states. */ I915_WRITE(GEN6_RC_CONTROL, 0); - /* Initialize rps frequencies */ - gen6_init_rps_frequencies(dev_priv); - /* 2b: Program RC6 thresholds.*/ I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16); I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ @@ -5392,9 +5367,6 @@ static void gen6_enable_rps(struct drm_i915_private *dev_priv) intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); - /* Initialize rps frequencies */ - gen6_init_rps_frequencies(dev_priv); - /* disable the counters and set deterministic thresholds */ I915_WRITE(GEN6_RC_CONTROL, 0); @@ -5778,8 +5750,6 @@ static void valleyview_init_gt_powersave(struct drm_i915_private *dev_priv) vlv_init_gpll_ref_freq(dev_priv); - mutex_lock(&dev_priv->rps.hw_lock); - val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); switch ((val >> 6) & 3) { case 0: @@ -5815,18 +5785,6 @@ static void valleyview_init_gt_powersave(struct drm_i915_private *dev_priv) DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", intel_gpu_freq(dev_priv, dev_priv->rps.min_freq), dev_priv->rps.min_freq); - - dev_priv->rps.idle_freq = dev_priv->rps.min_freq; - dev_priv->rps.cur_freq = dev_priv->rps.idle_freq; - - /* Preserve min/max settings in case of re-init */ - if (dev_priv->rps.max_freq_softlimit == 0) - dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq; - - if (dev_priv->rps.min_freq_softlimit == 0) - dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq; - - mutex_unlock(&dev_priv->rps.hw_lock); } static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv) @@ -5837,8 +5795,6 @@ static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv) vlv_init_gpll_ref_freq(dev_priv); - mutex_lock(&dev_priv->rps.hw_lock); - mutex_lock(&dev_priv->sb_lock); val = vlv_cck_read(dev_priv, CCK_FUSE_REG); mutex_unlock(&dev_priv->sb_lock); @@ -5880,18 +5836,6 @@ static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv) dev_priv->rps.rp1_freq | dev_priv->rps.min_freq) & 1, "Odd GPU freq values\n"); - - dev_priv->rps.idle_freq = dev_priv->rps.min_freq; - dev_priv->rps.cur_freq = dev_priv->rps.idle_freq; - - /* Preserve min/max settings in case of re-init */ - if (dev_priv->rps.max_freq_softlimit == 0) - dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq; - - if (dev_priv->rps.min_freq_softlimit == 0) - dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq; - - mutex_unlock(&dev_priv->rps.hw_lock); } static void valleyview_cleanup_gt_powersave(struct drm_i915_private *dev_priv) @@ -6559,10 +6503,30 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv) intel_runtime_pm_get(dev_priv); } + mutex_lock(&dev_priv->rps.hw_lock); + + /* Initialize RPS limits (for userspace) */ if (IS_CHERRYVIEW(dev_priv)) cherryview_init_gt_powersave(dev_priv); else if (IS_VALLEYVIEW(dev_priv)) valleyview_init_gt_powersave(dev_priv); + else + gen6_init_rps_frequencies(dev_priv); + + /* Derive initial user preferences/limits from the hardware limits */ + dev_priv->rps.idle_freq = dev_priv->rps.min_freq; + dev_priv->rps.cur_freq = dev_priv->rps.idle_freq; + + dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq; + dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq; + + if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) + dev_priv->rps.min_freq_softlimit = + max_t(int, + dev_priv->rps.efficient_freq, + intel_freq_opcode(dev_priv, 450)); + + mutex_unlock(&dev_priv->rps.hw_lock); } void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv) -- cgit v0.10.2 From 99ac9612799475bdd4e3a7a0f0f40a93d6c9eaa6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 13 Jul 2016 09:10:34 +0100 Subject: drm/i915: Move overclocking detection to alongside RPS frequency detection Move the overclocking max frequency detection alongside the regular frequency detection, before we expose the undefined value to userspace. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/1468397438-21226-4-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 54f739f..24b23a5 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -5343,7 +5343,7 @@ static void gen8_enable_rps(struct drm_i915_private *dev_priv) static void gen6_enable_rps(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; - u32 rc6vids, pcu_mbox = 0, rc6_mask = 0; + u32 rc6vids, rc6_mask = 0; u32 gtfifodbg; int rc6_mode; int ret; @@ -5417,14 +5417,6 @@ static void gen6_enable_rps(struct drm_i915_private *dev_priv) if (ret) DRM_DEBUG_DRIVER("Failed to set the min frequency\n"); - ret = sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, &pcu_mbox); - if (!ret && (pcu_mbox & (1<<31))) { /* OC supported */ - DRM_DEBUG_DRIVER("Overclocking supported. Max: %dMHz, Overclock max: %dMHz\n", - (dev_priv->rps.max_freq_softlimit & 0xff) * 50, - (pcu_mbox & 0xff) * 50); - dev_priv->rps.max_freq = pcu_mbox & 0xff; - } - reset_rps(dev_priv, gen6_set_rps); rc6vids = 0; @@ -6526,6 +6518,20 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv) dev_priv->rps.efficient_freq, intel_freq_opcode(dev_priv, 450)); + /* After setting max-softlimit, find the overclock max freq */ + if (IS_GEN6(dev_priv) || + IS_IVYBRIDGE(dev_priv) || IS_HASWELL(dev_priv)) { + u32 params = 0; + + sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, ¶ms); + if (params & BIT(31)) { /* OC supported */ + DRM_DEBUG_DRIVER("Overclocking supported, max: %dMHz, overclock: %dMHz\n", + (dev_priv->rps.max_freq & 0xff) * 50, + (params & 0xff) * 50); + dev_priv->rps.max_freq = params & 0xff; + } + } + mutex_unlock(&dev_priv->rps.hw_lock); } -- cgit v0.10.2 From 29ecd78d3b79746fc837b820accb062f6433d5fb Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 13 Jul 2016 09:10:35 +0100 Subject: drm/i915: Define a separate variable and control for RPS waitboost frequency To allow the user finer control over waitboosting, allow them to set the frequency we request for the boost. This also them allows to effectively disable the boosting by setting the boost request to a low frequency. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1468397438-21226-5-git-send-email-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 844fea7..d1ff4cb 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1381,6 +1381,8 @@ static int i915_frequency_info(struct seq_file *m, void *unused) intel_gpu_freq(dev_priv, dev_priv->rps.idle_freq)); seq_printf(m, "Min freq: %d MHz\n", intel_gpu_freq(dev_priv, dev_priv->rps.min_freq)); + seq_printf(m, "Boost freq: %d MHz\n", + intel_gpu_freq(dev_priv, dev_priv->rps.boost_freq)); seq_printf(m, "Max freq: %d MHz\n", intel_gpu_freq(dev_priv, dev_priv->rps.max_freq)); seq_printf(m, diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 65d69e5..7e28a0a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1170,6 +1170,7 @@ struct intel_gen6_power_mgmt { u8 max_freq_softlimit; /* Max frequency permitted by the driver */ u8 max_freq; /* Maximum frequency, RP0 if not overclocking */ u8 min_freq; /* AKA RPn. Minimum frequency */ + u8 boost_freq; /* Frequency to request when wait boosting */ u8 idle_freq; /* Frequency to request when we are idle */ u8 efficient_freq; /* AKA RPe. Pre-determined balanced frequency */ u8 rp1_freq; /* "less than" RP0 power/freqency */ diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 1c2aec3..c8ed367 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1105,9 +1105,10 @@ static void gen6_pm_rps_work(struct work_struct *work) new_delay = dev_priv->rps.cur_freq; min = dev_priv->rps.min_freq_softlimit; max = dev_priv->rps.max_freq_softlimit; - - if (client_boost) { - new_delay = dev_priv->rps.max_freq_softlimit; + if (client_boost || any_waiters(dev_priv)) + max = dev_priv->rps.max_freq; + if (client_boost && new_delay < dev_priv->rps.boost_freq) { + new_delay = dev_priv->rps.boost_freq; adj = 0; } else if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) { if (adj > 0) @@ -1122,7 +1123,7 @@ static void gen6_pm_rps_work(struct work_struct *work) new_delay = dev_priv->rps.efficient_freq; adj = 0; } - } else if (any_waiters(dev_priv)) { + } else if (client_boost || any_waiters(dev_priv)) { adj = 0; } else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) { if (dev_priv->rps.cur_freq > dev_priv->rps.efficient_freq) diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index d61829e..8c045ff 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -318,6 +318,41 @@ static ssize_t gt_cur_freq_mhz_show(struct device *kdev, return snprintf(buf, PAGE_SIZE, "%d\n", ret); } +static ssize_t gt_boost_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) +{ + struct drm_minor *minor = dev_to_drm_minor(kdev); + struct drm_i915_private *dev_priv = to_i915(minor->dev); + + return snprintf(buf, PAGE_SIZE, "%d\n", + intel_gpu_freq(dev_priv, dev_priv->rps.boost_freq)); +} + +static ssize_t gt_boost_freq_mhz_store(struct device *kdev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct drm_minor *minor = dev_to_drm_minor(kdev); + struct drm_device *dev = minor->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + u32 val; + ssize_t ret; + + ret = kstrtou32(buf, 0, &val); + if (ret) + return ret; + + /* Validate against (static) hardware limits */ + val = intel_freq_opcode(dev_priv, val); + if (val < dev_priv->rps.min_freq || val > dev_priv->rps.max_freq) + return -EINVAL; + + mutex_lock(&dev_priv->rps.hw_lock); + dev_priv->rps.boost_freq = val; + mutex_unlock(&dev_priv->rps.hw_lock); + + return count; +} + static ssize_t vlv_rpe_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) { @@ -465,6 +500,7 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev, static DEVICE_ATTR(gt_act_freq_mhz, S_IRUGO, gt_act_freq_mhz_show, NULL); static DEVICE_ATTR(gt_cur_freq_mhz, S_IRUGO, gt_cur_freq_mhz_show, NULL); +static DEVICE_ATTR(gt_boost_freq_mhz, S_IRUGO, gt_boost_freq_mhz_show, gt_boost_freq_mhz_store); static DEVICE_ATTR(gt_max_freq_mhz, S_IRUGO | S_IWUSR, gt_max_freq_mhz_show, gt_max_freq_mhz_store); static DEVICE_ATTR(gt_min_freq_mhz, S_IRUGO | S_IWUSR, gt_min_freq_mhz_show, gt_min_freq_mhz_store); @@ -498,6 +534,7 @@ static ssize_t gt_rp_mhz_show(struct device *kdev, struct device_attribute *attr static const struct attribute *gen6_attrs[] = { &dev_attr_gt_act_freq_mhz.attr, &dev_attr_gt_cur_freq_mhz.attr, + &dev_attr_gt_boost_freq_mhz.attr, &dev_attr_gt_max_freq_mhz.attr, &dev_attr_gt_min_freq_mhz.attr, &dev_attr_gt_RP0_freq_mhz.attr, @@ -509,6 +546,7 @@ static const struct attribute *gen6_attrs[] = { static const struct attribute *vlv_attrs[] = { &dev_attr_gt_act_freq_mhz.attr, &dev_attr_gt_cur_freq_mhz.attr, + &dev_attr_gt_boost_freq_mhz.attr, &dev_attr_gt_max_freq_mhz.attr, &dev_attr_gt_min_freq_mhz.attr, &dev_attr_gt_RP0_freq_mhz.attr, diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 24b23a5..aab1e0b 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4911,7 +4911,7 @@ void gen6_rps_boost(struct drm_i915_private *dev_priv, */ if (!(dev_priv->gt.awake && dev_priv->rps.enabled && - dev_priv->rps.cur_freq < dev_priv->rps.max_freq_softlimit)) + dev_priv->rps.cur_freq < dev_priv->rps.boost_freq)) return; /* Force a RPS boost (and don't count it against the client) if @@ -6532,6 +6532,9 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv) } } + /* Finally allow us to boost to max by default */ + dev_priv->rps.boost_freq = dev_priv->rps.max_freq; + mutex_unlock(&dev_priv->rps.hw_lock); } -- cgit v0.10.2 From 62e1baa128f98006261308182fe3006d66b1bf61 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 13 Jul 2016 09:10:36 +0100 Subject: drm/i915: Remove superfluous powersave work flushing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of flushing the outstanding enabling, remember the requested frequency to apply when the powersave work runs. Signed-off-by: Chris Wilson Cc: Ville Syrjälä Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/1468397438-21226-6-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index d1ff4cb..90aef45 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1205,8 +1205,6 @@ static int i915_frequency_info(struct seq_file *m, void *unused) intel_runtime_pm_get(dev_priv); - flush_delayed_work(&dev_priv->rps.delayed_resume_work); - if (IS_GEN5(dev)) { u16 rgvswctl = I915_READ16(MEMSWCTL); u16 rgvstat = I915_READ16(MEMSTAT_ILK); @@ -1898,8 +1896,6 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused) intel_runtime_pm_get(dev_priv); - flush_delayed_work(&dev_priv->rps.delayed_resume_work); - ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock); if (ret) goto out; @@ -4952,20 +4948,11 @@ i915_max_freq_get(void *data, u64 *val) { struct drm_device *dev = data; struct drm_i915_private *dev_priv = to_i915(dev); - int ret; if (INTEL_INFO(dev)->gen < 6) return -ENODEV; - flush_delayed_work(&dev_priv->rps.delayed_resume_work); - - ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock); - if (ret) - return ret; - *val = intel_gpu_freq(dev_priv, dev_priv->rps.max_freq_softlimit); - mutex_unlock(&dev_priv->rps.hw_lock); - return 0; } @@ -4980,8 +4967,6 @@ i915_max_freq_set(void *data, u64 val) if (INTEL_INFO(dev)->gen < 6) return -ENODEV; - flush_delayed_work(&dev_priv->rps.delayed_resume_work); - DRM_DEBUG_DRIVER("Manually setting max freq to %llu\n", val); ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock); @@ -5019,20 +5004,11 @@ i915_min_freq_get(void *data, u64 *val) { struct drm_device *dev = data; struct drm_i915_private *dev_priv = to_i915(dev); - int ret; - if (INTEL_INFO(dev)->gen < 6) + if (INTEL_GEN(dev_priv) < 6) return -ENODEV; - flush_delayed_work(&dev_priv->rps.delayed_resume_work); - - ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock); - if (ret) - return ret; - *val = intel_gpu_freq(dev_priv, dev_priv->rps.min_freq_softlimit); - mutex_unlock(&dev_priv->rps.hw_lock); - return 0; } @@ -5044,11 +5020,9 @@ i915_min_freq_set(void *data, u64 val) u32 hw_max, hw_min; int ret; - if (INTEL_INFO(dev)->gen < 6) + if (INTEL_GEN(dev_priv) < 6) return -ENODEV; - flush_delayed_work(&dev_priv->rps.delayed_resume_work); - DRM_DEBUG_DRIVER("Manually setting min freq to %llu\n", val); ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock); diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index 8c045ff..d47281b 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -271,8 +271,6 @@ static ssize_t gt_act_freq_mhz_show(struct device *kdev, struct drm_i915_private *dev_priv = to_i915(dev); int ret; - flush_delayed_work(&dev_priv->rps.delayed_resume_work); - intel_runtime_pm_get(dev_priv); mutex_lock(&dev_priv->rps.hw_lock); @@ -303,19 +301,10 @@ static ssize_t gt_cur_freq_mhz_show(struct device *kdev, struct drm_minor *minor = dev_to_drm_minor(kdev); struct drm_device *dev = minor->dev; struct drm_i915_private *dev_priv = to_i915(dev); - int ret; - - flush_delayed_work(&dev_priv->rps.delayed_resume_work); - - intel_runtime_pm_get(dev_priv); - - mutex_lock(&dev_priv->rps.hw_lock); - ret = intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq); - mutex_unlock(&dev_priv->rps.hw_lock); - intel_runtime_pm_put(dev_priv); - - return snprintf(buf, PAGE_SIZE, "%d\n", ret); + return snprintf(buf, PAGE_SIZE, "%d\n", + intel_gpu_freq(dev_priv, + dev_priv->rps.cur_freq)); } static ssize_t gt_boost_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) @@ -324,7 +313,8 @@ static ssize_t gt_boost_freq_mhz_show(struct device *kdev, struct device_attribu struct drm_i915_private *dev_priv = to_i915(minor->dev); return snprintf(buf, PAGE_SIZE, "%d\n", - intel_gpu_freq(dev_priv, dev_priv->rps.boost_freq)); + intel_gpu_freq(dev_priv, + dev_priv->rps.boost_freq)); } static ssize_t gt_boost_freq_mhz_store(struct device *kdev, @@ -360,9 +350,9 @@ static ssize_t vlv_rpe_freq_mhz_show(struct device *kdev, struct drm_device *dev = minor->dev; struct drm_i915_private *dev_priv = to_i915(dev); - return snprintf(buf, PAGE_SIZE, - "%d\n", - intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq)); + return snprintf(buf, PAGE_SIZE, "%d\n", + intel_gpu_freq(dev_priv, + dev_priv->rps.efficient_freq)); } static ssize_t gt_max_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) @@ -370,15 +360,10 @@ static ssize_t gt_max_freq_mhz_show(struct device *kdev, struct device_attribute struct drm_minor *minor = dev_to_drm_minor(kdev); struct drm_device *dev = minor->dev; struct drm_i915_private *dev_priv = to_i915(dev); - int ret; - - flush_delayed_work(&dev_priv->rps.delayed_resume_work); - mutex_lock(&dev_priv->rps.hw_lock); - ret = intel_gpu_freq(dev_priv, dev_priv->rps.max_freq_softlimit); - mutex_unlock(&dev_priv->rps.hw_lock); - - return snprintf(buf, PAGE_SIZE, "%d\n", ret); + return snprintf(buf, PAGE_SIZE, "%d\n", + intel_gpu_freq(dev_priv, + dev_priv->rps.max_freq_softlimit)); } static ssize_t gt_max_freq_mhz_store(struct device *kdev, @@ -395,8 +380,6 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev, if (ret) return ret; - flush_delayed_work(&dev_priv->rps.delayed_resume_work); - intel_runtime_pm_get(dev_priv); mutex_lock(&dev_priv->rps.hw_lock); @@ -438,15 +421,10 @@ static ssize_t gt_min_freq_mhz_show(struct device *kdev, struct device_attribute struct drm_minor *minor = dev_to_drm_minor(kdev); struct drm_device *dev = minor->dev; struct drm_i915_private *dev_priv = to_i915(dev); - int ret; - flush_delayed_work(&dev_priv->rps.delayed_resume_work); - - mutex_lock(&dev_priv->rps.hw_lock); - ret = intel_gpu_freq(dev_priv, dev_priv->rps.min_freq_softlimit); - mutex_unlock(&dev_priv->rps.hw_lock); - - return snprintf(buf, PAGE_SIZE, "%d\n", ret); + return snprintf(buf, PAGE_SIZE, "%d\n", + intel_gpu_freq(dev_priv, + dev_priv->rps.min_freq_softlimit)); } static ssize_t gt_min_freq_mhz_store(struct device *kdev, @@ -463,8 +441,6 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev, if (ret) return ret; - flush_delayed_work(&dev_priv->rps.delayed_resume_work); - intel_runtime_pm_get(dev_priv); mutex_lock(&dev_priv->rps.hw_lock); -- cgit v0.10.2 From b7137e0cf1e55b5b0cb88fbd85425a1bc0d24c3a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 13 Jul 2016 09:10:37 +0100 Subject: drm/i915: Defer enabling rc6 til after we submit the first batch/context MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some hardware requires a valid render context before it can initiate rc6 power gating of the GPU; the default state of the GPU is not sufficient and may lead to undefined behaviour. The first execution of any batch will load the "golden render state", at which point it is safe to enable rc6. As we do not forcibly load the kernel context at resume, we have to hook into the batch submission to be sure that the render state is setup before enabling rc6. However, since we don't enable powersaving until that first batch, we queued a delayed task in order to guarantee that the batch is indeed submitted. v2: Rearrange intel_disable_gt_powersave() to match. v3: Apply user specified cur_freq (or idle_freq if not set). v4: Give in, and supply a delayed work to autoenable rc6 v5: Mika suggested a couple of better names for delayed_resume_work v6: Rebalance rpm_put around the autoenable task Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1468397438-21226-7-git-send-email-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index b9a8117..c6cc01f 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1343,7 +1343,7 @@ void i915_driver_unload(struct drm_device *dev) i915_destroy_error_state(dev); /* Flush any outstanding unpin_work. */ - flush_workqueue(dev_priv->wq); + drain_workqueue(dev_priv->wq); intel_guc_fini(dev); i915_gem_fini(dev); @@ -1458,8 +1458,6 @@ static int i915_drm_suspend(struct drm_device *dev) intel_guc_suspend(dev); - intel_suspend_gt_powersave(dev_priv); - intel_display_suspend(dev); intel_dp_mst_suspend(dev); @@ -1652,6 +1650,7 @@ static int i915_drm_resume(struct drm_device *dev) intel_opregion_notify_adapter(dev_priv, PCI_D0); + intel_autoenable_gt_powersave(dev_priv); drm_kms_helper_poll_enable(dev); enable_rpm_wakeref_asserts(dev_priv); @@ -1778,8 +1777,6 @@ int i915_reset(struct drm_i915_private *dev_priv) unsigned reset_counter; int ret; - intel_reset_gt_powersave(dev_priv); - mutex_lock(&dev->struct_mutex); /* Clear any previous failed attempts at recovery. Time to try again. */ @@ -1835,8 +1832,7 @@ int i915_reset(struct drm_i915_private *dev_priv) * previous concerns that it doesn't respond well to some forms * of re-init after reset. */ - if (INTEL_INFO(dev)->gen > 5) - intel_enable_gt_powersave(dev_priv); + intel_autoenable_gt_powersave(dev_priv); return 0; @@ -2459,7 +2455,6 @@ static int intel_runtime_resume(struct device *device) * we can do is to hope that things will still work (and disable RPM). */ i915_gem_init_swizzling(dev); - gen6_update_ring_freq(dev_priv); intel_runtime_pm_enable_interrupts(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 7e28a0a..a13b098 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1188,7 +1188,7 @@ struct intel_gen6_power_mgmt { bool client_boost; bool enabled; - struct delayed_work delayed_resume_work; + struct delayed_work autoenable_work; unsigned boosts; struct intel_rps_client semaphores, mmioflips; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 002ecc6..cf0e8aa 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2842,6 +2842,7 @@ static void i915_gem_mark_busy(const struct intel_engine_cs *engine) intel_runtime_pm_get_noresume(dev_priv); dev_priv->gt.awake = true; + intel_enable_gt_powersave(dev_priv); i915_update_gfx_val(dev_priv); if (INTEL_GEN(dev_priv) >= 6) gen6_rps_busy(dev_priv); @@ -4979,6 +4980,8 @@ i915_gem_suspend(struct drm_device *dev) struct drm_i915_private *dev_priv = to_i915(dev); int ret = 0; + intel_suspend_gt_powersave(dev_priv); + mutex_lock(&dev->struct_mutex); ret = i915_gem_wait_for_idle(dev_priv); if (ret) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index be3b2ca..88589b5 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -15456,7 +15456,6 @@ void intel_modeset_init_hw(struct drm_device *dev) dev_priv->atomic_cdclk_freq = dev_priv->cdclk_freq; intel_init_clock_gating(dev); - intel_enable_gt_powersave(dev_priv); } /* diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 6c8485c..c036dfd 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1690,10 +1690,11 @@ void intel_gpu_ips_init(struct drm_i915_private *dev_priv); void intel_gpu_ips_teardown(void); void intel_init_gt_powersave(struct drm_i915_private *dev_priv); void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv); +void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv); void intel_enable_gt_powersave(struct drm_i915_private *dev_priv); +void intel_autoenable_gt_powersave(struct drm_i915_private *dev_priv); void intel_disable_gt_powersave(struct drm_i915_private *dev_priv); void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv); -void intel_reset_gt_powersave(struct drm_i915_private *dev_priv); void gen6_update_ring_freq(struct drm_i915_private *dev_priv); void gen6_rps_busy(struct drm_i915_private *dev_priv); void gen6_rps_reset_ei(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index aab1e0b..c77ec10 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -6536,6 +6536,8 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv) dev_priv->rps.boost_freq = dev_priv->rps.max_freq; mutex_unlock(&dev_priv->rps.hw_lock); + + intel_autoenable_gt_powersave(dev_priv); } void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv) @@ -6549,13 +6551,6 @@ void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv) intel_runtime_pm_put(dev_priv); } -static void gen6_suspend_rps(struct drm_i915_private *dev_priv) -{ - flush_delayed_work(&dev_priv->rps.delayed_resume_work); - - gen6_disable_rps_interrupts(dev_priv); -} - /** * intel_suspend_gt_powersave - suspend PM work and helper threads * @dev_priv: i915 device @@ -6569,50 +6564,63 @@ void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv) if (INTEL_GEN(dev_priv) < 6) return; - gen6_suspend_rps(dev_priv); + if (cancel_delayed_work_sync(&dev_priv->rps.autoenable_work)) + intel_runtime_pm_put(dev_priv); - /* Force GPU to min freq during suspend */ - gen6_rps_idle(dev_priv); + /* gen6_rps_idle() will be called later to disable interrupts */ +} + +void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv) +{ + dev_priv->rps.enabled = true; /* force disabling */ + intel_disable_gt_powersave(dev_priv); + + gen6_reset_rps_interrupts(dev_priv); } void intel_disable_gt_powersave(struct drm_i915_private *dev_priv) { - if (IS_IRONLAKE_M(dev_priv)) { - ironlake_disable_drps(dev_priv); - } else if (INTEL_INFO(dev_priv)->gen >= 6) { - intel_suspend_gt_powersave(dev_priv); + if (!READ_ONCE(dev_priv->rps.enabled)) + return; - mutex_lock(&dev_priv->rps.hw_lock); - if (INTEL_INFO(dev_priv)->gen >= 9) { - gen9_disable_rc6(dev_priv); - gen9_disable_rps(dev_priv); - } else if (IS_CHERRYVIEW(dev_priv)) - cherryview_disable_rps(dev_priv); - else if (IS_VALLEYVIEW(dev_priv)) - valleyview_disable_rps(dev_priv); - else - gen6_disable_rps(dev_priv); + mutex_lock(&dev_priv->rps.hw_lock); - dev_priv->rps.enabled = false; - mutex_unlock(&dev_priv->rps.hw_lock); + if (INTEL_GEN(dev_priv) >= 9) { + gen9_disable_rc6(dev_priv); + gen9_disable_rps(dev_priv); + } else if (IS_CHERRYVIEW(dev_priv)) { + cherryview_disable_rps(dev_priv); + } else if (IS_VALLEYVIEW(dev_priv)) { + valleyview_disable_rps(dev_priv); + } else if (INTEL_GEN(dev_priv) >= 6) { + gen6_disable_rps(dev_priv); + } else if (IS_IRONLAKE_M(dev_priv)) { + ironlake_disable_drps(dev_priv); } + + dev_priv->rps.enabled = false; + mutex_unlock(&dev_priv->rps.hw_lock); } -static void intel_gen6_powersave_work(struct work_struct *work) +void intel_enable_gt_powersave(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = - container_of(work, struct drm_i915_private, - rps.delayed_resume_work.work); + /* We shouldn't be disabling as we submit, so this should be less + * racy than it appears! + */ + if (READ_ONCE(dev_priv->rps.enabled)) + return; - mutex_lock(&dev_priv->rps.hw_lock); + /* Powersaving is controlled by the host when inside a VM */ + if (intel_vgpu_active(dev_priv)) + return; - gen6_reset_rps_interrupts(dev_priv); + mutex_lock(&dev_priv->rps.hw_lock); if (IS_CHERRYVIEW(dev_priv)) { cherryview_enable_rps(dev_priv); } else if (IS_VALLEYVIEW(dev_priv)) { valleyview_enable_rps(dev_priv); - } else if (INTEL_INFO(dev_priv)->gen >= 9) { + } else if (INTEL_GEN(dev_priv) >= 9) { gen9_enable_rc6(dev_priv); gen9_enable_rps(dev_priv); if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) @@ -6620,9 +6628,12 @@ static void intel_gen6_powersave_work(struct work_struct *work) } else if (IS_BROADWELL(dev_priv)) { gen8_enable_rps(dev_priv); __gen6_update_ring_freq(dev_priv); - } else { + } else if (INTEL_GEN(dev_priv) >= 6) { gen6_enable_rps(dev_priv); __gen6_update_ring_freq(dev_priv); + } else if (IS_IRONLAKE_M(dev_priv)) { + ironlake_enable_drps(dev_priv); + intel_init_emon(dev_priv); } WARN_ON(dev_priv->rps.max_freq < dev_priv->rps.min_freq); @@ -6632,18 +6643,47 @@ static void intel_gen6_powersave_work(struct work_struct *work) WARN_ON(dev_priv->rps.efficient_freq > dev_priv->rps.max_freq); dev_priv->rps.enabled = true; + mutex_unlock(&dev_priv->rps.hw_lock); +} - gen6_enable_rps_interrupts(dev_priv); +static void __intel_autoenable_gt_powersave(struct work_struct *work) +{ + struct drm_i915_private *dev_priv = + container_of(work, typeof(*dev_priv), rps.autoenable_work.work); + struct intel_engine_cs *rcs; + struct drm_i915_gem_request *req; - mutex_unlock(&dev_priv->rps.hw_lock); + if (READ_ONCE(dev_priv->rps.enabled)) + goto out; + + rcs = &dev_priv->engine[RCS]; + if (rcs->last_context) + goto out; + + if (!rcs->init_context) + goto out; + mutex_lock(&dev_priv->drm.struct_mutex); + + req = i915_gem_request_alloc(rcs, dev_priv->kernel_context); + if (IS_ERR(req)) + goto unlock; + + if (!i915.enable_execlists && i915_switch_context(req) == 0) + rcs->init_context(req); + + /* Mark the device busy, calling intel_enable_gt_powersave() */ + i915_add_request_no_flush(req); + +unlock: + mutex_unlock(&dev_priv->drm.struct_mutex); +out: intel_runtime_pm_put(dev_priv); } -void intel_enable_gt_powersave(struct drm_i915_private *dev_priv) +void intel_autoenable_gt_powersave(struct drm_i915_private *dev_priv) { - /* Powersaving is controlled by the host when inside a VM */ - if (intel_vgpu_active(dev_priv)) + if (READ_ONCE(dev_priv->rps.enabled)) return; if (IS_IRONLAKE_M(dev_priv)) { @@ -6664,21 +6704,13 @@ void intel_enable_gt_powersave(struct drm_i915_private *dev_priv) * paths, so the _noresume version is enough (and in case of * runtime resume it's necessary). */ - if (schedule_delayed_work(&dev_priv->rps.delayed_resume_work, - round_jiffies_up_relative(HZ))) + if (queue_delayed_work(dev_priv->wq, + &dev_priv->rps.autoenable_work, + round_jiffies_up_relative(HZ))) intel_runtime_pm_get_noresume(dev_priv); } } -void intel_reset_gt_powersave(struct drm_i915_private *dev_priv) -{ - if (INTEL_INFO(dev_priv)->gen < 6) - return; - - gen6_suspend_rps(dev_priv); - dev_priv->rps.enabled = false; -} - static void ibx_init_clock_gating(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); @@ -7785,8 +7817,8 @@ void intel_pm_setup(struct drm_device *dev) mutex_init(&dev_priv->rps.hw_lock); spin_lock_init(&dev_priv->rps.client_lock); - INIT_DELAYED_WORK(&dev_priv->rps.delayed_resume_work, - intel_gen6_powersave_work); + INIT_DELAYED_WORK(&dev_priv->rps.autoenable_work, + __intel_autoenable_gt_powersave); INIT_LIST_HEAD(&dev_priv->rps.clients); INIT_LIST_HEAD(&dev_priv->rps.semaphores.link); INIT_LIST_HEAD(&dev_priv->rps.mmioflips.link); diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index ff80a81..eeb4cbc 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -435,7 +435,7 @@ void intel_uncore_sanitize(struct drm_i915_private *dev_priv) i915.enable_rc6 = sanitize_rc6_option(dev_priv, i915.enable_rc6); /* BIOS often leaves RC6 enabled, but disable it for hw init */ - intel_disable_gt_powersave(dev_priv); + intel_sanitize_gt_powersave(dev_priv); } static void __intel_uncore_forcewake_get(struct drm_i915_private *dev_priv, -- cgit v0.10.2 From fb7404e81555b670e44fed6a95c277fe0214f540 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 13 Jul 2016 09:10:38 +0100 Subject: drm/i915: Hide gen6_update_ring_freq() This function is no longer used outside of intel_pm.c so we can stop exposing it and rename the __gen6_update_ring_freq() to take its place. Suggested-by: Mika Kuoppala Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/1468397438-21226-8-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index c036dfd..57738ba 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1695,7 +1695,6 @@ void intel_enable_gt_powersave(struct drm_i915_private *dev_priv); void intel_autoenable_gt_powersave(struct drm_i915_private *dev_priv); void intel_disable_gt_powersave(struct drm_i915_private *dev_priv); void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv); -void gen6_update_ring_freq(struct drm_i915_private *dev_priv); void gen6_rps_busy(struct drm_i915_private *dev_priv); void gen6_rps_reset_ei(struct drm_i915_private *dev_priv); void gen6_rps_idle(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index c77ec10..fa6b341 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -5436,7 +5436,7 @@ static void gen6_enable_rps(struct drm_i915_private *dev_priv) intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); } -static void __gen6_update_ring_freq(struct drm_i915_private *dev_priv) +static void gen6_update_ring_freq(struct drm_i915_private *dev_priv) { int min_freq = 15; unsigned int gpu_freq; @@ -5520,16 +5520,6 @@ static void __gen6_update_ring_freq(struct drm_i915_private *dev_priv) } } -void gen6_update_ring_freq(struct drm_i915_private *dev_priv) -{ - if (!HAS_CORE_RING_FREQ(dev_priv)) - return; - - mutex_lock(&dev_priv->rps.hw_lock); - __gen6_update_ring_freq(dev_priv); - mutex_unlock(&dev_priv->rps.hw_lock); -} - static int cherryview_rps_max_freq(struct drm_i915_private *dev_priv) { u32 val, rp0; @@ -6624,13 +6614,13 @@ void intel_enable_gt_powersave(struct drm_i915_private *dev_priv) gen9_enable_rc6(dev_priv); gen9_enable_rps(dev_priv); if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) - __gen6_update_ring_freq(dev_priv); + gen6_update_ring_freq(dev_priv); } else if (IS_BROADWELL(dev_priv)) { gen8_enable_rps(dev_priv); - __gen6_update_ring_freq(dev_priv); + gen6_update_ring_freq(dev_priv); } else if (INTEL_GEN(dev_priv) >= 6) { gen6_enable_rps(dev_priv); - __gen6_update_ring_freq(dev_priv); + gen6_update_ring_freq(dev_priv); } else if (IS_IRONLAKE_M(dev_priv)) { ironlake_enable_drps(dev_priv); intel_init_emon(dev_priv); -- cgit v0.10.2 From 28cf71ce3e206db1c3f30b3da31e7b48b2269e4c Mon Sep 17 00:00:00 2001 From: Lyude Date: Tue, 21 Jun 2016 17:03:41 -0400 Subject: drm/i915/vlv: Make intel_crt_reset() per-encoder MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This lets call intel_crt_reset() in contexts where IRQs are disabled and as such, can't hold the locks required to work with the connectors. Cc: stable@vger.kernel.org Cc: Ville Syrjälä Acked-by: Daniel Vetter Signed-off-by: Lyude Signed-off-by: Daniel Vetter diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index 5819d52..9cd8829 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -717,11 +717,11 @@ static int intel_crt_set_property(struct drm_connector *connector, return 0; } -static void intel_crt_reset(struct drm_connector *connector) +static void intel_crt_reset(struct drm_encoder *encoder) { - struct drm_device *dev = connector->dev; + struct drm_device *dev = encoder->dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crt *crt = intel_attached_crt(connector); + struct intel_crt *crt = intel_encoder_to_crt(to_intel_encoder(encoder)); if (INTEL_INFO(dev)->gen >= 5) { u32 adpa; @@ -743,7 +743,6 @@ static void intel_crt_reset(struct drm_connector *connector) */ static const struct drm_connector_funcs intel_crt_connector_funcs = { - .reset = intel_crt_reset, .dpms = drm_atomic_helper_connector_dpms, .detect = intel_crt_detect, .fill_modes = drm_helper_probe_single_connector_modes, @@ -762,6 +761,7 @@ static const struct drm_connector_helper_funcs intel_crt_connector_helper_funcs }; static const struct drm_encoder_funcs intel_crt_enc_funcs = { + .reset = intel_crt_reset, .destroy = intel_encoder_destroy, }; @@ -904,5 +904,5 @@ void intel_crt_init(struct drm_device *dev) dev_priv->fdi_rx_config = I915_READ(FDI_RX_CTL(PIPE_A)) & fdi_config; } - intel_crt_reset(connector); + intel_crt_reset(&crt->base.base); } -- cgit v0.10.2 From 9504a89247595b6c066c68aea0c34af1fc78d021 Mon Sep 17 00:00:00 2001 From: Lyude Date: Tue, 21 Jun 2016 17:03:42 -0400 Subject: drm/i915/vlv: Reset the ADPA in vlv_display_power_well_init() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While VGA hotplugging worked(ish) before, it looks like that was mainly because we'd unintentionally enable it in valleyview_crt_detect_hotplug() when we did a force trigger. This doesn't work reliably enough because whenever the display powerwell on vlv gets disabled, the values set in VLV_ADPA get cleared and consequently VGA hotplugging gets disabled. This causes bugs such as one we found on an Intel NUC, where doing the following sequence of hotplugs: - Disconnect all monitors - Connect VGA - Disconnect VGA - Connect HDMI Would result in VGA hotplugging becoming disabled, due to the powerwells getting toggled in the process of connecting HDMI. Changes since v3: - Expose intel_crt_reset() through intel_drv.h and call that in vlv_display_power_well_init() instead of encoder->base.funcs->reset(&encoder->base); Changes since v2: - Use intel_encoder structs instead of drm_encoder structs Changes since v1: - Instead of handling the register writes ourself, we just reuse intel_crt_detect() - Instead of resetting the ADPA during display IRQ installation, we now reset them in vlv_display_power_well_init() Cc: stable@vger.kernel.org Acked-by: Daniel Vetter Signed-off-by: Lyude Reviewed-by: Ville Syrjälä [danvet: Rebase over dev_priv/drm_device embedding.] Signed-off-by: Daniel Vetter diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index 9cd8829..d172930 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -717,7 +717,7 @@ static int intel_crt_set_property(struct drm_connector *connector, return 0; } -static void intel_crt_reset(struct drm_encoder *encoder) +void intel_crt_reset(struct drm_encoder *encoder) { struct drm_device *dev = encoder->dev; struct drm_i915_private *dev_priv = to_i915(dev); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 57738ba..ae15e44 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1102,7 +1102,7 @@ void gen8_irq_power_well_pre_disable(struct drm_i915_private *dev_priv, /* intel_crt.c */ void intel_crt_init(struct drm_device *dev); - +void intel_crt_reset(struct drm_encoder *encoder); /* intel_ddi.c */ void intel_ddi_clk_select(struct intel_encoder *encoder, diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 6b78295..b089ec8 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -1078,6 +1078,7 @@ static void vlv_init_display_clock_gating(struct drm_i915_private *dev_priv) static void vlv_display_power_well_init(struct drm_i915_private *dev_priv) { + struct intel_encoder *encoder; enum pipe pipe; /* @@ -1113,6 +1114,12 @@ static void vlv_display_power_well_init(struct drm_i915_private *dev_priv) intel_hpd_init(dev_priv); + /* Re-enable the ADPA, if we have one */ + for_each_intel_encoder(&dev_priv->drm, encoder) { + if (encoder->type == INTEL_OUTPUT_ANALOG) + intel_crt_reset(&encoder->base); + } + i915_redisable_vga_power_on(&dev_priv->drm); } -- cgit v0.10.2 From b236d7c8421969ac0693fc571e47ee5c2a62fb90 Mon Sep 17 00:00:00 2001 From: Lyude Date: Tue, 21 Jun 2016 17:03:43 -0400 Subject: drm/i915/vlv: Disable HPD in valleyview_crt_detect_hotplug() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit One of the things preventing us from using polling is the fact that calling valleyview_crt_detect_hotplug() when there's a VGA cable connected results in sending another hotplug. With polling enabled when HPD is disabled, this results in a scenario like this: - We enable power wells and reset the ADPA - output_poll_exec does force probe on VGA, triggering a hpd - HPD handler waits for poll to unlock dev->mode_config.mutex - output_poll_exec shuts off the ADPA, unlocks dev->mode_config.mutex - HPD handler runs, resets ADPA and brings us back to the start This results in an endless irq storm getting sent from the ADPA whenever a VGA connector gets detected in the middle of polling. Somewhat based off of the "drm/i915: Disable CRT HPD around force trigger" patch Ville Syrjälä sent a while back Cc: stable@vger.kernel.org Cc: Ville Syrjälä Signed-off-by: Lyude Signed-off-by: Daniel Vetter diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index a13b098..2552936 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2957,6 +2957,8 @@ void intel_hpd_init(struct drm_i915_private *dev_priv); void intel_hpd_init_work(struct drm_i915_private *dev_priv); void intel_hpd_cancel_work(struct drm_i915_private *dev_priv); bool intel_hpd_pin_to_port(enum hpd_pin pin, enum port *port); +bool intel_hpd_disable(struct drm_i915_private *dev_priv, enum hpd_pin pin); +void intel_hpd_enable(struct drm_i915_private *dev_priv, enum hpd_pin pin); /* i915_irq.c */ static inline void i915_queue_hangcheck(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index d172930..827b6ef 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -329,10 +329,25 @@ static bool valleyview_crt_detect_hotplug(struct drm_connector *connector) struct drm_device *dev = connector->dev; struct intel_crt *crt = intel_attached_crt(connector); struct drm_i915_private *dev_priv = to_i915(dev); + bool reenable_hpd; u32 adpa; bool ret; u32 save_adpa; + /* + * Doing a force trigger causes a hpd interrupt to get sent, which can + * get us stuck in a loop if we're polling: + * - We enable power wells and reset the ADPA + * - output_poll_exec does force probe on VGA, triggering a hpd + * - HPD handler waits for poll to unlock dev->mode_config.mutex + * - output_poll_exec shuts off the ADPA, unlocks + * dev->mode_config.mutex + * - HPD handler runs, resets ADPA and brings us back to the start + * + * Just disable HPD interrupts here to prevent this + */ + reenable_hpd = intel_hpd_disable(dev_priv, crt->base.hpd_pin); + save_adpa = adpa = I915_READ(crt->adpa_reg); DRM_DEBUG_KMS("trigger hotplug detect cycle: adpa=0x%x\n", adpa); @@ -357,6 +372,9 @@ static bool valleyview_crt_detect_hotplug(struct drm_connector *connector) DRM_DEBUG_KMS("valleyview hotplug adpa=0x%x, result %d\n", adpa, ret); + if (reenable_hpd) + intel_hpd_enable(dev_priv, crt->base.hpd_pin); + return ret; } diff --git a/drivers/gpu/drm/i915/intel_hotplug.c b/drivers/gpu/drm/i915/intel_hotplug.c index 51434ec..57f50a1 100644 --- a/drivers/gpu/drm/i915/intel_hotplug.c +++ b/drivers/gpu/drm/i915/intel_hotplug.c @@ -510,3 +510,30 @@ void intel_hpd_cancel_work(struct drm_i915_private *dev_priv) cancel_work_sync(&dev_priv->hotplug.hotplug_work); cancel_delayed_work_sync(&dev_priv->hotplug.reenable_work); } + +bool intel_hpd_disable(struct drm_i915_private *dev_priv, enum hpd_pin pin) +{ + bool ret = false; + + if (pin == HPD_NONE) + return false; + + spin_lock_irq(&dev_priv->irq_lock); + if (dev_priv->hotplug.stats[pin].state == HPD_ENABLED) { + dev_priv->hotplug.stats[pin].state = HPD_DISABLED; + ret = true; + } + spin_unlock_irq(&dev_priv->irq_lock); + + return ret; +} + +void intel_hpd_enable(struct drm_i915_private *dev_priv, enum hpd_pin pin) +{ + if (pin == HPD_NONE) + return; + + spin_lock_irq(&dev_priv->irq_lock); + dev_priv->hotplug.stats[pin].state = HPD_ENABLED; + spin_unlock_irq(&dev_priv->irq_lock); +} -- cgit v0.10.2 From 19625e85c6ec56038368aa72c44f5f55b221f0fc Mon Sep 17 00:00:00 2001 From: Lyude Date: Tue, 21 Jun 2016 17:03:44 -0400 Subject: drm/i915: Enable polling when we don't have hpd MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unfortunately, there's two situations where we lose hpd right now: - Runtime suspend - When we've shut off all of the power wells on Valleyview/Cherryview While it would be nice if this didn't cause issues, this has the ability to get us in some awkward states where a user won't be able to get their display to turn on. For instance; if we boot a Valleyview system without any monitors connected, it won't need any of it's power wells and thus shut them off. Since this causes us to lose HPD, this means that unless the user knows how to ssh into their machine and do a manual reprobe for monitors, none of the monitors they connect after booting will actually work. Eventually we should come up with a better fix then having to enable polling for this, since this makes rpm a lot less useful, but for now the infrastructure in i915 just isn't there yet to get hpd in these situations. Changes since v1: - Add comment explaining the addition of the if (!mode_config->poll_running) in intel_hpd_init() - Remove unneeded if (!dev->mode_config.poll_enabled) in i915_hpd_poll_init_work() - Call to drm_helper_hpd_irq_event() after we disable polling - Add cancel_work_sync() call to intel_hpd_cancel_work() Changes since v2: - Apparently dev->mode_config.poll_running doesn't actually reflect whether or not a poll is currently in progress, and is actually used for dynamic module paramter enabling/disabling. So now we instead keep track of our own poll_running variable in dev_priv->hotplug - Clean i915_hpd_poll_init_work() a little bit Changes since v3: - Remove the now-redundant connector loop in intel_hpd_init(), just rely on intel_hpd_poll_enable() for setting connector->polled correctly on each connector - Get rid of poll_running - Don't assign enabled in i915_hpd_poll_init_work before we actually lock dev->mode_config.mutex - Wrap enabled assignment in i915_hpd_poll_init_work() in READ_ONCE() for doc purposes - Do the same for dev_priv->hotplug.poll_enabled with WRITE_ONCE in intel_hpd_poll_enable() - Add some comments about racing not mattering in intel_hpd_poll_enable Changes since v4: - Rename intel_hpd_poll_enable() to intel_hpd_poll_init() - Drop the bool argument from intel_hpd_poll_init() - Remove redundant calls to intel_hpd_poll_init() - Rename poll_enable_work to poll_init_work - Add some kerneldoc for intel_hpd_poll_init() - Cross-reference intel_hpd_poll_init() in intel_hpd_init() - Just copy the loop from intel_hpd_init() in intel_hpd_poll_init() Changes since v5: - Minor kerneldoc nitpicks Cc: stable@vger.kernel.org Cc: Ville Syrjälä Reviewed-by: Daniel Vetter Signed-off-by: Lyude Signed-off-by: Daniel Vetter diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index c6cc01f..1544012 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -2409,6 +2409,9 @@ static int intel_runtime_suspend(struct device *device) assert_forcewakes_inactive(dev_priv); + if (!IS_VALLEYVIEW(dev_priv) || !IS_CHERRYVIEW(dev_priv)) + intel_hpd_poll_init(dev_priv); + DRM_DEBUG_KMS("Device suspended\n"); return 0; } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 2552936..1ec523d 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -284,6 +284,9 @@ struct i915_hotplug { u32 short_port_mask; struct work_struct dig_port_work; + struct work_struct poll_init_work; + bool poll_enabled; + /* * if we get a HPD irq from DP and a HPD irq from non-DP * the non-DP HPD could block the workqueue on a mode config diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index ae15e44..e74d851 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1425,6 +1425,8 @@ int intel_dsi_dcs_init_backlight_funcs(struct intel_connector *intel_connector); /* intel_dvo.c */ void intel_dvo_init(struct drm_device *dev); +/* intel_hotplug.c */ +void intel_hpd_poll_init(struct drm_i915_private *dev_priv); /* legacy fbdev emulation in intel_fbdev.c */ diff --git a/drivers/gpu/drm/i915/intel_hotplug.c b/drivers/gpu/drm/i915/intel_hotplug.c index 57f50a1..f48957e 100644 --- a/drivers/gpu/drm/i915/intel_hotplug.c +++ b/drivers/gpu/drm/i915/intel_hotplug.c @@ -452,20 +452,47 @@ void intel_hpd_irq_handler(struct drm_i915_private *dev_priv, * * This is a separate step from interrupt enabling to simplify the locking rules * in the driver load and resume code. + * + * Also see: intel_hpd_poll_init(), which enables connector polling */ void intel_hpd_init(struct drm_i915_private *dev_priv) { - struct drm_device *dev = &dev_priv->drm; - struct drm_mode_config *mode_config = &dev->mode_config; - struct drm_connector *connector; int i; for_each_hpd_pin(i) { dev_priv->hotplug.stats[i].count = 0; dev_priv->hotplug.stats[i].state = HPD_ENABLED; } + + WRITE_ONCE(dev_priv->hotplug.poll_enabled, false); + schedule_work(&dev_priv->hotplug.poll_init_work); + + /* + * Interrupt setup is already guaranteed to be single-threaded, this is + * just to make the assert_spin_locked checks happy. + */ + spin_lock_irq(&dev_priv->irq_lock); + if (dev_priv->display.hpd_irq_setup) + dev_priv->display.hpd_irq_setup(dev_priv); + spin_unlock_irq(&dev_priv->irq_lock); +} + +void i915_hpd_poll_init_work(struct work_struct *work) { + struct drm_i915_private *dev_priv = + container_of(work, struct drm_i915_private, + hotplug.poll_init_work); + struct drm_device *dev = &dev_priv->drm; + struct drm_mode_config *mode_config = &dev->mode_config; + struct drm_connector *connector; + bool enabled; + + mutex_lock(&dev->mode_config.mutex); + + enabled = READ_ONCE(dev_priv->hotplug.poll_enabled); + list_for_each_entry(connector, &mode_config->connector_list, head) { - struct intel_connector *intel_connector = to_intel_connector(connector); + struct intel_connector *intel_connector = + to_intel_connector(connector); connector->polled = intel_connector->polled; /* MST has a dynamic intel_connector->encoder and it's reprobing @@ -474,24 +501,62 @@ void intel_hpd_init(struct drm_i915_private *dev_priv) continue; if (!connector->polled && I915_HAS_HOTPLUG(dev) && - intel_connector->encoder->hpd_pin > HPD_NONE) - connector->polled = DRM_CONNECTOR_POLL_HPD; + intel_connector->encoder->hpd_pin > HPD_NONE) { + connector->polled = enabled ? + DRM_CONNECTOR_POLL_CONNECT | + DRM_CONNECTOR_POLL_DISCONNECT : + DRM_CONNECTOR_POLL_HPD; + } } + if (enabled) + drm_kms_helper_poll_enable_locked(dev); + + mutex_unlock(&dev->mode_config.mutex); + /* - * Interrupt setup is already guaranteed to be single-threaded, this is - * just to make the assert_spin_locked checks happy. + * We might have missed any hotplugs that happened while we were + * in the middle of disabling polling */ - spin_lock_irq(&dev_priv->irq_lock); - if (dev_priv->display.hpd_irq_setup) - dev_priv->display.hpd_irq_setup(dev_priv); - spin_unlock_irq(&dev_priv->irq_lock); + if (!enabled) + drm_helper_hpd_irq_event(dev); +} + +/** + * intel_hpd_poll_init - enables/disables polling for connectors with hpd + * @dev_priv: i915 device instance + * @enabled: Whether to enable or disable polling + * + * This function enables polling for all connectors, regardless of whether or + * not they support hotplug detection. Under certain conditions HPD may not be + * functional. On most Intel GPUs, this happens when we enter runtime suspend. + * On Valleyview and Cherryview systems, this also happens when we shut off all + * of the powerwells. + * + * Since this function can get called in contexts where we're already holding + * dev->mode_config.mutex, we do the actual hotplug enabling in a seperate + * worker. + * + * Also see: intel_hpd_init(), which restores hpd handling. + */ +void intel_hpd_poll_init(struct drm_i915_private *dev_priv) +{ + WRITE_ONCE(dev_priv->hotplug.poll_enabled, true); + + /* + * We might already be holding dev->mode_config.mutex, so do this in a + * seperate worker + * As well, there's no issue if we race here since we always reschedule + * this worker anyway + */ + schedule_work(&dev_priv->hotplug.poll_init_work); } void intel_hpd_init_work(struct drm_i915_private *dev_priv) { INIT_WORK(&dev_priv->hotplug.hotplug_work, i915_hotplug_work_func); INIT_WORK(&dev_priv->hotplug.dig_port_work, i915_digport_work_func); + INIT_WORK(&dev_priv->hotplug.poll_init_work, i915_hpd_poll_init_work); INIT_DELAYED_WORK(&dev_priv->hotplug.reenable_work, intel_hpd_irq_storm_reenable_work); } @@ -508,6 +573,7 @@ void intel_hpd_cancel_work(struct drm_i915_private *dev_priv) cancel_work_sync(&dev_priv->hotplug.dig_port_work); cancel_work_sync(&dev_priv->hotplug.hotplug_work); + cancel_work_sync(&dev_priv->hotplug.poll_init_work); cancel_delayed_work_sync(&dev_priv->hotplug.reenable_work); } diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index b089ec8..1c603bb 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -1133,6 +1133,8 @@ static void vlv_display_power_well_deinit(struct drm_i915_private *dev_priv) synchronize_irq(dev_priv->drm.irq); intel_power_sequencer_reset(dev_priv); + + intel_hpd_poll_init(dev_priv); } static void vlv_display_power_well_enable(struct drm_i915_private *dev_priv, -- cgit v0.10.2 From e7852a4b3a4fb6f6c18fdaff934580aa8521599a Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Wed, 25 May 2016 14:30:41 +0100 Subject: drm/i915: add missing condition for committing planes on crtc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The i915 driver checks for color management properties changes as part of a plane update. Therefore a color management update must imply a plane update, otherwise we never update the transformation matrixes and degamma/gamma LUTs. v2: add comment about moving the commit of color management registers to an async worker v3: Commit color management register right after vblank v4: Move back color management commit condition together with planes commit v5: Trigger color management commit through the planes commit (Daniel) v6: Make plane change update more readable Fixes: 20a34e78f0d7 (drm/i915: Update color management during vblank evasion.) Cc: Maarten Lankhorst Cc: Ville Syrjälä Cc: Daniel Vetter Cc: drm-intel-fixes@lists.freedesktop.org Signed-off-by: Lionel Landwerlin References: https://lkml.org/lkml/2016/7/14/614 Reviewed-and-tested-by: Mario Kleiner Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1464183041-8478-1-git-send-email-lionel.g.landwerlin@intel.com diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 88589b5..9337d3a 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -12016,6 +12016,12 @@ static int intel_crtc_atomic_check(struct drm_crtc *crtc, ret = intel_color_check(crtc, crtc_state); if (ret) return ret; + + /* + * Changing color management on Intel hardware is + * handled as part of planes update. + */ + crtc_state->planes_changed = true; } ret = 0; -- cgit v0.10.2 From 0b8c0e9c3354953a5d6f90ece82961017053dc74 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 13 Jul 2016 18:34:44 +0100 Subject: drm/i915/fbdev: Drain the suspend worker on retiring Since the suspend_work can arm itself if the console_lock() is currently held elsewhere, simply calling flush_work() doesn't guarantee that the work is idle upon return. To do so requires using cancel_work_sync(). Signed-off-by: Chris Wilson Cc: Daniel Vetter Cc: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/1468431285-28264-1-git-send-email-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index 86b00c6..ef17d88 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -768,7 +768,7 @@ void intel_fbdev_fini(struct drm_device *dev) if (!ifbdev) return; - flush_work(&dev_priv->fbdev_suspend_work); + cancel_work_sync(&dev_priv->fbdev_suspend_work); if (!current_is_async()) intel_fbdev_sync(ifbdev); -- cgit v0.10.2 From 6bc265424df02f8162f4a17a37e4982e1c64460e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 13 Jul 2016 18:34:45 +0100 Subject: drm/i915/fbdev: Check for the framebuffer before use If the fbdev probing fails, and in our error path we fail to clear the dev_priv->fbdev, then we can try and use a dangling fbdev pointer, and in particular a NULL fb. This could also happen in pathological cases where we try to operate on the fbdev prior to it being probed. Reported-by: Maarten Lankhorst Signed-off-by: Chris Wilson Cc: Maarten Lankhorst Cc: Daniel Vetter Cc: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/1468431285-28264-2-git-send-email-chris@chris-wilson.co.uk Reviewed-by: Daniel Vetter Reviewed-by: Mika Kuoppala diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index ef17d88..23129dc 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -782,7 +782,7 @@ void intel_fbdev_set_suspend(struct drm_device *dev, int state, bool synchronous struct intel_fbdev *ifbdev = dev_priv->fbdev; struct fb_info *info; - if (!ifbdev) + if (!ifbdev || !ifbdev->fb) return; info = ifbdev->helper.fbdev; @@ -827,31 +827,28 @@ void intel_fbdev_set_suspend(struct drm_device *dev, int state, bool synchronous void intel_fbdev_output_poll_changed(struct drm_device *dev) { - struct drm_i915_private *dev_priv = to_i915(dev); - if (dev_priv->fbdev) - drm_fb_helper_hotplug_event(&dev_priv->fbdev->helper); + struct intel_fbdev *ifbdev = to_i915(dev)->fbdev; + + if (ifbdev && ifbdev->fb) + drm_fb_helper_hotplug_event(&ifbdev->helper); } void intel_fbdev_restore_mode(struct drm_device *dev) { - int ret; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_fbdev *ifbdev = dev_priv->fbdev; - struct drm_fb_helper *fb_helper; + struct intel_fbdev *ifbdev = to_i915(dev)->fbdev; if (!ifbdev) return; intel_fbdev_sync(ifbdev); + if (!ifbdev->fb) + return; - fb_helper = &ifbdev->helper; - - ret = drm_fb_helper_restore_fbdev_mode_unlocked(fb_helper); - if (ret) { + if (drm_fb_helper_restore_fbdev_mode_unlocked(&ifbdev->helper)) { DRM_DEBUG("failed to restore crtc mode\n"); } else { - mutex_lock(&fb_helper->dev->struct_mutex); + mutex_lock(&dev->struct_mutex); intel_fb_obj_invalidate(ifbdev->fb->obj, ORIGIN_GTT); - mutex_unlock(&fb_helper->dev->struct_mutex); + mutex_unlock(&dev->struct_mutex); } } -- cgit v0.10.2 From 945657b461ed27621ec6d3ca2f1b9880b34843bd Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 15 Jul 2016 14:56:19 +0100 Subject: drm/i915/evict: Always switch away from the current context Currently execlists is exempt from emitting a request to switch each ring away from the current context over to the dev_priv->kernel_context (for whatever reason, just under execlists the GGTT is unlikely to be as fragmented, however the switch may help in some extreme cases). Extract the switcher and enable it for execlsts as well, as we need to do so in a later patch to force the context switch before suspend. (And since for that switch we explicitly require the disposable kernel context, rename the extracted function.) Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/1468590980-6186-1-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 1ec523d..cd1ccc4 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3525,6 +3525,7 @@ void i915_gem_context_reset(struct drm_device *dev); int i915_gem_context_open(struct drm_device *dev, struct drm_file *file); void i915_gem_context_close(struct drm_device *dev, struct drm_file *file); int i915_switch_context(struct drm_i915_gem_request *req); +int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv); void i915_gem_context_free(struct kref *ctx_ref); struct drm_i915_gem_object * i915_gem_alloc_context_obj(struct drm_device *dev, size_t size); diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 3c97f0e..3b63616 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -926,6 +926,35 @@ int i915_switch_context(struct drm_i915_gem_request *req) return do_rcs_switch(req); } +int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv) +{ + struct intel_engine_cs *engine; + + for_each_engine(engine, dev_priv) { + struct drm_i915_gem_request *req; + int ret; + + if (engine->last_context == NULL) + continue; + + if (engine->last_context == dev_priv->kernel_context) + continue; + + req = i915_gem_request_alloc(engine, dev_priv->kernel_context); + if (IS_ERR(req)) + return PTR_ERR(req); + + ret = 0; + if (!i915.enable_execlists) + ret = i915_switch_context(req); + i915_add_request_no_flush(req); + if (ret) + return ret; + } + + return 0; +} + static bool contexts_enabled(struct drm_device *dev) { return i915.enable_execlists || to_i915(dev)->hw_context_size; diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 3c1280e..b1194c7 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -33,37 +33,6 @@ #include "intel_drv.h" #include "i915_trace.h" -static int switch_to_pinned_context(struct drm_i915_private *dev_priv) -{ - struct intel_engine_cs *engine; - - if (i915.enable_execlists) - return 0; - - for_each_engine(engine, dev_priv) { - struct drm_i915_gem_request *req; - int ret; - - if (engine->last_context == NULL) - continue; - - if (engine->last_context == dev_priv->kernel_context) - continue; - - req = i915_gem_request_alloc(engine, dev_priv->kernel_context); - if (IS_ERR(req)) - return PTR_ERR(req); - - ret = i915_switch_context(req); - i915_add_request_no_flush(req); - if (ret) - return ret; - } - - return 0; -} - - static bool mark_free(struct i915_vma *vma, struct list_head *unwind) { @@ -184,7 +153,7 @@ none: struct drm_i915_private *dev_priv = to_i915(dev); if (i915_is_ggtt(vm)) { - ret = switch_to_pinned_context(dev_priv); + ret = i915_gem_switch_to_kernel_context(dev_priv); if (ret) return ret; } @@ -303,7 +272,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle) struct drm_i915_private *dev_priv = to_i915(vm->dev); if (i915_is_ggtt(vm)) { - ret = switch_to_pinned_context(dev_priv); + ret = i915_gem_switch_to_kernel_context(dev_priv); if (ret) return ret; } -- cgit v0.10.2 From 5ab57c7020697942ea15f45ad14c69cecb164329 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 15 Jul 2016 14:56:20 +0100 Subject: drm/i915: Flush logical context image out to memory upon suspend MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Before suspend, and especially before building the hibernation image, we need to context image to be coherent in memory. To do this we require that we perform a context switch to a disposable context (i.e. the dev_priv->kernel_context) - when that switch is complete, all other context images will be complete. This leaves the kernel_context image as incomplete, but fortunately that is disposable and we can do a quick fixup of the logical state after resuming. v2: Share the nearly identical code to switch to the kernel context with eviction. v3: Explain why we need the switch and reset. Testcase: igt/gem_exec_suspend # bsw References: https://bugs.freedesktop.org/show_bug.cgi?id=96526 Signed-off-by: Chris Wilson Cc: Ville Syrjälä Reviewed-by: Ville Syrjälä Tested-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1468590980-6186-2-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 1544012..c5b7b8e 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1590,9 +1590,7 @@ static int i915_drm_resume(struct drm_device *dev) intel_csr_ucode_resume(dev_priv); - mutex_lock(&dev->struct_mutex); - i915_gem_restore_gtt_mappings(dev); - mutex_unlock(&dev->struct_mutex); + i915_gem_resume(dev); i915_restore_state(dev); intel_opregion_setup(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index cd1ccc4..27d9b2c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3384,6 +3384,7 @@ void i915_gem_init_swizzling(struct drm_device *dev); void i915_gem_cleanup_engines(struct drm_device *dev); int __must_check i915_gem_wait_for_idle(struct drm_i915_private *dev_priv); int __must_check i915_gem_suspend(struct drm_device *dev); +void i915_gem_resume(struct drm_device *dev); void __i915_add_request(struct drm_i915_gem_request *req, struct drm_i915_gem_object *batch_obj, bool flush_caches); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index cf0e8aa..4644a7e 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4983,12 +4983,30 @@ i915_gem_suspend(struct drm_device *dev) intel_suspend_gt_powersave(dev_priv); mutex_lock(&dev->struct_mutex); + + /* We have to flush all the executing contexts to main memory so + * that they can saved in the hibernation image. To ensure the last + * context image is coherent, we have to switch away from it. That + * leaves the dev_priv->kernel_context still active when + * we actually suspend, and its image in memory may not match the GPU + * state. Fortunately, the kernel_context is disposable and we do + * not rely on its state. + */ + ret = i915_gem_switch_to_kernel_context(dev_priv); + if (ret) + goto err; + ret = i915_gem_wait_for_idle(dev_priv); if (ret) goto err; i915_gem_retire_requests(dev_priv); + /* Note that rather than stopping the engines, all we have to do + * is assert that every RING_HEAD == RING_TAIL (all execution complete) + * and similar for all logical context images (to ensure they are + * all ready for hibernation). + */ i915_gem_stop_engines(dev); i915_gem_context_lost(dev_priv); mutex_unlock(&dev->struct_mutex); @@ -5009,6 +5027,23 @@ err: return ret; } +void i915_gem_resume(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = to_i915(dev); + + mutex_lock(&dev->struct_mutex); + i915_gem_restore_gtt_mappings(dev); + + /* As we didn't flush the kernel context before suspend, we cannot + * guarantee that the context image is complete. So let's just reset + * it and start again. + */ + if (i915.enable_execlists) + intel_lr_context_reset(dev_priv, dev_priv->kernel_context); + + mutex_unlock(&dev->struct_mutex); +} + void i915_gem_init_swizzling(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); -- cgit v0.10.2 From 1b7f2c8b0773d5ccbef43ef38a13ad33136c9679 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 18 Jul 2016 13:15:14 +0300 Subject: drm/i915: Treat eDP as always connected, again MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit eDP should be treated as connected even if doesn't have an EDID. In that case we'll use the timings from the VBT. That used to be the case until commit f21a21983ef1 ("drm/i915: Splitting intel_dp_detect") broke things by considering even eDP disconnected if we fail to get an EDID for it. Fix things up again by treating eDP as always connected. Cc: Shubhangi Shrivastava Cc: Nathan D Ciobanu Cc: Sivakumar Thulasimani Cc: Ander Conselvan de Oliveira Cc: Larry Finger Reported-by: Larry Finger Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96675 Cc: drm-intel-fixes@lists.freedesktop.org Fixes: f21a21983ef1 ("drm/i915: Splitting intel_dp_detect") Signed-off-by: Ville Syrjälä Tested-by: Larry Finger Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1468836914-16537-1-git-send-email-ville.syrjala@linux.intel.com diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 0c5ba34..21b04c3 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -4336,7 +4336,7 @@ intel_dp_detect(struct drm_connector *connector, bool force) intel_dp->detect_done = false; - if (intel_connector->detect_edid) + if (is_edp(intel_dp) || intel_connector->detect_edid) return connector_status_connected; else return connector_status_disconnected; -- cgit v0.10.2 From a87848750ebc2c174879f5acaa1183044287fd03 Mon Sep 17 00:00:00 2001 From: Bob Paauwe Date: Fri, 15 Jul 2016 14:59:02 +0100 Subject: drm/i915: Set legacy properties when using legacy gamma set IOCTL. (v2) The i915 driver is now using atomic properties and atomic commit to handle the legacy set gamma IOCTL. However, if the driver is configured without atomic (nuclear_pageflip = false), it won't update the legacy properties for degamma_lut, gamma_lut and ctm leaving them out of sync with the atomic version of the properties. Until the driver is full atomic, make sure we update the non-atomic version of the properties. v2: Update the comment with a FIXME. (Daniel) v3: Update arguments of the gamma_set vfunc (Lionel) v4: Fixed vfunc prototype (Lionel) igt-testcase: kms_pipe_color / legacy-gamma-reset-pipeX Cc: Daniel Vetter Cc: Maarten Lankhorst Cc: stable@vger.kernel.org #v4.7 Signed-off-by: Bob Paauwe Signed-off-by: Lionel Landwerlin Reviewed-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/1468591142-2253-1-git-send-email-lionel.g.landwerlin@intel.com diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 9337d3a..fb7d8fc5 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -13924,8 +13924,50 @@ out: #undef for_each_intel_crtc_masked +/* + * FIXME: Remove this once i915 is fully DRIVER_ATOMIC by calling + * drm_atomic_helper_legacy_gamma_set() directly. + */ +static int intel_atomic_legacy_gamma_set(struct drm_crtc *crtc, + u16 *red, u16 *green, u16 *blue, + uint32_t size) +{ + struct drm_device *dev = crtc->dev; + struct drm_mode_config *config = &dev->mode_config; + struct drm_crtc_state *state; + int ret; + + ret = drm_atomic_helper_legacy_gamma_set(crtc, red, green, blue, size); + if (ret) + return ret; + + /* + * Make sure we update the legacy properties so this works when + * atomic is not enabled. + */ + + state = crtc->state; + + drm_object_property_set_value(&crtc->base, + config->degamma_lut_property, + (state->degamma_lut) ? + state->degamma_lut->base.id : 0); + + drm_object_property_set_value(&crtc->base, + config->ctm_property, + (state->ctm) ? + state->ctm->base.id : 0); + + drm_object_property_set_value(&crtc->base, + config->gamma_lut_property, + (state->gamma_lut) ? + state->gamma_lut->base.id : 0); + + return 0; +} + static const struct drm_crtc_funcs intel_crtc_funcs = { - .gamma_set = drm_atomic_helper_legacy_gamma_set, + .gamma_set = intel_atomic_legacy_gamma_set, .set_config = drm_atomic_helper_set_config, .set_property = drm_atomic_helper_crtc_set_property, .destroy = intel_crtc_destroy, -- cgit v0.10.2 From 9e2793f6e4e2ca452457e459f013cc8e6b08a789 Mon Sep 17 00:00:00 2001 From: Dave Gordon Date: Thu, 14 Jul 2016 14:52:03 +0100 Subject: drm/i915: compile-time consistency check on __EXEC_OBJECT flags Two different sets of flag bits are stored in the 'flags' member of a 'struct drm_i915_gem_exec_object2', and they're defined in two different source files, increasing the risk of an accidental clash. Some flags in this field are supplied by the user; these are defined in i915_drm.h, and they start from the LSB and work up. Other flags are defined in i915_gem_execbuffer, for internal use within that file only; they start from the MSB and work down. So here we add a compile-time check that the two sets of flags do not overlap, which would cause all sorts of confusion. Signed-off-by: Dave Gordon Reviewed-by: Daniel Vetter Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1468504324-12690-1-git-send-email-david.s.gordon@intel.com diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 1978633..1bb1f25 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -34,10 +34,11 @@ #include #include -#define __EXEC_OBJECT_HAS_PIN (1<<31) -#define __EXEC_OBJECT_HAS_FENCE (1<<30) -#define __EXEC_OBJECT_NEEDS_MAP (1<<29) -#define __EXEC_OBJECT_NEEDS_BIAS (1<<28) +#define __EXEC_OBJECT_HAS_PIN (1<<31) +#define __EXEC_OBJECT_HAS_FENCE (1<<30) +#define __EXEC_OBJECT_NEEDS_MAP (1<<29) +#define __EXEC_OBJECT_NEEDS_BIAS (1<<28) +#define __EXEC_OBJECT_INTERNAL_FLAGS (0xf<<28) /* all of the above */ #define BATCH_OFFSET_BIAS (256*1024) @@ -1007,6 +1008,9 @@ validate_exec_list(struct drm_device *dev, unsigned invalid_flags; int i; + /* INTERNAL flags must not overlap with external ones */ + BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS & ~__EXEC_OBJECT_UNKNOWN_FLAGS); + invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS; if (USES_FULL_PPGTT(dev)) invalid_flags |= EXEC_OBJECT_NEEDS_GTT; diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index d7e81a3..51b9360 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -698,12 +698,13 @@ struct drm_i915_gem_exec_object2 { */ __u64 offset; -#define EXEC_OBJECT_NEEDS_FENCE (1<<0) -#define EXEC_OBJECT_NEEDS_GTT (1<<1) -#define EXEC_OBJECT_WRITE (1<<2) +#define EXEC_OBJECT_NEEDS_FENCE (1<<0) +#define EXEC_OBJECT_NEEDS_GTT (1<<1) +#define EXEC_OBJECT_WRITE (1<<2) #define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3) -#define EXEC_OBJECT_PINNED (1<<4) -#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PINNED<<1) +#define EXEC_OBJECT_PINNED (1<<4) +/* All remaining bits are MBZ and RESERVED FOR FUTURE USE */ +#define __EXEC_OBJECT_UNKNOWN_FLAGS (-(EXEC_OBJECT_PINNED<<1)) __u64 flags; __u64 rsvd1; -- cgit v0.10.2 From 4bfa339aa40c2faa7aa7b7e488e6cd55c54b9055 Mon Sep 17 00:00:00 2001 From: Dave Gordon Date: Thu, 14 Jul 2016 14:52:04 +0100 Subject: drm/i915: refactor eb_get_batch() Precursor for fix to secure batch execution. We will need to be able to retrieve the batch VMA (as well as the batch itself) from the eb list, so this patch extracts that part of eb_get_batch() into a separate function, and moves both parts to a more logical place in the file, near where the eb list is created. Also, it may not be obvious, but the current execbuffer2 ioctl interface requires that the buffer object containing the batch-to-be-executed be the LAST entry in the exec2_list[] array (I expected it to be the first!). To clarify this, we can replace the rather obscure construct "list_entry(eb->vmas.prev, ...)" in the old version of eb_get_batch() with the equivalent but more explicit "list_last_entry(&eb->vmas,...)" in the new eb_get_batch_vma() and of course add an explanatory comment. Signed-off-by: Dave Gordon Reviewed-by: Daniel Vetter Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1468504324-12690-2-git-send-email-david.s.gordon@intel.com diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 1bb1f25..f6724ae 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -186,6 +186,35 @@ err: return ret; } +static inline struct i915_vma * +eb_get_batch_vma(struct eb_vmas *eb) +{ + /* The batch is always the LAST item in the VMA list */ + struct i915_vma *vma = list_last_entry(&eb->vmas, typeof(*vma), exec_list); + + return vma; +} + +static struct drm_i915_gem_object * +eb_get_batch(struct eb_vmas *eb) +{ + struct i915_vma *vma = eb_get_batch_vma(eb); + + /* + * SNA is doing fancy tricks with compressing batch buffers, which leads + * to negative relocation deltas. Usually that works out ok since the + * relocate address is still positive, except when the batch is placed + * very low in the GTT. Ensure this doesn't happen. + * + * Note that actual hangs have only been observed on gen7, but for + * paranoia do it everywhere. + */ + if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0) + vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS; + + return vma->obj; +} + static struct i915_vma *eb_get_vma(struct eb_vmas *eb, unsigned long handle) { if (eb->and < 0) { @@ -1341,26 +1370,6 @@ gen8_dispatch_bsd_ring(struct drm_i915_private *dev_priv, struct drm_file *file) return file_priv->bsd_ring; } -static struct drm_i915_gem_object * -eb_get_batch(struct eb_vmas *eb) -{ - struct i915_vma *vma = list_entry(eb->vmas.prev, typeof(*vma), exec_list); - - /* - * SNA is doing fancy tricks with compressing batch buffers, which leads - * to negative relocation deltas. Usually that works out ok since the - * relocate address is still positive, except when the batch is placed - * very low in the GTT. Ensure this doesn't happen. - * - * Note that actual hangs have only been observed on gen7, but for - * paranoia do it everywhere. - */ - if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0) - vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS; - - return vma->obj; -} - #define I915_USER_RINGS (4) static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = { -- cgit v0.10.2 From d1054ee492a89b134fb0ac527b0714c277ae9c0f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 16 Jul 2016 18:42:36 +0100 Subject: drm/i915: Handle ENOSPC after failing to insert a mappable node Even after adding individual page support for GTT mmaping, we can still fail to find any space within the mappable region, and drm_mm_insert_node() will then report ENOSPC. We have to then handle this error by using the shmem access to the pages. Fixes: b50a53715f09 ("drm/i915: Support for pread/pwrite ... objects") Testcase: igt/gem_concurrent_blit Signed-off-by: Chris Wilson Cc: Ankitprasad Sharma Cc: Tvrtko Ursulin diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 4644a7e..e37f73d 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1306,7 +1306,7 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, * textures). Fallback to the shmem path in that case. */ } - if (ret == -EFAULT) { + if (ret == -EFAULT || ret == -ENOSPC) { if (obj->phys_handle) ret = i915_gem_phys_pwrite(obj, args, file); else if (i915_gem_object_has_struct_page(obj)) -- cgit v0.10.2 From 3d466cd67e85fe11493001cf62b988afff018d33 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 15 Jul 2016 21:48:05 +0200 Subject: drm/i915: Fixup kerneldoc code snippets in intel_uncore.c We need :: before, blank lines around and indentation with 4 _additional_ spaces to make it work. Also, don't use @param in code snippets, it results in confusion. Acked-by: Chris Wilson Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1468612088-9721-8-git-send-email-daniel.vetter@ffwll.ch diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index eeb4cbc..43f8339 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -1618,8 +1618,10 @@ static int gen6_reset_engines(struct drm_i915_private *dev_priv, * @timeout_ms: timeout in millisecond * * This routine waits until the target register @reg contains the expected - * @value after applying the @mask, i.e. it waits until - * (I915_READ_FW(@reg) & @mask) == @value + * @value after applying the @mask, i.e. it waits until :: + * + * (I915_READ_FW(reg) & mask) == value + * * Otherwise, the wait will timeout after @timeout_ms milliseconds. * * Note that this routine assumes the caller holds forcewake asserted, it is @@ -1652,8 +1654,10 @@ int intel_wait_for_register_fw(struct drm_i915_private *dev_priv, * @timeout_ms: timeout in millisecond * * This routine waits until the target register @reg contains the expected - * @value after applying the @mask, i.e. it waits until - * (I915_READ(@reg) & @mask) == @value + * @value after applying the @mask, i.e. it waits until :: + * + * (I915_READ(reg) & mask) == value + * * Otherwise, the wait will timeout after @timeout_ms milliseconds. * * Returns 0 if the register matches the desired condition, or -ETIMEOUT. -- cgit v0.10.2 From 6e5248b53fda0dc61227b1c560897beb36d22225 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 15 Jul 2016 21:48:06 +0200 Subject: drm/i915: Clean up kerneldoc for intel_lrc.c Fairly minimal, there's still lots of functions without any docs, and which aren't static. But probably we want to first clean this up some more. - Drop the bogus const. Marking argument pointers themselves (instead of what they point at) as const provides roughly 0 value. And it's confusing, since the data the pointer points at _is_ being changed. - Remove kerneldoc for static functions. Keep comments where they seem valuable. - Indent and whitespace fixes. - Blockquote the bit field definitions of the descriptor for correct layouting. Acked-by: Chris Wilson Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1468612088-9721-9-git-send-email-daniel.vetter@ffwll.ch diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index b6af635..2e670f1 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -288,7 +288,6 @@ logical_ring_init_platform_invariants(struct intel_engine_cs *engine) /** * intel_lr_context_descriptor_update() - calculate & cache the descriptor * descriptor for a pinned context - * * @ctx: Context to work on * @engine: Engine the descriptor will be used with * @@ -297,12 +296,13 @@ logical_ring_init_platform_invariants(struct intel_engine_cs *engine) * expensive to calculate, we'll just do it once and cache the result, * which remains valid until the context is unpinned. * - * This is what a descriptor looks like, from LSB to MSB: - * bits 0-11: flags, GEN8_CTX_* (cached in ctx_desc_template) - * bits 12-31: LRCA, GTT address of (the HWSP of) this context - * bits 32-52: ctx ID, a globally unique tag - * bits 53-54: mbz, reserved for use by hardware - * bits 55-63: group ID, currently unused and set to 0 + * This is what a descriptor looks like, from LSB to MSB:: + * + * bits 0-11: flags, GEN8_CTX_* (cached in ctx_desc_template) + * bits 12-31: LRCA, GTT address of (the HWSP of) this context + * bits 32-52: ctx ID, a globally unique tag + * bits 53-54: mbz, reserved for use by hardware + * bits 55-63: group ID, currently unused and set to 0 */ static void intel_lr_context_descriptor_update(struct i915_gem_context *ctx, @@ -539,10 +539,7 @@ get_context_status(struct intel_engine_cs *engine, unsigned int read_pointer, return status; } -/** - * intel_lrc_irq_handler() - handle Context Switch interrupts - * @data: tasklet handler passed in unsigned long - * +/* * Check the unread Context Status Buffers and manage the submission of new * contexts to the ELSP accordingly. */ @@ -807,7 +804,7 @@ intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request) } /** - * execlists_submission() - submit a batchbuffer for execution, Execlists style + * intel_execlists_submission() - submit a batchbuffer for execution, Execlists style * @params: execbuffer call parameters. * @args: execbuffer call arguments. * @vmas: list of vmas. @@ -1094,7 +1091,7 @@ static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req) * code duplication. */ static inline int gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, - uint32_t *const batch, + uint32_t *batch, uint32_t index) { uint32_t l3sqc4_flush = (0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES); @@ -1155,37 +1152,24 @@ static inline int wa_ctx_end(struct i915_wa_ctx_bb *wa_ctx, return 0; } -/** - * gen8_init_indirectctx_bb() - initialize indirect ctx batch with WA - * - * @engine: only applicable for RCS - * @wa_ctx: structure representing wa_ctx - * offset: specifies start of the batch, should be cache-aligned. This is updated - * with the offset value received as input. - * size: size of the batch in DWORDS but HW expects in terms of cachelines - * @batch: page in which WA are loaded - * @offset: This field specifies the start of the batch, it should be - * cache-aligned otherwise it is adjusted accordingly. - * Typically we only have one indirect_ctx and per_ctx batch buffer which are - * initialized at the beginning and shared across all contexts but this field - * helps us to have multiple batches at different offsets and select them based - * on a criteria. At the moment this batch always start at the beginning of the page - * and at this point we don't have multiple wa_ctx batch buffers. - * - * The number of WA applied are not known at the beginning; we use this field - * to return the no of DWORDS written. +/* + * Typically we only have one indirect_ctx and per_ctx batch buffer which are + * initialized at the beginning and shared across all contexts but this field + * helps us to have multiple batches at different offsets and select them based + * on a criteria. At the moment this batch always start at the beginning of the page + * and at this point we don't have multiple wa_ctx batch buffers. * - * It is to be noted that this batch does not contain MI_BATCH_BUFFER_END - * so it adds NOOPs as padding to make it cacheline aligned. - * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together - * makes a complete batch buffer. + * The number of WA applied are not known at the beginning; we use this field + * to return the no of DWORDS written. * - * Return: non-zero if we exceed the PAGE_SIZE limit. + * It is to be noted that this batch does not contain MI_BATCH_BUFFER_END + * so it adds NOOPs as padding to make it cacheline aligned. + * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together + * makes a complete batch buffer. */ - static int gen8_init_indirectctx_bb(struct intel_engine_cs *engine, struct i915_wa_ctx_bb *wa_ctx, - uint32_t *const batch, + uint32_t *batch, uint32_t *offset) { uint32_t scratch_addr; @@ -1229,26 +1213,18 @@ static int gen8_init_indirectctx_bb(struct intel_engine_cs *engine, return wa_ctx_end(wa_ctx, *offset = index, CACHELINE_DWORDS); } -/** - * gen8_init_perctx_bb() - initialize per ctx batch with WA - * - * @engine: only applicable for RCS - * @wa_ctx: structure representing wa_ctx - * offset: specifies start of the batch, should be cache-aligned. - * size: size of the batch in DWORDS but HW expects in terms of cachelines - * @batch: page in which WA are loaded - * @offset: This field specifies the start of this batch. - * This batch is started immediately after indirect_ctx batch. Since we ensure - * that indirect_ctx ends on a cacheline this batch is aligned automatically. +/* + * This batch is started immediately after indirect_ctx batch. Since we ensure + * that indirect_ctx ends on a cacheline this batch is aligned automatically. * - * The number of DWORDS written are returned using this field. + * The number of DWORDS written are returned using this field. * * This batch is terminated with MI_BATCH_BUFFER_END and so we need not add padding * to align it with cacheline as padding after MI_BATCH_BUFFER_END is redundant. */ static int gen8_init_perctx_bb(struct intel_engine_cs *engine, struct i915_wa_ctx_bb *wa_ctx, - uint32_t *const batch, + uint32_t *batch, uint32_t *offset) { uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS); @@ -1263,7 +1239,7 @@ static int gen8_init_perctx_bb(struct intel_engine_cs *engine, static int gen9_init_indirectctx_bb(struct intel_engine_cs *engine, struct i915_wa_ctx_bb *wa_ctx, - uint32_t *const batch, + uint32_t *batch, uint32_t *offset) { int ret; @@ -1330,7 +1306,7 @@ static int gen9_init_indirectctx_bb(struct intel_engine_cs *engine, static int gen9_init_perctx_bb(struct intel_engine_cs *engine, struct i915_wa_ctx_bb *wa_ctx, - uint32_t *const batch, + uint32_t *batch, uint32_t *offset) { uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS); @@ -1916,9 +1892,7 @@ static int gen8_init_rcs_context(struct drm_i915_gem_request *req) /** * intel_logical_ring_cleanup() - deallocate the Engine Command Streamer - * * @engine: Engine Command Streamer. - * */ void intel_logical_ring_cleanup(struct intel_engine_cs *engine) { @@ -2365,19 +2339,6 @@ uint32_t intel_lr_context_size(struct intel_engine_cs *engine) return ret; } -/** - * execlists_context_deferred_alloc() - create the LRC specific bits of a context - * @ctx: LR context to create. - * @engine: engine to be used with the context. - * - * This function can be called more than once, with different engines, if we plan - * to use the context with them. The context backing objects and the ringbuffers - * (specially the ringbuffer backing objects) suck a lot of memory up, and that's why - * the creation is a deferred call: it's better to make sure first that we need to use - * a given ring with the context. - * - * Return: non-zero on error. - */ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, struct intel_engine_cs *engine) { -- cgit v0.10.2 From 62f90b38f3326206ea53f7cf4ea4616028419d60 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 15 Jul 2016 21:48:07 +0200 Subject: drm/i915: Update missing kerneldoc Not sure why so much slips through when 0day is catching these. Hopefully the much faster sphinx toolchain helps in unlazying people. Acked-by: Chris Wilson Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1468612088-9721-10-git-send-email-daniel.vetter@ffwll.ch diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e37f73d..e40fab1 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -916,7 +916,7 @@ fast_user_write(struct io_mapping *mapping, /** * This is the fast pwrite path, where we copy the data directly from the * user into the GTT, uncached. - * @dev: drm device pointer + * @i915: i915 device private data * @obj: i915 gem object * @args: pwrite arguments structure * @file: drm file pointer diff --git a/drivers/gpu/drm/i915/i915_vgpu.c b/drivers/gpu/drm/i915/i915_vgpu.c index f6acb5a..142bac9 100644 --- a/drivers/gpu/drm/i915/i915_vgpu.c +++ b/drivers/gpu/drm/i915/i915_vgpu.c @@ -97,6 +97,7 @@ static struct _balloon_info_ bl_info; /** * intel_vgt_deballoon - deballoon reserved graphics address trunks + * @dev_priv: i915 device private data * * This function is called to deallocate the ballooned-out graphic memory, when * driver is unloaded or when ballooning fails. @@ -138,7 +139,7 @@ static int vgt_balloon_space(struct drm_mm *mm, /** * intel_vgt_balloon - balloon out reserved graphics address trunks - * @dev: drm device + * @dev_priv: i915 device private data * * This function is called at the initialization stage, to balloon out the * graphic address space allocated to other vGPUs, by marking these spaces as diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c index 6a7ad3e..781e2f5 100644 --- a/drivers/gpu/drm/i915/intel_fbc.c +++ b/drivers/gpu/drm/i915/intel_fbc.c @@ -1075,6 +1075,8 @@ out: /** * intel_fbc_enable: tries to enable FBC on the CRTC * @crtc: the CRTC + * @crtc_state: corresponding &drm_crtc_state for @crtc + * @plane_state: corresponding &drm_plane_state for the primary plane of @crtc * * This function checks if the given CRTC was chosen for FBC, then enables it if * possible. Notice that it doesn't activate FBC. It is valid to call diff --git a/drivers/gpu/drm/i915/intel_hotplug.c b/drivers/gpu/drm/i915/intel_hotplug.c index f48957e..5dc2c20 100644 --- a/drivers/gpu/drm/i915/intel_hotplug.c +++ b/drivers/gpu/drm/i915/intel_hotplug.c @@ -525,7 +525,6 @@ void i915_hpd_poll_init_work(struct work_struct *work) { /** * intel_hpd_poll_init - enables/disables polling for connectors with hpd * @dev_priv: i915 device instance - * @enabled: Whether to enable or disable polling * * This function enables polling for all connectors, regardless of whether or * not they support hotplug detection. Under certain conditions HPD may not be -- cgit v0.10.2 From e419899b7c19ef99e340e2b1ba585d82fd28c53b Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Fri, 1 Jul 2016 16:40:04 +0300 Subject: drm/i915/gen9: Clean up MOCS table definitions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use named struct initializers for clarity. Also fix the target cache definition to reflect its role in GEN9 onwards. On GEN8 a TC value of 0 meant ELLC but on GEN9+ it means the TC and LRU controls are taken from the PTE. No functional change, igt/gem_mocs_settings still passing after this change. v2: (Chris) - Add back the hexa literals for the entries. Add note that igt/gem_mocs_settings still passes. CC: Rong R Yang CC: Yakui Zhao CC: Chris Wilson Signed-off-by: Imre Deak Acked-by: Zhao Yakui Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1467380406-11954-2-git-send-email-imre.deak@intel.com diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c index 3c1482b..d36e609 100644 --- a/drivers/gpu/drm/i915/intel_mocs.c +++ b/drivers/gpu/drm/i915/intel_mocs.c @@ -66,9 +66,10 @@ struct drm_i915_mocs_table { #define L3_WB 3 /* Target cache */ -#define ELLC 0 -#define LLC 1 -#define LLC_ELLC 2 +#define LE_TC_PAGETABLE 0 +#define LE_TC_LLC 1 +#define LE_TC_LLC_ELLC 2 +#define LE_TC_LLC_ELLC_ALT 3 /* * MOCS tables @@ -96,34 +97,67 @@ struct drm_i915_mocs_table { * end. */ static const struct drm_i915_mocs_entry skylake_mocs_table[] = { - /* { 0x00000009, 0x0010 } */ - { (LE_CACHEABILITY(LE_UC) | LE_TGT_CACHE(LLC_ELLC) | LE_LRUM(0) | - LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | LE_PFM(0) | LE_SCF(0)), - (L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC)) }, - /* { 0x00000038, 0x0030 } */ - { (LE_CACHEABILITY(LE_PAGETABLE) | LE_TGT_CACHE(LLC_ELLC) | LE_LRUM(3) | - LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | LE_PFM(0) | LE_SCF(0)), - (L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB)) }, - /* { 0x0000003b, 0x0030 } */ - { (LE_CACHEABILITY(LE_WB) | LE_TGT_CACHE(LLC_ELLC) | LE_LRUM(3) | - LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | LE_PFM(0) | LE_SCF(0)), - (L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB)) } + { /* 0x00000009 */ + .control_value = LE_CACHEABILITY(LE_UC) | + LE_TGT_CACHE(LE_TC_LLC_ELLC) | + LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | + LE_PFM(0) | LE_SCF(0), + + /* 0x0010 */ + .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), + }, + { + /* 0x00000038 */ + .control_value = LE_CACHEABILITY(LE_PAGETABLE) | + LE_TGT_CACHE(LE_TC_LLC_ELLC) | + LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | + LE_PFM(0) | LE_SCF(0), + /* 0x0030 */ + .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), + }, + { + /* 0x0000003b */ + .control_value = LE_CACHEABILITY(LE_WB) | + LE_TGT_CACHE(LE_TC_LLC_ELLC) | + LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | + LE_PFM(0) | LE_SCF(0), + /* 0x0030 */ + .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), + }, }; /* NOTE: the LE_TGT_CACHE is not used on Broxton */ static const struct drm_i915_mocs_entry broxton_mocs_table[] = { - /* { 0x00000009, 0x0010 } */ - { (LE_CACHEABILITY(LE_UC) | LE_TGT_CACHE(LLC_ELLC) | LE_LRUM(0) | - LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | LE_PFM(0) | LE_SCF(0)), - (L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC)) }, - /* { 0x00000038, 0x0030 } */ - { (LE_CACHEABILITY(LE_PAGETABLE) | LE_TGT_CACHE(LLC_ELLC) | LE_LRUM(3) | - LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | LE_PFM(0) | LE_SCF(0)), - (L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB)) }, - /* { 0x0000003b, 0x0030 } */ - { (LE_CACHEABILITY(LE_WB) | LE_TGT_CACHE(LLC_ELLC) | LE_LRUM(3) | - LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | LE_PFM(0) | LE_SCF(0)), - (L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB)) } + { + /* 0x00000009 */ + .control_value = LE_CACHEABILITY(LE_UC) | + LE_TGT_CACHE(LE_TC_LLC_ELLC) | + LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | + LE_PFM(0) | LE_SCF(0), + + /* 0x0010 */ + .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), + }, + { + /* 0x00000038 */ + .control_value = LE_CACHEABILITY(LE_PAGETABLE) | + LE_TGT_CACHE(LE_TC_LLC_ELLC) | + LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | + LE_PFM(0) | LE_SCF(0), + + /* 0x0030 */ + .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), + }, + { + /* 0x0000003b */ + .control_value = LE_CACHEABILITY(LE_WB) | + LE_TGT_CACHE(LE_TC_LLC_ELLC) | + LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | + LE_PFM(0) | LE_SCF(0), + + /* 0x0030 */ + .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), + }, }; /** -- cgit v0.10.2 From 6bee14ed1e1136d700e5290c080e1145982ce43e Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Fri, 1 Jul 2016 16:40:05 +0300 Subject: drm/i915/bxt: Fix inadvertent CPU snooping due to incorrect MOCS config MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Setting a write-back cache policy in the MOCS entry definition also implies snooping, which has a considerable overhead. This is unexpected for a few reasons: - From user-space's point of view since it didn't want a coherent surface (it didn't set the buffer as such via the set caching IOCTL). - There is a separate MOCS entry field for snooping (which we never set). - This MOCS table is about caching in (e)LLC and there is no (e)LLC on BXT. There is a separate table for L3 cache control. Considering the above the current behavior of snooping looks like an unintentional side-effect of the WB setting. Changing it to be LLC-UC gets rid of the snooping without any ill-effects. For a coherent surface the application would use a separate MOCS entry at index 1 and call the set caching IOCTL to setup the PTE entries for the corresponding buffer to be snooped. In the future we could also add a new MOCS entry for coherent surfaces. This resulted in 70% improvement in synthetic texturing benchmarks. Kudos to Valtteri Rantala, Eero Tamminen and Michael T Frederick and Ville who helped to narrow the source of problem to the kernel and to the snooping behaviour in particular. With a follow-up change to adjust the 3rd entry value igt/gem_mocs_settings is passing after this change. v2: - Rebase on v2 of patch 1/2. v3: - Set the entry as LLC uncached instead of PTE-passthrough. This way we also keep snooping disabled, but we also make the cacheability/ coherency setting indepent of the PTE which is managed by the kernel. (Chris) CC: Rong R Yang CC: Yakui Zhao CC: Valtteri Rantala CC: Eero Tamminen CC: Michael T Frederick CC: Ville Syrjälä CC: Chris Wilson Signed-off-by: Imre Deak Acked-by: Zhao Yakui Tested-by: Rong R Yang Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1467380406-11954-3-git-send-email-imre.deak@intel.com diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c index d36e609..927825f 100644 --- a/drivers/gpu/drm/i915/intel_mocs.c +++ b/drivers/gpu/drm/i915/intel_mocs.c @@ -149,8 +149,8 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = { .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), }, { - /* 0x0000003b */ - .control_value = LE_CACHEABILITY(LE_WB) | + /* 0x00000039 */ + .control_value = LE_CACHEABILITY(LE_UC) | LE_TGT_CACHE(LE_TC_LLC_ELLC) | LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | LE_PFM(0) | LE_SCF(0), -- cgit v0.10.2 From 3373ce2eccd56651579b1864fecf98b46fd1cb67 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Fri, 1 Jul 2016 17:32:08 +0300 Subject: drm/i915: Give proper names to MOCS entries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The purpose for each MOCS entry isn't well defined atm. Defining these is important to remove any uncertainty about the use of these entries for example in terms of performance and GPU/CPU coherency. Suggested by Ville. v4: - Rename I915_MOCS_AUTO to I915_MOCS_PTE. (Chris) CC: Rong R Yang CC: Yakui Zhao CC: Ville Syrjälä CC: Chris Wilson Signed-off-by: Imre Deak Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1467383528-16142-1-git-send-email-imre.deak@intel.com diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c index 927825f..2280c32 100644 --- a/drivers/gpu/drm/i915/intel_mocs.c +++ b/drivers/gpu/drm/i915/intel_mocs.c @@ -97,7 +97,8 @@ struct drm_i915_mocs_table { * end. */ static const struct drm_i915_mocs_entry skylake_mocs_table[] = { - { /* 0x00000009 */ + [I915_MOCS_UNCACHED] = { + /* 0x00000009 */ .control_value = LE_CACHEABILITY(LE_UC) | LE_TGT_CACHE(LE_TC_LLC_ELLC) | LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | @@ -106,7 +107,7 @@ static const struct drm_i915_mocs_entry skylake_mocs_table[] = { /* 0x0010 */ .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), }, - { + [I915_MOCS_PTE] = { /* 0x00000038 */ .control_value = LE_CACHEABILITY(LE_PAGETABLE) | LE_TGT_CACHE(LE_TC_LLC_ELLC) | @@ -115,7 +116,7 @@ static const struct drm_i915_mocs_entry skylake_mocs_table[] = { /* 0x0030 */ .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), }, - { + [I915_MOCS_CACHED] = { /* 0x0000003b */ .control_value = LE_CACHEABILITY(LE_WB) | LE_TGT_CACHE(LE_TC_LLC_ELLC) | @@ -128,7 +129,7 @@ static const struct drm_i915_mocs_entry skylake_mocs_table[] = { /* NOTE: the LE_TGT_CACHE is not used on Broxton */ static const struct drm_i915_mocs_entry broxton_mocs_table[] = { - { + [I915_MOCS_UNCACHED] = { /* 0x00000009 */ .control_value = LE_CACHEABILITY(LE_UC) | LE_TGT_CACHE(LE_TC_LLC_ELLC) | @@ -138,7 +139,7 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = { /* 0x0010 */ .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), }, - { + [I915_MOCS_PTE] = { /* 0x00000038 */ .control_value = LE_CACHEABILITY(LE_PAGETABLE) | LE_TGT_CACHE(LE_TC_LLC_ELLC) | @@ -148,7 +149,7 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = { /* 0x0030 */ .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), }, - { + [I915_MOCS_CACHED] = { /* 0x00000039 */ .control_value = LE_CACHEABILITY(LE_UC) | LE_TGT_CACHE(LE_TC_LLC_ELLC) | diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 51b9360..33ce5ff 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -62,6 +62,30 @@ extern "C" { #define I915_ERROR_UEVENT "ERROR" #define I915_RESET_UEVENT "RESET" +/* + * MOCS indexes used for GPU surfaces, defining the cacheability of the + * surface data and the coherency for this data wrt. CPU vs. GPU accesses. + */ +enum i915_mocs_table_index { + /* + * Not cached anywhere, coherency between CPU and GPU accesses is + * guaranteed. + */ + I915_MOCS_UNCACHED, + /* + * Cacheability and coherency controlled by the kernel automatically + * based on the DRM_I915_GEM_SET_CACHING IOCTL setting and the current + * usage of the surface (used for display scanout or not). + */ + I915_MOCS_PTE, + /* + * Cached in all GPU caches available on the platform. + * Coherency between CPU and GPU accesses to the surface is not + * guaranteed without extra synchronization. + */ + I915_MOCS_CACHED, +}; + /* Each region is a minimum of 16k, and there are at most 255 of them. */ #define I915_NR_TEX_REGIONS 255 /* table size 2k - maximum due to use -- cgit v0.10.2 From fe993bc9582ff23c9a81414acdfed8e55478e456 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Mon, 18 Jul 2016 16:27:57 -0700 Subject: drm/i915/guc: Revert "drm/i915/guc: enable GuC loading & submission by default" This reverts commit 041824ee25cfc535ba2d9a22c217df735ea2471e. We have latency issues that might impact the performance: #96606. and hangs and loading issues on resume after S4: #96526. This is also blocking a platform milestone so let's disable this for now while we make sure we don't have any more loading issue, or related basic hangs and it pass BAT for real in all platofmrs. In case BAT is wrong let's first fix BAT before re-enable it here. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96606 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96526 Cc: Chris Wilson Cc: Dave Gordon Cc: Tvrtko Ursulin Cc: Stable Cc: Jani Nikula Cc: Christophe Prigent Signed-off-by: Rodrigo Vivi Reviewed-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1468884477-30086-1-git-send-email-rodrigo.vivi@intel.com diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index 8b13bfa..b6e404c 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -54,8 +54,8 @@ struct i915_params i915 __read_mostly = { .verbose_state_checks = 1, .nuclear_pageflip = 0, .edp_vswing = 0, - .enable_guc_loading = -1, - .enable_guc_submission = -1, + .enable_guc_loading = 0, + .enable_guc_submission = 0, .guc_log_level = -1, .enable_dp_mst = true, .inject_load_failure = 0, @@ -203,12 +203,12 @@ MODULE_PARM_DESC(edp_vswing, module_param_named_unsafe(enable_guc_loading, i915.enable_guc_loading, int, 0400); MODULE_PARM_DESC(enable_guc_loading, "Enable GuC firmware loading " - "(-1=auto [default], 0=never, 1=if available, 2=required)"); + "(-1=auto, 0=never [default], 1=if available, 2=required)"); module_param_named_unsafe(enable_guc_submission, i915.enable_guc_submission, int, 0400); MODULE_PARM_DESC(enable_guc_submission, "Enable GuC submission " - "(-1=auto [default], 0=never, 1=if available, 2=required)"); + "(-1=auto, 0=never [default], 1=if available, 2=required)"); module_param_named(guc_log_level, i915.guc_log_level, int, 0400); MODULE_PARM_DESC(guc_log_level, -- cgit v0.10.2 From 05235c535404b79aa224bb63dfc7262aac95b7e4 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 20 Jul 2016 09:21:08 +0100 Subject: drm/i915: Move GEM request routines to i915_gem_request.c Migrate the request operations out of the main body of i915_gem.c and into their own C file for easier expansion. v2: Move __i915_add_request() across as well Signed-off-by: Chris Wilson Acked-by: Mika Kuoppala Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1469002875-2335-1-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 75318eb..6092f0e 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -33,6 +33,7 @@ i915-y += i915_cmd_parser.o \ i915_gem_gtt.o \ i915_gem.o \ i915_gem_render_state.o \ + i915_gem_request.o \ i915_gem_shrinker.o \ i915_gem_stolen.o \ i915_gem_tiling.o \ diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 27d9b2c..c97a755 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -61,6 +61,7 @@ #include "i915_gem.h" #include "i915_gem_gtt.h" #include "i915_gem_render_state.h" +#include "i915_gem_request.h" #include "intel_gvt.h" @@ -2365,171 +2366,6 @@ static inline struct scatterlist *__sg_next(struct scatterlist *sg) (((__iter).curr += PAGE_SIZE) < (__iter).max) || \ ((__iter) = __sgt_iter(__sg_next((__iter).sgp), false), 0)) -/** - * Request queue structure. - * - * The request queue allows us to note sequence numbers that have been emitted - * and may be associated with active buffers to be retired. - * - * By keeping this list, we can avoid having to do questionable sequence - * number comparisons on buffer last_read|write_seqno. It also allows an - * emission time to be associated with the request for tracking how far ahead - * of the GPU the submission is. - * - * The requests are reference counted, so upon creation they should have an - * initial reference taken using kref_init - */ -struct drm_i915_gem_request { - struct kref ref; - - /** On Which ring this request was generated */ - struct drm_i915_private *i915; - struct intel_engine_cs *engine; - struct intel_signal_node signaling; - - /** GEM sequence number associated with the previous request, - * when the HWS breadcrumb is equal to this the GPU is processing - * this request. - */ - u32 previous_seqno; - - /** GEM sequence number associated with this request, - * when the HWS breadcrumb is equal or greater than this the GPU - * has finished processing this request. - */ - u32 seqno; - - /** Position in the ringbuffer of the start of the request */ - u32 head; - - /** - * Position in the ringbuffer of the start of the postfix. - * This is required to calculate the maximum available ringbuffer - * space without overwriting the postfix. - */ - u32 postfix; - - /** Position in the ringbuffer of the end of the whole request */ - u32 tail; - - /** Preallocate space in the ringbuffer for the emitting the request */ - u32 reserved_space; - - /** - * Context and ring buffer related to this request - * Contexts are refcounted, so when this request is associated with a - * context, we must increment the context's refcount, to guarantee that - * it persists while any request is linked to it. Requests themselves - * are also refcounted, so the request will only be freed when the last - * reference to it is dismissed, and the code in - * i915_gem_request_free() will then decrement the refcount on the - * context. - */ - struct i915_gem_context *ctx; - struct intel_ringbuffer *ringbuf; - - /** - * Context related to the previous request. - * As the contexts are accessed by the hardware until the switch is - * completed to a new context, the hardware may still be writing - * to the context object after the breadcrumb is visible. We must - * not unpin/unbind/prune that object whilst still active and so - * we keep the previous context pinned until the following (this) - * request is retired. - */ - struct i915_gem_context *previous_context; - - /** Batch buffer related to this request if any (used for - error state dump only) */ - struct drm_i915_gem_object *batch_obj; - - /** Time at which this request was emitted, in jiffies. */ - unsigned long emitted_jiffies; - - /** global list entry for this request */ - struct list_head list; - - struct drm_i915_file_private *file_priv; - /** file_priv list entry for this request */ - struct list_head client_list; - - /** process identifier submitting this request */ - struct pid *pid; - - /** - * The ELSP only accepts two elements at a time, so we queue - * context/tail pairs on a given queue (ring->execlist_queue) until the - * hardware is available. The queue serves a double purpose: we also use - * it to keep track of the up to 2 contexts currently in the hardware - * (usually one in execution and the other queued up by the GPU): We - * only remove elements from the head of the queue when the hardware - * informs us that an element has been completed. - * - * All accesses to the queue are mediated by a spinlock - * (ring->execlist_lock). - */ - - /** Execlist link in the submission queue.*/ - struct list_head execlist_link; - - /** Execlists no. of times this request has been sent to the ELSP */ - int elsp_submitted; - - /** Execlists context hardware id. */ - unsigned ctx_hw_id; -}; - -struct drm_i915_gem_request * __must_check -i915_gem_request_alloc(struct intel_engine_cs *engine, - struct i915_gem_context *ctx); -void i915_gem_request_free(struct kref *req_ref); -int i915_gem_request_add_to_client(struct drm_i915_gem_request *req, - struct drm_file *file); - -static inline uint32_t -i915_gem_request_get_seqno(struct drm_i915_gem_request *req) -{ - return req ? req->seqno : 0; -} - -static inline struct intel_engine_cs * -i915_gem_request_get_engine(struct drm_i915_gem_request *req) -{ - return req ? req->engine : NULL; -} - -static inline struct drm_i915_gem_request * -i915_gem_request_reference(struct drm_i915_gem_request *req) -{ - if (req) - kref_get(&req->ref); - return req; -} - -static inline void -i915_gem_request_unreference(struct drm_i915_gem_request *req) -{ - kref_put(&req->ref, i915_gem_request_free); -} - -static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst, - struct drm_i915_gem_request *src) -{ - if (src) - i915_gem_request_reference(src); - - if (*pdst) - i915_gem_request_unreference(*pdst); - - *pdst = src; -} - -/* - * XXX: i915_gem_request_completed should be here but currently needs the - * definition of i915_seqno_passed() which is below. It will be moved in - * a later patch when the call to i915_seqno_passed() is obsoleted... - */ - /* * A command that requires special handling by the command parser. */ @@ -3297,37 +3133,6 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old, struct drm_i915_gem_object *new, unsigned frontbuffer_bits); -/** - * Returns true if seq1 is later than seq2. - */ -static inline bool -i915_seqno_passed(uint32_t seq1, uint32_t seq2) -{ - return (int32_t)(seq1 - seq2) >= 0; -} - -static inline bool i915_gem_request_started(const struct drm_i915_gem_request *req) -{ - return i915_seqno_passed(intel_engine_get_seqno(req->engine), - req->previous_seqno); -} - -static inline bool i915_gem_request_completed(const struct drm_i915_gem_request *req) -{ - return i915_seqno_passed(intel_engine_get_seqno(req->engine), - req->seqno); -} - -bool __i915_spin_request(const struct drm_i915_gem_request *request, - int state, unsigned long timeout_us); -static inline bool i915_spin_request(const struct drm_i915_gem_request *request, - int state, unsigned long timeout_us) -{ - return (i915_gem_request_started(request) && - __i915_spin_request(request, state, timeout_us)); -} - -int __must_check i915_gem_get_seqno(struct drm_i915_private *dev_priv, u32 *seqno); int __must_check i915_gem_set_seqno(struct drm_device *dev, u32 seqno); struct drm_i915_gem_request * @@ -3385,18 +3190,6 @@ void i915_gem_cleanup_engines(struct drm_device *dev); int __must_check i915_gem_wait_for_idle(struct drm_i915_private *dev_priv); int __must_check i915_gem_suspend(struct drm_device *dev); void i915_gem_resume(struct drm_device *dev); -void __i915_add_request(struct drm_i915_gem_request *req, - struct drm_i915_gem_object *batch_obj, - bool flush_caches); -#define i915_add_request(req) \ - __i915_add_request(req, NULL, true) -#define i915_add_request_no_flush(req) \ - __i915_add_request(req, NULL, false) -int __i915_wait_request(struct drm_i915_gem_request *req, - bool interruptible, - s64 *timeout, - struct intel_rps_client *rps); -int __must_check i915_wait_request(struct drm_i915_gem_request *req); int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf); int __must_check i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e40fab1..6df1405 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1325,365 +1325,6 @@ put_rpm: return ret; } -static int -i915_gem_check_wedge(unsigned reset_counter, bool interruptible) -{ - if (__i915_terminally_wedged(reset_counter)) - return -EIO; - - if (__i915_reset_in_progress(reset_counter)) { - /* Non-interruptible callers can't handle -EAGAIN, hence return - * -EIO unconditionally for these. */ - if (!interruptible) - return -EIO; - - return -EAGAIN; - } - - return 0; -} - -static unsigned long local_clock_us(unsigned *cpu) -{ - unsigned long t; - - /* Cheaply and approximately convert from nanoseconds to microseconds. - * The result and subsequent calculations are also defined in the same - * approximate microseconds units. The principal source of timing - * error here is from the simple truncation. - * - * Note that local_clock() is only defined wrt to the current CPU; - * the comparisons are no longer valid if we switch CPUs. Instead of - * blocking preemption for the entire busywait, we can detect the CPU - * switch and use that as indicator of system load and a reason to - * stop busywaiting, see busywait_stop(). - */ - *cpu = get_cpu(); - t = local_clock() >> 10; - put_cpu(); - - return t; -} - -static bool busywait_stop(unsigned long timeout, unsigned cpu) -{ - unsigned this_cpu; - - if (time_after(local_clock_us(&this_cpu), timeout)) - return true; - - return this_cpu != cpu; -} - -bool __i915_spin_request(const struct drm_i915_gem_request *req, - int state, unsigned long timeout_us) -{ - unsigned cpu; - - /* When waiting for high frequency requests, e.g. during synchronous - * rendering split between the CPU and GPU, the finite amount of time - * required to set up the irq and wait upon it limits the response - * rate. By busywaiting on the request completion for a short while we - * can service the high frequency waits as quick as possible. However, - * if it is a slow request, we want to sleep as quickly as possible. - * The tradeoff between waiting and sleeping is roughly the time it - * takes to sleep on a request, on the order of a microsecond. - */ - - timeout_us += local_clock_us(&cpu); - do { - if (i915_gem_request_completed(req)) - return true; - - if (signal_pending_state(state, current)) - break; - - if (busywait_stop(timeout_us, cpu)) - break; - - cpu_relax_lowlatency(); - } while (!need_resched()); - - return false; -} - -/** - * __i915_wait_request - wait until execution of request has finished - * @req: duh! - * @interruptible: do an interruptible wait (normally yes) - * @timeout: in - how long to wait (NULL forever); out - how much time remaining - * @rps: RPS client - * - * Note: It is of utmost importance that the passed in seqno and reset_counter - * values have been read by the caller in an smp safe manner. Where read-side - * locks are involved, it is sufficient to read the reset_counter before - * unlocking the lock that protects the seqno. For lockless tricks, the - * reset_counter _must_ be read before, and an appropriate smp_rmb must be - * inserted. - * - * Returns 0 if the request was found within the alloted time. Else returns the - * errno with remaining time filled in timeout argument. - */ -int __i915_wait_request(struct drm_i915_gem_request *req, - bool interruptible, - s64 *timeout, - struct intel_rps_client *rps) -{ - int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; - DEFINE_WAIT(reset); - struct intel_wait wait; - unsigned long timeout_remain; - s64 before = 0; /* Only to silence a compiler warning. */ - int ret = 0; - - might_sleep(); - - if (list_empty(&req->list)) - return 0; - - if (i915_gem_request_completed(req)) - return 0; - - timeout_remain = MAX_SCHEDULE_TIMEOUT; - if (timeout) { - if (WARN_ON(*timeout < 0)) - return -EINVAL; - - if (*timeout == 0) - return -ETIME; - - timeout_remain = nsecs_to_jiffies_timeout(*timeout); - - /* - * Record current time in case interrupted by signal, or wedged. - */ - before = ktime_get_raw_ns(); - } - - trace_i915_gem_request_wait_begin(req); - - /* This client is about to stall waiting for the GPU. In many cases - * this is undesirable and limits the throughput of the system, as - * many clients cannot continue processing user input/output whilst - * blocked. RPS autotuning may take tens of milliseconds to respond - * to the GPU load and thus incurs additional latency for the client. - * We can circumvent that by promoting the GPU frequency to maximum - * before we wait. This makes the GPU throttle up much more quickly - * (good for benchmarks and user experience, e.g. window animations), - * but at a cost of spending more power processing the workload - * (bad for battery). Not all clients even want their results - * immediately and for them we should just let the GPU select its own - * frequency to maximise efficiency. To prevent a single client from - * forcing the clocks too high for the whole system, we only allow - * each client to waitboost once in a busy period. - */ - if (INTEL_INFO(req->i915)->gen >= 6) - gen6_rps_boost(req->i915, rps, req->emitted_jiffies); - - /* Optimistic spin for the next ~jiffie before touching IRQs */ - if (i915_spin_request(req, state, 5)) - goto complete; - - set_current_state(state); - add_wait_queue(&req->i915->gpu_error.wait_queue, &reset); - - intel_wait_init(&wait, req->seqno); - if (intel_engine_add_wait(req->engine, &wait)) - /* In order to check that we haven't missed the interrupt - * as we enabled it, we need to kick ourselves to do a - * coherent check on the seqno before we sleep. - */ - goto wakeup; - - for (;;) { - if (signal_pending_state(state, current)) { - ret = -ERESTARTSYS; - break; - } - - timeout_remain = io_schedule_timeout(timeout_remain); - if (timeout_remain == 0) { - ret = -ETIME; - break; - } - - if (intel_wait_complete(&wait)) - break; - - set_current_state(state); - -wakeup: - /* Carefully check if the request is complete, giving time - * for the seqno to be visible following the interrupt. - * We also have to check in case we are kicked by the GPU - * reset in order to drop the struct_mutex. - */ - if (__i915_request_irq_complete(req)) - break; - - /* Only spin if we know the GPU is processing this request */ - if (i915_spin_request(req, state, 2)) - break; - } - remove_wait_queue(&req->i915->gpu_error.wait_queue, &reset); - - intel_engine_remove_wait(req->engine, &wait); - __set_current_state(TASK_RUNNING); -complete: - trace_i915_gem_request_wait_end(req); - - if (timeout) { - s64 tres = *timeout - (ktime_get_raw_ns() - before); - - *timeout = tres < 0 ? 0 : tres; - - /* - * Apparently ktime isn't accurate enough and occasionally has a - * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch - * things up to make the test happy. We allow up to 1 jiffy. - * - * This is a regrssion from the timespec->ktime conversion. - */ - if (ret == -ETIME && *timeout < jiffies_to_usecs(1)*1000) - *timeout = 0; - } - - if (rps && req->seqno == req->engine->last_submitted_seqno) { - /* The GPU is now idle and this client has stalled. - * Since no other client has submitted a request in the - * meantime, assume that this client is the only one - * supplying work to the GPU but is unable to keep that - * work supplied because it is waiting. Since the GPU is - * then never kept fully busy, RPS autoclocking will - * keep the clocks relatively low, causing further delays. - * Compensate by giving the synchronous client credit for - * a waitboost next time. - */ - spin_lock(&req->i915->rps.client_lock); - list_del_init(&rps->link); - spin_unlock(&req->i915->rps.client_lock); - } - - return ret; -} - -int i915_gem_request_add_to_client(struct drm_i915_gem_request *req, - struct drm_file *file) -{ - struct drm_i915_file_private *file_priv; - - WARN_ON(!req || !file || req->file_priv); - - if (!req || !file) - return -EINVAL; - - if (req->file_priv) - return -EINVAL; - - file_priv = file->driver_priv; - - spin_lock(&file_priv->mm.lock); - req->file_priv = file_priv; - list_add_tail(&req->client_list, &file_priv->mm.request_list); - spin_unlock(&file_priv->mm.lock); - - req->pid = get_pid(task_pid(current)); - - return 0; -} - -static inline void -i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) -{ - struct drm_i915_file_private *file_priv = request->file_priv; - - if (!file_priv) - return; - - spin_lock(&file_priv->mm.lock); - list_del(&request->client_list); - request->file_priv = NULL; - spin_unlock(&file_priv->mm.lock); - - put_pid(request->pid); - request->pid = NULL; -} - -static void i915_gem_request_retire(struct drm_i915_gem_request *request) -{ - trace_i915_gem_request_retire(request); - - /* We know the GPU must have read the request to have - * sent us the seqno + interrupt, so use the position - * of tail of the request to update the last known position - * of the GPU head. - * - * Note this requires that we are always called in request - * completion order. - */ - request->ringbuf->last_retired_head = request->postfix; - - list_del_init(&request->list); - i915_gem_request_remove_from_client(request); - - if (request->previous_context) { - if (i915.enable_execlists) - intel_lr_context_unpin(request->previous_context, - request->engine); - } - - i915_gem_context_unreference(request->ctx); - i915_gem_request_unreference(request); -} - -static void -__i915_gem_request_retire__upto(struct drm_i915_gem_request *req) -{ - struct intel_engine_cs *engine = req->engine; - struct drm_i915_gem_request *tmp; - - lockdep_assert_held(&engine->i915->drm.struct_mutex); - - if (list_empty(&req->list)) - return; - - do { - tmp = list_first_entry(&engine->request_list, - typeof(*tmp), list); - - i915_gem_request_retire(tmp); - } while (tmp != req); - - WARN_ON(i915_verify_lists(engine->dev)); -} - -/** - * Waits for a request to be signaled, and cleans up the - * request and object lists appropriately for that event. - * @req: request to wait on - */ -int -i915_wait_request(struct drm_i915_gem_request *req) -{ - struct drm_i915_private *dev_priv = req->i915; - bool interruptible; - int ret; - - interruptible = dev_priv->mm.interruptible; - - BUG_ON(!mutex_is_locked(&dev_priv->drm.struct_mutex)); - - ret = __i915_wait_request(req, interruptible, NULL, NULL); - if (ret) - return ret; - - /* If the GPU hung, we want to keep the requests to find the guilty. */ - if (!i915_reset_in_progress(&dev_priv->gpu_error)) - __i915_gem_request_retire__upto(req); - - return 0; -} - /** * Ensures that all rendering to the object has completed and the object is * safe to unbind from the GTT or access from the CPU. @@ -1740,7 +1381,7 @@ i915_gem_object_retire_request(struct drm_i915_gem_object *obj, i915_gem_object_retire__write(obj); if (!i915_reset_in_progress(&req->i915->gpu_error)) - __i915_gem_request_retire__upto(req); + i915_gem_request_retire_upto(req); } /* A nonblocking variant of the above wait. This is a highly dangerous routine @@ -2761,193 +2402,6 @@ i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring) drm_gem_object_unreference(&obj->base); } -static int -i915_gem_init_seqno(struct drm_i915_private *dev_priv, u32 seqno) -{ - struct intel_engine_cs *engine; - int ret; - - /* Carefully retire all requests without writing to the rings */ - for_each_engine(engine, dev_priv) { - ret = intel_engine_idle(engine); - if (ret) - return ret; - } - i915_gem_retire_requests(dev_priv); - - /* If the seqno wraps around, we need to clear the breadcrumb rbtree */ - if (!i915_seqno_passed(seqno, dev_priv->next_seqno)) { - while (intel_kick_waiters(dev_priv) || - intel_kick_signalers(dev_priv)) - yield(); - } - - /* Finally reset hw state */ - for_each_engine(engine, dev_priv) - intel_ring_init_seqno(engine, seqno); - - return 0; -} - -int i915_gem_set_seqno(struct drm_device *dev, u32 seqno) -{ - struct drm_i915_private *dev_priv = to_i915(dev); - int ret; - - if (seqno == 0) - return -EINVAL; - - /* HWS page needs to be set less than what we - * will inject to ring - */ - ret = i915_gem_init_seqno(dev_priv, seqno - 1); - if (ret) - return ret; - - /* Carefully set the last_seqno value so that wrap - * detection still works - */ - dev_priv->next_seqno = seqno; - dev_priv->last_seqno = seqno - 1; - if (dev_priv->last_seqno == 0) - dev_priv->last_seqno--; - - return 0; -} - -int -i915_gem_get_seqno(struct drm_i915_private *dev_priv, u32 *seqno) -{ - /* reserve 0 for non-seqno */ - if (dev_priv->next_seqno == 0) { - int ret = i915_gem_init_seqno(dev_priv, 0); - if (ret) - return ret; - - dev_priv->next_seqno = 1; - } - - *seqno = dev_priv->last_seqno = dev_priv->next_seqno++; - return 0; -} - -static void i915_gem_mark_busy(const struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - - dev_priv->gt.active_engines |= intel_engine_flag(engine); - if (dev_priv->gt.awake) - return; - - intel_runtime_pm_get_noresume(dev_priv); - dev_priv->gt.awake = true; - - intel_enable_gt_powersave(dev_priv); - i915_update_gfx_val(dev_priv); - if (INTEL_GEN(dev_priv) >= 6) - gen6_rps_busy(dev_priv); - - queue_delayed_work(dev_priv->wq, - &dev_priv->gt.retire_work, - round_jiffies_up_relative(HZ)); -} - -/* - * NB: This function is not allowed to fail. Doing so would mean the the - * request is not being tracked for completion but the work itself is - * going to happen on the hardware. This would be a Bad Thing(tm). - */ -void __i915_add_request(struct drm_i915_gem_request *request, - struct drm_i915_gem_object *obj, - bool flush_caches) -{ - struct intel_engine_cs *engine; - struct intel_ringbuffer *ringbuf; - u32 request_start; - u32 reserved_tail; - int ret; - - if (WARN_ON(request == NULL)) - return; - - engine = request->engine; - ringbuf = request->ringbuf; - - /* - * To ensure that this call will not fail, space for its emissions - * should already have been reserved in the ring buffer. Let the ring - * know that it is time to use that space up. - */ - request_start = intel_ring_get_tail(ringbuf); - reserved_tail = request->reserved_space; - request->reserved_space = 0; - - /* - * Emit any outstanding flushes - execbuf can fail to emit the flush - * after having emitted the batchbuffer command. Hence we need to fix - * things up similar to emitting the lazy request. The difference here - * is that the flush _must_ happen before the next request, no matter - * what. - */ - if (flush_caches) { - if (i915.enable_execlists) - ret = logical_ring_flush_all_caches(request); - else - ret = intel_ring_flush_all_caches(request); - /* Not allowed to fail! */ - WARN(ret, "*_ring_flush_all_caches failed: %d!\n", ret); - } - - trace_i915_gem_request_add(request); - - request->head = request_start; - - /* Whilst this request exists, batch_obj will be on the - * active_list, and so will hold the active reference. Only when this - * request is retired will the the batch_obj be moved onto the - * inactive_list and lose its active reference. Hence we do not need - * to explicitly hold another reference here. - */ - request->batch_obj = obj; - - /* Seal the request and mark it as pending execution. Note that - * we may inspect this state, without holding any locks, during - * hangcheck. Hence we apply the barrier to ensure that we do not - * see a more recent value in the hws than we are tracking. - */ - request->emitted_jiffies = jiffies; - request->previous_seqno = engine->last_submitted_seqno; - smp_store_mb(engine->last_submitted_seqno, request->seqno); - list_add_tail(&request->list, &engine->request_list); - - /* Record the position of the start of the request so that - * should we detect the updated seqno part-way through the - * GPU processing the request, we never over-estimate the - * position of the head. - */ - request->postfix = intel_ring_get_tail(ringbuf); - - if (i915.enable_execlists) - ret = engine->emit_request(request); - else { - ret = engine->add_request(request); - - request->tail = intel_ring_get_tail(ringbuf); - } - /* Not allowed to fail! */ - WARN(ret, "emit|add_request failed: %d!\n", ret); - /* Sanity check that the reserved size was large enough. */ - ret = intel_ring_get_tail(ringbuf) - request_start; - if (ret < 0) - ret += ringbuf->size; - WARN_ONCE(ret > reserved_tail, - "Not enough space reserved (%d bytes) " - "for adding the request (%d bytes)\n", - reserved_tail, ret); - - i915_gem_mark_busy(engine); -} - static bool i915_context_is_banned(const struct i915_gem_context *ctx) { unsigned long elapsed; @@ -2979,101 +2433,6 @@ static void i915_set_reset_status(struct i915_gem_context *ctx, } } -void i915_gem_request_free(struct kref *req_ref) -{ - struct drm_i915_gem_request *req = container_of(req_ref, - typeof(*req), ref); - kmem_cache_free(req->i915->requests, req); -} - -static inline int -__i915_gem_request_alloc(struct intel_engine_cs *engine, - struct i915_gem_context *ctx, - struct drm_i915_gem_request **req_out) -{ - struct drm_i915_private *dev_priv = engine->i915; - unsigned reset_counter = i915_reset_counter(&dev_priv->gpu_error); - struct drm_i915_gem_request *req; - int ret; - - if (!req_out) - return -EINVAL; - - *req_out = NULL; - - /* ABI: Before userspace accesses the GPU (e.g. execbuffer), report - * EIO if the GPU is already wedged, or EAGAIN to drop the struct_mutex - * and restart. - */ - ret = i915_gem_check_wedge(reset_counter, dev_priv->mm.interruptible); - if (ret) - return ret; - - req = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL); - if (req == NULL) - return -ENOMEM; - - ret = i915_gem_get_seqno(engine->i915, &req->seqno); - if (ret) - goto err; - - kref_init(&req->ref); - req->i915 = dev_priv; - req->engine = engine; - req->ctx = ctx; - i915_gem_context_reference(req->ctx); - - /* - * Reserve space in the ring buffer for all the commands required to - * eventually emit this request. This is to guarantee that the - * i915_add_request() call can't fail. Note that the reserve may need - * to be redone if the request is not actually submitted straight - * away, e.g. because a GPU scheduler has deferred it. - */ - req->reserved_space = MIN_SPACE_FOR_ADD_REQUEST; - - if (i915.enable_execlists) - ret = intel_logical_ring_alloc_request_extras(req); - else - ret = intel_ring_alloc_request_extras(req); - if (ret) - goto err_ctx; - - *req_out = req; - return 0; - -err_ctx: - i915_gem_context_unreference(ctx); -err: - kmem_cache_free(dev_priv->requests, req); - return ret; -} - -/** - * i915_gem_request_alloc - allocate a request structure - * - * @engine: engine that we wish to issue the request on. - * @ctx: context that the request will be associated with. - * This can be NULL if the request is not directly related to - * any specific user context, in which case this function will - * choose an appropriate context to use. - * - * Returns a pointer to the allocated request if successful, - * or an error code if not. - */ -struct drm_i915_gem_request * -i915_gem_request_alloc(struct intel_engine_cs *engine, - struct i915_gem_context *ctx) -{ - struct drm_i915_gem_request *req; - int err; - - if (ctx == NULL) - ctx = engine->i915->kernel_context; - err = __i915_gem_request_alloc(engine, ctx, &req); - return err ? ERR_PTR(err) : req; -} - struct drm_i915_gem_request * i915_gem_find_active_request(struct intel_engine_cs *engine) { @@ -3147,14 +2506,14 @@ static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine) * implicit references on things like e.g. ppgtt address spaces through * the request. */ - while (!list_empty(&engine->request_list)) { + if (!list_empty(&engine->request_list)) { struct drm_i915_gem_request *request; - request = list_first_entry(&engine->request_list, - struct drm_i915_gem_request, - list); + request = list_last_entry(&engine->request_list, + struct drm_i915_gem_request, + list); - i915_gem_request_retire(request); + i915_gem_request_retire_upto(request); } /* Having flushed all requests from all queues, we know that all @@ -3222,7 +2581,7 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *engine) if (!i915_gem_request_completed(request)) break; - i915_gem_request_retire(request); + i915_gem_request_retire_upto(request); } /* Move any buffers on the active list that are no longer referenced diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c new file mode 100644 index 0000000..9e9aa6b --- /dev/null +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -0,0 +1,658 @@ +/* + * Copyright © 2008-2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "i915_drv.h" + +int i915_gem_request_add_to_client(struct drm_i915_gem_request *req, + struct drm_file *file) +{ + struct drm_i915_private *dev_private; + struct drm_i915_file_private *file_priv; + + WARN_ON(!req || !file || req->file_priv); + + if (!req || !file) + return -EINVAL; + + if (req->file_priv) + return -EINVAL; + + dev_private = req->i915; + file_priv = file->driver_priv; + + spin_lock(&file_priv->mm.lock); + req->file_priv = file_priv; + list_add_tail(&req->client_list, &file_priv->mm.request_list); + spin_unlock(&file_priv->mm.lock); + + req->pid = get_pid(task_pid(current)); + + return 0; +} + +static inline void +i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) +{ + struct drm_i915_file_private *file_priv = request->file_priv; + + if (!file_priv) + return; + + spin_lock(&file_priv->mm.lock); + list_del(&request->client_list); + request->file_priv = NULL; + spin_unlock(&file_priv->mm.lock); + + put_pid(request->pid); + request->pid = NULL; +} + +static void i915_gem_request_retire(struct drm_i915_gem_request *request) +{ + trace_i915_gem_request_retire(request); + list_del_init(&request->list); + + /* We know the GPU must have read the request to have + * sent us the seqno + interrupt, so use the position + * of tail of the request to update the last known position + * of the GPU head. + * + * Note this requires that we are always called in request + * completion order. + */ + request->ringbuf->last_retired_head = request->postfix; + + i915_gem_request_remove_from_client(request); + + if (request->previous_context) { + if (i915.enable_execlists) + intel_lr_context_unpin(request->previous_context, + request->engine); + } + + i915_gem_context_unreference(request->ctx); + i915_gem_request_unreference(request); +} + +void i915_gem_request_retire_upto(struct drm_i915_gem_request *req) +{ + struct intel_engine_cs *engine = req->engine; + struct drm_i915_gem_request *tmp; + + lockdep_assert_held(&req->i915->drm.struct_mutex); + + if (list_empty(&req->list)) + return; + + do { + tmp = list_first_entry(&engine->request_list, + typeof(*tmp), list); + + i915_gem_request_retire(tmp); + } while (tmp != req); + + WARN_ON(i915_verify_lists(engine->dev)); +} + +static int i915_gem_check_wedge(unsigned int reset_counter, bool interruptible) +{ + if (__i915_terminally_wedged(reset_counter)) + return -EIO; + + if (__i915_reset_in_progress(reset_counter)) { + /* Non-interruptible callers can't handle -EAGAIN, hence return + * -EIO unconditionally for these. + */ + if (!interruptible) + return -EIO; + + return -EAGAIN; + } + + return 0; +} + +static int i915_gem_init_seqno(struct drm_i915_private *dev_priv, u32 seqno) +{ + struct intel_engine_cs *engine; + int ret; + + /* Carefully retire all requests without writing to the rings */ + for_each_engine(engine, dev_priv) { + ret = intel_engine_idle(engine); + if (ret) + return ret; + } + i915_gem_retire_requests(dev_priv); + + /* If the seqno wraps around, we need to clear the breadcrumb rbtree */ + if (!i915_seqno_passed(seqno, dev_priv->next_seqno)) { + while (intel_kick_waiters(dev_priv) || + intel_kick_signalers(dev_priv)) + yield(); + } + + /* Finally reset hw state */ + for_each_engine(engine, dev_priv) + intel_ring_init_seqno(engine, seqno); + + return 0; +} + +int i915_gem_set_seqno(struct drm_device *dev, u32 seqno) +{ + struct drm_i915_private *dev_priv = to_i915(dev); + int ret; + + if (seqno == 0) + return -EINVAL; + + /* HWS page needs to be set less than what we + * will inject to ring + */ + ret = i915_gem_init_seqno(dev_priv, seqno - 1); + if (ret) + return ret; + + /* Carefully set the last_seqno value so that wrap + * detection still works + */ + dev_priv->next_seqno = seqno; + dev_priv->last_seqno = seqno - 1; + if (dev_priv->last_seqno == 0) + dev_priv->last_seqno--; + + return 0; +} + +static int i915_gem_get_seqno(struct drm_i915_private *dev_priv, u32 *seqno) +{ + /* reserve 0 for non-seqno */ + if (unlikely(dev_priv->next_seqno == 0)) { + int ret; + + ret = i915_gem_init_seqno(dev_priv, 0); + if (ret) + return ret; + + dev_priv->next_seqno = 1; + } + + *seqno = dev_priv->last_seqno = dev_priv->next_seqno++; + return 0; +} + +static inline int +__i915_gem_request_alloc(struct intel_engine_cs *engine, + struct i915_gem_context *ctx, + struct drm_i915_gem_request **req_out) +{ + struct drm_i915_private *dev_priv = engine->i915; + unsigned int reset_counter = i915_reset_counter(&dev_priv->gpu_error); + struct drm_i915_gem_request *req; + int ret; + + if (!req_out) + return -EINVAL; + + *req_out = NULL; + + /* ABI: Before userspace accesses the GPU (e.g. execbuffer), report + * EIO if the GPU is already wedged, or EAGAIN to drop the struct_mutex + * and restart. + */ + ret = i915_gem_check_wedge(reset_counter, dev_priv->mm.interruptible); + if (ret) + return ret; + + req = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL); + if (!req) + return -ENOMEM; + + ret = i915_gem_get_seqno(dev_priv, &req->seqno); + if (ret) + goto err; + + kref_init(&req->ref); + req->i915 = dev_priv; + req->engine = engine; + req->ctx = ctx; + i915_gem_context_reference(ctx); + + /* + * Reserve space in the ring buffer for all the commands required to + * eventually emit this request. This is to guarantee that the + * i915_add_request() call can't fail. Note that the reserve may need + * to be redone if the request is not actually submitted straight + * away, e.g. because a GPU scheduler has deferred it. + */ + req->reserved_space = MIN_SPACE_FOR_ADD_REQUEST; + + if (i915.enable_execlists) + ret = intel_logical_ring_alloc_request_extras(req); + else + ret = intel_ring_alloc_request_extras(req); + if (ret) + goto err_ctx; + + *req_out = req; + return 0; + +err_ctx: + i915_gem_context_unreference(ctx); +err: + kmem_cache_free(dev_priv->requests, req); + return ret; +} + +/** + * i915_gem_request_alloc - allocate a request structure + * + * @engine: engine that we wish to issue the request on. + * @ctx: context that the request will be associated with. + * This can be NULL if the request is not directly related to + * any specific user context, in which case this function will + * choose an appropriate context to use. + * + * Returns a pointer to the allocated request if successful, + * or an error code if not. + */ +struct drm_i915_gem_request * +i915_gem_request_alloc(struct intel_engine_cs *engine, + struct i915_gem_context *ctx) +{ + struct drm_i915_gem_request *req; + int err; + + if (!ctx) + ctx = engine->i915->kernel_context; + err = __i915_gem_request_alloc(engine, ctx, &req); + return err ? ERR_PTR(err) : req; +} + +static void i915_gem_mark_busy(const struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + + dev_priv->gt.active_engines |= intel_engine_flag(engine); + if (dev_priv->gt.awake) + return; + + intel_runtime_pm_get_noresume(dev_priv); + dev_priv->gt.awake = true; + + intel_enable_gt_powersave(dev_priv); + i915_update_gfx_val(dev_priv); + if (INTEL_GEN(dev_priv) >= 6) + gen6_rps_busy(dev_priv); + + queue_delayed_work(dev_priv->wq, + &dev_priv->gt.retire_work, + round_jiffies_up_relative(HZ)); +} + +/* + * NB: This function is not allowed to fail. Doing so would mean the the + * request is not being tracked for completion but the work itself is + * going to happen on the hardware. This would be a Bad Thing(tm). + */ +void __i915_add_request(struct drm_i915_gem_request *request, + struct drm_i915_gem_object *obj, + bool flush_caches) +{ + struct intel_engine_cs *engine; + struct intel_ringbuffer *ringbuf; + u32 request_start; + u32 reserved_tail; + int ret; + + if (WARN_ON(!request)) + return; + + engine = request->engine; + ringbuf = request->ringbuf; + + /* + * To ensure that this call will not fail, space for its emissions + * should already have been reserved in the ring buffer. Let the ring + * know that it is time to use that space up. + */ + request_start = intel_ring_get_tail(ringbuf); + reserved_tail = request->reserved_space; + request->reserved_space = 0; + + /* + * Emit any outstanding flushes - execbuf can fail to emit the flush + * after having emitted the batchbuffer command. Hence we need to fix + * things up similar to emitting the lazy request. The difference here + * is that the flush _must_ happen before the next request, no matter + * what. + */ + if (flush_caches) { + if (i915.enable_execlists) + ret = logical_ring_flush_all_caches(request); + else + ret = intel_ring_flush_all_caches(request); + /* Not allowed to fail! */ + WARN(ret, "*_ring_flush_all_caches failed: %d!\n", ret); + } + + trace_i915_gem_request_add(request); + + request->head = request_start; + + /* Whilst this request exists, batch_obj will be on the + * active_list, and so will hold the active reference. Only when this + * request is retired will the the batch_obj be moved onto the + * inactive_list and lose its active reference. Hence we do not need + * to explicitly hold another reference here. + */ + request->batch_obj = obj; + + /* Seal the request and mark it as pending execution. Note that + * we may inspect this state, without holding any locks, during + * hangcheck. Hence we apply the barrier to ensure that we do not + * see a more recent value in the hws than we are tracking. + */ + request->emitted_jiffies = jiffies; + request->previous_seqno = engine->last_submitted_seqno; + smp_store_mb(engine->last_submitted_seqno, request->seqno); + list_add_tail(&request->list, &engine->request_list); + + /* Record the position of the start of the request so that + * should we detect the updated seqno part-way through the + * GPU processing the request, we never over-estimate the + * position of the head. + */ + request->postfix = intel_ring_get_tail(ringbuf); + + if (i915.enable_execlists) { + ret = engine->emit_request(request); + } else { + ret = engine->add_request(request); + + request->tail = intel_ring_get_tail(ringbuf); + } + /* Not allowed to fail! */ + WARN(ret, "emit|add_request failed: %d!\n", ret); + /* Sanity check that the reserved size was large enough. */ + ret = intel_ring_get_tail(ringbuf) - request_start; + if (ret < 0) + ret += ringbuf->size; + WARN_ONCE(ret > reserved_tail, + "Not enough space reserved (%d bytes) " + "for adding the request (%d bytes)\n", + reserved_tail, ret); + + i915_gem_mark_busy(engine); +} + +static unsigned long local_clock_us(unsigned int *cpu) +{ + unsigned long t; + + /* Cheaply and approximately convert from nanoseconds to microseconds. + * The result and subsequent calculations are also defined in the same + * approximate microseconds units. The principal source of timing + * error here is from the simple truncation. + * + * Note that local_clock() is only defined wrt to the current CPU; + * the comparisons are no longer valid if we switch CPUs. Instead of + * blocking preemption for the entire busywait, we can detect the CPU + * switch and use that as indicator of system load and a reason to + * stop busywaiting, see busywait_stop(). + */ + *cpu = get_cpu(); + t = local_clock() >> 10; + put_cpu(); + + return t; +} + +static bool busywait_stop(unsigned long timeout, unsigned int cpu) +{ + unsigned int this_cpu; + + if (time_after(local_clock_us(&this_cpu), timeout)) + return true; + + return this_cpu != cpu; +} + +bool __i915_spin_request(const struct drm_i915_gem_request *req, + int state, unsigned long timeout_us) +{ + unsigned int cpu; + + /* When waiting for high frequency requests, e.g. during synchronous + * rendering split between the CPU and GPU, the finite amount of time + * required to set up the irq and wait upon it limits the response + * rate. By busywaiting on the request completion for a short while we + * can service the high frequency waits as quick as possible. However, + * if it is a slow request, we want to sleep as quickly as possible. + * The tradeoff between waiting and sleeping is roughly the time it + * takes to sleep on a request, on the order of a microsecond. + */ + + timeout_us += local_clock_us(&cpu); + do { + if (i915_gem_request_completed(req)) + return true; + + if (signal_pending_state(state, current)) + break; + + if (busywait_stop(timeout_us, cpu)) + break; + + cpu_relax_lowlatency(); + } while (!need_resched()); + + return false; +} + +/** + * __i915_wait_request - wait until execution of request has finished + * @req: duh! + * @interruptible: do an interruptible wait (normally yes) + * @timeout: in - how long to wait (NULL forever); out - how much time remaining + * @rps: client to charge for RPS boosting + * + * Note: It is of utmost importance that the passed in seqno and reset_counter + * values have been read by the caller in an smp safe manner. Where read-side + * locks are involved, it is sufficient to read the reset_counter before + * unlocking the lock that protects the seqno. For lockless tricks, the + * reset_counter _must_ be read before, and an appropriate smp_rmb must be + * inserted. + * + * Returns 0 if the request was found within the alloted time. Else returns the + * errno with remaining time filled in timeout argument. + */ +int __i915_wait_request(struct drm_i915_gem_request *req, + bool interruptible, + s64 *timeout, + struct intel_rps_client *rps) +{ + int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; + DEFINE_WAIT(reset); + struct intel_wait wait; + unsigned long timeout_remain; + int ret = 0; + + might_sleep(); + + if (list_empty(&req->list)) + return 0; + + if (i915_gem_request_completed(req)) + return 0; + + timeout_remain = MAX_SCHEDULE_TIMEOUT; + if (timeout) { + if (WARN_ON(*timeout < 0)) + return -EINVAL; + + if (*timeout == 0) + return -ETIME; + + /* Record current time in case interrupted, or wedged */ + timeout_remain = nsecs_to_jiffies_timeout(*timeout); + *timeout += ktime_get_raw_ns(); + } + + trace_i915_gem_request_wait_begin(req); + + /* This client is about to stall waiting for the GPU. In many cases + * this is undesirable and limits the throughput of the system, as + * many clients cannot continue processing user input/output whilst + * blocked. RPS autotuning may take tens of milliseconds to respond + * to the GPU load and thus incurs additional latency for the client. + * We can circumvent that by promoting the GPU frequency to maximum + * before we wait. This makes the GPU throttle up much more quickly + * (good for benchmarks and user experience, e.g. window animations), + * but at a cost of spending more power processing the workload + * (bad for battery). Not all clients even want their results + * immediately and for them we should just let the GPU select its own + * frequency to maximise efficiency. To prevent a single client from + * forcing the clocks too high for the whole system, we only allow + * each client to waitboost once in a busy period. + */ + if (INTEL_GEN(req->i915) >= 6) + gen6_rps_boost(req->i915, rps, req->emitted_jiffies); + + /* Optimistic spin for the next ~jiffie before touching IRQs */ + if (i915_spin_request(req, state, 5)) + goto complete; + + set_current_state(state); + add_wait_queue(&req->i915->gpu_error.wait_queue, &reset); + + intel_wait_init(&wait, req->seqno); + if (intel_engine_add_wait(req->engine, &wait)) + /* In order to check that we haven't missed the interrupt + * as we enabled it, we need to kick ourselves to do a + * coherent check on the seqno before we sleep. + */ + goto wakeup; + + for (;;) { + if (signal_pending_state(state, current)) { + ret = -ERESTARTSYS; + break; + } + + timeout_remain = io_schedule_timeout(timeout_remain); + if (timeout_remain == 0) { + ret = -ETIME; + break; + } + + if (intel_wait_complete(&wait)) + break; + + set_current_state(state); + +wakeup: + /* Carefully check if the request is complete, giving time + * for the seqno to be visible following the interrupt. + * We also have to check in case we are kicked by the GPU + * reset in order to drop the struct_mutex. + */ + if (__i915_request_irq_complete(req)) + break; + + /* Only spin if we know the GPU is processing this request */ + if (i915_spin_request(req, state, 2)) + break; + } + remove_wait_queue(&req->i915->gpu_error.wait_queue, &reset); + + intel_engine_remove_wait(req->engine, &wait); + __set_current_state(TASK_RUNNING); +complete: + trace_i915_gem_request_wait_end(req); + + if (timeout) { + *timeout -= ktime_get_raw_ns(); + if (*timeout < 0) + *timeout = 0; + + /* + * Apparently ktime isn't accurate enough and occasionally has a + * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch + * things up to make the test happy. We allow up to 1 jiffy. + * + * This is a regrssion from the timespec->ktime conversion. + */ + if (ret == -ETIME && *timeout < jiffies_to_usecs(1)*1000) + *timeout = 0; + } + + if (rps && req->seqno == req->engine->last_submitted_seqno) { + /* The GPU is now idle and this client has stalled. + * Since no other client has submitted a request in the + * meantime, assume that this client is the only one + * supplying work to the GPU but is unable to keep that + * work supplied because it is waiting. Since the GPU is + * then never kept fully busy, RPS autoclocking will + * keep the clocks relatively low, causing further delays. + * Compensate by giving the synchronous client credit for + * a waitboost next time. + */ + spin_lock(&req->i915->rps.client_lock); + list_del_init(&rps->link); + spin_unlock(&req->i915->rps.client_lock); + } + + return ret; +} + +/** + * Waits for a request to be signaled, and cleans up the + * request and object lists appropriately for that event. + */ +int i915_wait_request(struct drm_i915_gem_request *req) +{ + int ret; + + GEM_BUG_ON(!req); + lockdep_assert_held(&req->i915->drm.struct_mutex); + + ret = __i915_wait_request(req, req->i915->mm.interruptible, NULL, NULL); + if (ret) + return ret; + + /* If the GPU hung, we want to keep the requests to find the guilty. */ + if (!i915_reset_in_progress(&req->i915->gpu_error)) + i915_gem_request_retire_upto(req); + + return 0; +} + +void i915_gem_request_free(struct kref *req_ref) +{ + struct drm_i915_gem_request *req = + container_of(req_ref, typeof(*req), ref); + kmem_cache_free(req->i915->requests, req); +} diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h new file mode 100644 index 0000000..ea700be --- /dev/null +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -0,0 +1,238 @@ +/* + * Copyright © 2008-2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef I915_GEM_REQUEST_H +#define I915_GEM_REQUEST_H + +/** + * Request queue structure. + * + * The request queue allows us to note sequence numbers that have been emitted + * and may be associated with active buffers to be retired. + * + * By keeping this list, we can avoid having to do questionable sequence + * number comparisons on buffer last_read|write_seqno. It also allows an + * emission time to be associated with the request for tracking how far ahead + * of the GPU the submission is. + * + * The requests are reference counted, so upon creation they should have an + * initial reference taken using kref_init + */ +struct drm_i915_gem_request { + struct kref ref; + + /** On Which ring this request was generated */ + struct drm_i915_private *i915; + + /** + * Context and ring buffer related to this request + * Contexts are refcounted, so when this request is associated with a + * context, we must increment the context's refcount, to guarantee that + * it persists while any request is linked to it. Requests themselves + * are also refcounted, so the request will only be freed when the last + * reference to it is dismissed, and the code in + * i915_gem_request_free() will then decrement the refcount on the + * context. + */ + struct i915_gem_context *ctx; + struct intel_engine_cs *engine; + struct intel_ringbuffer *ringbuf; + struct intel_signal_node signaling; + + /** GEM sequence number associated with the previous request, + * when the HWS breadcrumb is equal to this the GPU is processing + * this request. + */ + u32 previous_seqno; + + /** GEM sequence number associated with this request, + * when the HWS breadcrumb is equal or greater than this the GPU + * has finished processing this request. + */ + u32 seqno; + + /** Position in the ringbuffer of the start of the request */ + u32 head; + + /** + * Position in the ringbuffer of the start of the postfix. + * This is required to calculate the maximum available ringbuffer + * space without overwriting the postfix. + */ + u32 postfix; + + /** Position in the ringbuffer of the end of the whole request */ + u32 tail; + + /** Preallocate space in the ringbuffer for the emitting the request */ + u32 reserved_space; + + /** + * Context related to the previous request. + * As the contexts are accessed by the hardware until the switch is + * completed to a new context, the hardware may still be writing + * to the context object after the breadcrumb is visible. We must + * not unpin/unbind/prune that object whilst still active and so + * we keep the previous context pinned until the following (this) + * request is retired. + */ + struct i915_gem_context *previous_context; + + /** Batch buffer related to this request if any (used for + * error state dump only). + */ + struct drm_i915_gem_object *batch_obj; + + /** Time at which this request was emitted, in jiffies. */ + unsigned long emitted_jiffies; + + /** global list entry for this request */ + struct list_head list; + + struct drm_i915_file_private *file_priv; + /** file_priv list entry for this request */ + struct list_head client_list; + + /** process identifier submitting this request */ + struct pid *pid; + + /** + * The ELSP only accepts two elements at a time, so we queue + * context/tail pairs on a given queue (ring->execlist_queue) until the + * hardware is available. The queue serves a double purpose: we also use + * it to keep track of the up to 2 contexts currently in the hardware + * (usually one in execution and the other queued up by the GPU): We + * only remove elements from the head of the queue when the hardware + * informs us that an element has been completed. + * + * All accesses to the queue are mediated by a spinlock + * (ring->execlist_lock). + */ + + /** Execlist link in the submission queue.*/ + struct list_head execlist_link; + + /** Execlists no. of times this request has been sent to the ELSP */ + int elsp_submitted; + + /** Execlists context hardware id. */ + unsigned int ctx_hw_id; +}; + +struct drm_i915_gem_request * __must_check +i915_gem_request_alloc(struct intel_engine_cs *engine, + struct i915_gem_context *ctx); +void i915_gem_request_free(struct kref *req_ref); +int i915_gem_request_add_to_client(struct drm_i915_gem_request *req, + struct drm_file *file); +void i915_gem_request_retire_upto(struct drm_i915_gem_request *req); + +static inline u32 +i915_gem_request_get_seqno(struct drm_i915_gem_request *req) +{ + return req ? req->seqno : 0; +} + +static inline struct intel_engine_cs * +i915_gem_request_get_engine(struct drm_i915_gem_request *req) +{ + return req ? req->engine : NULL; +} + +static inline struct drm_i915_gem_request * +i915_gem_request_reference(struct drm_i915_gem_request *req) +{ + if (req) + kref_get(&req->ref); + return req; +} + +static inline void +i915_gem_request_unreference(struct drm_i915_gem_request *req) +{ + kref_put(&req->ref, i915_gem_request_free); +} + +static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst, + struct drm_i915_gem_request *src) +{ + if (src) + i915_gem_request_reference(src); + + if (*pdst) + i915_gem_request_unreference(*pdst); + + *pdst = src; +} + +void __i915_add_request(struct drm_i915_gem_request *req, + struct drm_i915_gem_object *batch_obj, + bool flush_caches); +#define i915_add_request(req) \ + __i915_add_request(req, NULL, true) +#define i915_add_request_no_flush(req) \ + __i915_add_request(req, NULL, false) + +struct intel_rps_client; + +int __i915_wait_request(struct drm_i915_gem_request *req, + bool interruptible, + s64 *timeout, + struct intel_rps_client *rps); +int __must_check i915_wait_request(struct drm_i915_gem_request *req); + +static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine); + +/** + * Returns true if seq1 is later than seq2. + */ +static inline bool i915_seqno_passed(u32 seq1, u32 seq2) +{ + return (s32)(seq1 - seq2) >= 0; +} + +static inline bool +i915_gem_request_started(const struct drm_i915_gem_request *req) +{ + return i915_seqno_passed(intel_engine_get_seqno(req->engine), + req->previous_seqno); +} + +static inline bool +i915_gem_request_completed(const struct drm_i915_gem_request *req) +{ + return i915_seqno_passed(intel_engine_get_seqno(req->engine), + req->seqno); +} + +bool __i915_spin_request(const struct drm_i915_gem_request *request, + int state, unsigned long timeout_us); +static inline bool i915_spin_request(const struct drm_i915_gem_request *request, + int state, unsigned long timeout_us) +{ + return (i915_gem_request_started(request) && + __i915_spin_request(request, state, timeout_us)); +} + +#endif /* I915_GEM_REQUEST_H */ -- cgit v0.10.2 From 9b5f4e5ed6fd58390ecad3772b80936357f1aba6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 20 Jul 2016 09:21:09 +0100 Subject: drm/i915: Retire oldest completed request before allocating next In order to keep the memory allocated for requests reasonably tight, try to reuse the oldest request (so long as it is completed and has no external references) for the next allocation. v2: Throw in a comment to hopefully make sure no one mistakes the optimistic retirement of the oldest request for simply stealing it. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/1469002875-2335-2-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 9e9aa6b..5cbb11e 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -226,6 +226,14 @@ __i915_gem_request_alloc(struct intel_engine_cs *engine, if (ret) return ret; + /* Move the oldest request to the slab-cache (if not in use!) */ + if (!list_empty(&engine->request_list)) { + req = list_first_entry(&engine->request_list, + typeof(*req), list); + if (i915_gem_request_completed(req)) + i915_gem_request_retire(req); + } + req = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL); if (!req) return -ENOMEM; -- cgit v0.10.2 From c4b0930bf418d5fab1d75ab462f0a7ff155f1b33 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 20 Jul 2016 09:21:10 +0100 Subject: drm/i915: Mark all current requests as complete before resetting them Following a GPU reset upon hang, we retire all the requests and then mark them all as complete. If we mark them as complete first, we both keep the normal retirement order (completed first then retired) and provide a small optimisation for concurrent lookups. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/1469002875-2335-3-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 6df1405..61729d6 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2486,6 +2486,12 @@ static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine) i915_gem_object_retire__read(obj, engine->id); } + /* Mark all pending requests as complete so that any concurrent + * (lockless) lookup doesn't try and wait upon the request as we + * reset it. + */ + intel_ring_init_seqno(engine, engine->last_submitted_seqno); + /* * Clear the execlists queue up before freeing the requests, as those * are the ones that keep the context and ringbuffer backing objects @@ -2528,8 +2534,6 @@ static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine) intel_ring_update_space(buffer); } - intel_ring_init_seqno(engine, engine->last_submitted_seqno); - engine->i915->gt.active_engines &= ~intel_engine_flag(engine); } -- cgit v0.10.2 From 04769652c8c7ad4779a4b943dcf889a2020226f4 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 20 Jul 2016 09:21:11 +0100 Subject: drm/i915: Derive GEM requests from dma-fence dma-buf provides a generic fence class for interoperation between drivers. Internally we use the request structure as a fence, and so with only a little bit of interfacing we can rebase those requests on top of dma-buf fences. This will allow us, in the future, to pass those fences back to userspace or between drivers. v2: The fence_context needs to be globally unique, not just unique to this device. Signed-off-by: Chris Wilson Cc: Jesse Barnes Cc: Daniel Vetter Reviewed-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1469002875-2335-4-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 90aef45..55fd3d9 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -768,7 +768,7 @@ static int i915_gem_request_info(struct seq_file *m, void *data) if (req->pid) task = pid_task(req->pid, PIDTYPE_PID); seq_printf(m, " %x @ %d: %s [%d]\n", - req->seqno, + req->fence.seqno, (int) (jiffies - req->emitted_jiffies), task ? task->comm : "", task ? task->pid : -1); diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 5cbb11e..6528536 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -24,6 +24,95 @@ #include "i915_drv.h" +static const char *i915_fence_get_driver_name(struct fence *fence) +{ + return "i915"; +} + +static const char *i915_fence_get_timeline_name(struct fence *fence) +{ + /* Timelines are bound by eviction to a VM. However, since + * we only have a global seqno at the moment, we only have + * a single timeline. Note that each timeline will have + * multiple execution contexts (fence contexts) as we allow + * engines within a single timeline to execute in parallel. + */ + return "global"; +} + +static bool i915_fence_signaled(struct fence *fence) +{ + return i915_gem_request_completed(to_request(fence)); +} + +static bool i915_fence_enable_signaling(struct fence *fence) +{ + if (i915_fence_signaled(fence)) + return false; + + intel_engine_enable_signaling(to_request(fence)); + return true; +} + +static signed long i915_fence_wait(struct fence *fence, + bool interruptible, + signed long timeout_jiffies) +{ + s64 timeout_ns, *timeout; + int ret; + + if (timeout_jiffies != MAX_SCHEDULE_TIMEOUT) { + timeout_ns = jiffies_to_nsecs(timeout_jiffies); + timeout = &timeout_ns; + } else { + timeout = NULL; + } + + ret = __i915_wait_request(to_request(fence), + interruptible, timeout, + NULL); + if (ret == -ETIME) + return 0; + + if (ret < 0) + return ret; + + if (timeout_jiffies != MAX_SCHEDULE_TIMEOUT) + timeout_jiffies = nsecs_to_jiffies(timeout_ns); + + return timeout_jiffies; +} + +static void i915_fence_value_str(struct fence *fence, char *str, int size) +{ + snprintf(str, size, "%u", fence->seqno); +} + +static void i915_fence_timeline_value_str(struct fence *fence, char *str, + int size) +{ + snprintf(str, size, "%u", + intel_engine_get_seqno(to_request(fence)->engine)); +} + +static void i915_fence_release(struct fence *fence) +{ + struct drm_i915_gem_request *req = to_request(fence); + + kmem_cache_free(req->i915->requests, req); +} + +const struct fence_ops i915_fence_ops = { + .get_driver_name = i915_fence_get_driver_name, + .get_timeline_name = i915_fence_get_timeline_name, + .enable_signaling = i915_fence_enable_signaling, + .signaled = i915_fence_signaled, + .wait = i915_fence_wait, + .release = i915_fence_release, + .fence_value_str = i915_fence_value_str, + .timeline_value_str = i915_fence_timeline_value_str, +}; + int i915_gem_request_add_to_client(struct drm_i915_gem_request *req, struct drm_file *file) { @@ -211,6 +300,7 @@ __i915_gem_request_alloc(struct intel_engine_cs *engine, struct drm_i915_private *dev_priv = engine->i915; unsigned int reset_counter = i915_reset_counter(&dev_priv->gpu_error); struct drm_i915_gem_request *req; + u32 seqno; int ret; if (!req_out) @@ -238,11 +328,17 @@ __i915_gem_request_alloc(struct intel_engine_cs *engine, if (!req) return -ENOMEM; - ret = i915_gem_get_seqno(dev_priv, &req->seqno); + ret = i915_gem_get_seqno(dev_priv, &seqno); if (ret) goto err; - kref_init(&req->ref); + spin_lock_init(&req->lock); + fence_init(&req->fence, + &i915_fence_ops, + &req->lock, + engine->fence_context, + seqno); + req->i915 = dev_priv; req->engine = engine; req->ctx = ctx; @@ -385,7 +481,7 @@ void __i915_add_request(struct drm_i915_gem_request *request, */ request->emitted_jiffies = jiffies; request->previous_seqno = engine->last_submitted_seqno; - smp_store_mb(engine->last_submitted_seqno, request->seqno); + smp_store_mb(engine->last_submitted_seqno, request->fence.seqno); list_add_tail(&request->list, &engine->request_list); /* Record the position of the start of the request so that @@ -556,7 +652,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req, set_current_state(state); add_wait_queue(&req->i915->gpu_error.wait_queue, &reset); - intel_wait_init(&wait, req->seqno); + intel_wait_init(&wait, req->fence.seqno); if (intel_engine_add_wait(req->engine, &wait)) /* In order to check that we haven't missed the interrupt * as we enabled it, we need to kick ourselves to do a @@ -617,7 +713,7 @@ complete: *timeout = 0; } - if (rps && req->seqno == req->engine->last_submitted_seqno) { + if (rps && req->fence.seqno == req->engine->last_submitted_seqno) { /* The GPU is now idle and this client has stalled. * Since no other client has submitted a request in the * meantime, assume that this client is the only one @@ -657,10 +753,3 @@ int i915_wait_request(struct drm_i915_gem_request *req) return 0; } - -void i915_gem_request_free(struct kref *req_ref) -{ - struct drm_i915_gem_request *req = - container_of(req_ref, typeof(*req), ref); - kmem_cache_free(req->i915->requests, req); -} diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index ea700be..6f2c820 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -25,6 +25,10 @@ #ifndef I915_GEM_REQUEST_H #define I915_GEM_REQUEST_H +#include + +#include "i915_gem.h" + /** * Request queue structure. * @@ -36,11 +40,11 @@ * emission time to be associated with the request for tracking how far ahead * of the GPU the submission is. * - * The requests are reference counted, so upon creation they should have an - * initial reference taken using kref_init + * The requests are reference counted. */ struct drm_i915_gem_request { - struct kref ref; + struct fence fence; + spinlock_t lock; /** On Which ring this request was generated */ struct drm_i915_private *i915; @@ -66,12 +70,6 @@ struct drm_i915_gem_request { */ u32 previous_seqno; - /** GEM sequence number associated with this request, - * when the HWS breadcrumb is equal or greater than this the GPU - * has finished processing this request. - */ - u32 seqno; - /** Position in the ringbuffer of the start of the request */ u32 head; @@ -140,10 +138,16 @@ struct drm_i915_gem_request { unsigned int ctx_hw_id; }; +extern const struct fence_ops i915_fence_ops; + +static inline bool fence_is_i915(struct fence *fence) +{ + return fence->ops == &i915_fence_ops; +} + struct drm_i915_gem_request * __must_check i915_gem_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx); -void i915_gem_request_free(struct kref *req_ref); int i915_gem_request_add_to_client(struct drm_i915_gem_request *req, struct drm_file *file); void i915_gem_request_retire_upto(struct drm_i915_gem_request *req); @@ -151,7 +155,7 @@ void i915_gem_request_retire_upto(struct drm_i915_gem_request *req); static inline u32 i915_gem_request_get_seqno(struct drm_i915_gem_request *req) { - return req ? req->seqno : 0; + return req ? req->fence.seqno : 0; } static inline struct intel_engine_cs * @@ -161,17 +165,24 @@ i915_gem_request_get_engine(struct drm_i915_gem_request *req) } static inline struct drm_i915_gem_request * +to_request(struct fence *fence) +{ + /* We assume that NULL fence/request are interoperable */ + BUILD_BUG_ON(offsetof(struct drm_i915_gem_request, fence) != 0); + GEM_BUG_ON(fence && !fence_is_i915(fence)); + return container_of(fence, struct drm_i915_gem_request, fence); +} + +static inline struct drm_i915_gem_request * i915_gem_request_reference(struct drm_i915_gem_request *req) { - if (req) - kref_get(&req->ref); - return req; + return to_request(fence_get(&req->fence)); } static inline void i915_gem_request_unreference(struct drm_i915_gem_request *req) { - kref_put(&req->ref, i915_gem_request_free); + fence_put(&req->fence); } static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst, @@ -223,7 +234,7 @@ static inline bool i915_gem_request_completed(const struct drm_i915_gem_request *req) { return i915_seqno_passed(intel_engine_get_seqno(req->engine), - req->seqno); + req->fence.seqno); } bool __i915_spin_request(const struct drm_i915_gem_request *request, diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 9d73d22..6daaf4e 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1182,7 +1182,7 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv, } erq = &error->ring[i].requests[count++]; - erq->seqno = request->seqno; + erq->seqno = request->fence.seqno; erq->jiffies = request->emitted_jiffies; erq->tail = request->postfix; } diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 2112e02..1cc5de1 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -506,7 +506,7 @@ static void guc_add_workqueue_item(struct i915_guc_client *gc, rq->engine); wqi->ring_tail = tail << WQ_RING_TAIL_SHIFT; - wqi->fence_id = rq->seqno; + wqi->fence_id = rq->fence.seqno; kunmap_atomic(base); } @@ -601,7 +601,7 @@ int i915_guc_submit(struct drm_i915_gem_request *rq) client->b_fail += 1; guc->submissions[engine_id] += 1; - guc->last_seqno[engine_id] = rq->seqno; + guc->last_seqno[engine_id] = rq->fence.seqno; return b_ret; } diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 534154e..007112d 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -465,7 +465,7 @@ TRACE_EVENT(i915_gem_ring_sync_to, __entry->dev = from->i915->drm.primary->index; __entry->sync_from = from->id; __entry->sync_to = to_req->engine->id; - __entry->seqno = i915_gem_request_get_seqno(req); + __entry->seqno = req->fence.seqno; ), TP_printk("dev=%u, sync-from=%u, sync-to=%u, seqno=%u", @@ -488,9 +488,9 @@ TRACE_EVENT(i915_gem_ring_dispatch, TP_fast_assign( __entry->dev = req->i915->drm.primary->index; __entry->ring = req->engine->id; - __entry->seqno = req->seqno; + __entry->seqno = req->fence.seqno; __entry->flags = flags; - intel_engine_enable_signaling(req); + fence_enable_sw_signaling(&req->fence); ), TP_printk("dev=%u, ring=%u, seqno=%u, flags=%x", @@ -533,7 +533,7 @@ DECLARE_EVENT_CLASS(i915_gem_request, TP_fast_assign( __entry->dev = req->i915->drm.primary->index; __entry->ring = req->engine->id; - __entry->seqno = req->seqno; + __entry->seqno = req->fence.seqno; ), TP_printk("dev=%u, ring=%u, seqno=%u", @@ -595,7 +595,7 @@ TRACE_EVENT(i915_gem_request_wait_begin, TP_fast_assign( __entry->dev = req->i915->drm.primary->index; __entry->ring = req->engine->id; - __entry->seqno = req->seqno; + __entry->seqno = req->fence.seqno; __entry->blocking = mutex_is_locked(&req->i915->drm.struct_mutex); ), diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index b074f3d..32ada41 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -436,6 +436,7 @@ static int intel_breadcrumbs_signaler(void *arg) */ intel_engine_remove_wait(engine, &request->signaling.wait); + fence_signal(&request->fence); /* Find the next oldest signal. Note that as we have * not been holding the lock, another client may @@ -482,7 +483,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request) } request->signaling.wait.tsk = b->signaler; - request->signaling.wait.seqno = request->seqno; + request->signaling.wait.seqno = request->fence.seqno; i915_gem_request_reference(request); /* First add ourselves into the list of waiters, but register our @@ -504,8 +505,8 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request) p = &b->signals.rb_node; while (*p) { parent = *p; - if (i915_seqno_passed(request->seqno, - to_signaler(parent)->seqno)) { + if (i915_seqno_passed(request->fence.seqno, + to_signaler(parent)->fence.seqno)) { p = &parent->rb_right; first = false; } else { diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index e3c9f04..f4a35ec 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -182,6 +182,8 @@ void intel_engine_setup_common(struct intel_engine_cs *engine) INIT_LIST_HEAD(&engine->execlist_queue); spin_lock_init(&engine->execlist_lock); + engine->fence_context = fence_context_alloc(1); + intel_engine_init_hangcheck(engine); i915_gem_batch_pool_init(&engine->i915->drm, &engine->batch_pool); } diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 2e670f1..860dba2 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1803,7 +1803,7 @@ static int gen8_emit_request(struct drm_i915_gem_request *request) intel_hws_seqno_address(request->engine) | MI_FLUSH_DW_USE_GTT); intel_logical_ring_emit(ringbuf, 0); - intel_logical_ring_emit(ringbuf, request->seqno); + intel_logical_ring_emit(ringbuf, request->fence.seqno); intel_logical_ring_emit(ringbuf, MI_USER_INTERRUPT); intel_logical_ring_emit(ringbuf, MI_NOOP); return intel_logical_ring_advance_and_submit(request); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 94c8ef4..af0bd71 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1348,7 +1348,7 @@ static int gen8_rcs_signal(struct drm_i915_gem_request *signaller_req, PIPE_CONTROL_CS_STALL); intel_ring_emit(signaller, lower_32_bits(gtt_offset)); intel_ring_emit(signaller, upper_32_bits(gtt_offset)); - intel_ring_emit(signaller, signaller_req->seqno); + intel_ring_emit(signaller, signaller_req->fence.seqno); intel_ring_emit(signaller, 0); intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL | MI_SEMAPHORE_TARGET(waiter->hw_id)); @@ -1386,7 +1386,7 @@ static int gen8_xcs_signal(struct drm_i915_gem_request *signaller_req, intel_ring_emit(signaller, lower_32_bits(gtt_offset) | MI_FLUSH_DW_USE_GTT); intel_ring_emit(signaller, upper_32_bits(gtt_offset)); - intel_ring_emit(signaller, signaller_req->seqno); + intel_ring_emit(signaller, signaller_req->fence.seqno); intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL | MI_SEMAPHORE_TARGET(waiter->hw_id)); intel_ring_emit(signaller, 0); @@ -1419,7 +1419,7 @@ static int gen6_signal(struct drm_i915_gem_request *signaller_req, if (i915_mmio_reg_valid(mbox_reg)) { intel_ring_emit(signaller, MI_LOAD_REGISTER_IMM(1)); intel_ring_emit_reg(signaller, mbox_reg); - intel_ring_emit(signaller, signaller_req->seqno); + intel_ring_emit(signaller, signaller_req->fence.seqno); } } @@ -1455,7 +1455,7 @@ gen6_add_request(struct drm_i915_gem_request *req) intel_ring_emit(engine, MI_STORE_DWORD_INDEX); intel_ring_emit(engine, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); - intel_ring_emit(engine, req->seqno); + intel_ring_emit(engine, req->fence.seqno); intel_ring_emit(engine, MI_USER_INTERRUPT); __intel_ring_advance(engine); @@ -1704,7 +1704,7 @@ i9xx_add_request(struct drm_i915_gem_request *req) intel_ring_emit(engine, MI_STORE_DWORD_INDEX); intel_ring_emit(engine, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); - intel_ring_emit(engine, req->seqno); + intel_ring_emit(engine, req->fence.seqno); intel_ring_emit(engine, MI_USER_INTERRUPT); __intel_ring_advance(engine); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index df7587a..5cbafc0 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -146,6 +146,7 @@ struct intel_engine_cs { unsigned int exec_id; unsigned int hw_id; unsigned int guc_id; /* XXX same as hw_id? */ + u64 fence_context; u32 mmio_base; unsigned int irq_shift; struct intel_ringbuffer *buffer; -- cgit v0.10.2 From 42df271439f77a06f4f0034d819fbb6f9c36f834 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 20 Jul 2016 09:21:12 +0100 Subject: drm/i915: Disable waitboosting for fence_wait() We want to restrict waitboosting to known process contexts, where we can track which clients are receiving waitboosts and prevent excessive power wasting. For fence_wait() we do not have any client tracking and so that leaves it open to abuse. v2: Hide the IS_ERR_OR_NULL testing for special clients Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/1469002875-2335-5-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 6528536..f483e60 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -70,7 +70,7 @@ static signed long i915_fence_wait(struct fence *fence, ret = __i915_wait_request(to_request(fence), interruptible, timeout, - NULL); + NO_WAITBOOST); if (ret == -ETIME) return 0; @@ -642,7 +642,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req, * forcing the clocks too high for the whole system, we only allow * each client to waitboost once in a busy period. */ - if (INTEL_GEN(req->i915) >= 6) + if (IS_RPS_CLIENT(rps) && INTEL_GEN(req->i915) >= 6) gen6_rps_boost(req->i915, rps, req->emitted_jiffies); /* Optimistic spin for the next ~jiffie before touching IRQs */ @@ -713,7 +713,8 @@ complete: *timeout = 0; } - if (rps && req->fence.seqno == req->engine->last_submitted_seqno) { + if (IS_RPS_USER(rps) && + req->fence.seqno == req->engine->last_submitted_seqno) { /* The GPU is now idle and this client has stalled. * Since no other client has submitted a request in the * meantime, assume that this client is the only one diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index 6f2c820..0a01d01 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -206,6 +206,9 @@ void __i915_add_request(struct drm_i915_gem_request *req, __i915_add_request(req, NULL, false) struct intel_rps_client; +#define NO_WAITBOOST ERR_PTR(-1) +#define IS_RPS_CLIENT(p) (!IS_ERR(p)) +#define IS_RPS_USER(p) (!IS_ERR_OR_NULL(p)) int __i915_wait_request(struct drm_i915_gem_request *req, bool interruptible, -- cgit v0.10.2 From 197be2ae8b0f3e5f7893b806d7aa01acdb3b45d8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 20 Jul 2016 09:21:13 +0100 Subject: drm/i915: Disable waitboosting for mmioflips/semaphores Since commit a6f766f39751 ("drm/i915: Limit ring synchronisation (sw sempahores) RPS boosts") and commit bcafc4e38b6a ("drm/i915: Limit mmio flip RPS boosts") we have limited the waitboosting for semaphores and flips. Ideally we do not want to boost in either of these instances as no userspace consumer is waiting upon the results (though a userspace producer may be stalled trying to submit an execbuf - but in this case the producer is being throttled due to the engine being saturated with work). With the introduction of NO_WAITBOOST in the previous patch, we can finally disable these needless boosts. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/1469002875-2335-6-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 55fd3d9..618f8cf 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2465,13 +2465,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data) list_empty(&file_priv->rps.link) ? "" : ", active"); rcu_read_unlock(); } - seq_printf(m, "Semaphore boosts: %d%s\n", - dev_priv->rps.semaphores.boosts, - list_empty(&dev_priv->rps.semaphores.link) ? "" : ", active"); - seq_printf(m, "MMIO flip boosts: %d%s\n", - dev_priv->rps.mmioflips.boosts, - list_empty(&dev_priv->rps.mmioflips.link) ? "" : ", active"); - seq_printf(m, "Kernel boosts: %d\n", dev_priv->rps.boosts); + seq_printf(m, "Kernel (anonymous) boosts: %d\n", dev_priv->rps.boosts); spin_unlock(&dev_priv->rps.client_lock); mutex_unlock(&dev->filelist_mutex); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index c97a755..e163a94 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1195,8 +1195,6 @@ struct intel_gen6_power_mgmt { struct delayed_work autoenable_work; unsigned boosts; - struct intel_rps_client semaphores, mmioflips; - /* manual wa residency calculations */ struct intel_rps_ei up_ei, down_ei; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 61729d6..079e09c 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2849,7 +2849,7 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj, ret = __i915_wait_request(from_req, i915->mm.interruptible, NULL, - &i915->rps.semaphores); + NO_WAITBOOST); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index fb7d8fc5..51fbca7 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -11473,7 +11473,7 @@ static void intel_mmio_flip_work_func(struct work_struct *w) if (work->flip_queued_req) WARN_ON(__i915_wait_request(work->flip_queued_req, false, NULL, - &dev_priv->rps.mmioflips)); + NO_WAITBOOST)); /* For framebuffer backed by dmabuf, wait for fence */ resv = i915_gem_object_get_dmabuf_resv(obj); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index fa6b341..a1bf5f8 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -7810,8 +7810,6 @@ void intel_pm_setup(struct drm_device *dev) INIT_DELAYED_WORK(&dev_priv->rps.autoenable_work, __intel_autoenable_gt_powersave); INIT_LIST_HEAD(&dev_priv->rps.clients); - INIT_LIST_HEAD(&dev_priv->rps.semaphores.link); - INIT_LIST_HEAD(&dev_priv->rps.mmioflips.link); dev_priv->pm.suspended = false; atomic_set(&dev_priv->pm.wakeref_count, 0); -- cgit v0.10.2 From 30bc06c0fa99a09d1132a2a5dec4bdf27426916f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 20 Jul 2016 09:21:14 +0100 Subject: drm/i915: Mark imported dma-buf objects as being coherent A foreign dma-buf does not share our cache domain tracking, and we rely on the producer ensuring cache coherency. Marking them as being in the CPU domain is incorrect. v2: Add commentary about the GTT domain. This is not the best place for it, but pending an actual overhaul of our domain tracking and explaining each one, this comment should help the next reader... Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1469002875-2335-7-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c index 80bbe43..7d08ac0 100644 --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c @@ -300,6 +300,16 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev, i915_gem_object_init(obj, &i915_gem_object_dmabuf_ops); obj->base.import_attach = attach; + /* We use GTT as shorthand for a coherent domain, one that is + * neither in the GPU cache nor in the CPU cache, where all + * writes are immediately visible in memory. (That's not strictly + * true, but it's close! There are internal buffers such as the + * write-combined buffer or a delay through the chipset for GTT + * writes that do require us to treat GTT as a separate cache domain.) + */ + obj->base.read_domains = I915_GEM_DOMAIN_GTT; + obj->base.write_domain = 0; + return &obj->base; fail_detach: -- cgit v0.10.2 From c13d87ea53851880ab117f23a3e4fde1645da472 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 20 Jul 2016 09:21:15 +0100 Subject: drm/i915: Wait on external rendering for GEM objects When transitioning to the GTT or CPU domain we wait on all rendering from i915 to complete (with the optimisation of allowing concurrent read access by both the GPU and client). We don't yet ensure all rendering from third parties (tracked by implicit fences on the dma-buf) is complete. Since implicitly tracked rendering by third parties will ignore our cache-domain tracking, we have to always wait upon rendering from third-parties when transitioning to direct access to the backing store. We still rely on clients notifying us of cache domain changes (i.e. they need to move to the GTT read or write domain after doing a CPU access before letting the third party render again). v2: This introduces a potential WARN_ON into i915_gem_object_free() as the current i915_vma_unbind() calls i915_gem_object_wait_rendering(). To hit this path we first need to render with the GPU, have a dma-buf attached with an unsignaled fence and then interrupt the wait. It does get fixed later in the series (when i915_vma_unbind() only waits on the active VMA and not all, including third-party, rendering. To offset that risk, use the __i915_vma_unbind_no_wait hack. Testcase: igt/prime_vgem/basic-fence-read Testcase: igt/prime_vgem/basic-fence-mmap Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1469002875-2335-8-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 079e09c..37868cc 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -29,10 +29,12 @@ #include #include #include "i915_drv.h" +#include "i915_gem_dmabuf.h" #include "i915_vgpu.h" #include "i915_trace.h" #include "intel_drv.h" #include "intel_mocs.h" +#include #include #include #include @@ -511,6 +513,10 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, if (WARN_ON(!i915_gem_object_has_struct_page(obj))) return -EINVAL; + ret = i915_gem_object_wait_rendering(obj, true); + if (ret) + return ret; + if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) { /* If we're not in the cpu read domain, set ourself into the gtt * read domain and manually flush cachelines (if required). This @@ -518,9 +524,6 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, * anyway again before the next pread happens. */ *needs_clflush = !cpu_cache_is_coherent(obj->base.dev, obj->cache_level); - ret = i915_gem_object_wait_rendering(obj, true); - if (ret) - return ret; } ret = i915_gem_object_get_pages(obj); @@ -1132,15 +1135,16 @@ i915_gem_shmem_pwrite(struct drm_device *dev, obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); + ret = i915_gem_object_wait_rendering(obj, false); + if (ret) + return ret; + if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) { /* If we're not in the cpu write domain, set ourself into the gtt * write domain and manually flush cachelines (if required). This * optimizes for the case when the gpu will use the data * right away and we therefore have to clflush anyway. */ needs_clflush_after = cpu_write_needs_clflush(obj); - ret = i915_gem_object_wait_rendering(obj, false); - if (ret) - return ret; } /* Same trick applies to invalidate partially written cachelines read * before writing. */ @@ -1335,11 +1339,9 @@ int i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, bool readonly) { + struct reservation_object *resv; int ret, i; - if (!obj->active) - return 0; - if (readonly) { if (obj->last_write_req != NULL) { ret = i915_wait_request(obj->last_write_req); @@ -1366,6 +1368,16 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, GEM_BUG_ON(obj->active); } + resv = i915_gem_object_get_dmabuf_resv(obj); + if (resv) { + long err; + + err = reservation_object_wait_timeout_rcu(resv, !readonly, true, + MAX_SCHEDULE_TIMEOUT); + if (err < 0) + return err; + } + return 0; } @@ -3402,13 +3414,13 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) struct i915_vma *vma; int ret; - if (obj->base.write_domain == I915_GEM_DOMAIN_GTT) - return 0; - ret = i915_gem_object_wait_rendering(obj, !write); if (ret) return ret; + if (obj->base.write_domain == I915_GEM_DOMAIN_GTT) + return 0; + /* Flush and acquire obj->pages so that we are coherent through * direct access in memory with previous cached writes through * shmemfs and that our cache domain tracking remains valid. @@ -3752,13 +3764,13 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) uint32_t old_write_domain, old_read_domains; int ret; - if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) - return 0; - ret = i915_gem_object_wait_rendering(obj, !write); if (ret) return ret; + if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) + return 0; + i915_gem_object_flush_gtt_write_domain(obj); old_write_domain = obj->base.write_domain; @@ -4238,7 +4250,7 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj) int ret; vma->pin_count = 0; - ret = i915_vma_unbind(vma); + ret = __i915_vma_unbind_no_wait(vma); if (WARN_ON(ret == -ERESTARTSYS)) { bool was_interruptible; -- cgit v0.10.2 From e8a261ea639998d79dee78220098b25f38801bcb Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 20 Jul 2016 13:31:49 +0100 Subject: drm/i915: Rename request reference/unreference to get/put Now that we derive requests from struct fence, swap over to its nomenclature for references. It's shorter and more idiomatic across the kernel. s/i915_gem_request_reference/i915_gem_request_get/ s/i915_gem_request_unreference/i915_gem_request_put/ Signed-off-by: Chris Wilson Reviewed-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1469005202-9659-2-git-send-email-chris@chris-wilson.co.uk Link: http://patchwork.freedesktop.org/patch/msgid/1469017917-15134-1-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 37868cc..d825db7 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1422,7 +1422,7 @@ i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, if (req == NULL) return 0; - requests[n++] = i915_gem_request_reference(req); + requests[n++] = i915_gem_request_get(req); } else { for (i = 0; i < I915_NUM_ENGINES; i++) { struct drm_i915_gem_request *req; @@ -1431,7 +1431,7 @@ i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, if (req == NULL) continue; - requests[n++] = i915_gem_request_reference(req); + requests[n++] = i915_gem_request_get(req); } } @@ -1444,7 +1444,7 @@ i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, for (i = 0; i < n; i++) { if (ret == 0) i915_gem_object_retire_request(obj, requests[i]); - i915_gem_request_unreference(requests[i]); + i915_gem_request_put(requests[i]); } return ret; @@ -2820,7 +2820,7 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) if (obj->last_read_req[i] == NULL) continue; - req[n++] = i915_gem_request_reference(obj->last_read_req[i]); + req[n++] = i915_gem_request_get(obj->last_read_req[i]); } mutex_unlock(&dev->struct_mutex); @@ -2830,7 +2830,7 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) ret = __i915_wait_request(req[i], true, args->timeout_ns > 0 ? &args->timeout_ns : NULL, to_rps_client(file)); - i915_gem_request_unreference(req[i]); + i915_gem_request_put(req[i]); } return ret; @@ -3845,14 +3845,14 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) target = request; } if (target) - i915_gem_request_reference(target); + i915_gem_request_get(target); spin_unlock(&file_priv->mm.lock); if (target == NULL) return 0; ret = __i915_wait_request(target, true, NULL, NULL); - i915_gem_request_unreference(target); + i915_gem_request_put(target); return ret; } diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index f483e60..04ff97b 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -181,7 +181,7 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) } i915_gem_context_unreference(request->ctx); - i915_gem_request_unreference(request); + i915_gem_request_put(request); } void i915_gem_request_retire_upto(struct drm_i915_gem_request *req) diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index 0a01d01..e06e81f 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -174,13 +174,13 @@ to_request(struct fence *fence) } static inline struct drm_i915_gem_request * -i915_gem_request_reference(struct drm_i915_gem_request *req) +i915_gem_request_get(struct drm_i915_gem_request *req) { return to_request(fence_get(&req->fence)); } static inline void -i915_gem_request_unreference(struct drm_i915_gem_request *req) +i915_gem_request_put(struct drm_i915_gem_request *req) { fence_put(&req->fence); } @@ -189,10 +189,10 @@ static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst, struct drm_i915_gem_request *src) { if (src) - i915_gem_request_reference(src); + i915_gem_request_get(src); if (*pdst) - i915_gem_request_unreference(*pdst); + i915_gem_request_put(*pdst); *pdst = src; } diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index 2314c88..ba16e04 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -78,7 +78,7 @@ static void wait_rendering(struct drm_i915_gem_object *obj) if (req == NULL) continue; - requests[n++] = i915_gem_request_reference(req); + requests[n++] = i915_gem_request_get(req); } mutex_unlock(&dev->struct_mutex); @@ -89,7 +89,7 @@ static void wait_rendering(struct drm_i915_gem_object *obj) mutex_lock(&dev->struct_mutex); for (i = 0; i < n; i++) - i915_gem_request_unreference(requests[i]); + i915_gem_request_put(requests[i]); } static void cancel_userptr(struct work_struct *work) diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 32ada41..f0b56e3 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -453,7 +453,7 @@ static int intel_breadcrumbs_signaler(void *arg) rb_erase(&request->signaling.node, &b->signals); spin_unlock(&b->lock); - i915_gem_request_unreference(request); + i915_gem_request_put(request); } else { if (kthread_should_stop()) break; @@ -484,7 +484,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request) request->signaling.wait.tsk = b->signaler; request->signaling.wait.seqno = request->fence.seqno; - i915_gem_request_reference(request); + i915_gem_request_get(request); /* First add ourselves into the list of waiters, but register our * bottom-half as the signaller thread. As per usual, only the oldest diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 51fbca7..2f7af85 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -10954,11 +10954,10 @@ static void intel_unpin_work_fn(struct work_struct *__work) mutex_lock(&dev->struct_mutex); intel_unpin_fb_obj(work->old_fb, primary->state->rotation); drm_gem_object_unreference(&work->pending_flip_obj->base); - - if (work->flip_queued_req) - i915_gem_request_assign(&work->flip_queued_req, NULL); mutex_unlock(&dev->struct_mutex); + i915_gem_request_put(work->flip_queued_req); + intel_frontbuffer_flip_complete(dev, to_intel_plane(primary)->frontbuffer_bit); intel_fbc_post_update(crtc); drm_framebuffer_unreference(work->old_fb); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 860dba2..b23d4cc 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -441,7 +441,7 @@ static void execlists_context_unqueue(struct intel_engine_cs *engine) * will update tail past first request's workload */ cursor->elsp_submitted = req0->elsp_submitted; list_del(&req0->execlist_link); - i915_gem_request_unreference(req0); + i915_gem_request_put(req0); req0 = cursor; } else { if (IS_ENABLED(CONFIG_DRM_I915_GVT)) { @@ -514,7 +514,7 @@ execlists_check_remove_request(struct intel_engine_cs *engine, u32 ctx_id) execlists_context_status_change(head_req, INTEL_CONTEXT_SCHEDULE_OUT); list_del(&head_req->execlist_link); - i915_gem_request_unreference(head_req); + i915_gem_request_put(head_req); return 1; } @@ -632,11 +632,11 @@ static void execlists_context_queue(struct drm_i915_gem_request *request) WARN(tail_req->elsp_submitted != 0, "More than 2 already-submitted reqs queued\n"); list_del(&tail_req->execlist_link); - i915_gem_request_unreference(tail_req); + i915_gem_request_put(tail_req); } } - i915_gem_request_reference(request); + i915_gem_request_get(request); list_add_tail(&request->execlist_link, &engine->execlist_queue); request->ctx_hw_id = request->ctx->hw_id; if (num_elements == 0) @@ -904,7 +904,7 @@ void intel_execlists_cancel_requests(struct intel_engine_cs *engine) list_for_each_entry_safe(req, tmp, &cancel_list, execlist_link) { list_del(&req->execlist_link); - i915_gem_request_unreference(req); + i915_gem_request_put(req); } } diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index a1bf5f8..4e9846e 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -7775,7 +7775,7 @@ static void __intel_rps_boost_work(struct work_struct *work) if (!i915_gem_request_completed(req)) gen6_rps_boost(req->i915, NULL, req->emitted_jiffies); - i915_gem_request_unreference(req); + i915_gem_request_put(req); kfree(boost); } @@ -7793,8 +7793,7 @@ void intel_queue_rps_boost_for_request(struct drm_i915_gem_request *req) if (boost == NULL) return; - i915_gem_request_reference(req); - boost->req = req; + boost->req = i915_gem_request_get(req); INIT_WORK(&boost->work, __intel_rps_boost_work); queue_work(req->i915->wq, &boost->work); -- cgit v0.10.2 From 9a6feaf0d74f91eeef23d0ee72c5ce69a559b31b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 20 Jul 2016 13:31:50 +0100 Subject: drm/i915: Rename i915_gem_context_reference/unreference() As these are wrappers around kref_get/kref_put() it is preferable to follow the naming convention and use the same verb get/put in our wrapper names for manipulating a reference to the context. s/i915_gem_context_reference/i915_gem_context_get/ s/i915_gem_context_unreference/i915_gem_context_put/ Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1469005202-9659-3-git-send-email-chris@chris-wilson.co.uk Link: http://patchwork.freedesktop.org/patch/msgid/1469017917-15134-2-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index e163a94..2131e7f 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3338,12 +3338,14 @@ i915_gem_context_lookup(struct drm_i915_file_private *file_priv, u32 id) return ctx; } -static inline void i915_gem_context_reference(struct i915_gem_context *ctx) +static inline struct i915_gem_context * +i915_gem_context_get(struct i915_gem_context *ctx) { kref_get(&ctx->ref); + return ctx; } -static inline void i915_gem_context_unreference(struct i915_gem_context *ctx) +static inline void i915_gem_context_put(struct i915_gem_context *ctx) { lockdep_assert_held(&ctx->i915->drm.struct_mutex); kref_put(&ctx->ref, i915_gem_context_free); diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 3b63616..7151791 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -305,7 +305,7 @@ __create_hw_context(struct drm_device *dev, return ctx; err_out: - i915_gem_context_unreference(ctx); + i915_gem_context_put(ctx); return ERR_PTR(ret); } @@ -333,7 +333,7 @@ i915_gem_create_context(struct drm_device *dev, DRM_DEBUG_DRIVER("PPGTT setup failed (%ld)\n", PTR_ERR(ppgtt)); idr_remove(&file_priv->context_idr, ctx->user_handle); - i915_gem_context_unreference(ctx); + i915_gem_context_put(ctx); return ERR_CAST(ppgtt); } @@ -390,7 +390,7 @@ static void i915_gem_context_unpin(struct i915_gem_context *ctx, if (ce->state) i915_gem_object_ggtt_unpin(ce->state); - i915_gem_context_unreference(ctx); + i915_gem_context_put(ctx); } } @@ -504,7 +504,7 @@ void i915_gem_context_fini(struct drm_device *dev) lockdep_assert_held(&dev->struct_mutex); - i915_gem_context_unreference(dctx); + i915_gem_context_put(dctx); dev_priv->kernel_context = NULL; ida_destroy(&dev_priv->context_hw_ida); @@ -515,7 +515,7 @@ static int context_idr_cleanup(int id, void *p, void *data) struct i915_gem_context *ctx = p; ctx->file_priv = ERR_PTR(-EBADF); - i915_gem_context_unreference(ctx); + i915_gem_context_put(ctx); return 0; } @@ -827,10 +827,9 @@ static int do_rcs_switch(struct drm_i915_gem_request *req) /* obj is kept alive until the next request by its active ref */ i915_gem_object_ggtt_unpin(from->engine[RCS].state); - i915_gem_context_unreference(from); + i915_gem_context_put(from); } - i915_gem_context_reference(to); - engine->last_context = to; + engine->last_context = i915_gem_context_get(to); /* GEN8 does *not* require an explicit reload if the PDPs have been * setup, and we do not wish to move them. @@ -914,10 +913,9 @@ int i915_switch_context(struct drm_i915_gem_request *req) } if (to != engine->last_context) { - i915_gem_context_reference(to); if (engine->last_context) - i915_gem_context_unreference(engine->last_context); - engine->last_context = to; + i915_gem_context_put(engine->last_context); + engine->last_context = i915_gem_context_get(to); } return 0; @@ -1014,7 +1012,7 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data, } idr_remove(&file_priv->context_idr, ctx->user_handle); - i915_gem_context_unreference(ctx); + i915_gem_context_put(ctx); mutex_unlock(&dev->struct_mutex); DRM_DEBUG_DRIVER("HW context %d destroyed\n", args->ctx_id); diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index f6724ae..551dd4f 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1509,7 +1509,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, goto pre_mutex_err; } - i915_gem_context_reference(ctx); + i915_gem_context_get(ctx); if (ctx->ppgtt) vm = &ctx->ppgtt->base; @@ -1520,7 +1520,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, eb = eb_create(args); if (eb == NULL) { - i915_gem_context_unreference(ctx); + i915_gem_context_put(ctx); mutex_unlock(&dev->struct_mutex); ret = -ENOMEM; goto pre_mutex_err; @@ -1664,7 +1664,7 @@ err_batch_unpin: err: /* the request owns the ref now */ - i915_gem_context_unreference(ctx); + i915_gem_context_put(ctx); eb_destroy(eb); mutex_unlock(&dev->struct_mutex); diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 04ff97b..60a3a34 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -180,7 +180,7 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) request->engine); } - i915_gem_context_unreference(request->ctx); + i915_gem_context_put(request->ctx); i915_gem_request_put(request); } @@ -341,8 +341,7 @@ __i915_gem_request_alloc(struct intel_engine_cs *engine, req->i915 = dev_priv; req->engine = engine; - req->ctx = ctx; - i915_gem_context_reference(ctx); + req->ctx = i915_gem_context_get(ctx); /* * Reserve space in the ring buffer for all the commands required to @@ -364,7 +363,7 @@ __i915_gem_request_alloc(struct intel_engine_cs *engine, return 0; err_ctx: - i915_gem_context_unreference(ctx); + i915_gem_context_put(ctx); err: kmem_cache_free(dev_priv->requests, req); return ret; diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index b23d4cc..6282fca 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -980,7 +980,6 @@ static int intel_lr_context_pin(struct i915_gem_context *ctx, if (ret) goto unpin_map; - i915_gem_context_reference(ctx); ce->lrc_vma = i915_gem_obj_to_ggtt(ce->state); intel_lr_context_descriptor_update(ctx, engine); @@ -992,6 +991,7 @@ static int intel_lr_context_pin(struct i915_gem_context *ctx, if (i915.enable_guc_submission) I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE); + i915_gem_context_get(ctx); return 0; unpin_map: @@ -1023,7 +1023,7 @@ void intel_lr_context_unpin(struct i915_gem_context *ctx, ce->lrc_desc = 0; ce->lrc_reg_state = NULL; - i915_gem_context_unreference(ctx); + i915_gem_context_put(ctx); } static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index af0bd71..3a7135d 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2139,7 +2139,7 @@ static int intel_ring_context_pin(struct i915_gem_context *ctx, if (ctx == ctx->i915->kernel_context) ce->initialised = true; - i915_gem_context_reference(ctx); + i915_gem_context_get(ctx); return 0; error: @@ -2160,7 +2160,7 @@ static void intel_ring_context_unpin(struct i915_gem_context *ctx, if (ce->state) i915_gem_object_ggtt_unpin(ce->state); - i915_gem_context_unreference(ctx); + i915_gem_context_put(ctx); } static int intel_init_ring_buffer(struct intel_engine_cs *engine) -- cgit v0.10.2 From 03ac0642f67a3a888bf82b1042c07d5df2b52a89 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 20 Jul 2016 13:31:51 +0100 Subject: drm/i915: Wrap drm_gem_object_lookup in i915_gem_object_lookup For symmetry with a forthcoming i915_gem_object_get() and i915_gem_object_put(). Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1469005202-9659-4-git-send-email-chris@chris-wilson.co.uk Reviewed-by: Dave Gordon Link: http://patchwork.freedesktop.org/patch/msgid/1469017917-15134-3-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 2131e7f..47f244f 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2286,7 +2286,25 @@ struct drm_i915_gem_object { } userptr; }; }; -#define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base) + +static inline struct drm_i915_gem_object * +to_intel_bo(struct drm_gem_object *gem) +{ + /* Assert that to_intel_bo(NULL) == NULL */ + BUILD_BUG_ON(offsetof(struct drm_i915_gem_object, base)); + + return container_of(gem, struct drm_i915_gem_object, base); +} + +static inline struct drm_i915_gem_object * +i915_gem_object_lookup(struct drm_file *file, u32 handle) +{ + return to_intel_bo(drm_gem_object_lookup(file, handle)); +} + +__deprecated +extern struct drm_gem_object * +drm_gem_object_lookup(struct drm_file *file, u32 handle); static inline bool i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index d825db7..9704964 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -864,8 +864,8 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data, if (ret) return ret; - obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); - if (&obj->base == NULL) { + obj = i915_gem_object_lookup(file, args->handle); + if (!obj) { ret = -ENOENT; goto unlock; } @@ -1280,8 +1280,8 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, if (ret) goto put_rpm; - obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); - if (&obj->base == NULL) { + obj = i915_gem_object_lookup(file, args->handle); + if (!obj) { ret = -ENOENT; goto unlock; } @@ -1497,8 +1497,8 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, if (ret) return ret; - obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); - if (&obj->base == NULL) { + obj = i915_gem_object_lookup(file, args->handle); + if (!obj) { ret = -ENOENT; goto unlock; } @@ -1546,8 +1546,8 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, if (ret) return ret; - obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); - if (&obj->base == NULL) { + obj = i915_gem_object_lookup(file, args->handle); + if (!obj) { ret = -ENOENT; goto unlock; } @@ -1587,7 +1587,7 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { struct drm_i915_gem_mmap *args = data; - struct drm_gem_object *obj; + struct drm_i915_gem_object *obj; unsigned long addr; if (args->flags & ~(I915_MMAP_WC)) @@ -1596,19 +1596,19 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data, if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT)) return -ENODEV; - obj = drm_gem_object_lookup(file, args->handle); - if (obj == NULL) + obj = i915_gem_object_lookup(file, args->handle); + if (!obj) return -ENOENT; /* prime objects have no backing filp to GEM mmap * pages from. */ - if (!obj->filp) { - drm_gem_object_unreference_unlocked(obj); + if (!obj->base.filp) { + drm_gem_object_unreference_unlocked(&obj->base); return -EINVAL; } - addr = vm_mmap(obj->filp, 0, args->size, + addr = vm_mmap(obj->base.filp, 0, args->size, PROT_READ | PROT_WRITE, MAP_SHARED, args->offset); if (args->flags & I915_MMAP_WC) { @@ -1616,7 +1616,7 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data, struct vm_area_struct *vma; if (down_write_killable(&mm->mmap_sem)) { - drm_gem_object_unreference_unlocked(obj); + drm_gem_object_unreference_unlocked(&obj->base); return -EINTR; } vma = find_vma(mm, addr); @@ -1628,9 +1628,9 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data, up_write(&mm->mmap_sem); /* This may race, but that's ok, it only gets set */ - WRITE_ONCE(to_intel_bo(obj)->has_wc_mmap, true); + WRITE_ONCE(obj->has_wc_mmap, true); } - drm_gem_object_unreference_unlocked(obj); + drm_gem_object_unreference_unlocked(&obj->base); if (IS_ERR((void *)addr)) return addr; @@ -1968,8 +1968,8 @@ i915_gem_mmap_gtt(struct drm_file *file, if (ret) return ret; - obj = to_intel_bo(drm_gem_object_lookup(file, handle)); - if (&obj->base == NULL) { + obj = i915_gem_object_lookup(file, handle); + if (!obj) { ret = -ENOENT; goto unlock; } @@ -2792,8 +2792,8 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) if (ret) return ret; - obj = to_intel_bo(drm_gem_object_lookup(file, args->bo_handle)); - if (&obj->base == NULL) { + obj = i915_gem_object_lookup(file, args->bo_handle); + if (!obj) { mutex_unlock(&dev->struct_mutex); return -ENOENT; } @@ -3596,8 +3596,8 @@ int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, struct drm_i915_gem_caching *args = data; struct drm_i915_gem_object *obj; - obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); - if (&obj->base == NULL) + obj = i915_gem_object_lookup(file, args->handle); + if (!obj) return -ENOENT; switch (obj->cache_level) { @@ -3657,8 +3657,8 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, if (ret) goto rpm_put; - obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); - if (&obj->base == NULL) { + obj = i915_gem_object_lookup(file, args->handle); + if (!obj) { ret = -ENOENT; goto unlock; } @@ -4026,8 +4026,8 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, if (ret) return ret; - obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); - if (&obj->base == NULL) { + obj = i915_gem_object_lookup(file, args->handle); + if (!obj) { ret = -ENOENT; goto unlock; } @@ -4091,8 +4091,8 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data, if (ret) return ret; - obj = to_intel_bo(drm_gem_object_lookup(file_priv, args->handle)); - if (&obj->base == NULL) { + obj = i915_gem_object_lookup(file_priv, args->handle); + if (!obj) { ret = -ENOENT; goto unlock; } diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index 8030199..46e80f3 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -166,8 +166,8 @@ i915_gem_set_tiling(struct drm_device *dev, void *data, struct drm_i915_gem_object *obj; int ret = 0; - obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); - if (&obj->base == NULL) + obj = i915_gem_object_lookup(file, args->handle); + if (!obj) return -ENOENT; if (!i915_tiling_ok(dev, @@ -297,8 +297,8 @@ i915_gem_get_tiling(struct drm_device *dev, void *data, struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_gem_object *obj; - obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); - if (&obj->base == NULL) + obj = i915_gem_object_lookup(file, args->handle); + if (!obj) return -ENOENT; mutex_lock(&dev->struct_mutex); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 2f7af85..77d3205 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -15089,8 +15089,8 @@ intel_user_framebuffer_create(struct drm_device *dev, struct drm_i915_gem_object *obj; struct drm_mode_fb_cmd2 mode_cmd = *user_mode_cmd; - obj = to_intel_bo(drm_gem_object_lookup(filp, mode_cmd.handles[0])); - if (&obj->base == NULL) + obj = i915_gem_object_lookup(filp, mode_cmd.handles[0]); + if (!obj) return ERR_PTR(-ENOENT); fb = intel_framebuffer_create(dev, &mode_cmd, obj); diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index 3212d88..5ca797b 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -1122,9 +1122,8 @@ int intel_overlay_put_image_ioctl(struct drm_device *dev, void *data, } crtc = to_intel_crtc(drmmode_crtc); - new_bo = to_intel_bo(drm_gem_object_lookup(file_priv, - put_image_rec->bo_handle)); - if (&new_bo->base == NULL) { + new_bo = i915_gem_object_lookup(file_priv, put_image_rec->bo_handle); + if (!new_bo) { ret = -ENOENT; goto out_free; } -- cgit v0.10.2 From 25dc556a2a0a2747cf3651462076d86410bd6b90 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 20 Jul 2016 13:31:52 +0100 Subject: drm/i915: Wrap drm_gem_object_reference in i915_gem_object_get Ultimately wraps kref_get(), so adopt its nomenclature for consistency with other subsystems. s/drm_gem_object_reference/i915_gem_object_get/ Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1469005202-9659-5-git-send-email-chris@chris-wilson.co.uk Reviewed-by: Dave Gordon Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1469017917-15134-4-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 47f244f..163d28e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2306,6 +2306,17 @@ __deprecated extern struct drm_gem_object * drm_gem_object_lookup(struct drm_file *file, u32 handle); +__attribute__((nonnull)) +static inline struct drm_i915_gem_object * +i915_gem_object_get(struct drm_i915_gem_object *obj) +{ + drm_gem_object_reference(&obj->base); + return obj; +} + +__deprecated +extern void drm_gem_object_reference(struct drm_gem_object *); + static inline bool i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj) { diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 9704964..f886c0b 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -289,7 +289,7 @@ drop_pages(struct drm_i915_gem_object *obj) struct i915_vma *vma, *next; int ret; - drm_gem_object_reference(&obj->base); + i915_gem_object_get(obj); list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) if (i915_vma_unbind(vma)) break; @@ -2361,7 +2361,7 @@ void i915_vma_move_to_active(struct i915_vma *vma, /* Add a reference if we're newly entering the active list. */ if (obj->active == 0) - drm_gem_object_reference(&obj->base); + i915_gem_object_get(obj); obj->active |= intel_engine_flag(engine); list_move_tail(&obj->engine_list[engine->id], &engine->active_list); diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c index 7d08ac0..3a00ab3 100644 --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c @@ -278,8 +278,7 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev, * Importing dmabuf exported from out own gem increases * refcount on gem itself instead of f_count of dmabuf. */ - drm_gem_object_reference(&obj->base); - return &obj->base; + return &i915_gem_object_get(obj)->base; } } diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index b1194c7..5610394 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -183,7 +183,7 @@ found: exec_list); if (drm_mm_scan_remove_block(&vma->node)) { list_move(&vma->exec_list, &eviction_list); - drm_gem_object_reference(&vma->obj->base); + i915_gem_object_get(vma->obj); continue; } list_del_init(&vma->exec_list); diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 551dd4f..28bcc99 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -123,7 +123,7 @@ eb_lookup_vmas(struct eb_vmas *eb, goto err; } - drm_gem_object_reference(&obj->base); + i915_gem_object_get(obj); list_add_tail(&obj->obj_exec_link, &objects); } spin_unlock(&file->table_lock); @@ -1236,7 +1236,7 @@ i915_gem_execbuffer_parse(struct intel_engine_cs *engine, vma = i915_gem_obj_to_ggtt(shadow_batch_obj); vma->exec_entry = shadow_exec_entry; vma->exec_entry->flags = __EXEC_OBJECT_HAS_PIN; - drm_gem_object_reference(&shadow_batch_obj->base); + i915_gem_object_get(shadow_batch_obj); list_add_tail(&vma->exec_list, &eb->vmas); shadow_batch_obj->base.pending_read_domains = I915_GEM_DOMAIN_COMMAND; diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index 6f10b42..3aa76d3 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -190,7 +190,7 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, if (!can_release_pages(obj)) continue; - drm_gem_object_reference(&obj->base); + i915_gem_object_get(obj); /* For the unbound phase, this should be a no-op! */ list_for_each_entry_safe(vma, v, diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index ba16e04..c41bf74 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -622,8 +622,7 @@ __i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj, obj->userptr.work = &work->work; obj->userptr.workers++; - work->obj = obj; - drm_gem_object_reference(&obj->base); + work->obj = i915_gem_object_get(obj); work->task = current; get_task_struct(work->task); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 77d3205..84904a2 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -11649,7 +11649,6 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, /* Reference the objects for the scheduled work. */ drm_framebuffer_reference(work->old_fb); - drm_gem_object_reference(&obj->base); crtc->primary->fb = fb; update_state_fb(crtc->primary); @@ -11657,7 +11656,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, intel_fbc_pre_update(intel_crtc, intel_crtc->config, to_intel_plane_state(primary->state)); - work->pending_flip_obj = obj; + work->pending_flip_obj = i915_gem_object_get(obj); ret = i915_mutex_lock_interruptible(dev); if (ret) -- cgit v0.10.2 From f8c417cdb1b83c41520980af0bdc86e3951850b8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 20 Jul 2016 13:31:53 +0100 Subject: drm/i915: Rename drm_gem_object_unreference in preparation for lockless free Ultimately wraps kref_put(), so adopt its nomenclature for consistency with other subsystems. s/drm_gem_object_unreference/i915_gem_object_put/ Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1469005202-9659-6-git-send-email-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1469017917-15134-5-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 163d28e..ffc97c5 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2317,6 +2317,16 @@ i915_gem_object_get(struct drm_i915_gem_object *obj) __deprecated extern void drm_gem_object_reference(struct drm_gem_object *); +__attribute__((nonnull)) +static inline void +i915_gem_object_put(struct drm_i915_gem_object *obj) +{ + drm_gem_object_unreference(&obj->base); +} + +__deprecated +extern void drm_gem_object_unreference(struct drm_gem_object *); + static inline bool i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj) { diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index f886c0b..f23670f 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -295,7 +295,7 @@ drop_pages(struct drm_i915_gem_object *obj) break; ret = i915_gem_object_put_pages(obj); - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); return ret; } @@ -887,7 +887,7 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data, args->offset, args->data_ptr); out: - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); unlock: mutex_unlock(&dev->struct_mutex); return ret; @@ -1320,7 +1320,7 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, } out: - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); unlock: mutex_unlock(&dev->struct_mutex); put_rpm: @@ -1522,7 +1522,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, intel_fb_obj_invalidate(obj, write_origin(obj, write_domain)); unref: - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); unlock: mutex_unlock(&dev->struct_mutex); return ret; @@ -1556,7 +1556,7 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, if (obj->pin_display) i915_gem_object_flush_cpu_write_domain(obj); - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); unlock: mutex_unlock(&dev->struct_mutex); return ret; @@ -1987,7 +1987,7 @@ i915_gem_mmap_gtt(struct drm_file *file, *offset = drm_vma_node_offset_addr(&obj->base.vma_node); out: - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); unlock: mutex_unlock(&dev->struct_mutex); return ret; @@ -2411,7 +2411,7 @@ i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring) } i915_gem_request_assign(&obj->last_fenced_req, NULL); - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); } static bool i915_context_is_banned(const struct i915_gem_context *ctx) @@ -2814,7 +2814,7 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) goto out; } - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); for (i = 0; i < I915_NUM_ENGINES; i++) { if (obj->last_read_req[i] == NULL) @@ -2835,7 +2835,7 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) return ret; out: - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); mutex_unlock(&dev->struct_mutex); return ret; } @@ -3665,7 +3665,7 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, ret = i915_gem_object_set_cache_level(obj, level); - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); unlock: mutex_unlock(&dev->struct_mutex); rpm_put: @@ -4057,7 +4057,7 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, } unref: - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); unlock: mutex_unlock(&dev->struct_mutex); return ret; @@ -4121,7 +4121,7 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data, args->retained = obj->madv != __I915_MADV_PURGED; out: - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); unlock: mutex_unlock(&dev->struct_mutex); return ret; @@ -4952,6 +4952,6 @@ i915_gem_object_create_from_data(struct drm_device *dev, return obj; fail: - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); return ERR_PTR(ret); } diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c index 3752d5d..3507b27 100644 --- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c +++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c @@ -75,7 +75,7 @@ void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool) batch_pool_link); list_del(&obj->batch_pool_link); - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); } } } @@ -121,7 +121,7 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool, /* While we're looping, do some clean up */ if (tmp->madv == __I915_MADV_PURGED) { list_del(&tmp->batch_pool_link); - drm_gem_object_unreference(&tmp->base); + i915_gem_object_put(tmp); continue; } diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 7151791..2600939 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -176,7 +176,7 @@ void i915_gem_context_free(struct kref *ctx_ref) if (ce->ringbuf) intel_ringbuffer_free(ce->ringbuf); - drm_gem_object_unreference(&ce->state->base); + i915_gem_object_put(ce->state); } list_del(&ctx->link); @@ -216,7 +216,7 @@ i915_gem_alloc_context_obj(struct drm_device *dev, size_t size) ret = i915_gem_object_set_cache_level(obj, I915_CACHE_L3_LLC); /* Failure shouldn't ever happen this early */ if (WARN_ON(ret)) { - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); return ERR_PTR(ret); } } diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 5610394..81f7b43 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -191,17 +191,18 @@ found: /* Unbinding will emit any required flushes */ while (!list_empty(&eviction_list)) { - struct drm_gem_object *obj; + struct drm_i915_gem_object *obj; + vma = list_first_entry(&eviction_list, struct i915_vma, exec_list); - obj = &vma->obj->base; + obj = vma->obj; list_del_init(&vma->exec_list); if (ret == 0) ret = i915_vma_unbind(vma); - drm_gem_object_unreference(obj); + i915_gem_object_put(obj); } return ret; diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 28bcc99..6482ec2 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -176,7 +176,7 @@ err: struct drm_i915_gem_object, obj_exec_link); list_del_init(&obj->obj_exec_link); - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); } /* * Objects already transfered to the vmas list will be unreferenced by @@ -264,7 +264,7 @@ static void eb_destroy(struct eb_vmas *eb) exec_list); list_del_init(&vma->exec_list); i915_gem_execbuffer_unreserve_vma(vma); - drm_gem_object_unreference(&vma->obj->base); + i915_gem_object_put(vma->obj); } kfree(eb); } @@ -873,7 +873,7 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev, vma = list_first_entry(&eb->vmas, struct i915_vma, exec_list); list_del_init(&vma->exec_list); i915_gem_execbuffer_unreserve_vma(vma); - drm_gem_object_unreference(&vma->obj->base); + i915_gem_object_put(vma->obj); } mutex_unlock(&dev->struct_mutex); diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c index f75bbd6..b2be467 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.c +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c @@ -70,7 +70,7 @@ static int render_state_init(struct render_state *so, return 0; free_gem: - drm_gem_object_unreference(&so->obj->base); + i915_gem_object_put(so->obj); return ret; } @@ -195,7 +195,7 @@ err_out: void i915_gem_render_state_fini(struct render_state *so) { i915_gem_object_ggtt_unpin(so->obj); - drm_gem_object_unreference(&so->obj->base); + i915_gem_object_put(so->obj); } int i915_gem_render_state_prepare(struct intel_engine_cs *engine, diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index 3aa76d3..afaa259 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -201,7 +201,7 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, if (i915_gem_object_put_pages(obj) == 0) count += obj->base.size >> PAGE_SHIFT; - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); } list_splice(&still_in_list, phase->list); } diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index 66be299a1..310756c 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -716,6 +716,6 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_device *dev, return obj; err: - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); return NULL; } diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index 46e80f3..2d9d190 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -268,7 +268,7 @@ i915_gem_set_tiling(struct drm_device *dev, void *data, } err: - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); mutex_unlock(&dev->struct_mutex); intel_runtime_pm_put(dev_priv); @@ -328,7 +328,7 @@ i915_gem_get_tiling(struct drm_device *dev, void *data, if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_10_17) args->swizzle_mode = I915_BIT_6_SWIZZLE_9_10; - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); mutex_unlock(&dev->struct_mutex); return 0; diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index c41bf74..cd4af22 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -119,7 +119,7 @@ static void cancel_userptr(struct work_struct *work) dev_priv->mm.interruptible = was_interruptible; } - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); mutex_unlock(&dev->struct_mutex); } @@ -577,7 +577,7 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work) } obj->userptr.workers--; - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); mutex_unlock(&dev->struct_mutex); release_pages(pvec, pinned, 0); diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 1cc5de1..01c1c16 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -633,13 +633,13 @@ gem_allocate_guc_obj(struct drm_i915_private *dev_priv, u32 size) return NULL; if (i915_gem_object_get_pages(obj)) { - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); return NULL; } if (i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, PIN_OFFSET_BIAS | GUC_WOPCM_TOP)) { - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); return NULL; } @@ -661,7 +661,7 @@ static void gem_release_guc_obj(struct drm_i915_gem_object *obj) if (i915_gem_obj_is_pinned(obj)) i915_gem_object_ggtt_unpin(obj); - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); } static void diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 84904a2..a18a79c 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2488,7 +2488,7 @@ intel_alloc_initial_plane_obj(struct intel_crtc *crtc, return true; out_unref_obj: - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); mutex_unlock(&dev->struct_mutex); return false; } @@ -10953,7 +10953,7 @@ static void intel_unpin_work_fn(struct work_struct *__work) mutex_lock(&dev->struct_mutex); intel_unpin_fb_obj(work->old_fb, primary->state->rotation); - drm_gem_object_unreference(&work->pending_flip_obj->base); + i915_gem_object_put(work->pending_flip_obj); mutex_unlock(&dev->struct_mutex); i915_gem_request_put(work->flip_queued_req); @@ -14851,7 +14851,7 @@ static void intel_user_framebuffer_destroy(struct drm_framebuffer *fb) drm_framebuffer_cleanup(fb); mutex_lock(&dev->struct_mutex); WARN_ON(!intel_fb->obj->framebuffer_references--); - drm_gem_object_unreference(&intel_fb->obj->base); + i915_gem_object_put(intel_fb->obj); mutex_unlock(&dev->struct_mutex); kfree(intel_fb); } diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index 23129dc..b1074a2 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -159,7 +159,7 @@ static int intelfb_alloc(struct drm_fb_helper *helper, fb = __intel_framebuffer_create(dev, &mode_cmd, obj); if (IS_ERR(fb)) { - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); ret = PTR_ERR(fb); goto out; } diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c index 605c696..a6a8fba 100644 --- a/drivers/gpu/drm/i915/intel_guc_loader.c +++ b/drivers/gpu/drm/i915/intel_guc_loader.c @@ -662,7 +662,7 @@ fail: mutex_lock(&dev->struct_mutex); obj = guc_fw->guc_fw_obj; if (obj) - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); guc_fw->guc_fw_obj = NULL; mutex_unlock(&dev->struct_mutex); @@ -743,7 +743,7 @@ void intel_guc_fini(struct drm_device *dev) i915_guc_submission_fini(dev_priv); if (guc_fw->guc_fw_obj) - drm_gem_object_unreference(&guc_fw->guc_fw_obj->base); + i915_gem_object_put(guc_fw->guc_fw_obj); guc_fw->guc_fw_obj = NULL; mutex_unlock(&dev->struct_mutex); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 6282fca..439aeab 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1367,7 +1367,7 @@ static int lrc_setup_wa_ctx_obj(struct intel_engine_cs *engine, u32 size) if (ret) { DRM_DEBUG_DRIVER("pin LRC WA ctx backing obj failed: %d\n", ret); - drm_gem_object_unreference(&engine->wa_ctx.obj->base); + i915_gem_object_put(engine->wa_ctx.obj); return ret; } @@ -1378,7 +1378,7 @@ static void lrc_destroy_wa_ctx_obj(struct intel_engine_cs *engine) { if (engine->wa_ctx.obj) { i915_gem_object_ggtt_unpin(engine->wa_ctx.obj); - drm_gem_object_unreference(&engine->wa_ctx.obj->base); + i915_gem_object_put(engine->wa_ctx.obj); engine->wa_ctx.obj = NULL; } } @@ -2382,7 +2382,7 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, error_ringbuf: intel_ringbuffer_free(ringbuf); error_deref_obj: - drm_gem_object_unreference(&ctx_obj->base); + i915_gem_object_put(ctx_obj); ce->ringbuf = NULL; ce->state = NULL; return ret; diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index 5ca797b..30d2f5d 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -308,7 +308,7 @@ static void intel_overlay_release_old_vid_tail(struct intel_overlay *overlay) struct drm_i915_gem_object *obj = overlay->old_vid_bo; i915_gem_object_ggtt_unpin(obj); - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); overlay->old_vid_bo = NULL; } @@ -322,7 +322,7 @@ static void intel_overlay_off_tail(struct intel_overlay *overlay) return; i915_gem_object_ggtt_unpin(obj); - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); overlay->vid_bo = NULL; overlay->crtc->overlay = NULL; @@ -1219,7 +1219,7 @@ int intel_overlay_put_image_ioctl(struct drm_device *dev, void *data, out_unlock: mutex_unlock(&dev->struct_mutex); drm_modeset_unlock_all(dev); - drm_gem_object_unreference_unlocked(&new_bo->base); + i915_gem_object_put(new_bo); out_free: kfree(params); @@ -1443,7 +1443,7 @@ out_unpin_bo: if (!OVERLAY_NEEDS_PHYSICAL(dev_priv)) i915_gem_object_ggtt_unpin(reg_bo); out_free_bo: - drm_gem_object_unreference(®_bo->base); + i915_gem_object_put(reg_bo); out_free: mutex_unlock(&dev_priv->drm.struct_mutex); kfree(overlay); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 3a7135d..9c0a0b4 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -641,7 +641,7 @@ void intel_fini_pipe_control(struct intel_engine_cs *engine) return; i915_gem_object_ggtt_unpin(engine->scratch.obj); - drm_gem_object_unreference(&engine->scratch.obj->base); + i915_gem_object_put(engine->scratch.obj); engine->scratch.obj = NULL; } @@ -672,7 +672,7 @@ int intel_init_pipe_control(struct intel_engine_cs *engine, int size) return 0; err_unref: - drm_gem_object_unreference(&engine->scratch.obj->base); + i915_gem_object_put(engine->scratch.obj); err: return ret; } @@ -1312,7 +1312,7 @@ static void render_ring_cleanup(struct intel_engine_cs *engine) if (dev_priv->semaphore_obj) { i915_gem_object_ggtt_unpin(dev_priv->semaphore_obj); - drm_gem_object_unreference(&dev_priv->semaphore_obj->base); + i915_gem_object_put(dev_priv->semaphore_obj); dev_priv->semaphore_obj = NULL; } @@ -1898,7 +1898,7 @@ static void cleanup_status_page(struct intel_engine_cs *engine) kunmap(sg_page(obj->pages->sgl)); i915_gem_object_ggtt_unpin(obj); - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); engine->status_page.obj = NULL; } @@ -1936,7 +1936,7 @@ static int init_status_page(struct intel_engine_cs *engine) ret = i915_gem_obj_ggtt_pin(obj, 4096, flags); if (ret) { err_unref: - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); return ret; } @@ -2039,7 +2039,7 @@ err_unpin: static void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf) { - drm_gem_object_unreference(&ringbuf->obj->base); + i915_gem_object_put(ringbuf->obj); ringbuf->obj = NULL; } @@ -2691,7 +2691,7 @@ static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv, i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_NONBLOCK); if (ret != 0) { - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); DRM_ERROR("Failed to pin semaphore bo. Disabling semaphores\n"); i915.semaphores = 0; } else { -- cgit v0.10.2 From 34911fd30c6183036de67aa96e2c5a74cd43c9cf Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 20 Jul 2016 13:31:54 +0100 Subject: drm/i915: Rename drm_gem_object_unreference_unlocked in preparation for lockless free Whilst this ultimately wraps kref_put_mutex(), our goal here is the lockless variant, so keep the _unlocked() suffix until we need it no more. s/drm_gem_object_unreference_unlocked/i915_gem_object_put_unlocked/ Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1469005202-9659-7-git-send-email-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1469017917-15134-6-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index ffc97c5..3ca1ace 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2327,6 +2327,16 @@ i915_gem_object_put(struct drm_i915_gem_object *obj) __deprecated extern void drm_gem_object_unreference(struct drm_gem_object *); +__attribute__((nonnull)) +static inline void +i915_gem_object_put_unlocked(struct drm_i915_gem_object *obj) +{ + drm_gem_object_unreference_unlocked(&obj->base); +} + +__deprecated +extern void drm_gem_object_unreference_unlocked(struct drm_gem_object *); + static inline bool i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj) { diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index f23670f..536acd5 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -410,7 +410,7 @@ i915_gem_create(struct drm_file *file, ret = drm_gem_handle_create(file, &obj->base, &handle); /* drop reference from allocate - handle holds it now */ - drm_gem_object_unreference_unlocked(&obj->base); + i915_gem_object_put_unlocked(obj); if (ret) return ret; @@ -1604,7 +1604,7 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data, * pages from. */ if (!obj->base.filp) { - drm_gem_object_unreference_unlocked(&obj->base); + i915_gem_object_put_unlocked(obj); return -EINVAL; } @@ -1616,7 +1616,7 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data, struct vm_area_struct *vma; if (down_write_killable(&mm->mmap_sem)) { - drm_gem_object_unreference_unlocked(&obj->base); + i915_gem_object_put_unlocked(obj); return -EINTR; } vma = find_vma(mm, addr); @@ -1630,7 +1630,7 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data, /* This may race, but that's ok, it only gets set */ WRITE_ONCE(obj->has_wc_mmap, true); } - drm_gem_object_unreference_unlocked(&obj->base); + i915_gem_object_put_unlocked(obj); if (IS_ERR((void *)addr)) return addr; @@ -3615,7 +3615,7 @@ int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, break; } - drm_gem_object_unreference_unlocked(&obj->base); + i915_gem_object_put_unlocked(obj); return 0; } diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index 2d9d190..e83fc2d 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -172,7 +172,7 @@ i915_gem_set_tiling(struct drm_device *dev, void *data, if (!i915_tiling_ok(dev, args->stride, obj->base.size, args->tiling_mode)) { - drm_gem_object_unreference_unlocked(&obj->base); + i915_gem_object_put_unlocked(obj); return -EINVAL; } diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index cd4af22..ca8b82a 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -845,7 +845,7 @@ i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file ret = drm_gem_handle_create(file, &obj->base, &handle); /* drop reference from allocate - handle holds it now */ - drm_gem_object_unreference_unlocked(&obj->base); + i915_gem_object_put_unlocked(obj); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index a18a79c..78beb7e 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -10442,7 +10442,7 @@ intel_framebuffer_create_for_mode(struct drm_device *dev, fb = intel_framebuffer_create(dev, &mode_cmd, obj); if (IS_ERR(fb)) - drm_gem_object_unreference_unlocked(&obj->base); + i915_gem_object_put_unlocked(obj); return fb; } @@ -11756,7 +11756,7 @@ cleanup: crtc->primary->fb = old_fb; update_state_fb(crtc->primary); - drm_gem_object_unreference_unlocked(&obj->base); + i915_gem_object_put_unlocked(obj); drm_framebuffer_unreference(work->old_fb); spin_lock_irq(&dev->event_lock); @@ -15094,7 +15094,7 @@ intel_user_framebuffer_create(struct drm_device *dev, fb = intel_framebuffer_create(dev, &mode_cmd, obj); if (IS_ERR(fb)) - drm_gem_object_unreference_unlocked(&obj->base); + i915_gem_object_put_unlocked(obj); return fb; } diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index 30d2f5d..8654a32 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -1460,7 +1460,7 @@ void intel_cleanup_overlay(struct drm_i915_private *dev_priv) * hardware should be off already */ WARN_ON(dev_priv->overlay->active); - drm_gem_object_unreference_unlocked(&dev_priv->overlay->reg_bo->base); + i915_gem_object_put_unlocked(dev_priv->overlay->reg_bo); kfree(dev_priv->overlay); } diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 4e9846e..64d628c 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -5709,7 +5709,7 @@ static void valleyview_cleanup_pctx(struct drm_i915_private *dev_priv) if (WARN_ON(!dev_priv->vlv_pctx)) return; - drm_gem_object_unreference_unlocked(&dev_priv->vlv_pctx->base); + i915_gem_object_put_unlocked(dev_priv->vlv_pctx); dev_priv->vlv_pctx = NULL; } -- cgit v0.10.2 From 406ea8d22f9aeee6b484b35241ea5195c3af66a6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 20 Jul 2016 13:31:55 +0100 Subject: drm/i915: Treat ringbuffer writes as write to normal memory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ringbuffers are now being written to either through LLC or WC paths, so treating them as simply iomem is no longer adequate. However, for the older !llc hardware, the hardware is documentated as treating the TAIL register update as serialising, so we can relax the barriers when filling the rings (but even if it were not, it is still an uncached register write and so serialising anyway.). For simplicity, let's ignore the iomem annotation. v2: Remove iomem from ringbuffer->virtual_address v3: And for good measure add iomem elsewhere to keep sparse happy Signed-off-by: Chris Wilson Reviewed-by: Ville Syrjälä #v2 Link: http://patchwork.freedesktop.org/patch/msgid/1469005202-9659-8-git-send-email-chris@chris-wilson.co.uk Link: http://patchwork.freedesktop.org/patch/msgid/1469017917-15134-7-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 10f1e32..30da543 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -3681,7 +3681,7 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) lockdep_assert_held(&vma->vm->dev->struct_mutex); if (WARN_ON(!vma->obj->map_and_fenceable)) - return ERR_PTR(-ENODEV); + return IO_ERR_PTR(-ENODEV); GEM_BUG_ON(!vma->is_ggtt); GEM_BUG_ON((vma->bound & GLOBAL_BIND) == 0); @@ -3692,7 +3692,7 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) vma->node.start, vma->node.size); if (ptr == NULL) - return ERR_PTR(-ENOMEM); + return IO_ERR_PTR(-ENOMEM); vma->iomap = ptr; } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index aa5f31d..c4a6579 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -580,6 +580,7 @@ i915_ggtt_view_size(struct drm_i915_gem_object *obj, * Returns a valid iomapped pointer or ERR_PTR. */ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma); +#define IO_ERR_PTR(x) ((void __iomem *)ERR_PTR(x)) /** * i915_vma_unpin_iomap - unpins the mapping returned from i915_vma_iomap diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index c8ed367..093bfe1 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -2860,6 +2860,7 @@ static struct intel_engine_cs * semaphore_waits_for(struct intel_engine_cs *engine, u32 *seqno) { struct drm_i915_private *dev_priv = engine->i915; + void __iomem *vaddr; u32 cmd, ipehr, head; u64 offset = 0; int i, backwards; @@ -2898,6 +2899,7 @@ semaphore_waits_for(struct intel_engine_cs *engine, u32 *seqno) */ head = I915_READ_HEAD(engine) & HEAD_ADDR; backwards = (INTEL_GEN(dev_priv) >= 8) ? 5 : 4; + vaddr = (void __iomem *)engine->buffer->virtual_start; for (i = backwards; i; --i) { /* @@ -2908,7 +2910,7 @@ semaphore_waits_for(struct intel_engine_cs *engine, u32 *seqno) head &= engine->buffer->size - 1; /* This here seems to blow up */ - cmd = ioread32(engine->buffer->virtual_start + head); + cmd = ioread32(vaddr + head); if (cmd == ipehr) break; @@ -2918,11 +2920,11 @@ semaphore_waits_for(struct intel_engine_cs *engine, u32 *seqno) if (!i) return NULL; - *seqno = ioread32(engine->buffer->virtual_start + head + 4) + 1; + *seqno = ioread32(vaddr + head + 4) + 1; if (INTEL_GEN(dev_priv) >= 8) { - offset = ioread32(engine->buffer->virtual_start + head + 12); + offset = ioread32(vaddr + head + 12); offset <<= 32; - offset = ioread32(engine->buffer->virtual_start + head + 8); + offset |= ioread32(vaddr + head + 8); } return semaphore_wait_to_signaller_ring(engine, ipehr, offset); } diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index b1074a2..6344999 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -189,7 +189,7 @@ static int intelfb_create(struct drm_fb_helper *helper, struct i915_vma *vma; struct drm_i915_gem_object *obj; bool prealloc = false; - void *vaddr; + void __iomem *vaddr; int ret; if (intel_fb && diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 938e3ee..aa3ac02 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -81,8 +81,9 @@ int logical_ring_flush_all_caches(struct drm_i915_gem_request *req); */ static inline void intel_logical_ring_advance(struct intel_ringbuffer *ringbuf) { - ringbuf->tail &= ringbuf->size - 1; + __intel_ringbuffer_advance(ringbuf); } + /** * intel_logical_ring_emit() - write a DWORD to the ringbuffer. * @ringbuf: Ringbuffer to write to. @@ -91,8 +92,7 @@ static inline void intel_logical_ring_advance(struct intel_ringbuffer *ringbuf) static inline void intel_logical_ring_emit(struct intel_ringbuffer *ringbuf, u32 data) { - iowrite32(data, ringbuf->virtual_start + ringbuf->tail); - ringbuf->tail += 4; + __intel_ringbuffer_emit(ringbuf, data); } static inline void intel_logical_ring_emit_reg(struct intel_ringbuffer *ringbuf, i915_reg_t reg) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 9c0a0b4..afed24a 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2021,7 +2021,8 @@ int intel_pin_and_map_ringbuffer_obj(struct drm_i915_private *dev_priv, /* Access through the GTT requires the device to be awake. */ assert_rpm_wakelock_held(dev_priv); - addr = i915_vma_pin_iomap(i915_gem_obj_to_ggtt(obj)); + addr = (void __force *) + i915_vma_pin_iomap(i915_gem_obj_to_ggtt(obj)); if (IS_ERR(addr)) { ret = PTR_ERR(addr); goto err_unpin; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 5cbafc0..d1b2d9b 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -84,7 +84,7 @@ struct intel_ring_hangcheck { struct intel_ringbuffer { struct drm_i915_gem_object *obj; - void __iomem *virtual_start; + void *virtual_start; struct i915_vma *vma; struct intel_engine_cs *engine; @@ -453,23 +453,35 @@ int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request); int __must_check intel_ring_begin(struct drm_i915_gem_request *req, int n); int __must_check intel_ring_cacheline_align(struct drm_i915_gem_request *req); -static inline void intel_ring_emit(struct intel_engine_cs *engine, - u32 data) + +static inline void __intel_ringbuffer_emit(struct intel_ringbuffer *rb, + u32 data) +{ + *(uint32_t *)(rb->virtual_start + rb->tail) = data; + rb->tail += 4; +} + +static inline void __intel_ringbuffer_advance(struct intel_ringbuffer *rb) +{ + rb->tail &= rb->size - 1; +} + +static inline void intel_ring_emit(struct intel_engine_cs *engine, u32 data) { - struct intel_ringbuffer *ringbuf = engine->buffer; - iowrite32(data, ringbuf->virtual_start + ringbuf->tail); - ringbuf->tail += 4; + __intel_ringbuffer_emit(engine->buffer, data); } + static inline void intel_ring_emit_reg(struct intel_engine_cs *engine, i915_reg_t reg) { intel_ring_emit(engine, i915_mmio_reg_offset(reg)); } + static inline void intel_ring_advance(struct intel_engine_cs *engine) { - struct intel_ringbuffer *ringbuf = engine->buffer; - ringbuf->tail &= ringbuf->size - 1; + __intel_ringbuffer_advance(engine->buffer); } + int __intel_ring_space(int head, int tail, int size); void intel_ring_update_space(struct intel_ringbuffer *ringbuf); -- cgit v0.10.2 From f2f0ed718bc9d7587490ab0369a1091542d6c1f3 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 20 Jul 2016 13:31:56 +0100 Subject: drm/i915: Rename ring->virtual_start as ring->vaddr Just a different colour to better match virtual addresses elsewhere. s/ring->virtual_start/ring->vaddr/ Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1469005202-9659-9-git-send-email-chris@chris-wilson.co.uk Reviewed-by: Dave Gordon Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1469017917-15134-8-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 093bfe1..7104dc1 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -2899,7 +2899,7 @@ semaphore_waits_for(struct intel_engine_cs *engine, u32 *seqno) */ head = I915_READ_HEAD(engine) & HEAD_ADDR; backwards = (INTEL_GEN(dev_priv) >= 8) ? 5 : 4; - vaddr = (void __iomem *)engine->buffer->virtual_start; + vaddr = (void __iomem *)engine->buffer->vaddr; for (i = backwards; i; --i) { /* diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index afed24a..b9638e1 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1972,14 +1972,14 @@ static int init_phys_status_page(struct intel_engine_cs *engine) void intel_unpin_ringbuffer_obj(struct intel_ringbuffer *ringbuf) { - GEM_BUG_ON(ringbuf->vma == NULL); - GEM_BUG_ON(ringbuf->virtual_start == NULL); + GEM_BUG_ON(!ringbuf->vma); + GEM_BUG_ON(!ringbuf->vaddr); if (HAS_LLC(ringbuf->obj->base.dev) && !ringbuf->obj->stolen) i915_gem_object_unpin_map(ringbuf->obj); else i915_vma_unpin_iomap(ringbuf->vma); - ringbuf->virtual_start = NULL; + ringbuf->vaddr = NULL; i915_gem_object_ggtt_unpin(ringbuf->obj); ringbuf->vma = NULL; @@ -2029,7 +2029,7 @@ int intel_pin_and_map_ringbuffer_obj(struct drm_i915_private *dev_priv, } } - ringbuf->virtual_start = addr; + ringbuf->vaddr = addr; ringbuf->vma = i915_gem_obj_to_ggtt(obj); return 0; @@ -2391,8 +2391,7 @@ int intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) GEM_BUG_ON(ringbuf->tail + remain_actual > ringbuf->size); /* Fill the tail with MI_NOOP */ - memset(ringbuf->virtual_start + ringbuf->tail, - 0, remain_actual); + memset(ringbuf->vaddr + ringbuf->tail, 0, remain_actual); ringbuf->tail = 0; ringbuf->space -= remain_actual; } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index d1b2d9b..05bab8b 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -84,7 +84,7 @@ struct intel_ring_hangcheck { struct intel_ringbuffer { struct drm_i915_gem_object *obj; - void *virtual_start; + void *vaddr; struct i915_vma *vma; struct intel_engine_cs *engine; @@ -457,7 +457,7 @@ int __must_check intel_ring_cacheline_align(struct drm_i915_gem_request *req); static inline void __intel_ringbuffer_emit(struct intel_ringbuffer *rb, u32 data) { - *(uint32_t *)(rb->virtual_start + rb->tail) = data; + *(uint32_t *)(rb->vaddr + rb->tail) = data; rb->tail += 4; } -- cgit v0.10.2 From 39df91905df3ba2d3a8e1270a30845970c9e2001 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 20 Jul 2016 13:31:57 +0100 Subject: drm/i915: Convert i915_semaphores_is_enabled over to early sanitize Rather than recomputing whether semaphores are enabled, we can do that computation once during early initialisation as the i915.semaphores module parameter is now read-only. s/i915_semaphores_is_enabled/i915.semaphores/ v2: Add the state to the debug dmesg as well Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1469005202-9659-10-git-send-email-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1469017917-15134-9-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 618f8cf..9aa62c5 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -3220,7 +3220,7 @@ static int i915_semaphore_status(struct seq_file *m, void *unused) enum intel_engine_id id; int j, ret; - if (!i915_semaphore_is_enabled(dev_priv)) { + if (!i915.semaphores) { seq_puts(m, "Semaphores are disabled\n"); return 0; } diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index c5b7b8e..83afdd0 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -228,27 +228,6 @@ static void intel_detect_pch(struct drm_device *dev) pci_dev_put(pch); } -bool i915_semaphore_is_enabled(struct drm_i915_private *dev_priv) -{ - if (INTEL_GEN(dev_priv) < 6) - return false; - - if (i915.semaphores >= 0) - return i915.semaphores; - - /* TODO: make semaphores and Execlists play nicely together */ - if (i915.enable_execlists) - return false; - -#ifdef CONFIG_INTEL_IOMMU - /* Enable semaphores on SNB when IO remapping is off */ - if (IS_GEN6(dev_priv) && intel_iommu_gfx_mapped) - return false; -#endif - - return true; -} - static int i915_getparam(struct drm_device *dev, void *data, struct drm_file *file_priv) { @@ -324,7 +303,7 @@ static int i915_getparam(struct drm_device *dev, void *data, value = 1; break; case I915_PARAM_HAS_SEMAPHORES: - value = i915_semaphore_is_enabled(dev_priv); + value = i915.semaphores; break; case I915_PARAM_HAS_PRIME_VMAP_FLUSH: value = 1; @@ -999,6 +978,9 @@ static void intel_sanitize_options(struct drm_i915_private *dev_priv) i915.enable_ppgtt = intel_sanitize_enable_ppgtt(dev_priv, i915.enable_ppgtt); DRM_DEBUG_DRIVER("ppgtt mode: %i\n", i915.enable_ppgtt); + + i915.semaphores = intel_sanitize_semaphores(dev_priv, i915.semaphores); + DRM_DEBUG_DRIVER("use GPU sempahores? %s\n", yesno(i915.semaphores)); } /** diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 3ca1ace..0f408ad 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2813,6 +2813,8 @@ extern int i915_resume_switcheroo(struct drm_device *dev); int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv, int enable_ppgtt); +bool intel_sanitize_semaphores(struct drm_i915_private *dev_priv, int value); + /* i915_drv.c */ void __printf(3, 4) __i915_printk(struct drm_i915_private *dev_priv, const char *level, @@ -3641,7 +3643,6 @@ extern void intel_set_rps(struct drm_i915_private *dev_priv, u8 val); extern void intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable); -extern bool i915_semaphore_is_enabled(struct drm_i915_private *dev_priv); int i915_reg_read_ioctl(struct drm_device *dev, void *data, struct drm_file *file); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 536acd5..40047eb 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2856,7 +2856,7 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj, if (i915_gem_request_completed(from_req)) return 0; - if (!i915_semaphore_is_enabled(to_i915(obj->base.dev))) { + if (!i915.semaphores) { struct drm_i915_private *i915 = to_i915(obj->base.dev); ret = __i915_wait_request(from_req, i915->mm.interruptible, @@ -4537,6 +4537,27 @@ out: return ret; } +bool intel_sanitize_semaphores(struct drm_i915_private *dev_priv, int value) +{ + if (INTEL_INFO(dev_priv)->gen < 6) + return false; + + /* TODO: make semaphores and Execlists play nicely together */ + if (i915.enable_execlists) + return false; + + if (value >= 0) + return value; + +#ifdef CONFIG_INTEL_IOMMU + /* Enable semaphores on SNB when IO remapping is off */ + if (INTEL_INFO(dev_priv)->gen == 6 && intel_iommu_gfx_mapped) + return false; +#endif + + return true; +} + int i915_gem_init(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 2600939..bd13d08 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -556,7 +556,7 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) u32 flags = hw_flags | MI_MM_SPACE_GTT; const int num_rings = /* Use an extended w/a on ivb+ if signalling from other rings */ - i915_semaphore_is_enabled(dev_priv) ? + i915.semaphores ? hweight32(INTEL_INFO(dev_priv)->ring_mask) - 1 : 0; int len, ret; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 6daaf4e..4d39c72 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -863,7 +863,7 @@ static void gen8_record_semaphore_state(struct drm_i915_private *dev_priv, struct intel_engine_cs *to; enum intel_engine_id id; - if (!i915_semaphore_is_enabled(dev_priv)) + if (!i915.semaphores) return; if (!error->semaphore_obj) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index b9638e1..b844e69 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2679,7 +2679,7 @@ static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv, struct drm_i915_gem_object *obj; int ret, i; - if (!i915_semaphore_is_enabled(dev_priv)) + if (!i915.semaphores) return; if (INTEL_GEN(dev_priv) >= 8 && !dev_priv->semaphore_obj) { @@ -2700,7 +2700,7 @@ static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv, } } - if (!i915_semaphore_is_enabled(dev_priv)) + if (!i915.semaphores) return; if (INTEL_GEN(dev_priv) >= 8) { @@ -2838,7 +2838,7 @@ int intel_init_render_ring_buffer(struct intel_engine_cs *engine) engine->init_context = intel_rcs_ctx_init; engine->add_request = gen8_render_add_request; engine->flush = gen8_render_ring_flush; - if (i915_semaphore_is_enabled(dev_priv)) + if (i915.semaphores) engine->semaphore.signal = gen8_rcs_signal; } else if (INTEL_GEN(dev_priv) >= 6) { engine->init_context = intel_rcs_ctx_init; -- cgit v0.10.2 From 4ba9c1f7c7b8ca8c1d77f65d408e589dc87b9a2d Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Wed, 20 Jul 2016 14:26:12 +0300 Subject: drm/i915/gen9: Add WaInPlaceDecompressionHang Add this workaround to prevent hang when in place compression is used. References: HSD#2135774 Cc: stable@vger.kernel.org Signed-off-by: Mika Kuoppala Reviewed-by: Arun Siluvery Signed-off-by: Joonas Lahtinen diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 0f408ad..c97724d 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2629,6 +2629,8 @@ struct drm_i915_cmd_table { #define SKL_REVID_D0 0x3 #define SKL_REVID_E0 0x4 #define SKL_REVID_F0 0x5 +#define SKL_REVID_G0 0x6 +#define SKL_REVID_H0 0x7 #define IS_SKL_REVID(p, since, until) (IS_SKYLAKE(p) && IS_REVID(p, since, until)) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 8bfde75..ce14fe0 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -1686,6 +1686,9 @@ enum skl_disp_power_wells { #define GEN7_TLB_RD_ADDR _MMIO(0x4700) +#define GEN9_GAMT_ECO_REG_RW_IA _MMIO(0x4ab0) +#define GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS (1<<18) + #define GAMT_CHKN_BIT_REG _MMIO(0x4ab8) #define GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING (1<<28) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index b844e69..502bd7c 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1104,6 +1104,11 @@ static int skl_init_workarounds(struct intel_engine_cs *engine) /* WaDisableGafsUnitClkGating:skl */ WA_SET_BIT(GEN7_UCGCTL4, GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); + /* WaInPlaceDecompressionHang:skl */ + if (IS_SKL_REVID(dev_priv, SKL_REVID_H0, REVID_FOREVER)) + WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA, + GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); + /* WaDisableLSQCROPERFforOCL:skl */ ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4); if (ret) @@ -1173,6 +1178,11 @@ static int bxt_init_workarounds(struct intel_engine_cs *engine) WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); + /* WaInPlaceDecompressionHang:bxt */ + if (IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER)) + WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA, + GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); + return 0; } @@ -1220,6 +1230,10 @@ static int kbl_init_workarounds(struct intel_engine_cs *engine) GEN7_HALF_SLICE_CHICKEN1, GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); + /* WaInPlaceDecompressionHang:kbl */ + WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA, + GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); + /* WaDisableLSQCROPERFforOCL:kbl */ ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4); if (ret) -- cgit v0.10.2 From 873e8171aebe9e633734cef18e04c64e4715a9a4 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Wed, 20 Jul 2016 14:26:13 +0300 Subject: drm/i915/gen9: Add WaDisableGatherAtSetShaderCommonSlice Add WaDisableGatherAtSetShaderCommonSlice for all gen9 as stated by bspec. The bspec told to put this workaround to the per ctx bb. Initial implementation and subsequent review were done based on bspec. Arun raised a suspicion that this would belong to indirect bb instead and he conducted more throughout investigation on the matter and indeed the documentation was wrong. v2: Move to indirect_ctx wa bb, as it is correct place (Arun) References: HSD#2135817 Cc: Arun Siluvery Cc: Matthew Auld Signed-off-by: Mika Kuoppala Reviewed-by: Matthew Auld (v1) Reviewed-by: Arun Siluvery Signed-off-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1469013973-24104-1-git-send-email-mika.kuoppala@intel.com diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index ce14fe0..f031231 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -6132,6 +6132,7 @@ enum { # define GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC ((1<<10) | (1<<26)) # define GEN9_RHWO_OPTIMIZATION_DISABLE (1<<14) #define COMMON_SLICE_CHICKEN2 _MMIO(0x7014) +# define GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE (1<<12) # define GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION (1<<8) # define GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE (1<<0) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 439aeab..daf1279 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1256,6 +1256,13 @@ static int gen9_init_indirectctx_bb(struct intel_engine_cs *engine, return ret; index = ret; + /* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl */ + wa_ctx_emit(batch, index, MI_LOAD_REGISTER_IMM(1)); + wa_ctx_emit_reg(batch, index, COMMON_SLICE_CHICKEN2); + wa_ctx_emit(batch, index, _MASKED_BIT_DISABLE( + GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE)); + wa_ctx_emit(batch, index, MI_NOOP); + /* WaClearSlmSpaceAtContextSwitch:kbl */ /* Actual scratch location is at 128 bytes offset */ if (IS_KBL_REVID(engine->i915, 0, KBL_REVID_A0)) { -- cgit v0.10.2 From bbdc070a79b791bec0e374089bd0e7ecd949e6e8 Mon Sep 17 00:00:00 2001 From: Dave Gordon Date: Wed, 20 Jul 2016 18:16:05 +0100 Subject: drm/i915: rename macro parameter(ring) to (engine) 'ring' is an old deprecated term for a GPU engine. Here we make the terminology more consistent by renaming the 'ring' parameter of lots of macros that calculate addresses within the MMIO space of an engine. Signed-off-by: Dave Gordon Link: http://patchwork.freedesktop.org/patch/msgid/1469034967-15840-2-git-send-email-david.s.gordon@intel.com Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index f031231..9397dde 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -186,13 +186,13 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN9_GRDOM_GUC (1 << 5) #define GEN8_GRDOM_MEDIA2 (1 << 7) -#define RING_PP_DIR_BASE(ring) _MMIO((ring)->mmio_base+0x228) -#define RING_PP_DIR_BASE_READ(ring) _MMIO((ring)->mmio_base+0x518) -#define RING_PP_DIR_DCLV(ring) _MMIO((ring)->mmio_base+0x220) +#define RING_PP_DIR_BASE(engine) _MMIO((engine)->mmio_base+0x228) +#define RING_PP_DIR_BASE_READ(engine) _MMIO((engine)->mmio_base+0x518) +#define RING_PP_DIR_DCLV(engine) _MMIO((engine)->mmio_base+0x220) #define PP_DIR_DCLV_2G 0xffffffff -#define GEN8_RING_PDP_UDW(ring, n) _MMIO((ring)->mmio_base+0x270 + (n) * 8 + 4) -#define GEN8_RING_PDP_LDW(ring, n) _MMIO((ring)->mmio_base+0x270 + (n) * 8) +#define GEN8_RING_PDP_UDW(engine, n) _MMIO((engine)->mmio_base+0x270 + (n) * 8 + 4) +#define GEN8_RING_PDP_LDW(engine, n) _MMIO((engine)->mmio_base+0x270 + (n) * 8) #define GEN8_R_PWR_CLK_STATE _MMIO(0x20C8) #define GEN8_RPCS_ENABLE (1 << 31) @@ -1647,7 +1647,7 @@ enum skl_disp_power_wells { #define ARB_MODE_BWGTLB_DISABLE (1<<9) #define ARB_MODE_SWIZZLE_BDW (1<<1) #define RENDER_HWS_PGA_GEN7 _MMIO(0x04080) -#define RING_FAULT_REG(ring) _MMIO(0x4094 + 0x100*(ring)->id) +#define RING_FAULT_REG(engine) _MMIO(0x4094 + 0x100*(engine)->id) #define RING_FAULT_GTTSEL_MASK (1<<11) #define RING_FAULT_SRCID(x) (((x) >> 3) & 0xff) #define RING_FAULT_FAULT_TYPE(x) (((x) >> 1) & 0x3) @@ -1845,7 +1845,7 @@ enum skl_disp_power_wells { #define GFX_MODE _MMIO(0x2520) #define GFX_MODE_GEN7 _MMIO(0x229c) -#define RING_MODE_GEN7(ring) _MMIO((ring)->mmio_base+0x29c) +#define RING_MODE_GEN7(engine) _MMIO((engine)->mmio_base+0x29c) #define GFX_RUN_LIST_ENABLE (1<<15) #define GFX_INTERRUPT_STEERING (1<<14) #define GFX_TLB_INVALIDATE_EXPLICIT (1<<13) diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index aa3ac02..3828730 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -29,17 +29,17 @@ #define GEN8_LR_CONTEXT_ALIGN 4096 /* Execlists regs */ -#define RING_ELSP(ring) _MMIO((ring)->mmio_base + 0x230) -#define RING_EXECLIST_STATUS_LO(ring) _MMIO((ring)->mmio_base + 0x234) -#define RING_EXECLIST_STATUS_HI(ring) _MMIO((ring)->mmio_base + 0x234 + 4) -#define RING_CONTEXT_CONTROL(ring) _MMIO((ring)->mmio_base + 0x244) +#define RING_ELSP(engine) _MMIO((engine)->mmio_base + 0x230) +#define RING_EXECLIST_STATUS_LO(engine) _MMIO((engine)->mmio_base + 0x234) +#define RING_EXECLIST_STATUS_HI(engine) _MMIO((engine)->mmio_base + 0x234 + 4) +#define RING_CONTEXT_CONTROL(engine) _MMIO((engine)->mmio_base + 0x244) #define CTX_CTRL_INHIBIT_SYN_CTX_SWITCH (1 << 3) #define CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT (1 << 0) #define CTX_CTRL_RS_CTX_ENABLE (1 << 1) -#define RING_CONTEXT_STATUS_BUF_BASE(ring) _MMIO((ring)->mmio_base + 0x370) -#define RING_CONTEXT_STATUS_BUF_LO(ring, i) _MMIO((ring)->mmio_base + 0x370 + (i) * 8) -#define RING_CONTEXT_STATUS_BUF_HI(ring, i) _MMIO((ring)->mmio_base + 0x370 + (i) * 8 + 4) -#define RING_CONTEXT_STATUS_PTR(ring) _MMIO((ring)->mmio_base + 0x3a0) +#define RING_CONTEXT_STATUS_BUF_BASE(engine) _MMIO((engine)->mmio_base + 0x370) +#define RING_CONTEXT_STATUS_BUF_LO(engine, i) _MMIO((engine)->mmio_base + 0x370 + (i) * 8) +#define RING_CONTEXT_STATUS_BUF_HI(engine, i) _MMIO((engine)->mmio_base + 0x370 + (i) * 8 + 4) +#define RING_CONTEXT_STATUS_PTR(engine) _MMIO((engine)->mmio_base + 0x3a0) /* The docs specify that the write pointer wraps around after 5h, "After status * is written out to the last available status QW at offset 5h, this pointer diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 05bab8b..4671fb8 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -31,23 +31,23 @@ struct intel_hw_status_page { struct drm_i915_gem_object *obj; }; -#define I915_READ_TAIL(ring) I915_READ(RING_TAIL((ring)->mmio_base)) -#define I915_WRITE_TAIL(ring, val) I915_WRITE(RING_TAIL((ring)->mmio_base), val) +#define I915_READ_TAIL(engine) I915_READ(RING_TAIL((engine)->mmio_base)) +#define I915_WRITE_TAIL(engine, val) I915_WRITE(RING_TAIL((engine)->mmio_base), val) -#define I915_READ_START(ring) I915_READ(RING_START((ring)->mmio_base)) -#define I915_WRITE_START(ring, val) I915_WRITE(RING_START((ring)->mmio_base), val) +#define I915_READ_START(engine) I915_READ(RING_START((engine)->mmio_base)) +#define I915_WRITE_START(engine, val) I915_WRITE(RING_START((engine)->mmio_base), val) -#define I915_READ_HEAD(ring) I915_READ(RING_HEAD((ring)->mmio_base)) -#define I915_WRITE_HEAD(ring, val) I915_WRITE(RING_HEAD((ring)->mmio_base), val) +#define I915_READ_HEAD(engine) I915_READ(RING_HEAD((engine)->mmio_base)) +#define I915_WRITE_HEAD(engine, val) I915_WRITE(RING_HEAD((engine)->mmio_base), val) -#define I915_READ_CTL(ring) I915_READ(RING_CTL((ring)->mmio_base)) -#define I915_WRITE_CTL(ring, val) I915_WRITE(RING_CTL((ring)->mmio_base), val) +#define I915_READ_CTL(engine) I915_READ(RING_CTL((engine)->mmio_base)) +#define I915_WRITE_CTL(engine, val) I915_WRITE(RING_CTL((engine)->mmio_base), val) -#define I915_READ_IMR(ring) I915_READ(RING_IMR((ring)->mmio_base)) -#define I915_WRITE_IMR(ring, val) I915_WRITE(RING_IMR((ring)->mmio_base), val) +#define I915_READ_IMR(engine) I915_READ(RING_IMR((engine)->mmio_base)) +#define I915_WRITE_IMR(engine, val) I915_WRITE(RING_IMR((engine)->mmio_base), val) -#define I915_READ_MODE(ring) I915_READ(RING_MI_MODE((ring)->mmio_base)) -#define I915_WRITE_MODE(ring, val) I915_WRITE(RING_MI_MODE((ring)->mmio_base), val) +#define I915_READ_MODE(engine) I915_READ(RING_MI_MODE((engine)->mmio_base)) +#define I915_WRITE_MODE(engine, val) I915_WRITE(RING_MI_MODE((engine)->mmio_base), val) /* seqno size is actually only a uint32, but since we plan to use MI_FLUSH_DW to * do the writes, and that must have qw aligned offsets, simply pretend it's 8b. -- cgit v0.10.2 From 38a0f2db5b776246da35fe83c96f2c1ba87e4afa Mon Sep 17 00:00:00 2001 From: Dave Gordon Date: Wed, 20 Jul 2016 18:16:06 +0100 Subject: drm/i915: rename 'ring' where it refers to an engine or engine_id 'ring' is an old deprecated term for a GPU engine. Chris Wilson wants to use the name for what is currently known as an intel_ringbuffer, but it will be dreadfully confusing if some rings are ringbuffers but other rings are still engines. So this patch changes the names of a bunch of parameters called 'ring' to either 'engine' or 'engine_id' according to what they actually are. Signed-off-by: Dave Gordon Link: http://patchwork.freedesktop.org/patch/msgid/1469034967-15840-3-git-send-email-david.s.gordon@intel.com Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c index 2280c32..bd46968 100644 --- a/drivers/gpu/drm/i915/intel_mocs.c +++ b/drivers/gpu/drm/i915/intel_mocs.c @@ -204,9 +204,9 @@ static bool get_mocs_settings(struct drm_i915_private *dev_priv, return result; } -static i915_reg_t mocs_register(enum intel_engine_id ring, int index) +static i915_reg_t mocs_register(enum intel_engine_id engine_id, int index) { - switch (ring) { + switch (engine_id) { case RCS: return GEN9_GFX_MOCS(index); case VCS: @@ -218,7 +218,7 @@ static i915_reg_t mocs_register(enum intel_engine_id ring, int index) case VCS2: return GEN9_MFX1_MOCS(index); default: - MISSING_CASE(ring); + MISSING_CASE(engine_id); return INVALID_MMIO_REG; } } diff --git a/drivers/gpu/drm/i915/intel_mocs.h b/drivers/gpu/drm/i915/intel_mocs.h index 4640299..a8bd9f7 100644 --- a/drivers/gpu/drm/i915/intel_mocs.h +++ b/drivers/gpu/drm/i915/intel_mocs.h @@ -54,6 +54,6 @@ int intel_rcs_context_init_mocs(struct drm_i915_gem_request *req); void intel_mocs_init_l3cc_table(struct drm_device *dev); -int intel_mocs_init_engine(struct intel_engine_cs *ring); +int intel_mocs_init_engine(struct intel_engine_cs *engine); #endif diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 502bd7c..0b5d1de 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1595,7 +1595,7 @@ gen6_ring_sync(struct drm_i915_gem_request *waiter_req, } static void -gen5_seqno_barrier(struct intel_engine_cs *ring) +gen5_seqno_barrier(struct intel_engine_cs *engine) { /* MI_STORE are internally buffered by the GPU and not flushed * either by MI_FLUSH or SyncFlush or any other combination of diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 4671fb8..0f80194 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -197,14 +197,14 @@ struct intel_engine_cs { u32 irq_keep_mask; /* always keep these interrupts */ u32 irq_enable_mask; /* bitmask to enable ring interrupt */ - void (*irq_enable)(struct intel_engine_cs *ring); - void (*irq_disable)(struct intel_engine_cs *ring); + void (*irq_enable)(struct intel_engine_cs *engine); + void (*irq_disable)(struct intel_engine_cs *engine); - int (*init_hw)(struct intel_engine_cs *ring); + int (*init_hw)(struct intel_engine_cs *engine); int (*init_context)(struct drm_i915_gem_request *req); - void (*write_tail)(struct intel_engine_cs *ring, + void (*write_tail)(struct intel_engine_cs *engine, u32 value); int __must_check (*flush)(struct drm_i915_gem_request *req, u32 invalidate_domains, @@ -216,14 +216,14 @@ struct intel_engine_cs { * seen value is good enough. Note that the seqno will always be * monotonic, even if not coherent. */ - void (*irq_seqno_barrier)(struct intel_engine_cs *ring); + void (*irq_seqno_barrier)(struct intel_engine_cs *engine); int (*dispatch_execbuffer)(struct drm_i915_gem_request *req, u64 offset, u32 length, unsigned dispatch_flags); #define I915_DISPATCH_SECURE 0x1 #define I915_DISPATCH_PINNED 0x2 #define I915_DISPATCH_RS 0x4 - void (*cleanup)(struct intel_engine_cs *ring); + void (*cleanup)(struct intel_engine_cs *engine); /* GEN8 signal/wait table - never trust comments! * signal to signal to signal to signal to signal to -- cgit v0.10.2 From f8ca0c07f68ba47f9c512019514af2922713b5ce Mon Sep 17 00:00:00 2001 From: Dave Gordon Date: Wed, 20 Jul 2016 18:16:07 +0100 Subject: drm/i915: rename & update eb_select_ring() 'ring' is an old deprecated term for a GPU engine, so we're trying to phase out all such terminology. eb_select_ring() not only has 'ring' (meaning engine) in its name, but it has an ugly calling convention whereby it returns an errno and stores a pointer-to-engine indirectly through an output parameter. As there is only one error it ever returns (-EINVAL), we can make it return the pointer directly, and have the caller pass back the error code -EINVAL if the pointer result is NULL. Thus we can replace - ret = eb_select_ring(dev_priv, file, args, &engine); - if (ret) - return ret; with + engine = eb_select_engine(dev_priv, file, args); + if (!engine) + return -EINVAL; for increased clarity and maybe save a few cycles too. Signed-off-by: Dave Gordon Link: http://patchwork.freedesktop.org/patch/msgid/1469034967-15840-4-git-send-email-david.s.gordon@intel.com Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 6482ec2..f8d8ae3 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1380,24 +1380,24 @@ static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = { [I915_EXEC_VEBOX] = VECS }; -static int -eb_select_ring(struct drm_i915_private *dev_priv, - struct drm_file *file, - struct drm_i915_gem_execbuffer2 *args, - struct intel_engine_cs **ring) +static struct intel_engine_cs * +eb_select_engine(struct drm_i915_private *dev_priv, + struct drm_file *file, + struct drm_i915_gem_execbuffer2 *args) { unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK; + struct intel_engine_cs *engine; if (user_ring_id > I915_USER_RINGS) { DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id); - return -EINVAL; + return NULL; } if ((user_ring_id != I915_EXEC_BSD) && ((args->flags & I915_EXEC_BSD_MASK) != 0)) { DRM_DEBUG("execbuf with non bsd ring but with invalid " "bsd dispatch flags: %d\n", (int)(args->flags)); - return -EINVAL; + return NULL; } if (user_ring_id == I915_EXEC_BSD && HAS_BSD2(dev_priv)) { @@ -1412,20 +1412,20 @@ eb_select_ring(struct drm_i915_private *dev_priv, } else { DRM_DEBUG("execbuf with unknown bsd ring: %u\n", bsd_idx); - return -EINVAL; + return NULL; } - *ring = &dev_priv->engine[_VCS(bsd_idx)]; + engine = &dev_priv->engine[_VCS(bsd_idx)]; } else { - *ring = &dev_priv->engine[user_ring_map[user_ring_id]]; + engine = &dev_priv->engine[user_ring_map[user_ring_id]]; } - if (!intel_engine_initialized(*ring)) { + if (!intel_engine_initialized(engine)) { DRM_DEBUG("execbuf with invalid ring: %u\n", user_ring_id); - return -EINVAL; + return NULL; } - return 0; + return engine; } static int @@ -1467,9 +1467,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, if (args->flags & I915_EXEC_IS_PINNED) dispatch_flags |= I915_DISPATCH_PINNED; - ret = eb_select_ring(dev_priv, file, args, &engine); - if (ret) - return ret; + engine = eb_select_engine(dev_priv, file, args); + if (!engine) + return -EINVAL; if (args->buffer_count < 1) { DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count); -- cgit v0.10.2 From 65fe29eeec71d3e50870c5048d2c79bc9a477a76 Mon Sep 17 00:00:00 2001 From: Tim Gore Date: Wed, 20 Jul 2016 11:00:25 +0100 Subject: drm/i915:gen9: restrict WaC6DisallowByGfxPause WaC6DisallowByGfxPause is currently applied unconditionally but is not required in all revisions. v2: extend application of workaround to agree with w/a database, which differs from the HSD. References: HSD#2133391 Signed-off-by: Tim Gore Reviewed-by: Sagar Arun Kamble Signed-off-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/1469008825-19442-1-git-send-email-tim.gore@intel.com diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c index a6a8fba..b883efd 100644 --- a/drivers/gpu/drm/i915/intel_guc_loader.c +++ b/drivers/gpu/drm/i915/intel_guc_loader.c @@ -349,7 +349,9 @@ static int guc_ucode_xfer(struct drm_i915_private *dev_priv) } /* WaC6DisallowByGfxPause*/ - I915_WRITE(GEN6_GFXPAUSE, 0x30FFF); + if (IS_SKL_REVID(dev, 0, SKL_REVID_C0) || + IS_BXT_REVID(dev, 0, BXT_REVID_B0)) + I915_WRITE(GEN6_GFXPAUSE, 0x30FFF); if (IS_BROXTON(dev)) I915_WRITE(GEN9LP_GT_PM_CONFIG, GT_DOORBELL_ENABLE); -- cgit v0.10.2 From b12e0ee2080c093a08243726f0f3c57fddf954a5 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 21 Jul 2016 18:28:30 +0100 Subject: drm/i915: Enable RC6 immediately Now that PCU communication is reasonably fast, we do not need to defer RC6 initialisation to a workqueue. References: https://bugs.freedesktop.org/show_bug.cgi?id=97017 Signed-off-by: Chris Wilson diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 83afdd0..84e4018 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1630,7 +1630,6 @@ static int i915_drm_resume(struct drm_device *dev) intel_opregion_notify_adapter(dev_priv, PCI_D0); - intel_autoenable_gt_powersave(dev_priv); drm_kms_helper_poll_enable(dev); enable_rpm_wakeref_asserts(dev_priv); @@ -1812,7 +1811,8 @@ int i915_reset(struct drm_i915_private *dev_priv) * previous concerns that it doesn't respond well to some forms * of re-init after reset. */ - intel_autoenable_gt_powersave(dev_priv); + if (INTEL_GEN(dev_priv) > 5) + intel_enable_gt_powersave(dev_priv); return 0; @@ -2440,6 +2440,7 @@ static int intel_runtime_resume(struct device *device) i915_gem_init_swizzling(dev); intel_runtime_pm_enable_interrupts(dev_priv); + intel_enable_gt_powersave(dev_priv); /* * On VLV/CHV display interrupts are part of the display diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index c97724d..52be86e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1192,7 +1192,6 @@ struct intel_gen6_power_mgmt { bool client_boost; bool enabled; - struct delayed_work autoenable_work; unsigned boosts; /* manual wa residency calculations */ diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 40047eb..90b9f46 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4355,8 +4355,6 @@ i915_gem_suspend(struct drm_device *dev) struct drm_i915_private *dev_priv = to_i915(dev); int ret = 0; - intel_suspend_gt_powersave(dev_priv); - mutex_lock(&dev->struct_mutex); /* We have to flush all the executing contexts to main memory so diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 60a3a34..b20b004 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -405,7 +405,6 @@ static void i915_gem_mark_busy(const struct intel_engine_cs *engine) intel_runtime_pm_get_noresume(dev_priv); dev_priv->gt.awake = true; - intel_enable_gt_powersave(dev_priv); i915_update_gfx_val(dev_priv); if (INTEL_GEN(dev_priv) >= 6) gen6_rps_busy(dev_priv); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 78beb7e..0011609 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -15502,6 +15502,7 @@ void intel_modeset_init_hw(struct drm_device *dev) dev_priv->atomic_cdclk_freq = dev_priv->cdclk_freq; intel_init_clock_gating(dev); + intel_enable_gt_powersave(dev_priv); } /* diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index e74d851..8bb98ec 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1691,12 +1691,9 @@ void intel_pm_setup(struct drm_device *dev); void intel_gpu_ips_init(struct drm_i915_private *dev_priv); void intel_gpu_ips_teardown(void); void intel_init_gt_powersave(struct drm_i915_private *dev_priv); -void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv); -void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv); void intel_enable_gt_powersave(struct drm_i915_private *dev_priv); -void intel_autoenable_gt_powersave(struct drm_i915_private *dev_priv); void intel_disable_gt_powersave(struct drm_i915_private *dev_priv); -void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv); +void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv); void gen6_rps_busy(struct drm_i915_private *dev_priv); void gen6_rps_reset_ei(struct drm_i915_private *dev_priv); void gen6_rps_idle(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 64d628c..45753e1 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -6526,8 +6526,6 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv) dev_priv->rps.boost_freq = dev_priv->rps.max_freq; mutex_unlock(&dev_priv->rps.hw_lock); - - intel_autoenable_gt_powersave(dev_priv); } void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv) @@ -6541,31 +6539,10 @@ void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv) intel_runtime_pm_put(dev_priv); } -/** - * intel_suspend_gt_powersave - suspend PM work and helper threads - * @dev_priv: i915 device - * - * We don't want to disable RC6 or other features here, we just want - * to make sure any work we've queued has finished and won't bother - * us while we're suspended. - */ -void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv) -{ - if (INTEL_GEN(dev_priv) < 6) - return; - - if (cancel_delayed_work_sync(&dev_priv->rps.autoenable_work)) - intel_runtime_pm_put(dev_priv); - - /* gen6_rps_idle() will be called later to disable interrupts */ -} - void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv) { dev_priv->rps.enabled = true; /* force disabling */ intel_disable_gt_powersave(dev_priv); - - gen6_reset_rps_interrupts(dev_priv); } void intel_disable_gt_powersave(struct drm_i915_private *dev_priv) @@ -6590,13 +6567,12 @@ void intel_disable_gt_powersave(struct drm_i915_private *dev_priv) dev_priv->rps.enabled = false; mutex_unlock(&dev_priv->rps.hw_lock); + + gen6_reset_rps_interrupts(dev_priv); } void intel_enable_gt_powersave(struct drm_i915_private *dev_priv) { - /* We shouldn't be disabling as we submit, so this should be less - * racy than it appears! - */ if (READ_ONCE(dev_priv->rps.enabled)) return; @@ -6632,75 +6608,9 @@ void intel_enable_gt_powersave(struct drm_i915_private *dev_priv) WARN_ON(dev_priv->rps.efficient_freq < dev_priv->rps.min_freq); WARN_ON(dev_priv->rps.efficient_freq > dev_priv->rps.max_freq); - dev_priv->rps.enabled = true; mutex_unlock(&dev_priv->rps.hw_lock); } -static void __intel_autoenable_gt_powersave(struct work_struct *work) -{ - struct drm_i915_private *dev_priv = - container_of(work, typeof(*dev_priv), rps.autoenable_work.work); - struct intel_engine_cs *rcs; - struct drm_i915_gem_request *req; - - if (READ_ONCE(dev_priv->rps.enabled)) - goto out; - - rcs = &dev_priv->engine[RCS]; - if (rcs->last_context) - goto out; - - if (!rcs->init_context) - goto out; - - mutex_lock(&dev_priv->drm.struct_mutex); - - req = i915_gem_request_alloc(rcs, dev_priv->kernel_context); - if (IS_ERR(req)) - goto unlock; - - if (!i915.enable_execlists && i915_switch_context(req) == 0) - rcs->init_context(req); - - /* Mark the device busy, calling intel_enable_gt_powersave() */ - i915_add_request_no_flush(req); - -unlock: - mutex_unlock(&dev_priv->drm.struct_mutex); -out: - intel_runtime_pm_put(dev_priv); -} - -void intel_autoenable_gt_powersave(struct drm_i915_private *dev_priv) -{ - if (READ_ONCE(dev_priv->rps.enabled)) - return; - - if (IS_IRONLAKE_M(dev_priv)) { - ironlake_enable_drps(dev_priv); - mutex_lock(&dev_priv->drm.struct_mutex); - intel_init_emon(dev_priv); - mutex_unlock(&dev_priv->drm.struct_mutex); - } else if (INTEL_INFO(dev_priv)->gen >= 6) { - /* - * PCU communication is slow and this doesn't need to be - * done at any specific time, so do this out of our fast path - * to make resume and init faster. - * - * We depend on the HW RC6 power context save/restore - * mechanism when entering D3 through runtime PM suspend. So - * disable RPM until RPS/RC6 is properly setup. We can only - * get here via the driver load/system resume/runtime resume - * paths, so the _noresume version is enough (and in case of - * runtime resume it's necessary). - */ - if (queue_delayed_work(dev_priv->wq, - &dev_priv->rps.autoenable_work, - round_jiffies_up_relative(HZ))) - intel_runtime_pm_get_noresume(dev_priv); - } -} - static void ibx_init_clock_gating(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); @@ -7806,8 +7716,6 @@ void intel_pm_setup(struct drm_device *dev) mutex_init(&dev_priv->rps.hw_lock); spin_lock_init(&dev_priv->rps.client_lock); - INIT_DELAYED_WORK(&dev_priv->rps.autoenable_work, - __intel_autoenable_gt_powersave); INIT_LIST_HEAD(&dev_priv->rps.clients); dev_priv->pm.suspended = false; diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 43f8339..2d4bca4 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -435,7 +435,7 @@ void intel_uncore_sanitize(struct drm_i915_private *dev_priv) i915.enable_rc6 = sanitize_rc6_option(dev_priv, i915.enable_rc6); /* BIOS often leaves RC6 enabled, but disable it for hw init */ - intel_sanitize_gt_powersave(dev_priv); + intel_disable_gt_powersave(dev_priv); } static void __intel_uncore_forcewake_get(struct drm_i915_private *dev_priv, -- cgit v0.10.2 From 13f17b22a720c4d98c3df1ebafcf9e3d3ac1c463 Mon Sep 17 00:00:00 2001 From: Dave Gordon Date: Thu, 21 Jul 2016 18:39:38 +0100 Subject: drm/i915: use i915_gem_object_put_unlocked() after releasing mutex The exit path in intel_overlay_put_image_ioctl() first unlocks the struct_mutex, then drops its reference to 'new_bo' by calling i915_gem_object_put(). As it isn't holding the mutex at this point, this should be i915_gem_object_put_unlocked(). This was previously correct but got splatted in the recent s/drm_gem_object_unreference/i915_gem_object_put/ where the _unlocked suffix was lost in this one case. v2: don't bother fixing whitespace glitch [Chris Wilson] Chris can do it next time he touches gem_evict.c ;) Fixes: f8c417cd drm/i915: Rename drm_gem_object_unreference in preparation ... Signed-off-by: Dave Gordon Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1469122778-14416-1-git-send-email-david.s.gordon@intel.com Signed-off-by: Chris Wilson diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index 8654a32..c10ce36 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -1219,7 +1219,7 @@ int intel_overlay_put_image_ioctl(struct drm_device *dev, void *data, out_unlock: mutex_unlock(&dev->struct_mutex); drm_modeset_unlock_all(dev); - i915_gem_object_put(new_bo); + i915_gem_object_put_unlocked(new_bo); out_free: kfree(params); -- cgit v0.10.2 From 843759a5f81d0b65df12f3bb6627fc7c3e450a98 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 14 Jul 2016 16:15:04 +0200 Subject: drm/i915: Remove misleading CSR firmware loading docs I forgot to remove these when reworking the firmware loading sequence last year. The new sequence is that we load firmware, and if it's not there we entirely (and permanently) fail dmc setup. Reported-by: Dave Gordon Reviewed-by: Dave Gordon Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1468505704-17391-1-git-send-email-daniel.vetter@ffwll.ch diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c index c3b33a1..1ea0e1f 100644 --- a/drivers/gpu/drm/i915/intel_csr.c +++ b/drivers/gpu/drm/i915/intel_csr.c @@ -32,13 +32,6 @@ * onwards to drive newly added DMC (Display microcontroller) in display * engine to save and restore the state of display engine when it enter into * low-power state and comes back to normal. - * - * Firmware loading status will be one of the below states: FW_UNINITIALIZED, - * FW_LOADED, FW_FAILED. - * - * Once the firmware is written into the registers status will be moved from - * FW_UNINITIALIZED to FW_LOADED and for any erroneous condition status will - * be moved to FW_FAILED. */ #define I915_CSR_KBL "i915/kbl_dmc_ver1_01.bin" -- cgit v0.10.2 From 54b4f68f184c4b8409c113148758baf37d51351b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 21 Jul 2016 21:16:19 +0100 Subject: Revert "drm/i915: Enable RC6 immediately" This reverts commit b12e0ee2080c ("drm/i915: Enable RC6 immediately"), as it was never meant to be sent anywhere other than the bug report for experimentation. Signed-off-by: Chris Wilson Cc: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1469132179-4052-1-git-send-email-chris@chris-wilson.co.uk Acked-by: Daniel Vetter diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 84e4018..83afdd0 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1630,6 +1630,7 @@ static int i915_drm_resume(struct drm_device *dev) intel_opregion_notify_adapter(dev_priv, PCI_D0); + intel_autoenable_gt_powersave(dev_priv); drm_kms_helper_poll_enable(dev); enable_rpm_wakeref_asserts(dev_priv); @@ -1811,8 +1812,7 @@ int i915_reset(struct drm_i915_private *dev_priv) * previous concerns that it doesn't respond well to some forms * of re-init after reset. */ - if (INTEL_GEN(dev_priv) > 5) - intel_enable_gt_powersave(dev_priv); + intel_autoenable_gt_powersave(dev_priv); return 0; @@ -2440,7 +2440,6 @@ static int intel_runtime_resume(struct device *device) i915_gem_init_swizzling(dev); intel_runtime_pm_enable_interrupts(dev_priv); - intel_enable_gt_powersave(dev_priv); /* * On VLV/CHV display interrupts are part of the display diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 52be86e..c97724d 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1192,6 +1192,7 @@ struct intel_gen6_power_mgmt { bool client_boost; bool enabled; + struct delayed_work autoenable_work; unsigned boosts; /* manual wa residency calculations */ diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 90b9f46..40047eb 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4355,6 +4355,8 @@ i915_gem_suspend(struct drm_device *dev) struct drm_i915_private *dev_priv = to_i915(dev); int ret = 0; + intel_suspend_gt_powersave(dev_priv); + mutex_lock(&dev->struct_mutex); /* We have to flush all the executing contexts to main memory so diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index b20b004..60a3a34 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -405,6 +405,7 @@ static void i915_gem_mark_busy(const struct intel_engine_cs *engine) intel_runtime_pm_get_noresume(dev_priv); dev_priv->gt.awake = true; + intel_enable_gt_powersave(dev_priv); i915_update_gfx_val(dev_priv); if (INTEL_GEN(dev_priv) >= 6) gen6_rps_busy(dev_priv); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 0011609..78beb7e 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -15502,7 +15502,6 @@ void intel_modeset_init_hw(struct drm_device *dev) dev_priv->atomic_cdclk_freq = dev_priv->cdclk_freq; intel_init_clock_gating(dev); - intel_enable_gt_powersave(dev_priv); } /* diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 8bb98ec..e74d851 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1691,9 +1691,12 @@ void intel_pm_setup(struct drm_device *dev); void intel_gpu_ips_init(struct drm_i915_private *dev_priv); void intel_gpu_ips_teardown(void); void intel_init_gt_powersave(struct drm_i915_private *dev_priv); +void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv); +void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv); void intel_enable_gt_powersave(struct drm_i915_private *dev_priv); +void intel_autoenable_gt_powersave(struct drm_i915_private *dev_priv); void intel_disable_gt_powersave(struct drm_i915_private *dev_priv); -void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv); +void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv); void gen6_rps_busy(struct drm_i915_private *dev_priv); void gen6_rps_reset_ei(struct drm_i915_private *dev_priv); void gen6_rps_idle(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 45753e1..64d628c 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -6526,6 +6526,8 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv) dev_priv->rps.boost_freq = dev_priv->rps.max_freq; mutex_unlock(&dev_priv->rps.hw_lock); + + intel_autoenable_gt_powersave(dev_priv); } void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv) @@ -6539,10 +6541,31 @@ void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv) intel_runtime_pm_put(dev_priv); } +/** + * intel_suspend_gt_powersave - suspend PM work and helper threads + * @dev_priv: i915 device + * + * We don't want to disable RC6 or other features here, we just want + * to make sure any work we've queued has finished and won't bother + * us while we're suspended. + */ +void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv) +{ + if (INTEL_GEN(dev_priv) < 6) + return; + + if (cancel_delayed_work_sync(&dev_priv->rps.autoenable_work)) + intel_runtime_pm_put(dev_priv); + + /* gen6_rps_idle() will be called later to disable interrupts */ +} + void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv) { dev_priv->rps.enabled = true; /* force disabling */ intel_disable_gt_powersave(dev_priv); + + gen6_reset_rps_interrupts(dev_priv); } void intel_disable_gt_powersave(struct drm_i915_private *dev_priv) @@ -6567,12 +6590,13 @@ void intel_disable_gt_powersave(struct drm_i915_private *dev_priv) dev_priv->rps.enabled = false; mutex_unlock(&dev_priv->rps.hw_lock); - - gen6_reset_rps_interrupts(dev_priv); } void intel_enable_gt_powersave(struct drm_i915_private *dev_priv) { + /* We shouldn't be disabling as we submit, so this should be less + * racy than it appears! + */ if (READ_ONCE(dev_priv->rps.enabled)) return; @@ -6608,9 +6632,75 @@ void intel_enable_gt_powersave(struct drm_i915_private *dev_priv) WARN_ON(dev_priv->rps.efficient_freq < dev_priv->rps.min_freq); WARN_ON(dev_priv->rps.efficient_freq > dev_priv->rps.max_freq); + dev_priv->rps.enabled = true; mutex_unlock(&dev_priv->rps.hw_lock); } +static void __intel_autoenable_gt_powersave(struct work_struct *work) +{ + struct drm_i915_private *dev_priv = + container_of(work, typeof(*dev_priv), rps.autoenable_work.work); + struct intel_engine_cs *rcs; + struct drm_i915_gem_request *req; + + if (READ_ONCE(dev_priv->rps.enabled)) + goto out; + + rcs = &dev_priv->engine[RCS]; + if (rcs->last_context) + goto out; + + if (!rcs->init_context) + goto out; + + mutex_lock(&dev_priv->drm.struct_mutex); + + req = i915_gem_request_alloc(rcs, dev_priv->kernel_context); + if (IS_ERR(req)) + goto unlock; + + if (!i915.enable_execlists && i915_switch_context(req) == 0) + rcs->init_context(req); + + /* Mark the device busy, calling intel_enable_gt_powersave() */ + i915_add_request_no_flush(req); + +unlock: + mutex_unlock(&dev_priv->drm.struct_mutex); +out: + intel_runtime_pm_put(dev_priv); +} + +void intel_autoenable_gt_powersave(struct drm_i915_private *dev_priv) +{ + if (READ_ONCE(dev_priv->rps.enabled)) + return; + + if (IS_IRONLAKE_M(dev_priv)) { + ironlake_enable_drps(dev_priv); + mutex_lock(&dev_priv->drm.struct_mutex); + intel_init_emon(dev_priv); + mutex_unlock(&dev_priv->drm.struct_mutex); + } else if (INTEL_INFO(dev_priv)->gen >= 6) { + /* + * PCU communication is slow and this doesn't need to be + * done at any specific time, so do this out of our fast path + * to make resume and init faster. + * + * We depend on the HW RC6 power context save/restore + * mechanism when entering D3 through runtime PM suspend. So + * disable RPM until RPS/RC6 is properly setup. We can only + * get here via the driver load/system resume/runtime resume + * paths, so the _noresume version is enough (and in case of + * runtime resume it's necessary). + */ + if (queue_delayed_work(dev_priv->wq, + &dev_priv->rps.autoenable_work, + round_jiffies_up_relative(HZ))) + intel_runtime_pm_get_noresume(dev_priv); + } +} + static void ibx_init_clock_gating(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); @@ -7716,6 +7806,8 @@ void intel_pm_setup(struct drm_device *dev) mutex_init(&dev_priv->rps.hw_lock); spin_lock_init(&dev_priv->rps.client_lock); + INIT_DELAYED_WORK(&dev_priv->rps.autoenable_work, + __intel_autoenable_gt_powersave); INIT_LIST_HEAD(&dev_priv->rps.clients); dev_priv->pm.suspended = false; diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 2d4bca4..43f8339 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -435,7 +435,7 @@ void intel_uncore_sanitize(struct drm_i915_private *dev_priv) i915.enable_rc6 = sanitize_rc6_option(dev_priv, i915.enable_rc6); /* BIOS often leaves RC6 enabled, but disable it for hw init */ - intel_disable_gt_powersave(dev_priv); + intel_sanitize_gt_powersave(dev_priv); } static void __intel_uncore_forcewake_get(struct drm_i915_private *dev_priv, -- cgit v0.10.2 From 2529d57050af51232fddbbe135fcc2a216182c5d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 24 Jul 2016 10:10:20 +0100 Subject: drm/i915: Drop racy markup of missed-irqs from idle-worker During the idle-worker we disable the hangcheck and so kick any waiters that should have been completed (since the GPU is now idle). Unlike the hangcheck, we do not take any care to avoid the race between the irq handler and ourselves, and so it is possible for us to declare a missed interrupt even as the bottom-half is being scheduled to run. Let's ignore this race to stop a potential false-positive error. References: https://bugs.freedesktop.org/show_bug.cgi?id=96974 Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/1469351421-13493-1-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 40047eb..c843663 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2705,11 +2705,14 @@ i915_gem_idle_work_handler(struct work_struct *work) dev_priv->gt.awake = false; rearm_hangcheck = false; + /* As we have disabled hangcheck, we need to unstick any waiters still + * hanging around. However, as we may be racing against the interrupt + * handler or the waiters themselves, we skip enabling the fake-irq. + */ stuck_engines = intel_kick_waiters(dev_priv); - if (unlikely(stuck_engines)) { - DRM_DEBUG_DRIVER("kicked stuck waiters...missed irq\n"); - dev_priv->gpu_error.missed_irq_rings |= stuck_engines; - } + if (unlikely(stuck_engines)) + DRM_DEBUG_DRIVER("kicked stuck waiters (%x)...missed irq?\n", + stuck_engines); if (INTEL_GEN(dev_priv) >= 6) gen6_rps_idle(dev_priv); -- cgit v0.10.2 From f527a38ee764f57a86ceedfc105f425e56392db4 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 24 Jul 2016 10:10:21 +0100 Subject: drm/i915: Update the breadcrumb interrupt counter before enabling In order to close a race with a long running hangcheck comparing a stale interrupt counter with a just started waiter, we need to first bump the counter as we start the fresh wait. References: https://bugs.freedesktop.org/show_bug.cgi?id=96974 Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/1469351421-13493-2-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index f0b56e3..d893ccd 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -51,6 +51,13 @@ static void irq_enable(struct intel_engine_cs *engine) */ engine->breadcrumbs.irq_posted = true; + /* Make sure the current hangcheck doesn't falsely accuse a just + * started irq handler from missing an interrupt (because the + * interrupt count still matches the stale value from when + * the irq handler was disabled, many hangchecks ago). + */ + engine->breadcrumbs.irq_wakeups++; + spin_lock_irq(&engine->i915->irq_lock); engine->irq_enable(engine); spin_unlock_irq(&engine->i915->irq_lock); -- cgit v0.10.2 From d2b9448f96b566d19f4759afc0364c9d23634390 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 25 Jul 2016 08:00:19 +0200 Subject: drm/i915: Update DRIVER_DATE to 20160725 Signed-off-by: Daniel Vetter diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index c97724d..9f655e2 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -70,7 +70,7 @@ #define DRIVER_NAME "i915" #define DRIVER_DESC "Intel Graphics" -#define DRIVER_DATE "20160711" +#define DRIVER_DATE "20160725" #undef WARN_ON /* Many gcc seem to no see through this and fall over :( */ -- cgit v0.10.2 From 4a50d20e105a4a8e46a641e4779e9431e0aabc84 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 26 Jul 2016 12:01:50 +0100 Subject: drm/i915: Reduce breadcrumb lock coverage for intel_engine_enable_signaling() Since intel_engine_enable_signaling() is now only called via fence_enable_sw_signaling(), we can rely on it to provide serialisation and run-once for us and so make ourselves slightly simpler. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1469432687-22756-2-git-send-email-chris@chris-wilson.co.uk Cc: Tvrtko Ursulin Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1469530913-17180-1-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index d893ccd..9086744 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -480,19 +480,15 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request) struct rb_node *parent, **p; bool first, wakeup; - if (unlikely(READ_ONCE(request->signaling.wait.tsk))) - return; - - spin_lock(&b->lock); - if (unlikely(request->signaling.wait.tsk)) { - wakeup = false; - goto unlock; - } + /* locked by fence_enable_sw_signaling() */ + assert_spin_locked(&request->lock); request->signaling.wait.tsk = b->signaler; request->signaling.wait.seqno = request->fence.seqno; i915_gem_request_get(request); + spin_lock(&b->lock); + /* First add ourselves into the list of waiters, but register our * bottom-half as the signaller thread. As per usual, only the oldest * waiter (not just signaller) is tasked as the bottom-half waking @@ -525,7 +521,6 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request) if (first) smp_store_mb(b->first_signal, request); -unlock: spin_unlock(&b->lock); if (wakeup) -- cgit v0.10.2 From 2a1d775201081c400d7e60ceb8e5ac887d11b1f7 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 26 Jul 2016 12:01:51 +0100 Subject: drm/i915: Prefer list_first_entry_or_null list_first_entry_or_null() can generate better code than using if (!list_empty()) {ptr = list_first_entry()) ..., so put it to use. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1469432687-22756-3-git-send-email-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1469530913-17180-2-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 30da543..38e7d99 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2736,13 +2736,11 @@ static void i915_gtt_color_adjust(struct drm_mm_node *node, if (node->color != color) *start += 4096; - if (!list_empty(&node->node_list)) { - node = list_entry(node->node_list.next, - struct drm_mm_node, - node_list); - if (node->allocated && node->color != color) - *end -= 4096; - } + node = list_first_entry_or_null(&node->node_list, + struct drm_mm_node, + node_list); + if (node && node->allocated && node->color != color) + *end -= 4096; } static int i915_gem_setup_global_gtt(struct drm_device *dev, diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 60a3a34..49396b8 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -317,12 +317,10 @@ __i915_gem_request_alloc(struct intel_engine_cs *engine, return ret; /* Move the oldest request to the slab-cache (if not in use!) */ - if (!list_empty(&engine->request_list)) { - req = list_first_entry(&engine->request_list, + req = list_first_entry_or_null(&engine->request_list, typeof(*req), list); - if (i915_gem_request_completed(req)) - i915_gem_request_retire(req); - } + if (req && i915_gem_request_completed(req)) + i915_gem_request_retire(req); req = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL); if (!req) diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index afaa259..5d4772c 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -163,17 +163,18 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, */ for (phase = phases; phase->list; phase++) { struct list_head still_in_list; + struct drm_i915_gem_object *obj; if ((flags & phase->bit) == 0) continue; INIT_LIST_HEAD(&still_in_list); - while (count < target && !list_empty(phase->list)) { - struct drm_i915_gem_object *obj; + while (count < target && + (obj = list_first_entry_or_null(phase->list, + typeof(*obj), + global_list))) { struct i915_vma *vma, *v; - obj = list_first_entry(phase->list, - typeof(*obj), global_list); list_move_tail(&obj->global_list, &still_in_list); if (flags & I915_SHRINK_PURGEABLE && -- cgit v0.10.2 From 15f7bbc7351da8a5568c75ac09f142da78077a1d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 26 Jul 2016 12:01:52 +0100 Subject: drm/i915: Only clear the client pointer when tearing down the file Upon release of the file (i.e. the user calls close(fd)), we decouple all objects from the client list so that we don't chase the dangling file_priv. As we always inspect file_priv first, we only need to nullify that pointer and can safely ignore the list_head. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1469432687-22756-4-git-send-email-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1469530913-17180-3-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index c843663..3730aec 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4745,21 +4745,15 @@ int i915_gem_freeze_late(struct drm_i915_private *dev_priv) void i915_gem_release(struct drm_device *dev, struct drm_file *file) { struct drm_i915_file_private *file_priv = file->driver_priv; + struct drm_i915_gem_request *request; /* Clean up our request list when the client is going away, so that * later retire_requests won't dereference our soon-to-be-gone * file_priv. */ spin_lock(&file_priv->mm.lock); - while (!list_empty(&file_priv->mm.request_list)) { - struct drm_i915_gem_request *request; - - request = list_first_entry(&file_priv->mm.request_list, - struct drm_i915_gem_request, - client_list); - list_del(&request->client_list); + list_for_each_entry(request, &file_priv->mm.request_list, client_list) request->file_priv = NULL; - } spin_unlock(&file_priv->mm.lock); if (!list_empty(&file_priv->rps.link)) { -- cgit v0.10.2 From f67cbce0f18299b70da776a5d699125b06523700 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 26 Jul 2016 12:01:53 +0100 Subject: drm/i915: Only drop the batch-pool's object reference The obj->batch_pool_link is only inspected when traversing the batch pool list and when on the batch pool list the object is referenced. Thus when freeing the batch pool list, we only need to unreference the object and do not have to worry about the obj->batch_pool_link. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1469432687-22756-5-git-send-email-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1469530913-17180-4-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c index 3507b27..825981b5 100644 --- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c +++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c @@ -68,15 +68,14 @@ void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool) WARN_ON(!mutex_is_locked(&pool->dev->struct_mutex)); for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) { - while (!list_empty(&pool->cache_list[n])) { - struct drm_i915_gem_object *obj = - list_first_entry(&pool->cache_list[n], - struct drm_i915_gem_object, - batch_pool_link); + struct drm_i915_gem_object *obj, *next; - list_del(&obj->batch_pool_link); + list_for_each_entry_safe(obj, next, + &pool->cache_list[n], + batch_pool_link) i915_gem_object_put(obj); - } + + INIT_LIST_HEAD(&pool->cache_list[n]); } } -- cgit v0.10.2 From 848496e5902833600f7992f4faa82dc1546051ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 13 Jul 2016 16:32:03 +0300 Subject: drm/i915: Wait up to 3ms for the pcu to ack the cdclk change request on SKL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bspec tells us to keep bashing the PCU for up to 3ms when trying to inform it about an upcoming change in the cdclk frequency. Currently we only keep at it for 15*10usec (+ whatever delays gets added by the sandybridge_pcode_read() itself). Let's change the limit to 3ms. I decided to keep 10 usec delay per iteration for now, even though the spec doesn't really tell us to do that. Cc: stable@vger.kernel.org Fixes: 5d96d8afcfbb ("drm/i915/skl: Deinit/init the display at suspend/resume") Cc: David Weinehall Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1468416723-23440-1-git-send-email-ville.syrjala@linux.intel.com Tested-by: David Weinehall Reviewed-by: Chris Wilson diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 78beb7e..c4c1c85 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -5691,15 +5691,7 @@ static bool skl_cdclk_pcu_ready(struct drm_i915_private *dev_priv) static bool skl_cdclk_wait_for_pcu_ready(struct drm_i915_private *dev_priv) { - unsigned int i; - - for (i = 0; i < 15; i++) { - if (skl_cdclk_pcu_ready(dev_priv)) - return true; - udelay(10); - } - - return false; + return _wait_for(skl_cdclk_pcu_ready(dev_priv), 3000, 10) == 0; } static void skl_set_cdclk(struct drm_i915_private *dev_priv, int cdclk, int vco) -- cgit v0.10.2 From 33a051a5fc72c78a6770cb4f49b8932ae3587de9 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 27 Jul 2016 09:07:26 +0100 Subject: drm/i915/cmdparser: Remove stray intel_engine_cs *ring When we refer to intel_engine_cs, we want to use engine so as not to confuse ourselves about ringbuffers. v2: Rename all the functions as well, as well as a few more stray comments. v3: Split the really long error message strings Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1469432687-22756-6-git-send-email-chris@chris-wilson.co.uk Cc: Joonas Lahtinen Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1469606850-28659-1-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index b0fd6a7..1db829c 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -62,23 +62,23 @@ * The parser always rejects such commands. * * The majority of the problematic commands fall in the MI_* range, with only a - * few specific commands on each ring (e.g. PIPE_CONTROL and MI_FLUSH_DW). + * few specific commands on each engine (e.g. PIPE_CONTROL and MI_FLUSH_DW). * * Implementation: - * Each ring maintains tables of commands and registers which the parser uses in - * scanning batch buffers submitted to that ring. + * Each engine maintains tables of commands and registers which the parser + * uses in scanning batch buffers submitted to that engine. * * Since the set of commands that the parser must check for is significantly * smaller than the number of commands supported, the parser tables contain only * those commands required by the parser. This generally works because command * opcode ranges have standard command length encodings. So for commands that * the parser does not need to check, it can easily skip them. This is - * implemented via a per-ring length decoding vfunc. + * implemented via a per-engine length decoding vfunc. * * Unfortunately, there are a number of commands that do not follow the standard * length encoding for their opcode range, primarily amongst the MI_* commands. * To handle this, the parser provides a way to define explicit "skip" entries - * in the per-ring command tables. + * in the per-engine command tables. * * Other command table entries map fairly directly to high level categories * mentioned above: rejected, master-only, register whitelist. The parser @@ -603,7 +603,7 @@ static u32 gen7_blt_get_cmd_length_mask(u32 cmd_header) return 0; } -static bool validate_cmds_sorted(struct intel_engine_cs *engine, +static bool validate_cmds_sorted(const struct intel_engine_cs *engine, const struct drm_i915_cmd_table *cmd_tables, int cmd_table_count) { @@ -624,8 +624,10 @@ static bool validate_cmds_sorted(struct intel_engine_cs *engine, u32 curr = desc->cmd.value & desc->cmd.mask; if (curr < previous) { - DRM_ERROR("CMD: table not sorted ring=%d table=%d entry=%d cmd=0x%08X prev=0x%08X\n", - engine->id, i, j, curr, previous); + DRM_ERROR("CMD: %s [%d] command table not sorted: " + "table=%d entry=%d cmd=0x%08X prev=0x%08X\n", + engine->name, engine->id, + i, j, curr, previous); ret = false; } @@ -636,7 +638,7 @@ static bool validate_cmds_sorted(struct intel_engine_cs *engine, return ret; } -static bool check_sorted(int ring_id, +static bool check_sorted(const struct intel_engine_cs *engine, const struct drm_i915_reg_descriptor *reg_table, int reg_count) { @@ -648,8 +650,10 @@ static bool check_sorted(int ring_id, u32 curr = i915_mmio_reg_offset(reg_table[i].addr); if (curr < previous) { - DRM_ERROR("CMD: table not sorted ring=%d entry=%d reg=0x%08X prev=0x%08X\n", - ring_id, i, curr, previous); + DRM_ERROR("CMD: %s [%d] register table not sorted: " + "entry=%d reg=0x%08X prev=0x%08X\n", + engine->name, engine->id, + i, curr, previous); ret = false; } @@ -666,7 +670,7 @@ static bool validate_regs_sorted(struct intel_engine_cs *engine) for (i = 0; i < engine->reg_table_count; i++) { table = &engine->reg_tables[i]; - if (!check_sorted(engine->id, table->regs, table->num_regs)) + if (!check_sorted(engine, table->regs, table->num_regs)) return false; } @@ -736,7 +740,7 @@ static void fini_hash_table(struct intel_engine_cs *engine) } /** - * i915_cmd_parser_init_ring() - set cmd parser related fields for a ringbuffer + * intel_engine_init_cmd_parser() - set cmd parser related fields for an engine * @engine: the engine to initialize * * Optionally initializes fields related to batch buffer command parsing in the @@ -745,7 +749,7 @@ static void fini_hash_table(struct intel_engine_cs *engine) * * Return: non-zero if initialization fails */ -int i915_cmd_parser_init_ring(struct intel_engine_cs *engine) +int intel_engine_init_cmd_parser(struct intel_engine_cs *engine) { const struct drm_i915_cmd_table *cmd_tables; int cmd_table_count; @@ -806,8 +810,7 @@ int i915_cmd_parser_init_ring(struct intel_engine_cs *engine) engine->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask; break; default: - DRM_ERROR("CMD: cmd_parser_init with unknown ring: %d\n", - engine->id); + MISSING_CASE(engine->id); BUG(); } @@ -829,13 +832,13 @@ int i915_cmd_parser_init_ring(struct intel_engine_cs *engine) } /** - * i915_cmd_parser_fini_ring() - clean up cmd parser related fields + * intel_engine_cleanup_cmd_parser() - clean up cmd parser related fields * @engine: the engine to clean up * * Releases any resources related to command parsing that may have been - * initialized for the specified ring. + * initialized for the specified engine. */ -void i915_cmd_parser_fini_ring(struct intel_engine_cs *engine) +void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine) { if (!engine->needs_cmd_parser) return; @@ -866,9 +869,9 @@ find_cmd_in_table(struct intel_engine_cs *engine, * Returns a pointer to a descriptor for the command specified by cmd_header. * * The caller must supply space for a default descriptor via the default_desc - * parameter. If no descriptor for the specified command exists in the ring's + * parameter. If no descriptor for the specified command exists in the engine's * command parser tables, this function fills in default_desc based on the - * ring's default length encoding and returns default_desc. + * engine's default length encoding and returns default_desc. */ static const struct drm_i915_cmd_descriptor* find_cmd(struct intel_engine_cs *engine, @@ -1023,15 +1026,16 @@ unpin_src: } /** - * i915_needs_cmd_parser() - should a given ring use software command parsing? + * intel_engine_needs_cmd_parser() - should a given engine use software + * command parsing? * @engine: the engine in question * * Only certain platforms require software batch buffer command parsing, and * only when enabled via module parameter. * - * Return: true if the ring requires software command parsing + * Return: true if the engine requires software command parsing */ -bool i915_needs_cmd_parser(struct intel_engine_cs *engine) +bool intel_engine_needs_cmd_parser(struct intel_engine_cs *engine) { if (!engine->needs_cmd_parser) return false; @@ -1078,8 +1082,8 @@ static bool check_cmd(const struct intel_engine_cs *engine, reg_addr); if (!reg) { - DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (ring=%d)\n", - reg_addr, *cmd, engine->id); + DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (exec_id=%d)\n", + reg_addr, *cmd, engine->exec_id); return false; } @@ -1159,11 +1163,11 @@ static bool check_cmd(const struct intel_engine_cs *engine, desc->bits[i].mask; if (dword != desc->bits[i].expected) { - DRM_DEBUG_DRIVER("CMD: Rejected command 0x%08X for bitmask 0x%08X (exp=0x%08X act=0x%08X) (ring=%d)\n", + DRM_DEBUG_DRIVER("CMD: Rejected command 0x%08X for bitmask 0x%08X (exp=0x%08X act=0x%08X) (exec_id=%d)\n", *cmd, desc->bits[i].mask, desc->bits[i].expected, - dword, engine->id); + dword, engine->exec_id); return false; } } @@ -1189,12 +1193,12 @@ static bool check_cmd(const struct intel_engine_cs *engine, * Return: non-zero if the parser finds violations or otherwise fails; -EACCES * if the batch appears legal but should use hardware parsing */ -int i915_parse_cmds(struct intel_engine_cs *engine, - struct drm_i915_gem_object *batch_obj, - struct drm_i915_gem_object *shadow_batch_obj, - u32 batch_start_offset, - u32 batch_len, - bool is_master) +int intel_engine_cmd_parser(struct intel_engine_cs *engine, + struct drm_i915_gem_object *batch_obj, + struct drm_i915_gem_object *shadow_batch_obj, + u32 batch_start_offset, + u32 batch_len, + bool is_master) { u32 *cmd, *batch_base, *batch_end; struct drm_i915_cmd_descriptor default_desc = { 0 }; @@ -1295,7 +1299,7 @@ int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv) /* If the command parser is not enabled, report 0 - unsupported */ for_each_engine(engine, dev_priv) { - if (i915_needs_cmd_parser(engine)) { + if (intel_engine_needs_cmd_parser(engine)) { active = true; break; } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 9f655e2..ea9b953 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2500,8 +2500,9 @@ struct drm_i915_cmd_descriptor { /* * A table of commands requiring special handling by the command parser. * - * Each ring has an array of tables. Each table consists of an array of command - * descriptors, which must be sorted with command opcodes in ascending order. + * Each engine has an array of tables. Each table consists of an array of + * command descriptors, which must be sorted with command opcodes in + * ascending order. */ struct drm_i915_cmd_table { const struct drm_i915_cmd_descriptor *table; @@ -3529,15 +3530,15 @@ const char *i915_cache_level_str(struct drm_i915_private *i915, int type); /* i915_cmd_parser.c */ int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv); -int i915_cmd_parser_init_ring(struct intel_engine_cs *engine); -void i915_cmd_parser_fini_ring(struct intel_engine_cs *engine); -bool i915_needs_cmd_parser(struct intel_engine_cs *engine); -int i915_parse_cmds(struct intel_engine_cs *engine, - struct drm_i915_gem_object *batch_obj, - struct drm_i915_gem_object *shadow_batch_obj, - u32 batch_start_offset, - u32 batch_len, - bool is_master); +int intel_engine_init_cmd_parser(struct intel_engine_cs *engine); +void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine); +bool intel_engine_needs_cmd_parser(struct intel_engine_cs *engine); +int intel_engine_cmd_parser(struct intel_engine_cs *engine, + struct drm_i915_gem_object *batch_obj, + struct drm_i915_gem_object *shadow_batch_obj, + u32 batch_start_offset, + u32 batch_len, + bool is_master); /* i915_suspend.c */ extern int i915_save_state(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index f8d8ae3..cd3f873 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1216,12 +1216,12 @@ i915_gem_execbuffer_parse(struct intel_engine_cs *engine, if (IS_ERR(shadow_batch_obj)) return shadow_batch_obj; - ret = i915_parse_cmds(engine, - batch_obj, - shadow_batch_obj, - batch_start_offset, - batch_len, - is_master); + ret = intel_engine_cmd_parser(engine, + batch_obj, + shadow_batch_obj, + batch_start_offset, + batch_len, + is_master); if (ret) goto err; @@ -1563,7 +1563,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, } params->args_batch_start_offset = args->batch_start_offset; - if (i915_needs_cmd_parser(engine) && args->batch_len) { + if (intel_engine_needs_cmd_parser(engine) && args->batch_len) { struct drm_i915_gem_object *parsed_batch_obj; parsed_batch_obj = i915_gem_execbuffer_parse(engine, diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index f4a35ec..e28873c 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -207,5 +207,5 @@ int intel_engine_init_common(struct intel_engine_cs *engine) if (ret) return ret; - return i915_cmd_parser_init_ring(engine); + return intel_engine_init_cmd_parser(engine); } diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index daf1279..dd3f490 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1925,7 +1925,7 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine) if (engine->cleanup) engine->cleanup(engine); - i915_cmd_parser_fini_ring(engine); + intel_engine_cleanup_cmd_parser(engine); i915_gem_batch_pool_fini(&engine->batch_pool); intel_engine_fini_breadcrumbs(engine); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 0b5d1de..15acaf6 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2267,7 +2267,7 @@ void intel_cleanup_engine(struct intel_engine_cs *engine) cleanup_phys_status_page(engine); } - i915_cmd_parser_fini_ring(engine); + intel_engine_cleanup_cmd_parser(engine); i915_gem_batch_pool_fini(&engine->batch_pool); intel_engine_fini_breadcrumbs(engine); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 0f80194..9a0a026 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -340,7 +340,7 @@ struct intel_engine_cs { /* * Table of commands the command parser needs to know about - * for this ring. + * for this engine. */ DECLARE_HASHTABLE(cmd_hash, I915_CMD_HASH_ORDER); @@ -354,11 +354,11 @@ struct intel_engine_cs { * Returns the bitmask for the length field of the specified command. * Return 0 for an unrecognized/invalid command. * - * If the command parser finds an entry for a command in the ring's + * If the command parser finds an entry for a command in the engine's * cmd_tables, it gets the command's length based on the table entry. - * If not, it calls this function to determine the per-ring length field - * encoding for the command (i.e. certain opcode ranges use certain bits - * to encode the command length in the header). + * If not, it calls this function to determine the per-engine length + * field encoding for the command (i.e. different opcode ranges use + * certain bits to encode the command length in the header). */ u32 (*get_cmd_length_mask)(u32 cmd_header); }; -- cgit v0.10.2 From c80ff16e112bde543483cb779554ffde493988a5 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 27 Jul 2016 09:07:27 +0100 Subject: drm/i915: Use engine to refer to the user's BSD intel_engine_cs This patch transitions the execbuf engine selection away from using the ring nomenclature - though we still refer to the user's incoming selector as their user_ring_id. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1469432687-22756-7-git-send-email-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1469606850-28659-2-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index ea9b953..97f8279 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -402,7 +402,7 @@ struct drm_i915_file_private { unsigned boosts; } rps; - unsigned int bsd_ring; + unsigned int bsd_engine; }; /* Used by dp and fdi links */ @@ -1331,7 +1331,7 @@ struct i915_gem_mm { bool interruptible; /* the indicator for dispatch video commands on two BSD rings */ - unsigned int bsd_ring_dispatch_index; + unsigned int bsd_engine_dispatch_index; /** Bit 6 swizzling required for X tiling */ uint32_t bit_6_swizzle_x; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 3730aec..e155e8d 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4782,7 +4782,7 @@ int i915_gem_open(struct drm_device *dev, struct drm_file *file) spin_lock_init(&file_priv->mm.lock); INIT_LIST_HEAD(&file_priv->mm.request_list); - file_priv->bsd_ring = -1; + file_priv->bsd_engine = -1; ret = i915_gem_context_open(dev, file); if (ret) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index cd3f873..aa35867f 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1351,23 +1351,24 @@ i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params, /** * Find one BSD ring to dispatch the corresponding BSD command. - * The ring index is returned. + * The engine index is returned. */ static unsigned int -gen8_dispatch_bsd_ring(struct drm_i915_private *dev_priv, struct drm_file *file) +gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv, + struct drm_file *file) { struct drm_i915_file_private *file_priv = file->driver_priv; /* Check whether the file_priv has already selected one ring. */ - if ((int)file_priv->bsd_ring < 0) { + if ((int)file_priv->bsd_engine < 0) { /* If not, use the ping-pong mechanism to select one. */ mutex_lock(&dev_priv->drm.struct_mutex); - file_priv->bsd_ring = dev_priv->mm.bsd_ring_dispatch_index; - dev_priv->mm.bsd_ring_dispatch_index ^= 1; + file_priv->bsd_engine = dev_priv->mm.bsd_engine_dispatch_index; + dev_priv->mm.bsd_engine_dispatch_index ^= 1; mutex_unlock(&dev_priv->drm.struct_mutex); } - return file_priv->bsd_ring; + return file_priv->bsd_engine; } #define I915_USER_RINGS (4) @@ -1404,7 +1405,7 @@ eb_select_engine(struct drm_i915_private *dev_priv, unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK; if (bsd_idx == I915_EXEC_BSD_DEFAULT) { - bsd_idx = gen8_dispatch_bsd_ring(dev_priv, file); + bsd_idx = gen8_dispatch_bsd_engine(dev_priv, file); } else if (bsd_idx >= I915_EXEC_BSD_RING1 && bsd_idx <= I915_EXEC_BSD_RING2) { bsd_idx >>= I915_EXEC_BSD_SHIFT; -- cgit v0.10.2 From 6361f4ba468264e8203111c5d138c7123e72a5c2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 27 Jul 2016 09:07:28 +0100 Subject: drm/i915: Avoid using intel_engine_cs *ring for GPU error capture Inside the error capture itself, we refer to not only the hardware engine, its ringbuffer but also the capture state. Finding clear names for each whilst avoiding mixing ring/intel_engine_cs is tricky. As a compromise we keep using ering for the error capture. v2: Use 'ee' locals for struct drm_i915_error_engine Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1469432687-22756-8-git-send-email-chris@chris-wilson.co.uk Cc: Joonas Lahtinen Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1469606850-28659-3-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 97f8279..65ada5d 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -512,8 +512,8 @@ struct drm_i915_error_state { struct intel_display_error_state *display; struct drm_i915_error_object *semaphore_obj; - struct drm_i915_error_ring { - bool valid; + struct drm_i915_error_engine { + int engine_id; /* Software tracked state */ bool waiting; int num_waiters; @@ -578,7 +578,7 @@ struct drm_i915_error_state { pid_t pid; char comm[TASK_COMM_LEN]; - } ring[I915_NUM_ENGINES]; + } engine[I915_NUM_ENGINES]; struct drm_i915_error_buffer { u32 size; @@ -593,7 +593,7 @@ struct drm_i915_error_state { u32 dirty:1; u32 purgeable:1; u32 userptr:1; - s32 ring:4; + s32 engine:4; u32 cache_level:3; } **active_bo, **pinned_bo; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 4d39c72..bc4a3eb 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -30,9 +30,9 @@ #include #include "i915_drv.h" -static const char *ring_str(int ring) +static const char *engine_str(int engine) { - switch (ring) { + switch (engine) { case RCS: return "render"; case VCS: return "bsd"; case BCS: return "blt"; @@ -207,8 +207,8 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m, err_puts(m, dirty_flag(err->dirty)); err_puts(m, purgeable_flag(err->purgeable)); err_puts(m, err->userptr ? " userptr" : ""); - err_puts(m, err->ring != -1 ? " " : ""); - err_puts(m, ring_str(err->ring)); + err_puts(m, err->engine != -1 ? " " : ""); + err_puts(m, engine_str(err->engine)); err_puts(m, i915_cache_level_str(m->i915, err->cache_level)); if (err->name) @@ -239,70 +239,65 @@ static const char *hangcheck_action_to_str(enum intel_ring_hangcheck_action a) return "unknown"; } -static void i915_ring_error_state(struct drm_i915_error_state_buf *m, - struct drm_device *dev, - struct drm_i915_error_state *error, - int ring_idx) +static void error_print_engine(struct drm_i915_error_state_buf *m, + struct drm_i915_error_engine *ee) { - struct drm_i915_error_ring *ring = &error->ring[ring_idx]; - - if (!ring->valid) - return; - - err_printf(m, "%s command stream:\n", ring_str(ring_idx)); - err_printf(m, " START: 0x%08x\n", ring->start); - err_printf(m, " HEAD: 0x%08x\n", ring->head); - err_printf(m, " TAIL: 0x%08x\n", ring->tail); - err_printf(m, " CTL: 0x%08x\n", ring->ctl); - err_printf(m, " HWS: 0x%08x\n", ring->hws); - err_printf(m, " ACTHD: 0x%08x %08x\n", (u32)(ring->acthd>>32), (u32)ring->acthd); - err_printf(m, " IPEIR: 0x%08x\n", ring->ipeir); - err_printf(m, " IPEHR: 0x%08x\n", ring->ipehr); - err_printf(m, " INSTDONE: 0x%08x\n", ring->instdone); - if (INTEL_INFO(dev)->gen >= 4) { - err_printf(m, " BBADDR: 0x%08x %08x\n", (u32)(ring->bbaddr>>32), (u32)ring->bbaddr); - err_printf(m, " BB_STATE: 0x%08x\n", ring->bbstate); - err_printf(m, " INSTPS: 0x%08x\n", ring->instps); + err_printf(m, "%s command stream:\n", engine_str(ee->engine_id)); + err_printf(m, " START: 0x%08x\n", ee->start); + err_printf(m, " HEAD: 0x%08x\n", ee->head); + err_printf(m, " TAIL: 0x%08x\n", ee->tail); + err_printf(m, " CTL: 0x%08x\n", ee->ctl); + err_printf(m, " HWS: 0x%08x\n", ee->hws); + err_printf(m, " ACTHD: 0x%08x %08x\n", + (u32)(ee->acthd>>32), (u32)ee->acthd); + err_printf(m, " IPEIR: 0x%08x\n", ee->ipeir); + err_printf(m, " IPEHR: 0x%08x\n", ee->ipehr); + err_printf(m, " INSTDONE: 0x%08x\n", ee->instdone); + if (INTEL_GEN(m->i915) >= 4) { + err_printf(m, " BBADDR: 0x%08x %08x\n", + (u32)(ee->bbaddr>>32), (u32)ee->bbaddr); + err_printf(m, " BB_STATE: 0x%08x\n", ee->bbstate); + err_printf(m, " INSTPS: 0x%08x\n", ee->instps); } - err_printf(m, " INSTPM: 0x%08x\n", ring->instpm); - err_printf(m, " FADDR: 0x%08x %08x\n", upper_32_bits(ring->faddr), - lower_32_bits(ring->faddr)); - if (INTEL_INFO(dev)->gen >= 6) { - err_printf(m, " RC PSMI: 0x%08x\n", ring->rc_psmi); - err_printf(m, " FAULT_REG: 0x%08x\n", ring->fault_reg); + err_printf(m, " INSTPM: 0x%08x\n", ee->instpm); + err_printf(m, " FADDR: 0x%08x %08x\n", upper_32_bits(ee->faddr), + lower_32_bits(ee->faddr)); + if (INTEL_GEN(m->i915) >= 6) { + err_printf(m, " RC PSMI: 0x%08x\n", ee->rc_psmi); + err_printf(m, " FAULT_REG: 0x%08x\n", ee->fault_reg); err_printf(m, " SYNC_0: 0x%08x [last synced 0x%08x]\n", - ring->semaphore_mboxes[0], - ring->semaphore_seqno[0]); + ee->semaphore_mboxes[0], + ee->semaphore_seqno[0]); err_printf(m, " SYNC_1: 0x%08x [last synced 0x%08x]\n", - ring->semaphore_mboxes[1], - ring->semaphore_seqno[1]); - if (HAS_VEBOX(dev)) { + ee->semaphore_mboxes[1], + ee->semaphore_seqno[1]); + if (HAS_VEBOX(m->i915)) { err_printf(m, " SYNC_2: 0x%08x [last synced 0x%08x]\n", - ring->semaphore_mboxes[2], - ring->semaphore_seqno[2]); + ee->semaphore_mboxes[2], + ee->semaphore_seqno[2]); } } - if (USES_PPGTT(dev)) { - err_printf(m, " GFX_MODE: 0x%08x\n", ring->vm_info.gfx_mode); + if (USES_PPGTT(m->i915)) { + err_printf(m, " GFX_MODE: 0x%08x\n", ee->vm_info.gfx_mode); - if (INTEL_INFO(dev)->gen >= 8) { + if (INTEL_GEN(m->i915) >= 8) { int i; for (i = 0; i < 4; i++) err_printf(m, " PDP%d: 0x%016llx\n", - i, ring->vm_info.pdp[i]); + i, ee->vm_info.pdp[i]); } else { err_printf(m, " PP_DIR_BASE: 0x%08x\n", - ring->vm_info.pp_dir_base); + ee->vm_info.pp_dir_base); } } - err_printf(m, " seqno: 0x%08x\n", ring->seqno); - err_printf(m, " last_seqno: 0x%08x\n", ring->last_seqno); - err_printf(m, " waiting: %s\n", yesno(ring->waiting)); - err_printf(m, " ring->head: 0x%08x\n", ring->cpu_ring_head); - err_printf(m, " ring->tail: 0x%08x\n", ring->cpu_ring_tail); + err_printf(m, " seqno: 0x%08x\n", ee->seqno); + err_printf(m, " last_seqno: 0x%08x\n", ee->last_seqno); + err_printf(m, " waiting: %s\n", yesno(ee->waiting)); + err_printf(m, " ring->head: 0x%08x\n", ee->cpu_ring_head); + err_printf(m, " ring->tail: 0x%08x\n", ee->cpu_ring_tail); err_printf(m, " hangcheck: %s [%d]\n", - hangcheck_action_to_str(ring->hangcheck_action), - ring->hangcheck_score); + hangcheck_action_to_str(ee->hangcheck_action), + ee->hangcheck_score); } void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...) @@ -348,17 +343,17 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, error->time.tv_usec); err_printf(m, "Kernel: " UTS_RELEASE "\n"); max_hangcheck_score = 0; - for (i = 0; i < ARRAY_SIZE(error->ring); i++) { - if (error->ring[i].hangcheck_score > max_hangcheck_score) - max_hangcheck_score = error->ring[i].hangcheck_score; + for (i = 0; i < ARRAY_SIZE(error->engine); i++) { + if (error->engine[i].hangcheck_score > max_hangcheck_score) + max_hangcheck_score = error->engine[i].hangcheck_score; } - for (i = 0; i < ARRAY_SIZE(error->ring); i++) { - if (error->ring[i].hangcheck_score == max_hangcheck_score && - error->ring[i].pid != -1) { + for (i = 0; i < ARRAY_SIZE(error->engine); i++) { + if (error->engine[i].hangcheck_score == max_hangcheck_score && + error->engine[i].pid != -1) { err_printf(m, "Active process (on ring %s): %s [%d]\n", - ring_str(i), - error->ring[i].comm, - error->ring[i].pid); + engine_str(i), + error->engine[i].comm, + error->engine[i].pid); } } err_printf(m, "Reset count: %u\n", error->reset_count); @@ -414,8 +409,10 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, if (IS_GEN7(dev)) err_printf(m, "ERR_INT: 0x%08x\n", error->err_int); - for (i = 0; i < ARRAY_SIZE(error->ring); i++) - i915_ring_error_state(m, dev, error, i); + for (i = 0; i < ARRAY_SIZE(error->engine); i++) { + if (error->engine[i].engine_id != -1) + error_print_engine(m, &error->engine[i]); + } for (i = 0; i < error->vm_count; i++) { err_printf(m, "vm[%d]\n", i); @@ -429,21 +426,23 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, error->pinned_bo_count[i]); } - for (i = 0; i < ARRAY_SIZE(error->ring); i++) { - obj = error->ring[i].batchbuffer; + for (i = 0; i < ARRAY_SIZE(error->engine); i++) { + struct drm_i915_error_engine *ee = &error->engine[i]; + + obj = ee->batchbuffer; if (obj) { err_puts(m, dev_priv->engine[i].name); - if (error->ring[i].pid != -1) + if (ee->pid != -1) err_printf(m, " (submitted by %s [%d])", - error->ring[i].comm, - error->ring[i].pid); + ee->comm, + ee->pid); err_printf(m, " --- gtt_offset = 0x%08x %08x\n", upper_32_bits(obj->gtt_offset), lower_32_bits(obj->gtt_offset)); print_error_obj(m, obj); } - obj = error->ring[i].wa_batchbuffer; + obj = ee->wa_batchbuffer; if (obj) { err_printf(m, "%s (w/a) --- gtt_offset = 0x%08x\n", dev_priv->engine[i].name, @@ -451,38 +450,38 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, print_error_obj(m, obj); } - if (error->ring[i].num_requests) { + if (ee->num_requests) { err_printf(m, "%s --- %d requests\n", dev_priv->engine[i].name, - error->ring[i].num_requests); - for (j = 0; j < error->ring[i].num_requests; j++) { + ee->num_requests); + for (j = 0; j < ee->num_requests; j++) { err_printf(m, " seqno 0x%08x, emitted %ld, tail 0x%08x\n", - error->ring[i].requests[j].seqno, - error->ring[i].requests[j].jiffies, - error->ring[i].requests[j].tail); + ee->requests[j].seqno, + ee->requests[j].jiffies, + ee->requests[j].tail); } } - if (error->ring[i].num_waiters) { + if (ee->num_waiters) { err_printf(m, "%s --- %d waiters\n", dev_priv->engine[i].name, - error->ring[i].num_waiters); - for (j = 0; j < error->ring[i].num_waiters; j++) { + ee->num_waiters); + for (j = 0; j < ee->num_waiters; j++) { err_printf(m, " seqno 0x%08x for %s [%d]\n", - error->ring[i].waiters[j].seqno, - error->ring[i].waiters[j].comm, - error->ring[i].waiters[j].pid); + ee->waiters[j].seqno, + ee->waiters[j].comm, + ee->waiters[j].pid); } } - if ((obj = error->ring[i].ringbuffer)) { + if ((obj = ee->ringbuffer)) { err_printf(m, "%s --- ringbuffer = 0x%08x\n", dev_priv->engine[i].name, lower_32_bits(obj->gtt_offset)); print_error_obj(m, obj); } - if ((obj = error->ring[i].hws_page)) { + if ((obj = ee->hws_page)) { u64 hws_offset = obj->gtt_offset; u32 *hws_page = &obj->pages[0][0]; @@ -504,7 +503,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, } } - obj = error->ring[i].wa_ctx; + obj = ee->wa_ctx; if (obj) { u64 wa_ctx_offset = obj->gtt_offset; u32 *wa_ctx_page = &obj->pages[0][0]; @@ -526,7 +525,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, } } - if ((obj = error->ring[i].ctx)) { + if ((obj = ee->ctx)) { err_printf(m, "%s --- HW Context = 0x%08x\n", dev_priv->engine[i].name, lower_32_bits(obj->gtt_offset)); @@ -611,15 +610,18 @@ static void i915_error_state_free(struct kref *error_ref) typeof(*error), ref); int i; - for (i = 0; i < ARRAY_SIZE(error->ring); i++) { - i915_error_object_free(error->ring[i].batchbuffer); - i915_error_object_free(error->ring[i].wa_batchbuffer); - i915_error_object_free(error->ring[i].ringbuffer); - i915_error_object_free(error->ring[i].hws_page); - i915_error_object_free(error->ring[i].ctx); - i915_error_object_free(error->ring[i].wa_ctx); - kfree(error->ring[i].requests); - kfree(error->ring[i].waiters); + for (i = 0; i < ARRAY_SIZE(error->engine); i++) { + struct drm_i915_error_engine *ee = &error->engine[i]; + + i915_error_object_free(ee->batchbuffer); + i915_error_object_free(ee->wa_batchbuffer); + i915_error_object_free(ee->ringbuffer); + i915_error_object_free(ee->hws_page); + i915_error_object_free(ee->ctx); + i915_error_object_free(ee->wa_ctx); + + kfree(ee->requests); + kfree(ee->waiters); } i915_error_object_free(error->semaphore_obj); @@ -762,8 +764,8 @@ static void capture_bo(struct drm_i915_error_buffer *err, err->dirty = obj->dirty; err->purgeable = obj->madv != I915_MADV_WILLNEED; err->userptr = obj->userptr.mm != NULL; - err->ring = obj->last_write_req ? - i915_gem_request_get_engine(obj->last_write_req)->id : -1; + err->engine = obj->last_write_req ? + i915_gem_request_get_engine(obj->last_write_req)->id : -1; err->cache_level = obj->cache_level; } @@ -815,7 +817,7 @@ static u32 capture_pinned_bo(struct drm_i915_error_buffer *err, */ static uint32_t i915_error_generate_code(struct drm_i915_private *dev_priv, struct drm_i915_error_state *error, - int *ring_id) + int *engine_id) { uint32_t error_code = 0; int i; @@ -826,11 +828,11 @@ static uint32_t i915_error_generate_code(struct drm_i915_private *dev_priv, * strictly a client bug. Use instdone to differentiate those some. */ for (i = 0; i < I915_NUM_ENGINES; i++) { - if (error->ring[i].hangcheck_action == HANGCHECK_HUNG) { - if (ring_id) - *ring_id = i; + if (error->engine[i].hangcheck_action == HANGCHECK_HUNG) { + if (engine_id) + *engine_id = i; - return error->ring[i].ipehr ^ error->ring[i].instdone; + return error->engine[i].ipehr ^ error->engine[i].instdone; } } @@ -855,21 +857,16 @@ static void i915_gem_record_fences(struct drm_i915_private *dev_priv, } -static void gen8_record_semaphore_state(struct drm_i915_private *dev_priv, - struct drm_i915_error_state *error, +static void gen8_record_semaphore_state(struct drm_i915_error_state *error, struct intel_engine_cs *engine, - struct drm_i915_error_ring *ering) + struct drm_i915_error_engine *ee) { + struct drm_i915_private *dev_priv = engine->i915; struct intel_engine_cs *to; enum intel_engine_id id; - if (!i915.semaphores) - return; - if (!error->semaphore_obj) - error->semaphore_obj = - i915_error_ggtt_object_create(dev_priv, - dev_priv->semaphore_obj); + return; for_each_engine_id(to, dev_priv, id) { int idx; @@ -879,42 +876,43 @@ static void gen8_record_semaphore_state(struct drm_i915_private *dev_priv, if (engine == to) continue; - signal_offset = (GEN8_SIGNAL_OFFSET(engine, id) & (PAGE_SIZE - 1)) - / 4; + signal_offset = + (GEN8_SIGNAL_OFFSET(engine, id) & (PAGE_SIZE - 1)) / 4; tmp = error->semaphore_obj->pages[0]; idx = intel_ring_sync_index(engine, to); - ering->semaphore_mboxes[idx] = tmp[signal_offset]; - ering->semaphore_seqno[idx] = engine->semaphore.sync_seqno[idx]; + ee->semaphore_mboxes[idx] = tmp[signal_offset]; + ee->semaphore_seqno[idx] = engine->semaphore.sync_seqno[idx]; } } -static void gen6_record_semaphore_state(struct drm_i915_private *dev_priv, - struct intel_engine_cs *engine, - struct drm_i915_error_ring *ering) +static void gen6_record_semaphore_state(struct intel_engine_cs *engine, + struct drm_i915_error_engine *ee) { - ering->semaphore_mboxes[0] = I915_READ(RING_SYNC_0(engine->mmio_base)); - ering->semaphore_mboxes[1] = I915_READ(RING_SYNC_1(engine->mmio_base)); - ering->semaphore_seqno[0] = engine->semaphore.sync_seqno[0]; - ering->semaphore_seqno[1] = engine->semaphore.sync_seqno[1]; + struct drm_i915_private *dev_priv = engine->i915; + + ee->semaphore_mboxes[0] = I915_READ(RING_SYNC_0(engine->mmio_base)); + ee->semaphore_mboxes[1] = I915_READ(RING_SYNC_1(engine->mmio_base)); + ee->semaphore_seqno[0] = engine->semaphore.sync_seqno[0]; + ee->semaphore_seqno[1] = engine->semaphore.sync_seqno[1]; if (HAS_VEBOX(dev_priv)) { - ering->semaphore_mboxes[2] = + ee->semaphore_mboxes[2] = I915_READ(RING_SYNC_2(engine->mmio_base)); - ering->semaphore_seqno[2] = engine->semaphore.sync_seqno[2]; + ee->semaphore_seqno[2] = engine->semaphore.sync_seqno[2]; } } -static void engine_record_waiters(struct intel_engine_cs *engine, - struct drm_i915_error_ring *ering) +static void error_record_engine_waiters(struct intel_engine_cs *engine, + struct drm_i915_error_engine *ee) { struct intel_breadcrumbs *b = &engine->breadcrumbs; struct drm_i915_error_waiter *waiter; struct rb_node *rb; int count; - ering->num_waiters = 0; - ering->waiters = NULL; + ee->num_waiters = 0; + ee->waiters = NULL; spin_lock(&b->lock); count = 0; @@ -930,7 +928,7 @@ static void engine_record_waiters(struct intel_engine_cs *engine, if (!waiter) return; - ering->waiters = waiter; + ee->waiters = waiter; spin_lock(&b->lock); for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { @@ -941,55 +939,55 @@ static void engine_record_waiters(struct intel_engine_cs *engine, waiter->seqno = w->seqno; waiter++; - if (++ering->num_waiters == count) + if (++ee->num_waiters == count) break; } spin_unlock(&b->lock); } -static void i915_record_ring_state(struct drm_i915_private *dev_priv, - struct drm_i915_error_state *error, - struct intel_engine_cs *engine, - struct drm_i915_error_ring *ering) +static void error_record_engine_registers(struct drm_i915_error_state *error, + struct intel_engine_cs *engine, + struct drm_i915_error_engine *ee) { + struct drm_i915_private *dev_priv = engine->i915; + if (INTEL_GEN(dev_priv) >= 6) { - ering->rc_psmi = I915_READ(RING_PSMI_CTL(engine->mmio_base)); - ering->fault_reg = I915_READ(RING_FAULT_REG(engine)); + ee->rc_psmi = I915_READ(RING_PSMI_CTL(engine->mmio_base)); + ee->fault_reg = I915_READ(RING_FAULT_REG(engine)); if (INTEL_GEN(dev_priv) >= 8) - gen8_record_semaphore_state(dev_priv, error, engine, - ering); + gen8_record_semaphore_state(error, engine, ee); else - gen6_record_semaphore_state(dev_priv, engine, ering); + gen6_record_semaphore_state(engine, ee); } if (INTEL_GEN(dev_priv) >= 4) { - ering->faddr = I915_READ(RING_DMA_FADD(engine->mmio_base)); - ering->ipeir = I915_READ(RING_IPEIR(engine->mmio_base)); - ering->ipehr = I915_READ(RING_IPEHR(engine->mmio_base)); - ering->instdone = I915_READ(RING_INSTDONE(engine->mmio_base)); - ering->instps = I915_READ(RING_INSTPS(engine->mmio_base)); - ering->bbaddr = I915_READ(RING_BBADDR(engine->mmio_base)); + ee->faddr = I915_READ(RING_DMA_FADD(engine->mmio_base)); + ee->ipeir = I915_READ(RING_IPEIR(engine->mmio_base)); + ee->ipehr = I915_READ(RING_IPEHR(engine->mmio_base)); + ee->instdone = I915_READ(RING_INSTDONE(engine->mmio_base)); + ee->instps = I915_READ(RING_INSTPS(engine->mmio_base)); + ee->bbaddr = I915_READ(RING_BBADDR(engine->mmio_base)); if (INTEL_GEN(dev_priv) >= 8) { - ering->faddr |= (u64) I915_READ(RING_DMA_FADD_UDW(engine->mmio_base)) << 32; - ering->bbaddr |= (u64) I915_READ(RING_BBADDR_UDW(engine->mmio_base)) << 32; + ee->faddr |= (u64) I915_READ(RING_DMA_FADD_UDW(engine->mmio_base)) << 32; + ee->bbaddr |= (u64) I915_READ(RING_BBADDR_UDW(engine->mmio_base)) << 32; } - ering->bbstate = I915_READ(RING_BBSTATE(engine->mmio_base)); + ee->bbstate = I915_READ(RING_BBSTATE(engine->mmio_base)); } else { - ering->faddr = I915_READ(DMA_FADD_I8XX); - ering->ipeir = I915_READ(IPEIR); - ering->ipehr = I915_READ(IPEHR); - ering->instdone = I915_READ(GEN2_INSTDONE); + ee->faddr = I915_READ(DMA_FADD_I8XX); + ee->ipeir = I915_READ(IPEIR); + ee->ipehr = I915_READ(IPEHR); + ee->instdone = I915_READ(GEN2_INSTDONE); } - ering->waiting = intel_engine_has_waiter(engine); - ering->instpm = I915_READ(RING_INSTPM(engine->mmio_base)); - ering->acthd = intel_ring_get_active_head(engine); - ering->seqno = intel_engine_get_seqno(engine); - ering->last_seqno = engine->last_submitted_seqno; - ering->start = I915_READ_START(engine); - ering->head = I915_READ_HEAD(engine); - ering->tail = I915_READ_TAIL(engine); - ering->ctl = I915_READ_CTL(engine); + ee->waiting = intel_engine_has_waiter(engine); + ee->instpm = I915_READ(RING_INSTPM(engine->mmio_base)); + ee->acthd = intel_ring_get_active_head(engine); + ee->seqno = intel_engine_get_seqno(engine); + ee->last_seqno = engine->last_submitted_seqno; + ee->start = I915_READ_START(engine); + ee->head = I915_READ_HEAD(engine); + ee->tail = I915_READ_TAIL(engine); + ee->ctl = I915_READ_CTL(engine); if (I915_NEED_GFX_HWS(dev_priv)) { i915_reg_t mmio; @@ -1017,29 +1015,29 @@ static void i915_record_ring_state(struct drm_i915_private *dev_priv, mmio = RING_HWS_PGA(engine->mmio_base); } - ering->hws = I915_READ(mmio); + ee->hws = I915_READ(mmio); } - ering->hangcheck_score = engine->hangcheck.score; - ering->hangcheck_action = engine->hangcheck.action; + ee->hangcheck_score = engine->hangcheck.score; + ee->hangcheck_action = engine->hangcheck.action; if (USES_PPGTT(dev_priv)) { int i; - ering->vm_info.gfx_mode = I915_READ(RING_MODE_GEN7(engine)); + ee->vm_info.gfx_mode = I915_READ(RING_MODE_GEN7(engine)); if (IS_GEN6(dev_priv)) - ering->vm_info.pp_dir_base = + ee->vm_info.pp_dir_base = I915_READ(RING_PP_DIR_BASE_READ(engine)); else if (IS_GEN7(dev_priv)) - ering->vm_info.pp_dir_base = + ee->vm_info.pp_dir_base = I915_READ(RING_PP_DIR_BASE(engine)); else if (INTEL_GEN(dev_priv) >= 8) for (i = 0; i < 4; i++) { - ering->vm_info.pdp[i] = + ee->vm_info.pdp[i] = I915_READ(GEN8_RING_PDP_UDW(engine, i)); - ering->vm_info.pdp[i] <<= 32; - ering->vm_info.pdp[i] |= + ee->vm_info.pdp[i] <<= 32; + ee->vm_info.pdp[i] |= I915_READ(GEN8_RING_PDP_LDW(engine, i)); } } @@ -1048,7 +1046,7 @@ static void i915_record_ring_state(struct drm_i915_private *dev_priv, static void i915_gem_record_active_context(struct intel_engine_cs *engine, struct drm_i915_error_state *error, - struct drm_i915_error_ring *ering) + struct drm_i915_error_engine *ee) { struct drm_i915_private *dev_priv = engine->i915; struct drm_i915_gem_object *obj; @@ -1062,7 +1060,7 @@ static void i915_gem_record_active_context(struct intel_engine_cs *engine, continue; if ((error->ccid & PAGE_MASK) == i915_gem_obj_ggtt_offset(obj)) { - ering->ctx = i915_error_ggtt_object_create(dev_priv, obj); + ee->ctx = i915_error_ggtt_object_create(dev_priv, obj); break; } } @@ -1075,18 +1073,26 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv, struct drm_i915_gem_request *request; int i, count; + if (dev_priv->semaphore_obj) { + error->semaphore_obj = + i915_error_ggtt_object_create(dev_priv, + dev_priv->semaphore_obj); + } + for (i = 0; i < I915_NUM_ENGINES; i++) { struct intel_engine_cs *engine = &dev_priv->engine[i]; + struct drm_i915_error_engine *ee = &error->engine[i]; - error->ring[i].pid = -1; + ee->pid = -1; + ee->engine_id = -1; if (!intel_engine_initialized(engine)) continue; - error->ring[i].valid = true; + ee->engine_id = i; - i915_record_ring_state(dev_priv, error, engine, &error->ring[i]); - engine_record_waiters(engine, &error->ring[i]); + error_record_engine_registers(error, engine, ee); + error_record_engine_waiters(engine, ee); request = i915_gem_find_active_request(engine); if (request) { @@ -1100,13 +1106,13 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv, * as the simplest method to avoid being overwritten * by userspace. */ - error->ring[i].batchbuffer = + ee->batchbuffer = i915_error_object_create(dev_priv, request->batch_obj, vm); if (HAS_BROKEN_CS_TLB(dev_priv)) - error->ring[i].wa_batchbuffer = + ee->wa_batchbuffer = i915_error_ggtt_object_create(dev_priv, engine->scratch.obj); @@ -1116,8 +1122,8 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv, rcu_read_lock(); task = pid_task(request->pid, PIDTYPE_PID); if (task) { - strcpy(error->ring[i].comm, task->comm); - error->ring[i].pid = task->pid; + strcpy(ee->comm, task->comm); + ee->pid = task->pid; } rcu_read_unlock(); } @@ -1126,35 +1132,34 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv, request->ctx->flags & CONTEXT_NO_ERROR_CAPTURE; rb = request->ringbuf; - error->ring[i].cpu_ring_head = rb->head; - error->ring[i].cpu_ring_tail = rb->tail; - error->ring[i].ringbuffer = + ee->cpu_ring_head = rb->head; + ee->cpu_ring_tail = rb->tail; + ee->ringbuffer = i915_error_ggtt_object_create(dev_priv, rb->obj); } - error->ring[i].hws_page = + ee->hws_page = i915_error_ggtt_object_create(dev_priv, engine->status_page.obj); if (engine->wa_ctx.obj) { - error->ring[i].wa_ctx = + ee->wa_ctx = i915_error_ggtt_object_create(dev_priv, engine->wa_ctx.obj); } - i915_gem_record_active_context(engine, error, &error->ring[i]); + i915_gem_record_active_context(engine, error, ee); count = 0; list_for_each_entry(request, &engine->request_list, list) count++; - error->ring[i].num_requests = count; - error->ring[i].requests = - kcalloc(count, sizeof(*error->ring[i].requests), - GFP_ATOMIC); - if (error->ring[i].requests == NULL) { - error->ring[i].num_requests = 0; + ee->num_requests = count; + ee->requests = + kcalloc(count, sizeof(*ee->requests), GFP_ATOMIC); + if (!ee->requests) { + ee->num_requests = 0; continue; } @@ -1162,7 +1167,7 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv, list_for_each_entry(request, &engine->request_list, list) { struct drm_i915_error_request *erq; - if (count >= error->ring[i].num_requests) { + if (count >= ee->num_requests) { /* * If the ring request list was changed in * between the point where the error request @@ -1181,7 +1186,7 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv, break; } - erq = &error->ring[i].requests[count++]; + erq = &ee->requests[count++]; erq->seqno = request->fence.seqno; erq->jiffies = request->emitted_jiffies; erq->tail = request->postfix; @@ -1352,20 +1357,20 @@ static void i915_error_capture_msg(struct drm_i915_private *dev_priv, const char *error_msg) { u32 ecode; - int ring_id = -1, len; + int engine_id = -1, len; - ecode = i915_error_generate_code(dev_priv, error, &ring_id); + ecode = i915_error_generate_code(dev_priv, error, &engine_id); len = scnprintf(error->error_msg, sizeof(error->error_msg), "GPU HANG: ecode %d:%d:0x%08x", - INTEL_GEN(dev_priv), ring_id, ecode); + INTEL_GEN(dev_priv), engine_id, ecode); - if (ring_id != -1 && error->ring[ring_id].pid != -1) + if (engine_id != -1 && error->engine[engine_id].pid != -1) len += scnprintf(error->error_msg + len, sizeof(error->error_msg) - len, ", in %s [%d]", - error->ring[ring_id].comm, - error->ring[ring_id].pid); + error->engine[engine_id].comm, + error->engine[engine_id].pid); scnprintf(error->error_msg + len, sizeof(error->error_msg) - len, ", reason: %s, action: %s", -- cgit v0.10.2 From 7e21d6484ded4aae2871f71e9bda0ed88ce6b43b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 27 Jul 2016 09:07:29 +0100 Subject: drm/i915: Remove stray intel_engine_cs ring identifiers from i915_gem.c A few places we use ring when referring to the struct intel_engine_cs. An anachronism we are pruning out. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1469432687-22756-9-git-send-email-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1469606850-28659-4-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e155e8d..7bfce1d 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -46,7 +46,7 @@ static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *o static void i915_gem_object_retire__write(struct drm_i915_gem_object *obj); static void -i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring); +i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int engine); static bool cpu_cache_is_coherent(struct drm_device *dev, enum i915_cache_level level) @@ -1385,10 +1385,10 @@ static void i915_gem_object_retire_request(struct drm_i915_gem_object *obj, struct drm_i915_gem_request *req) { - int ring = req->engine->id; + int idx = req->engine->id; - if (obj->last_read_req[ring] == req) - i915_gem_object_retire__read(obj, ring); + if (obj->last_read_req[idx] == req) + i915_gem_object_retire__read(obj, idx); else if (obj->last_write_req == req) i915_gem_object_retire__write(obj); @@ -2381,20 +2381,20 @@ i915_gem_object_retire__write(struct drm_i915_gem_object *obj) } static void -i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring) +i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int idx) { struct i915_vma *vma; - GEM_BUG_ON(obj->last_read_req[ring] == NULL); - GEM_BUG_ON(!(obj->active & (1 << ring))); + GEM_BUG_ON(obj->last_read_req[idx] == NULL); + GEM_BUG_ON(!(obj->active & (1 << idx))); - list_del_init(&obj->engine_list[ring]); - i915_gem_request_assign(&obj->last_read_req[ring], NULL); + list_del_init(&obj->engine_list[idx]); + i915_gem_request_assign(&obj->last_read_req[idx], NULL); - if (obj->last_write_req && obj->last_write_req->engine->id == ring) + if (obj->last_write_req && obj->last_write_req->engine->id == idx) i915_gem_object_retire__write(obj); - obj->active &= ~(1 << ring); + obj->active &= ~(1 << idx); if (obj->active) return; @@ -4599,7 +4599,7 @@ int i915_gem_init(struct drm_device *dev) ret = i915_gem_init_hw(dev); if (ret == -EIO) { - /* Allow ring initialisation to fail by marking the GPU as + /* Allow engine initialisation to fail by marking the GPU as * wedged. But we only want to do this where the GPU is angry, * for all other failure, such as an allocation failure, bail. */ -- cgit v0.10.2 From 9930ca1ae7e9fbf67e3a69239e9a39e80f4d9f60 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 27 Jul 2016 09:07:30 +0100 Subject: drm/i915: Update a couple of hangcheck comments to talk about engines We still have lots of comments that refer to the old ring when we mean struct intel_engine_cs and its hardware correspondence. This patch fixes an instance inside hangcheck to talk about engines. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1469432687-22756-10-git-send-email-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1469606850-28659-5-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 7104dc1..f5bf4f9 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -3140,13 +3140,13 @@ static void i915_hangcheck_elapsed(struct work_struct *work) } } else { /* We always increment the hangcheck score - * if the ring is busy and still processing + * if the engine is busy and still processing * the same request, so that no single request * can run indefinitely (such as a chain of * batches). The only time we do not increment * the hangcheck score on this ring, if this - * ring is in a legitimate wait for another - * ring. In that case the waiting ring is a + * engine is in a legitimate wait for another + * engine. In that case the waiting engine is a * victim and we want to be sure we catch the * right culprit. Then every time we do kick * the ring, add a small increment to the -- cgit v0.10.2 From 12c100bfa5d9103b6c4d43636fee09c31e75605a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 23 May 2016 17:42:48 +0300 Subject: drm/i915: Never fully mask the the EI up rps interrupt on SNB/IVB MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SNB (and IVB too I suppose) starts to misbehave if the GPU gets stuck in an infinite batch buffer loop. The GPU apparently hogs something critical and CPUs start to lose interrupts and whatnot. We can keep the system limping along by unmasking some interrupts in GEN6_PMINTRMSK. The EI up interrupt has been previously chosen for that task, so let's never mask it. v2: s/gen6_rps_pm_mask/gen6_sanitize_rps_pm_mask/ (Chris) Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=93122 Signed-off-by: Ville Syrjälä Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1464014568-4529-1-git-send-email-ville.syrjala@linux.intel.com Cc: stable@vger.kernel.org diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 64d628c..e50505c 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4892,7 +4892,8 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv) else gen6_set_rps(dev_priv, dev_priv->rps.idle_freq); dev_priv->rps.last_adj = 0; - I915_WRITE(GEN6_PMINTRMSK, 0xffffffff); + I915_WRITE(GEN6_PMINTRMSK, + gen6_sanitize_rps_pm_mask(dev_priv, ~0)); } mutex_unlock(&dev_priv->rps.hw_lock); -- cgit v0.10.2 From 5b249600c1483976b0a7501ba25a8ba8b86672e1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 27 Jul 2016 19:32:29 +0100 Subject: drm/i915: Fix up some stray to_i915(dev) after a recent merge The merge conflict resolution caused some dev->dev_private to return from the dead. Kill them with to_i915(). Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1469644349-24571-1-git-send-email-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index d47281b..f1ffde7 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -323,7 +323,7 @@ static ssize_t gt_boost_freq_mhz_store(struct device *kdev, { struct drm_minor *minor = dev_to_drm_minor(kdev); struct drm_device *dev = minor->dev; - struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = to_i915(dev); u32 val; ssize_t ret; -- cgit v0.10.2 From 6ce213575cf37c50667850b4372272b4f1660b51 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 29 Jul 2016 00:45:35 +0100 Subject: drm/i915: Add missing ring_mask to Pineview It appears that we never told Pineview it has a RENDER_RING. This was all fine until we started using the ring_mask for determining all the available rings to initialise for legacy ringbuffer submission in commit 88d2ba2e95c8 ("drm/i915: Unify engine init loop"). Though really it is a latent bug since the ring_mask inception in commit 73ae478cdf6a ("drm/i915: Replace has_bsd/blt/vebox with a mask"). To prevent similar mishaps in future, add a WARN_ON() if we find ourselves with a device without any rings. Fixes: 73ae478cdf6a ("drm/i915: Replace has_bsd/blt/vebox with a mask") Fixes: 88d2ba2e95c8 ("drm/i915: Unify engine init loop") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Daniel Vetter Cc: Ben Widawsky Link: http://patchwork.freedesktop.org/patch/msgid/1469749535-2382-1-git-send-email-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen Cc: drm-intel-fixes@lists.freedesktop.org diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 949c016..2587b1b 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -173,6 +173,7 @@ static const struct intel_device_info intel_pineview_info = { .gen = 3, .is_g33 = 1, .is_pineview = 1, .is_mobile = 1, .num_pipes = 2, .need_gfx_hws = 1, .has_hotplug = 1, .has_overlay = 1, + .ring_mask = RENDER_RING, GEN_DEFAULT_PIPEOFFSETS, CURSOR_OFFSETS, }; diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index e28873c..b90dd2f 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -114,6 +114,7 @@ int intel_engines_init(struct drm_device *dev) unsigned int i; int ret; + WARN_ON(INTEL_INFO(dev_priv)->ring_mask == 0); WARN_ON(INTEL_INFO(dev_priv)->ring_mask & GENMASK(sizeof(mask) * BITS_PER_BYTE - 1, I915_NUM_ENGINES)); -- cgit v0.10.2 From 238010ed6cc8d3b5c6b146ab813838c6a6992a7b Mon Sep 17 00:00:00 2001 From: David Weinehall Date: Mon, 1 Aug 2016 17:33:27 +0300 Subject: drm/i915/debugfs: Take runtime_pm ref for sseu When reading the SSEU statistics, we need to call intel_runtime_pm_get() first, otherwise we might end up triggering "Device suspended during HW access". Signed-off-by: David Weinehall Link: http://patchwork.freedesktop.org/patch/msgid/1470062007-26996-1-git-send-email-david.weinehall@linux.intel.com Reviewed-by: Joonas Lahtinen Signed-off-by: Chris Wilson diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 9aa62c5..531ca02 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -5238,7 +5238,8 @@ static void broadwell_sseu_device_status(struct drm_device *dev, static int i915_sseu_status(struct seq_file *m, void *unused) { struct drm_info_node *node = (struct drm_info_node *) m->private; - struct drm_device *dev = node->minor->dev; + struct drm_i915_private *dev_priv = to_i915(node->minor->dev); + struct drm_device *dev = &dev_priv->drm; struct sseu_dev_status stat; if (INTEL_INFO(dev)->gen < 8) @@ -5268,6 +5269,9 @@ static int i915_sseu_status(struct seq_file *m, void *unused) seq_puts(m, "SSEU Device Status\n"); memset(&stat, 0, sizeof(stat)); + + intel_runtime_pm_get(dev_priv); + if (IS_CHERRYVIEW(dev)) { cherryview_sseu_device_status(dev, &stat); } else if (IS_BROADWELL(dev)) { @@ -5275,6 +5279,9 @@ static int i915_sseu_status(struct seq_file *m, void *unused) } else if (INTEL_INFO(dev)->gen >= 9) { gen9_sseu_device_status(dev, &stat); } + + intel_runtime_pm_put(dev_priv); + seq_printf(m, " Enabled Slice Total: %u\n", stat.slice_total); seq_printf(m, " Enabled Subslice Total: %u\n", -- cgit v0.10.2 From 2a13ae79524ed8bec32a623e2763c649fa3e0846 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 2 Aug 2016 11:15:27 +0100 Subject: drm/i915: Protect older gen against intel_gt_init_powersave() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the middle of intel_gt_init_powersave() we have an if-chain that ends with a universal else clause to read gen6+ registers. Older platforms like Pineview that end up here do not like those registers and may even OOPS whilst reading them! Fixes: 3ea9a80132 ("drm/i915: Perform static RPS frequency setup ...") Signed-off-by: Chris Wilson Cc: Ville Syrjälä Cc: Mika Kuoppala Cc: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470132927-1821-1-git-send-email-chris@chris-wilson.co.uk Reviewed-by: Ville Syrjälä diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index e50505c..b5513d3 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -6493,7 +6493,7 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv) cherryview_init_gt_powersave(dev_priv); else if (IS_VALLEYVIEW(dev_priv)) valleyview_init_gt_powersave(dev_priv); - else + else if (INTEL_GEN(dev_priv) >= 6) gen6_init_rps_frequencies(dev_priv); /* Derive initial user preferences/limits from the hardware limits */ -- cgit v0.10.2 From 4da456168f41499369bef5ebb33d5966cd9cbb8c Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Tue, 5 Jul 2016 10:28:34 +0100 Subject: drm/i915: remove redundant fbc warnings The fbc enabled/active sanity checks are already done in __intel_fbc_disable so no need to do them again. Signed-off-by: Matthew Auld Reviewed-by: Paulo Zanoni Signed-off-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1467710914-15146-1-git-send-email-matthew.auld@intel.com diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c index 781e2f5..8147eb9 100644 --- a/drivers/gpu/drm/i915/intel_fbc.c +++ b/drivers/gpu/drm/i915/intel_fbc.c @@ -1165,11 +1165,8 @@ void intel_fbc_disable(struct intel_crtc *crtc) return; mutex_lock(&fbc->lock); - if (fbc->crtc == crtc) { - WARN_ON(!fbc->enabled); - WARN_ON(fbc->active); + if (fbc->crtc == crtc) __intel_fbc_disable(dev_priv); - } mutex_unlock(&fbc->lock); cancel_work_sync(&fbc->work.work); -- cgit v0.10.2 From a7d8dbc07c8f0faaace983b1e4c6e9495dd0aa75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 12 Jul 2016 15:59:28 +0300 Subject: drm/i915: Fix iboost setting for DDI with 4 lanes on SKL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bspec says: "For DDIA with x4 capability (DDI_BUF_CTL DDIA Lane Capability Control = DDIA x4), the I_boost value has to be programmed in both tx_blnclegsctl_0 and tx_blnclegsctl_4." Currently we only program tx_blnclegsctl_0. Let's do the other one as well. Cc: stable@vger.kernel.org Fixes: f8896f5d58e6 ("drm/i915/skl: Buffer translation improvements") Cc: David Weinehall Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1468328376-6380-2-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: David Weinehall diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 9397dde..506ea87 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -1536,6 +1536,7 @@ enum skl_disp_power_wells { #define BALANCE_LEG_MASK(port) (7<<(8+3*(port))) /* Balance leg disable bits */ #define BALANCE_LEG_DISABLE_SHIFT 23 +#define BALANCE_LEG_DISABLE(port) (1 << (23 + (port))) /* * Fence registers diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index dd1d6fe..75354cd 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -1379,14 +1379,30 @@ void intel_ddi_disable_pipe_clock(struct intel_crtc *intel_crtc) TRANS_CLK_SEL_DISABLED); } -static void skl_ddi_set_iboost(struct drm_i915_private *dev_priv, - u32 level, enum port port, int type) +static void _skl_ddi_set_iboost(struct drm_i915_private *dev_priv, + enum port port, uint8_t iboost) { + u32 tmp; + + tmp = I915_READ(DISPIO_CR_TX_BMU_CR0); + tmp &= ~(BALANCE_LEG_MASK(port) | BALANCE_LEG_DISABLE(port)); + if (iboost) + tmp |= iboost << BALANCE_LEG_SHIFT(port); + else + tmp |= BALANCE_LEG_DISABLE(port); + I915_WRITE(DISPIO_CR_TX_BMU_CR0, tmp); +} + +static void skl_ddi_set_iboost(struct intel_encoder *encoder, u32 level) +{ + struct intel_digital_port *intel_dig_port = enc_to_dig_port(&encoder->base); + struct drm_i915_private *dev_priv = to_i915(intel_dig_port->base.base.dev); + enum port port = intel_dig_port->port; + int type = encoder->type; const struct ddi_buf_trans *ddi_translations; uint8_t iboost; uint8_t dp_iboost, hdmi_iboost; int n_entries; - u32 reg; /* VBT may override standard boost values */ dp_iboost = dev_priv->vbt.ddi_port_info[port].dp_boost_level; @@ -1428,16 +1444,10 @@ static void skl_ddi_set_iboost(struct drm_i915_private *dev_priv, return; } - reg = I915_READ(DISPIO_CR_TX_BMU_CR0); - reg &= ~BALANCE_LEG_MASK(port); - reg &= ~(1 << (BALANCE_LEG_DISABLE_SHIFT + port)); - - if (iboost) - reg |= iboost << BALANCE_LEG_SHIFT(port); - else - reg |= 1 << (BALANCE_LEG_DISABLE_SHIFT + port); + _skl_ddi_set_iboost(dev_priv, port, iboost); - I915_WRITE(DISPIO_CR_TX_BMU_CR0, reg); + if (port == PORT_A && intel_dig_port->max_lanes == 4) + _skl_ddi_set_iboost(dev_priv, PORT_E, iboost); } static void bxt_ddi_vswing_sequence(struct drm_i915_private *dev_priv, @@ -1568,7 +1578,7 @@ uint32_t ddi_signal_levels(struct intel_dp *intel_dp) level = translate_signal_level(signal_levels); if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) - skl_ddi_set_iboost(dev_priv, level, port, encoder->type); + skl_ddi_set_iboost(encoder, level); else if (IS_BROXTON(dev_priv)) bxt_ddi_vswing_sequence(dev_priv, level, port, encoder->type); -- cgit v0.10.2 From c110ae6cffddb81cb69a8c1a57ce4b7ff2a42f99 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 12 Jul 2016 15:59:29 +0300 Subject: drm/i915: Name the "iboost bit" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Give a proper name for the SKL DDI_BUF_TRANS iboost bit. Cc: David Weinehall Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1468328376-6380-3-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: David Weinehall diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 506ea87..7c79a79 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -7487,6 +7487,7 @@ enum { #define _DDI_BUF_TRANS_A 0x64E00 #define _DDI_BUF_TRANS_B 0x64E60 #define DDI_BUF_TRANS_LO(port, i) _MMIO(_PORT(port, _DDI_BUF_TRANS_A, _DDI_BUF_TRANS_B) + (i) * 8) +#define DDI_BUF_BALANCE_LEG_ENABLE (1 << 31) #define DDI_BUF_TRANS_HI(port, i) _MMIO(_PORT(port, _DDI_BUF_TRANS_A, _DDI_BUF_TRANS_B) + (i) * 8 + 4) /* Sideband Interface (SBI) is programmed indirectly, via diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 75354cd..78e1fda 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -434,7 +434,7 @@ void intel_prepare_ddi_buffer(struct intel_encoder *encoder) /* If we're boosting the current, set bit 31 of trans1 */ if (dev_priv->vbt.ddi_port_info[port].hdmi_boost_level || dev_priv->vbt.ddi_port_info[port].dp_boost_level) - iboost_bit = 1<<31; + iboost_bit = DDI_BUF_BALANCE_LEG_ENABLE; if (WARN_ON(encoder->type == INTEL_OUTPUT_EDP && port != PORT_A && port != PORT_E && -- cgit v0.10.2 From 8d8bb85eb7d859aa9bbe36e588690a1d22af7608 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 12 Jul 2016 15:59:30 +0300 Subject: drm/i915: Program iboost settings for HDMI/DVI on SKL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently we fail to program the iboost stuff for HDMI/DVI. Let's remedy that. Cc: stable@vger.kernel.org Fixes: f8896f5d58e6 ("drm/i915/skl: Buffer translation improvements") Cc: David Weinehall Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1468328376-6380-4-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: David Weinehall diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 78e1fda..09fe580 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -388,6 +388,40 @@ skl_get_buf_trans_hdmi(struct drm_i915_private *dev_priv, int *n_entries) } } +static int intel_ddi_hdmi_level(struct drm_i915_private *dev_priv, enum port port) +{ + int n_hdmi_entries; + int hdmi_level; + int hdmi_default_entry; + + hdmi_level = dev_priv->vbt.ddi_port_info[port].hdmi_level_shift; + + if (IS_BROXTON(dev_priv)) + return hdmi_level; + + if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + skl_get_buf_trans_hdmi(dev_priv, &n_hdmi_entries); + hdmi_default_entry = 8; + } else if (IS_BROADWELL(dev_priv)) { + n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi); + hdmi_default_entry = 7; + } else if (IS_HASWELL(dev_priv)) { + n_hdmi_entries = ARRAY_SIZE(hsw_ddi_translations_hdmi); + hdmi_default_entry = 6; + } else { + WARN(1, "ddi translation table missing\n"); + n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi); + hdmi_default_entry = 7; + } + + /* Choose a good default if VBT is badly populated */ + if (hdmi_level == HDMI_LEVEL_SHIFT_UNKNOWN || + hdmi_level >= n_hdmi_entries) + hdmi_level = hdmi_default_entry; + + return hdmi_level; +} + /* * Starting with Haswell, DDI port buffers must be programmed with correct * values in advance. The buffer values are different for FDI and DP modes, @@ -399,7 +433,7 @@ void intel_prepare_ddi_buffer(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); u32 iboost_bit = 0; - int i, n_hdmi_entries, n_dp_entries, n_edp_entries, hdmi_default_entry, + int i, n_hdmi_entries, n_dp_entries, n_edp_entries, size; int hdmi_level; enum port port; @@ -410,7 +444,7 @@ void intel_prepare_ddi_buffer(struct intel_encoder *encoder) const struct ddi_buf_trans *ddi_translations; port = intel_ddi_get_encoder_port(encoder); - hdmi_level = dev_priv->vbt.ddi_port_info[port].hdmi_level_shift; + hdmi_level = intel_ddi_hdmi_level(dev_priv, port); if (IS_BROXTON(dev_priv)) { if (encoder->type != INTEL_OUTPUT_HDMI) @@ -430,7 +464,6 @@ void intel_prepare_ddi_buffer(struct intel_encoder *encoder) skl_get_buf_trans_edp(dev_priv, &n_edp_entries); ddi_translations_hdmi = skl_get_buf_trans_hdmi(dev_priv, &n_hdmi_entries); - hdmi_default_entry = 8; /* If we're boosting the current, set bit 31 of trans1 */ if (dev_priv->vbt.ddi_port_info[port].hdmi_boost_level || dev_priv->vbt.ddi_port_info[port].dp_boost_level) @@ -456,7 +489,6 @@ void intel_prepare_ddi_buffer(struct intel_encoder *encoder) n_dp_entries = ARRAY_SIZE(bdw_ddi_translations_dp); n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi); - hdmi_default_entry = 7; } else if (IS_HASWELL(dev_priv)) { ddi_translations_fdi = hsw_ddi_translations_fdi; ddi_translations_dp = hsw_ddi_translations_dp; @@ -464,7 +496,6 @@ void intel_prepare_ddi_buffer(struct intel_encoder *encoder) ddi_translations_hdmi = hsw_ddi_translations_hdmi; n_dp_entries = n_edp_entries = ARRAY_SIZE(hsw_ddi_translations_dp); n_hdmi_entries = ARRAY_SIZE(hsw_ddi_translations_hdmi); - hdmi_default_entry = 6; } else { WARN(1, "ddi translation table missing\n"); ddi_translations_edp = bdw_ddi_translations_dp; @@ -474,7 +505,6 @@ void intel_prepare_ddi_buffer(struct intel_encoder *encoder) n_edp_entries = ARRAY_SIZE(bdw_ddi_translations_edp); n_dp_entries = ARRAY_SIZE(bdw_ddi_translations_dp); n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi); - hdmi_default_entry = 7; } switch (encoder->type) { @@ -505,11 +535,6 @@ void intel_prepare_ddi_buffer(struct intel_encoder *encoder) if (encoder->type != INTEL_OUTPUT_HDMI) return; - /* Choose a good default if VBT is badly populated */ - if (hdmi_level == HDMI_LEVEL_SHIFT_UNKNOWN || - hdmi_level >= n_hdmi_entries) - hdmi_level = hdmi_default_entry; - /* Entry 9 is for HDMI: */ I915_WRITE(DDI_BUF_TRANS_LO(port, i), ddi_translations_hdmi[hdmi_level].trans1 | iboost_bit); @@ -1647,6 +1672,10 @@ static void intel_ddi_pre_enable(struct intel_encoder *intel_encoder) intel_dp_stop_link_train(intel_dp); } else if (type == INTEL_OUTPUT_HDMI) { struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder); + int level = intel_ddi_hdmi_level(dev_priv, port); + + if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + skl_ddi_set_iboost(intel_encoder, level); intel_hdmi->set_infoframes(encoder, crtc->config->has_hdmi_sink, -- cgit v0.10.2 From 9f3324377b02cc8ba8d05c89d6d4369e932b7d34 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 12 Jul 2016 15:59:31 +0300 Subject: drm/i915: Move bxt_ddi_vswing_sequence() call into intel_ddi_pre_enable() for HDMI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that the SKL iboost programming is done from intel_ddi_pre_enable() for HDMI, let's move the BXT bxt_ddi_vswing_sequence() call there as well. This makes things look more similar to the DP/eDP case which is handled in ddi_signal_levels(). Cc: Imre Deak Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1468328376-6380-5-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: Imre Deak diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 09fe580..52589bb 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -301,9 +301,6 @@ static const struct bxt_ddi_buf_trans bxt_ddi_translations_hdmi[] = { { 154, 0x9A, 1, 128, true }, /* 9: 1200 0 */ }; -static void bxt_ddi_vswing_sequence(struct drm_i915_private *dev_priv, - u32 level, enum port port, int type); - static void ddi_get_encoder_port(struct intel_encoder *intel_encoder, struct intel_digital_port **dig_port, enum port *port) @@ -446,15 +443,8 @@ void intel_prepare_ddi_buffer(struct intel_encoder *encoder) port = intel_ddi_get_encoder_port(encoder); hdmi_level = intel_ddi_hdmi_level(dev_priv, port); - if (IS_BROXTON(dev_priv)) { - if (encoder->type != INTEL_OUTPUT_HDMI) - return; - - /* Vswing programming for HDMI */ - bxt_ddi_vswing_sequence(dev_priv, hdmi_level, port, - INTEL_OUTPUT_HDMI); + if (IS_BROXTON(dev_priv)) return; - } if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { ddi_translations_fdi = NULL; @@ -1676,6 +1666,9 @@ static void intel_ddi_pre_enable(struct intel_encoder *intel_encoder) if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) skl_ddi_set_iboost(intel_encoder, level); + else if (IS_BROXTON(dev_priv)) + bxt_ddi_vswing_sequence(dev_priv, level, port, + INTEL_OUTPUT_HDMI); intel_hdmi->set_infoframes(encoder, crtc->config->has_hdmi_sink, -- cgit v0.10.2 From ed9c77d2612316fba202cf1d69fc903969e474c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 12 Jul 2016 15:59:32 +0300 Subject: drm/i915: Explicitly use ddi buf trans entry 9 for hdmi MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the DDI port is in HDMI/DVI mode, it automagically uses the buffer translations values from entry 9. Let's make that explicit in the code. Signed-off-by: Ville Syrjälä Reviewed-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1468328376-6380-6-git-send-email-ville.syrjala@linux.intel.com diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 52589bb..35b9799 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -526,9 +526,9 @@ void intel_prepare_ddi_buffer(struct intel_encoder *encoder) return; /* Entry 9 is for HDMI: */ - I915_WRITE(DDI_BUF_TRANS_LO(port, i), + I915_WRITE(DDI_BUF_TRANS_LO(port, 9), ddi_translations_hdmi[hdmi_level].trans1 | iboost_bit); - I915_WRITE(DDI_BUF_TRANS_HI(port, i), + I915_WRITE(DDI_BUF_TRANS_HI(port, 9), ddi_translations_hdmi[hdmi_level].trans2); } -- cgit v0.10.2 From 32bdc400963cee9e55889c2973b9553ecee1bcc6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 12 Jul 2016 15:59:33 +0300 Subject: drm/i915: Split DP/eDP/FDI and HDMI/DVI DDI buffer programming apart MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DDI buffer prorgramming works quite differently depending on the mode of the DDI port (DP/eDP/FDI vs. HDMI/DVI). Let's split the function that does the programming into two matching variants as well. Signed-off-by: Ville Syrjälä Reviewed-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1468328376-6380-7-git-send-email-ville.syrjala@linux.intel.com diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 35b9799..5cae63a 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -421,28 +421,20 @@ static int intel_ddi_hdmi_level(struct drm_i915_private *dev_priv, enum port por /* * Starting with Haswell, DDI port buffers must be programmed with correct - * values in advance. The buffer values are different for FDI and DP modes, - * but the HDMI/DVI fields are shared among those. So we program the DDI - * in either FDI or DP modes only, as HDMI connections will work with both - * of those + * values in advance. This function programs the correct values for + * DP/eDP/FDI use cases. */ -void intel_prepare_ddi_buffer(struct intel_encoder *encoder) +void intel_prepare_dp_ddi_buffers(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); u32 iboost_bit = 0; - int i, n_hdmi_entries, n_dp_entries, n_edp_entries, - size; - int hdmi_level; - enum port port; + int i, n_dp_entries, n_edp_entries, size; + enum port port = intel_ddi_get_encoder_port(encoder); const struct ddi_buf_trans *ddi_translations_fdi; const struct ddi_buf_trans *ddi_translations_dp; const struct ddi_buf_trans *ddi_translations_edp; - const struct ddi_buf_trans *ddi_translations_hdmi; const struct ddi_buf_trans *ddi_translations; - port = intel_ddi_get_encoder_port(encoder); - hdmi_level = intel_ddi_hdmi_level(dev_priv, port); - if (IS_BROXTON(dev_priv)) return; @@ -452,8 +444,7 @@ void intel_prepare_ddi_buffer(struct intel_encoder *encoder) skl_get_buf_trans_dp(dev_priv, &n_dp_entries); ddi_translations_edp = skl_get_buf_trans_edp(dev_priv, &n_edp_entries); - ddi_translations_hdmi = - skl_get_buf_trans_hdmi(dev_priv, &n_hdmi_entries); + /* If we're boosting the current, set bit 31 of trans1 */ if (dev_priv->vbt.ddi_port_info[port].hdmi_boost_level || dev_priv->vbt.ddi_port_info[port].dp_boost_level) @@ -466,7 +457,6 @@ void intel_prepare_ddi_buffer(struct intel_encoder *encoder) } else if (IS_BROADWELL(dev_priv)) { ddi_translations_fdi = bdw_ddi_translations_fdi; ddi_translations_dp = bdw_ddi_translations_dp; - if (dev_priv->vbt.edp.low_vswing) { ddi_translations_edp = bdw_ddi_translations_edp; n_edp_entries = ARRAY_SIZE(bdw_ddi_translations_edp); @@ -474,27 +464,19 @@ void intel_prepare_ddi_buffer(struct intel_encoder *encoder) ddi_translations_edp = bdw_ddi_translations_dp; n_edp_entries = ARRAY_SIZE(bdw_ddi_translations_dp); } - - ddi_translations_hdmi = bdw_ddi_translations_hdmi; - n_dp_entries = ARRAY_SIZE(bdw_ddi_translations_dp); - n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi); } else if (IS_HASWELL(dev_priv)) { ddi_translations_fdi = hsw_ddi_translations_fdi; ddi_translations_dp = hsw_ddi_translations_dp; ddi_translations_edp = hsw_ddi_translations_dp; - ddi_translations_hdmi = hsw_ddi_translations_hdmi; n_dp_entries = n_edp_entries = ARRAY_SIZE(hsw_ddi_translations_dp); - n_hdmi_entries = ARRAY_SIZE(hsw_ddi_translations_hdmi); } else { WARN(1, "ddi translation table missing\n"); ddi_translations_edp = bdw_ddi_translations_dp; ddi_translations_fdi = bdw_ddi_translations_fdi; ddi_translations_dp = bdw_ddi_translations_dp; - ddi_translations_hdmi = bdw_ddi_translations_hdmi; n_edp_entries = ARRAY_SIZE(bdw_ddi_translations_edp); n_dp_entries = ARRAY_SIZE(bdw_ddi_translations_dp); - n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi); } switch (encoder->type) { @@ -503,7 +485,6 @@ void intel_prepare_ddi_buffer(struct intel_encoder *encoder) size = n_edp_entries; break; case INTEL_OUTPUT_DP: - case INTEL_OUTPUT_HDMI: ddi_translations = ddi_translations_dp; size = n_dp_entries; break; @@ -521,10 +502,44 @@ void intel_prepare_ddi_buffer(struct intel_encoder *encoder) I915_WRITE(DDI_BUF_TRANS_HI(port, i), ddi_translations[i].trans2); } +} + +/* + * Starting with Haswell, DDI port buffers must be programmed with correct + * values in advance. This function programs the correct values for + * HDMI/DVI use cases. + */ +static void intel_prepare_hdmi_ddi_buffers(struct intel_encoder *encoder) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + u32 iboost_bit = 0; + int n_hdmi_entries, hdmi_level; + enum port port = intel_ddi_get_encoder_port(encoder); + const struct ddi_buf_trans *ddi_translations_hdmi; - if (encoder->type != INTEL_OUTPUT_HDMI) + if (IS_BROXTON(dev_priv)) return; + hdmi_level = intel_ddi_hdmi_level(dev_priv, port); + + if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + ddi_translations_hdmi = skl_get_buf_trans_hdmi(dev_priv, &n_hdmi_entries); + /* If we're boosting the current, set bit 31 of trans1 */ + if (dev_priv->vbt.ddi_port_info[port].hdmi_boost_level || + dev_priv->vbt.ddi_port_info[port].dp_boost_level) + iboost_bit = DDI_BUF_BALANCE_LEG_ENABLE; + } else if (IS_BROADWELL(dev_priv)) { + ddi_translations_hdmi = bdw_ddi_translations_hdmi; + n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi); + } else if (IS_HASWELL(dev_priv)) { + ddi_translations_hdmi = hsw_ddi_translations_hdmi; + n_hdmi_entries = ARRAY_SIZE(hsw_ddi_translations_hdmi); + } else { + WARN(1, "ddi translation table missing\n"); + ddi_translations_hdmi = bdw_ddi_translations_hdmi; + n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi); + } + /* Entry 9 is for HDMI: */ I915_WRITE(DDI_BUF_TRANS_LO(port, 9), ddi_translations_hdmi[hdmi_level].trans1 | iboost_bit); @@ -565,7 +580,7 @@ void hsw_fdi_link_train(struct drm_crtc *crtc) for_each_encoder_on_crtc(dev, crtc, encoder) { WARN_ON(encoder->type != INTEL_OUTPUT_ANALOG); - intel_prepare_ddi_buffer(encoder); + intel_prepare_dp_ddi_buffers(encoder); } /* Set the FDI_RX_MISC pwrdn lanes and the 2 workarounds listed at the @@ -1640,8 +1655,6 @@ static void intel_ddi_pre_enable(struct intel_encoder *intel_encoder) intel_dp_dual_mode_set_tmds_output(intel_hdmi, true); } - intel_prepare_ddi_buffer(intel_encoder); - if (type == INTEL_OUTPUT_EDP) { struct intel_dp *intel_dp = enc_to_intel_dp(encoder); intel_edp_panel_on(intel_dp); @@ -1652,6 +1665,8 @@ static void intel_ddi_pre_enable(struct intel_encoder *intel_encoder) if (type == INTEL_OUTPUT_DP || type == INTEL_OUTPUT_EDP) { struct intel_dp *intel_dp = enc_to_intel_dp(encoder); + intel_prepare_dp_ddi_buffers(intel_encoder); + intel_dp_set_link_params(intel_dp, crtc->config); intel_ddi_init_dp_buf_reg(intel_encoder); @@ -1664,6 +1679,8 @@ static void intel_ddi_pre_enable(struct intel_encoder *intel_encoder) struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder); int level = intel_ddi_hdmi_level(dev_priv, port); + intel_prepare_hdmi_ddi_buffers(intel_encoder); + if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) skl_ddi_set_iboost(intel_encoder, level); else if (IS_BROXTON(dev_priv)) diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c index 68a005d..629337d 100644 --- a/drivers/gpu/drm/i915/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/intel_dp_mst.c @@ -170,10 +170,10 @@ static void intel_mst_pre_enable_dp(struct intel_encoder *encoder) intel_mst->connector = found; if (intel_dp->active_mst_links == 0) { - intel_prepare_ddi_buffer(&intel_dig_port->base); - intel_ddi_clk_select(&intel_dig_port->base, intel_crtc->config); + intel_prepare_dp_ddi_buffers(&intel_dig_port->base); + intel_dp_set_link_params(intel_dp, intel_crtc->config); intel_ddi_init_dp_buf_reg(&intel_dig_port->base); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index e74d851..50cdc89 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1107,7 +1107,7 @@ void intel_crt_reset(struct drm_encoder *encoder); /* intel_ddi.c */ void intel_ddi_clk_select(struct intel_encoder *encoder, const struct intel_crtc_state *pipe_config); -void intel_prepare_ddi_buffer(struct intel_encoder *encoder); +void intel_prepare_dp_ddi_buffers(struct intel_encoder *encoder); void hsw_fdi_link_train(struct drm_crtc *crtc); void intel_ddi_init(struct drm_device *dev, enum port port); enum port intel_ddi_get_encoder_port(struct intel_encoder *intel_encoder); -- cgit v0.10.2 From 1edaaa2f25c04ba9b40caf4d80fe3d302d9e76cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 12 Jul 2016 15:59:34 +0300 Subject: drm/i915: Get the iboost setting based on the port type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Program the 'iboost_bit' based on what the VBT says it should be for the specific port type, rather than assume it's always the same for DP and HDMI. Signed-off-by: Ville Syrjälä Reviewed-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1468328376-6380-8-git-send-email-ville.syrjala@linux.intel.com diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 5cae63a..5720202 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -446,8 +446,7 @@ void intel_prepare_dp_ddi_buffers(struct intel_encoder *encoder) skl_get_buf_trans_edp(dev_priv, &n_edp_entries); /* If we're boosting the current, set bit 31 of trans1 */ - if (dev_priv->vbt.ddi_port_info[port].hdmi_boost_level || - dev_priv->vbt.ddi_port_info[port].dp_boost_level) + if (dev_priv->vbt.ddi_port_info[port].dp_boost_level) iboost_bit = DDI_BUF_BALANCE_LEG_ENABLE; if (WARN_ON(encoder->type == INTEL_OUTPUT_EDP && @@ -524,9 +523,9 @@ static void intel_prepare_hdmi_ddi_buffers(struct intel_encoder *encoder) if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { ddi_translations_hdmi = skl_get_buf_trans_hdmi(dev_priv, &n_hdmi_entries); + /* If we're boosting the current, set bit 31 of trans1 */ - if (dev_priv->vbt.ddi_port_info[port].hdmi_boost_level || - dev_priv->vbt.ddi_port_info[port].dp_boost_level) + if (dev_priv->vbt.ddi_port_info[port].hdmi_boost_level) iboost_bit = DDI_BUF_BALANCE_LEG_ENABLE; } else if (IS_BROADWELL(dev_priv)) { ddi_translations_hdmi = bdw_ddi_translations_hdmi; -- cgit v0.10.2 From 5a5d24dca44f7c946cbe0d638db81ad58f12e63e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 12 Jul 2016 15:59:35 +0300 Subject: drm/i915: Simplify intel_ddi_get_encoder_port() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We no longer have any need to look up the intel_digital_port based on the passed in intel_encoder, but we still want to look up the port. Let's just move that logic into intel_ddi_get_encoder_port() and drop the dig_port stuff. Signed-off-by: Ville Syrjälä Reviewed-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1468328376-6380-9-git-send-email-ville.syrjala@linux.intel.com diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 5720202..c581751 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -301,44 +301,24 @@ static const struct bxt_ddi_buf_trans bxt_ddi_translations_hdmi[] = { { 154, 0x9A, 1, 128, true }, /* 9: 1200 0 */ }; -static void ddi_get_encoder_port(struct intel_encoder *intel_encoder, - struct intel_digital_port **dig_port, - enum port *port) +enum port intel_ddi_get_encoder_port(struct intel_encoder *encoder) { - struct drm_encoder *encoder = &intel_encoder->base; - - switch (intel_encoder->type) { + switch (encoder->type) { case INTEL_OUTPUT_DP_MST: - *dig_port = enc_to_mst(encoder)->primary; - *port = (*dig_port)->port; - break; - default: - WARN(1, "Invalid DDI encoder type %d\n", intel_encoder->type); - /* fallthrough and treat as unknown */ + return enc_to_mst(&encoder->base)->primary->port; case INTEL_OUTPUT_DP: case INTEL_OUTPUT_EDP: case INTEL_OUTPUT_HDMI: case INTEL_OUTPUT_UNKNOWN: - *dig_port = enc_to_dig_port(encoder); - *port = (*dig_port)->port; - break; + return enc_to_dig_port(&encoder->base)->port; case INTEL_OUTPUT_ANALOG: - *dig_port = NULL; - *port = PORT_E; - break; + return PORT_E; + default: + MISSING_CASE(encoder->type); + return PORT_A; } } -enum port intel_ddi_get_encoder_port(struct intel_encoder *intel_encoder) -{ - struct intel_digital_port *dig_port; - enum port port; - - ddi_get_encoder_port(intel_encoder, &dig_port, &port); - - return port; -} - static const struct ddi_buf_trans * skl_get_buf_trans_dp(struct drm_i915_private *dev_priv, int *n_entries) { -- cgit v0.10.2 From a930acd91f46ad4ae15d14a8c018bb9356ef3be6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 12 Jul 2016 15:59:36 +0300 Subject: drm/i915: Extract bdw_get_buf_trans_edp() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make the BDW and SKL code a bit more similar by extracting the low vswing handling for BDW into a helper, as we already have it like that for SKL+. Cc: Mika Kahola Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1468328376-6380-10-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: Imre Deak diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index c581751..fc2ef2d 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -320,6 +320,18 @@ enum port intel_ddi_get_encoder_port(struct intel_encoder *encoder) } static const struct ddi_buf_trans * +bdw_get_buf_trans_edp(struct drm_i915_private *dev_priv, int *n_entries) +{ + if (dev_priv->vbt.edp.low_vswing) { + *n_entries = ARRAY_SIZE(bdw_ddi_translations_edp); + return bdw_ddi_translations_edp; + } else { + *n_entries = ARRAY_SIZE(bdw_ddi_translations_dp); + return bdw_ddi_translations_dp; + } +} + +static const struct ddi_buf_trans * skl_get_buf_trans_dp(struct drm_i915_private *dev_priv, int *n_entries) { if (IS_SKL_ULX(dev_priv) || IS_KBL_ULX(dev_priv)) { @@ -436,13 +448,7 @@ void intel_prepare_dp_ddi_buffers(struct intel_encoder *encoder) } else if (IS_BROADWELL(dev_priv)) { ddi_translations_fdi = bdw_ddi_translations_fdi; ddi_translations_dp = bdw_ddi_translations_dp; - if (dev_priv->vbt.edp.low_vswing) { - ddi_translations_edp = bdw_ddi_translations_edp; - n_edp_entries = ARRAY_SIZE(bdw_ddi_translations_edp); - } else { - ddi_translations_edp = bdw_ddi_translations_dp; - n_edp_entries = ARRAY_SIZE(bdw_ddi_translations_dp); - } + ddi_translations_edp = bdw_get_buf_trans_edp(dev_priv, &n_edp_entries); n_dp_entries = ARRAY_SIZE(bdw_ddi_translations_dp); } else if (IS_HASWELL(dev_priv)) { ddi_translations_fdi = hsw_ddi_translations_fdi; -- cgit v0.10.2 From 2d1b50564734619487745fe14dc2999b1f4a15d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 29 Jul 2016 17:57:01 +0300 Subject: drm/i915: Always use cpp==4 for FW_BLC_SELF on 915GM/945GM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bspec says: "FW_BLC_SELF ... Programming Note [DevALV] and [DevCST]: When calculating watermark values for 15/16bpp, assume 32bpp for purposes of calculation using the high priority bandwidth analysis spreadsheet." Let's do that. Perhaps this might even help with the problem that resulted in commit 2ab1bc9df01d ("drm/i915: Disable self-refresh for untiled fbs on i915gm") Cc: Daniel Vetter Cc: Chris Wilson Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1469804222-12650-1-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index b5513d3..e2eee77 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1604,6 +1604,9 @@ static void i9xx_update_wm(struct drm_crtc *unused_crtc) unsigned long line_time_us; int entries; + if (IS_I915GM(dev) || IS_I945GM(dev)) + cpp = 4; + line_time_us = max(htotal * 1000 / clock, 1); /* Use ns/us then divide to preserve precision */ -- cgit v0.10.2 From acb913593addda42fdfd9f40d2529151b61b5b4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 29 Jul 2016 17:57:02 +0300 Subject: drm/i915: Program FW_BLC_SELF on 915G as well MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to Bspec FW_BLC_SELF exists on 915G also. Let's program it. The only open question is whether there's is a memory self-refresh enable bit somewhere as well. For 945G/GM it's in FW_BLC_SELF, for 915GM it's in INSTPM. For 915G I can't find one in the docs. Let's drop a FIXME about this, in case someone with the hardware is ever bored enough to look for it. Cc: Daniel Vetter Cc: Chris Wilson Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1469804222-12650-2-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index e2eee77..df02483 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -340,6 +340,11 @@ void intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable) I915_WRITE(FW_BLC_SELF, val); POSTING_READ(FW_BLC_SELF); } else if (IS_I915GM(dev)) { + /* + * FIXME can't find a bit like this for 915G, and + * and yet it does have the related watermark in + * FW_BLC_SELF. What's going on? + */ val = enable ? _MASKED_BIT_ENABLE(INSTPM_SELF_EN) : _MASKED_BIT_DISABLE(INSTPM_SELF_EN); I915_WRITE(INSTPM, val); @@ -1621,7 +1626,7 @@ static void i9xx_update_wm(struct drm_crtc *unused_crtc) if (IS_I945G(dev) || IS_I945GM(dev)) I915_WRITE(FW_BLC_SELF, FW_BLC_SELF_FIFO_MASK | (srwm & 0xff)); - else if (IS_I915GM(dev)) + else I915_WRITE(FW_BLC_SELF, srwm & 0x3f); } -- cgit v0.10.2 From dd788090822300a66ff469ae9e50f6d28d124eb8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 28 Jul 2016 17:55:04 +0300 Subject: drm/i915: Warn about aux msg buffer vs. size mismatch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If we have have a buffer, we should also have a size, and vice versa. Let's check it both ways instead of just one. Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1469717704-13020-1-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: Daniel Vetter diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 21b04c3..001f74f 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1041,10 +1041,10 @@ intel_dp_aux_transfer(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg) if (WARN_ON(txsize > 20)) return -E2BIG; + WARN_ON(!msg->buffer != !msg->size); + if (msg->buffer) memcpy(txbuf + HEADER_SIZE, msg->buffer, msg->size); - else - WARN_ON(msg->size); ret = intel_dp_aux_ch(intel_dp, txbuf, txsize, rxbuf, rxsize); if (ret > 0) { -- cgit v0.10.2 From 8497825753253a0c72729d8f8cced6c7ed341589 Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Sun, 31 Jul 2016 00:54:51 -0700 Subject: drm/i915: cleanup_plane_fb: also drop reference to current state wait_req There are two paths into intel_cleanup_plane_fb, the normal completion path and the failure path. In the failure case, intel_cleanup_plane_fb is called before drm_atomic_helper_swap_state, so any wait_req reference made in intel_prepare_plane_fb will be in old_intel_state->wait_req. In the normal completion path, drm_atomic_helper_swap_state has already been called, so the plane state holding the just-used wait_req will not be in old_intel_state->wait_req, rather it will be in the state associated with the plane itself. Clearing this reference ensures that the wait_req will be freed as soon as it the related mode setting operation is complete, rather than waiting for some future mode setting operation to eventually dereference it. The existing dereference of old_intel_state->wait_req is still required as that will hold the wait_req when the mode setting operation fails. cc: Daniel Vetter cc: David Airlie cc: intel-gfx@lists.freedesktop.org cc: dri-devel@lists.freedesktop.org Signed-off-by: Keith Packard Signed-off-by: Daniel Vetter diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index c4c1c85..a8e8cc8 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -14070,6 +14070,7 @@ intel_cleanup_plane_fb(struct drm_plane *plane, { struct drm_device *dev = plane->dev; struct intel_plane_state *old_intel_state; + struct intel_plane_state *intel_state = to_intel_plane_state(plane->state); struct drm_i915_gem_object *old_obj = intel_fb_obj(old_state->fb); struct drm_i915_gem_object *obj = intel_fb_obj(plane->state->fb); @@ -14082,6 +14083,7 @@ intel_cleanup_plane_fb(struct drm_plane *plane, !INTEL_INFO(dev)->cursor_needs_physical)) intel_unpin_fb_obj(old_state->fb, old_state->rotation); + i915_gem_request_assign(&intel_state->wait_req, NULL); i915_gem_request_assign(&old_intel_state->wait_req, NULL); } -- cgit v0.10.2 From f2dd7578c4f4ea985da937df2b377ce646e8819b Mon Sep 17 00:00:00 2001 From: Akash Goel Date: Mon, 27 Jun 2016 20:10:01 +0530 Subject: drm/i915/gen9: Update i915_drpc_info debugfs for coarse pg & forcewake info Updated the i915_drpc_info debugfs with coarse power gating & forcewake info for Gen9. v2: Change all IS_GEN9() by gen >= 9 (Damien) v3: Rebase Cc: Damien Lespiau Signed-off-by: Akash Goel Reviewed-by: Sagar Arun Kamble Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1467038401-8283-1-git-send-email-akash.goel@intel.com diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 531ca02..7c42ec4 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1602,6 +1602,7 @@ static int gen6_drpc_info(struct seq_file *m) struct drm_device *dev = node->minor->dev; struct drm_i915_private *dev_priv = to_i915(dev); u32 rpmodectl1, gt_core_status, rcctl1, rc6vids = 0; + u32 gen9_powergate_enable = 0, gen9_powergate_status = 0; unsigned forcewake_count; int count = 0, ret; @@ -1629,6 +1630,10 @@ static int gen6_drpc_info(struct seq_file *m) rpmodectl1 = I915_READ(GEN6_RP_CONTROL); rcctl1 = I915_READ(GEN6_RC_CONTROL); + if (INTEL_INFO(dev)->gen >= 9) { + gen9_powergate_enable = I915_READ(GEN9_PG_ENABLE); + gen9_powergate_status = I915_READ(GEN9_PWRGT_DOMAIN_STATUS); + } mutex_unlock(&dev->struct_mutex); mutex_lock(&dev_priv->rps.hw_lock); sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids); @@ -1647,6 +1652,12 @@ static int gen6_drpc_info(struct seq_file *m) yesno(rcctl1 & GEN6_RC_CTL_RC1e_ENABLE)); seq_printf(m, "RC6 Enabled: %s\n", yesno(rcctl1 & GEN6_RC_CTL_RC6_ENABLE)); + if (INTEL_INFO(dev)->gen >= 9) { + seq_printf(m, "Render Well Gating Enabled: %s\n", + yesno(gen9_powergate_enable & GEN9_RENDER_PG_ENABLE)); + seq_printf(m, "Media Well Gating Enabled: %s\n", + yesno(gen9_powergate_enable & GEN9_MEDIA_PG_ENABLE)); + } seq_printf(m, "Deep RC6 Enabled: %s\n", yesno(rcctl1 & GEN6_RC_CTL_RC6p_ENABLE)); seq_printf(m, "Deepest RC6 Enabled: %s\n", @@ -1675,6 +1686,14 @@ static int gen6_drpc_info(struct seq_file *m) seq_printf(m, "Core Power Down: %s\n", yesno(gt_core_status & GEN6_CORE_CPD_STATE_MASK)); + if (INTEL_INFO(dev)->gen >= 9) { + seq_printf(m, "Render Power Well: %s\n", + (gen9_powergate_status & + GEN9_PWRGT_RENDER_STATUS_MASK) ? "Up" : "Down"); + seq_printf(m, "Media Power Well: %s\n", + (gen9_powergate_status & + GEN9_PWRGT_MEDIA_STATUS_MASK) ? "Up" : "Down"); + } /* Not exactly sure what this is */ seq_printf(m, "RC6 \"Locked to RPn\" residency since boot: %u\n", @@ -1692,7 +1711,7 @@ static int gen6_drpc_info(struct seq_file *m) GEN6_DECODE_RC6_VID(((rc6vids >> 8) & 0xff))); seq_printf(m, "RC6++ voltage: %dmV\n", GEN6_DECODE_RC6_VID(((rc6vids >> 16) & 0xff))); - return 0; + return i915_forcewake_domains(m, NULL); } static int i915_drpc_info(struct seq_file *m, void *unused) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 7c79a79..2f93d4a 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -6960,6 +6960,9 @@ enum { #define ECOBUS _MMIO(0xa180) #define FORCEWAKE_MT_ENABLE (1<<5) #define VLV_SPAREG2H _MMIO(0xA194) +#define GEN9_PWRGT_DOMAIN_STATUS _MMIO(0xA2A0) +#define GEN9_PWRGT_MEDIA_STATUS_MASK (1 << 0) +#define GEN9_PWRGT_RENDER_STATUS_MASK (1 << 1) #define GTFIFODBG _MMIO(0x120000) #define GT_FIFO_SBDEDICATE_FREE_ENTRY_CHV (0x1f << 20) -- cgit v0.10.2 From b5321f309ba4921e9f0e32de96c49aa826c08a37 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 2 Aug 2016 22:50:18 +0100 Subject: drm/i915: Unify intel_logical_ring_emit and intel_ring_emit Both perform the same actions with more or less indirection, so just unify the code. v2: Add back a few intel_engine_cs locals Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1469432687-22756-11-git-send-email-chris@chris-wilson.co.uk Link: http://patchwork.freedesktop.org/patch/msgid/1470174640-18242-1-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index bd13d08..a0e24eb 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -552,6 +552,7 @@ static inline int mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) { struct drm_i915_private *dev_priv = req->i915; + struct intel_ringbuffer *ring = req->ringbuf; struct intel_engine_cs *engine = req->engine; u32 flags = hw_flags | MI_MM_SPACE_GTT; const int num_rings = @@ -589,64 +590,64 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) /* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */ if (INTEL_GEN(dev_priv) >= 7) { - intel_ring_emit(engine, MI_ARB_ON_OFF | MI_ARB_DISABLE); + intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_DISABLE); if (num_rings) { struct intel_engine_cs *signaller; - intel_ring_emit(engine, + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(num_rings)); for_each_engine(signaller, dev_priv) { if (signaller == engine) continue; - intel_ring_emit_reg(engine, + intel_ring_emit_reg(ring, RING_PSMI_CTL(signaller->mmio_base)); - intel_ring_emit(engine, + intel_ring_emit(ring, _MASKED_BIT_ENABLE(GEN6_PSMI_SLEEP_MSG_DISABLE)); } } } - intel_ring_emit(engine, MI_NOOP); - intel_ring_emit(engine, MI_SET_CONTEXT); - intel_ring_emit(engine, + intel_ring_emit(ring, MI_NOOP); + intel_ring_emit(ring, MI_SET_CONTEXT); + intel_ring_emit(ring, i915_gem_obj_ggtt_offset(req->ctx->engine[RCS].state) | flags); /* * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP * WaMiSetContext_Hang:snb,ivb,vlv */ - intel_ring_emit(engine, MI_NOOP); + intel_ring_emit(ring, MI_NOOP); if (INTEL_GEN(dev_priv) >= 7) { if (num_rings) { struct intel_engine_cs *signaller; i915_reg_t last_reg = {}; /* keep gcc quiet */ - intel_ring_emit(engine, + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(num_rings)); for_each_engine(signaller, dev_priv) { if (signaller == engine) continue; last_reg = RING_PSMI_CTL(signaller->mmio_base); - intel_ring_emit_reg(engine, last_reg); - intel_ring_emit(engine, + intel_ring_emit_reg(ring, last_reg); + intel_ring_emit(ring, _MASKED_BIT_DISABLE(GEN6_PSMI_SLEEP_MSG_DISABLE)); } /* Insert a delay before the next switch! */ - intel_ring_emit(engine, + intel_ring_emit(ring, MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT); - intel_ring_emit_reg(engine, last_reg); - intel_ring_emit(engine, engine->scratch.gtt_offset); - intel_ring_emit(engine, MI_NOOP); + intel_ring_emit_reg(ring, last_reg); + intel_ring_emit(ring, engine->scratch.gtt_offset); + intel_ring_emit(ring, MI_NOOP); } - intel_ring_emit(engine, MI_ARB_ON_OFF | MI_ARB_ENABLE); + intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_ENABLE); } - intel_ring_advance(engine); + intel_ring_advance(ring); return ret; } @@ -654,7 +655,7 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) static int remap_l3(struct drm_i915_gem_request *req, int slice) { u32 *remap_info = req->i915->l3_parity.remap_info[slice]; - struct intel_engine_cs *engine = req->engine; + struct intel_ringbuffer *ring = req->ringbuf; int i, ret; if (!remap_info) @@ -669,13 +670,13 @@ static int remap_l3(struct drm_i915_gem_request *req, int slice) * here because no other code should access these registers other than * at initialization time. */ - intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4)); + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4)); for (i = 0; i < GEN7_L3LOG_SIZE/4; i++) { - intel_ring_emit_reg(engine, GEN7_L3LOG(slice, i)); - intel_ring_emit(engine, remap_info[i]); + intel_ring_emit_reg(ring, GEN7_L3LOG(slice, i)); + intel_ring_emit(ring, remap_info[i]); } - intel_ring_emit(engine, MI_NOOP); - intel_ring_advance(engine); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); return 0; } diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index aa35867f..2f9f0da 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1171,14 +1171,12 @@ i915_gem_execbuffer_retire_commands(struct i915_execbuffer_params *params) } static int -i915_reset_gen7_sol_offsets(struct drm_device *dev, - struct drm_i915_gem_request *req) +i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req) { - struct intel_engine_cs *engine = req->engine; - struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_ringbuffer *ring = req->ringbuf; int ret, i; - if (!IS_GEN7(dev) || engine != &dev_priv->engine[RCS]) { + if (!IS_GEN7(req->i915) || req->engine->id != RCS) { DRM_DEBUG("sol reset is gen7/rcs only\n"); return -EINVAL; } @@ -1188,12 +1186,12 @@ i915_reset_gen7_sol_offsets(struct drm_device *dev, return ret; for (i = 0; i < 4; i++) { - intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(1)); - intel_ring_emit_reg(engine, GEN7_SO_WRITE_OFFSET(i)); - intel_ring_emit(engine, 0); + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); + intel_ring_emit_reg(ring, GEN7_SO_WRITE_OFFSET(i)); + intel_ring_emit(ring, 0); } - intel_ring_advance(engine); + intel_ring_advance(ring); return 0; } @@ -1256,9 +1254,7 @@ i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params, struct drm_i915_gem_execbuffer2 *args, struct list_head *vmas) { - struct drm_device *dev = params->dev; - struct intel_engine_cs *engine = params->engine; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = params->request->i915; u64 exec_start, exec_len; int instp_mode; u32 instp_mask; @@ -1272,34 +1268,31 @@ i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params, if (ret) return ret; - WARN(params->ctx->ppgtt && params->ctx->ppgtt->pd_dirty_rings & (1<id), - "%s didn't clear reload\n", engine->name); - instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK; instp_mask = I915_EXEC_CONSTANTS_MASK; switch (instp_mode) { case I915_EXEC_CONSTANTS_REL_GENERAL: case I915_EXEC_CONSTANTS_ABSOLUTE: case I915_EXEC_CONSTANTS_REL_SURFACE: - if (instp_mode != 0 && engine != &dev_priv->engine[RCS]) { + if (instp_mode != 0 && params->engine->id != RCS) { DRM_DEBUG("non-0 rel constants mode on non-RCS\n"); return -EINVAL; } if (instp_mode != dev_priv->relative_constants_mode) { - if (INTEL_INFO(dev)->gen < 4) { + if (INTEL_INFO(dev_priv)->gen < 4) { DRM_DEBUG("no rel constants on pre-gen4\n"); return -EINVAL; } - if (INTEL_INFO(dev)->gen > 5 && + if (INTEL_INFO(dev_priv)->gen > 5 && instp_mode == I915_EXEC_CONSTANTS_REL_SURFACE) { DRM_DEBUG("rel surface constants mode invalid on gen5+\n"); return -EINVAL; } /* The HW changed the meaning on this bit on gen6 */ - if (INTEL_INFO(dev)->gen >= 6) + if (INTEL_INFO(dev_priv)->gen >= 6) instp_mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE; } break; @@ -1308,23 +1301,25 @@ i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params, return -EINVAL; } - if (engine == &dev_priv->engine[RCS] && + if (params->engine->id == RCS && instp_mode != dev_priv->relative_constants_mode) { + struct intel_ringbuffer *ring = params->request->ringbuf; + ret = intel_ring_begin(params->request, 4); if (ret) return ret; - intel_ring_emit(engine, MI_NOOP); - intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(1)); - intel_ring_emit_reg(engine, INSTPM); - intel_ring_emit(engine, instp_mask << 16 | instp_mode); - intel_ring_advance(engine); + intel_ring_emit(ring, MI_NOOP); + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); + intel_ring_emit_reg(ring, INSTPM); + intel_ring_emit(ring, instp_mask << 16 | instp_mode); + intel_ring_advance(ring); dev_priv->relative_constants_mode = instp_mode; } if (args->flags & I915_EXEC_GEN7_SOL_RESET) { - ret = i915_reset_gen7_sol_offsets(dev, params->request); + ret = i915_reset_gen7_sol_offsets(params->request); if (ret) return ret; } @@ -1336,9 +1331,9 @@ i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params, if (exec_len == 0) exec_len = params->batch_obj->base.size; - ret = engine->dispatch_execbuffer(params->request, - exec_start, exec_len, - params->dispatch_flags); + ret = params->engine->dispatch_execbuffer(params->request, + exec_start, exec_len, + params->dispatch_flags); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 38e7d99..b38a531 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -669,6 +669,7 @@ static int gen8_write_pdp(struct drm_i915_gem_request *req, unsigned entry, dma_addr_t addr) { + struct intel_ringbuffer *ring = req->ringbuf; struct intel_engine_cs *engine = req->engine; int ret; @@ -678,13 +679,13 @@ static int gen8_write_pdp(struct drm_i915_gem_request *req, if (ret) return ret; - intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(1)); - intel_ring_emit_reg(engine, GEN8_RING_PDP_UDW(engine, entry)); - intel_ring_emit(engine, upper_32_bits(addr)); - intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(1)); - intel_ring_emit_reg(engine, GEN8_RING_PDP_LDW(engine, entry)); - intel_ring_emit(engine, lower_32_bits(addr)); - intel_ring_advance(engine); + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); + intel_ring_emit_reg(ring, GEN8_RING_PDP_UDW(engine, entry)); + intel_ring_emit(ring, upper_32_bits(addr)); + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); + intel_ring_emit_reg(ring, GEN8_RING_PDP_LDW(engine, entry)); + intel_ring_emit(ring, lower_32_bits(addr)); + intel_ring_advance(ring); return 0; } @@ -1660,6 +1661,7 @@ static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt) static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, struct drm_i915_gem_request *req) { + struct intel_ringbuffer *ring = req->ringbuf; struct intel_engine_cs *engine = req->engine; int ret; @@ -1672,13 +1674,13 @@ static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, if (ret) return ret; - intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(2)); - intel_ring_emit_reg(engine, RING_PP_DIR_DCLV(engine)); - intel_ring_emit(engine, PP_DIR_DCLV_2G); - intel_ring_emit_reg(engine, RING_PP_DIR_BASE(engine)); - intel_ring_emit(engine, get_pd_offset(ppgtt)); - intel_ring_emit(engine, MI_NOOP); - intel_ring_advance(engine); + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2)); + intel_ring_emit_reg(ring, RING_PP_DIR_DCLV(engine)); + intel_ring_emit(ring, PP_DIR_DCLV_2G); + intel_ring_emit_reg(ring, RING_PP_DIR_BASE(engine)); + intel_ring_emit(ring, get_pd_offset(ppgtt)); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); return 0; } @@ -1686,6 +1688,7 @@ static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, struct drm_i915_gem_request *req) { + struct intel_ringbuffer *ring = req->ringbuf; struct intel_engine_cs *engine = req->engine; int ret; @@ -1698,17 +1701,18 @@ static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, if (ret) return ret; - intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(2)); - intel_ring_emit_reg(engine, RING_PP_DIR_DCLV(engine)); - intel_ring_emit(engine, PP_DIR_DCLV_2G); - intel_ring_emit_reg(engine, RING_PP_DIR_BASE(engine)); - intel_ring_emit(engine, get_pd_offset(ppgtt)); - intel_ring_emit(engine, MI_NOOP); - intel_ring_advance(engine); + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2)); + intel_ring_emit_reg(ring, RING_PP_DIR_DCLV(engine)); + intel_ring_emit(ring, PP_DIR_DCLV_2G); + intel_ring_emit_reg(ring, RING_PP_DIR_BASE(engine)); + intel_ring_emit(ring, get_pd_offset(ppgtt)); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); /* XXX: RCS is the only one to auto invalidate the TLBs? */ if (engine->id != RCS) { - ret = engine->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); + ret = engine->flush(req, + I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); if (ret) return ret; } diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index a8e8cc8..1d32653 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -11115,7 +11115,7 @@ static int intel_gen2_queue_flip(struct drm_device *dev, struct drm_i915_gem_request *req, uint32_t flags) { - struct intel_engine_cs *engine = req->engine; + struct intel_ringbuffer *ring = req->ringbuf; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); u32 flip_mask; int ret; @@ -11131,13 +11131,13 @@ static int intel_gen2_queue_flip(struct drm_device *dev, flip_mask = MI_WAIT_FOR_PLANE_B_FLIP; else flip_mask = MI_WAIT_FOR_PLANE_A_FLIP; - intel_ring_emit(engine, MI_WAIT_FOR_EVENT | flip_mask); - intel_ring_emit(engine, MI_NOOP); - intel_ring_emit(engine, MI_DISPLAY_FLIP | + intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask); + intel_ring_emit(ring, MI_NOOP); + intel_ring_emit(ring, MI_DISPLAY_FLIP | MI_DISPLAY_FLIP_PLANE(intel_crtc->plane)); - intel_ring_emit(engine, fb->pitches[0]); - intel_ring_emit(engine, intel_crtc->flip_work->gtt_offset); - intel_ring_emit(engine, 0); /* aux display base address, unused */ + intel_ring_emit(ring, fb->pitches[0]); + intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset); + intel_ring_emit(ring, 0); /* aux display base address, unused */ return 0; } @@ -11149,7 +11149,7 @@ static int intel_gen3_queue_flip(struct drm_device *dev, struct drm_i915_gem_request *req, uint32_t flags) { - struct intel_engine_cs *engine = req->engine; + struct intel_ringbuffer *ring = req->ringbuf; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); u32 flip_mask; int ret; @@ -11162,13 +11162,13 @@ static int intel_gen3_queue_flip(struct drm_device *dev, flip_mask = MI_WAIT_FOR_PLANE_B_FLIP; else flip_mask = MI_WAIT_FOR_PLANE_A_FLIP; - intel_ring_emit(engine, MI_WAIT_FOR_EVENT | flip_mask); - intel_ring_emit(engine, MI_NOOP); - intel_ring_emit(engine, MI_DISPLAY_FLIP_I915 | + intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask); + intel_ring_emit(ring, MI_NOOP); + intel_ring_emit(ring, MI_DISPLAY_FLIP_I915 | MI_DISPLAY_FLIP_PLANE(intel_crtc->plane)); - intel_ring_emit(engine, fb->pitches[0]); - intel_ring_emit(engine, intel_crtc->flip_work->gtt_offset); - intel_ring_emit(engine, MI_NOOP); + intel_ring_emit(ring, fb->pitches[0]); + intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset); + intel_ring_emit(ring, MI_NOOP); return 0; } @@ -11180,7 +11180,7 @@ static int intel_gen4_queue_flip(struct drm_device *dev, struct drm_i915_gem_request *req, uint32_t flags) { - struct intel_engine_cs *engine = req->engine; + struct intel_ringbuffer *ring = req->ringbuf; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); uint32_t pf, pipesrc; @@ -11194,10 +11194,10 @@ static int intel_gen4_queue_flip(struct drm_device *dev, * Display Registers (which do not change across a page-flip) * so we need only reprogram the base address. */ - intel_ring_emit(engine, MI_DISPLAY_FLIP | + intel_ring_emit(ring, MI_DISPLAY_FLIP | MI_DISPLAY_FLIP_PLANE(intel_crtc->plane)); - intel_ring_emit(engine, fb->pitches[0]); - intel_ring_emit(engine, intel_crtc->flip_work->gtt_offset | + intel_ring_emit(ring, fb->pitches[0]); + intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset | obj->tiling_mode); /* XXX Enabling the panel-fitter across page-flip is so far @@ -11206,7 +11206,7 @@ static int intel_gen4_queue_flip(struct drm_device *dev, */ pf = 0; pipesrc = I915_READ(PIPESRC(intel_crtc->pipe)) & 0x0fff0fff; - intel_ring_emit(engine, pf | pipesrc); + intel_ring_emit(ring, pf | pipesrc); return 0; } @@ -11218,7 +11218,7 @@ static int intel_gen6_queue_flip(struct drm_device *dev, struct drm_i915_gem_request *req, uint32_t flags) { - struct intel_engine_cs *engine = req->engine; + struct intel_ringbuffer *ring = req->ringbuf; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); uint32_t pf, pipesrc; @@ -11228,10 +11228,10 @@ static int intel_gen6_queue_flip(struct drm_device *dev, if (ret) return ret; - intel_ring_emit(engine, MI_DISPLAY_FLIP | + intel_ring_emit(ring, MI_DISPLAY_FLIP | MI_DISPLAY_FLIP_PLANE(intel_crtc->plane)); - intel_ring_emit(engine, fb->pitches[0] | obj->tiling_mode); - intel_ring_emit(engine, intel_crtc->flip_work->gtt_offset); + intel_ring_emit(ring, fb->pitches[0] | obj->tiling_mode); + intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset); /* Contrary to the suggestions in the documentation, * "Enable Panel Fitter" does not seem to be required when page @@ -11241,7 +11241,7 @@ static int intel_gen6_queue_flip(struct drm_device *dev, */ pf = 0; pipesrc = I915_READ(PIPESRC(intel_crtc->pipe)) & 0x0fff0fff; - intel_ring_emit(engine, pf | pipesrc); + intel_ring_emit(ring, pf | pipesrc); return 0; } @@ -11253,7 +11253,7 @@ static int intel_gen7_queue_flip(struct drm_device *dev, struct drm_i915_gem_request *req, uint32_t flags) { - struct intel_engine_cs *engine = req->engine; + struct intel_ringbuffer *ring = req->ringbuf; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); uint32_t plane_bit = 0; int len, ret; @@ -11274,7 +11274,7 @@ static int intel_gen7_queue_flip(struct drm_device *dev, } len = 4; - if (engine->id == RCS) { + if (req->engine->id == RCS) { len += 6; /* * On Gen 8, SRM is now taking an extra dword to accommodate @@ -11312,30 +11312,30 @@ static int intel_gen7_queue_flip(struct drm_device *dev, * for the RCS also doesn't appear to drop events. Setting the DERRMR * to zero does lead to lockups within MI_DISPLAY_FLIP. */ - if (engine->id == RCS) { - intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(1)); - intel_ring_emit_reg(engine, DERRMR); - intel_ring_emit(engine, ~(DERRMR_PIPEA_PRI_FLIP_DONE | + if (req->engine->id == RCS) { + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); + intel_ring_emit_reg(ring, DERRMR); + intel_ring_emit(ring, ~(DERRMR_PIPEA_PRI_FLIP_DONE | DERRMR_PIPEB_PRI_FLIP_DONE | DERRMR_PIPEC_PRI_FLIP_DONE)); if (IS_GEN8(dev)) - intel_ring_emit(engine, MI_STORE_REGISTER_MEM_GEN8 | + intel_ring_emit(ring, MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT); else - intel_ring_emit(engine, MI_STORE_REGISTER_MEM | + intel_ring_emit(ring, MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT); - intel_ring_emit_reg(engine, DERRMR); - intel_ring_emit(engine, engine->scratch.gtt_offset + 256); + intel_ring_emit_reg(ring, DERRMR); + intel_ring_emit(ring, req->engine->scratch.gtt_offset + 256); if (IS_GEN8(dev)) { - intel_ring_emit(engine, 0); - intel_ring_emit(engine, MI_NOOP); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, MI_NOOP); } } - intel_ring_emit(engine, MI_DISPLAY_FLIP_I915 | plane_bit); - intel_ring_emit(engine, (fb->pitches[0] | obj->tiling_mode)); - intel_ring_emit(engine, intel_crtc->flip_work->gtt_offset); - intel_ring_emit(engine, (MI_NOOP)); + intel_ring_emit(ring, MI_DISPLAY_FLIP_I915 | plane_bit); + intel_ring_emit(ring, (fb->pitches[0] | obj->tiling_mode)); + intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset); + intel_ring_emit(ring, (MI_NOOP)); return 0; } diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index dd3f490..d851b4e 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -773,7 +773,7 @@ intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request) struct intel_ringbuffer *ringbuf = request->ringbuf; struct intel_engine_cs *engine = request->engine; - intel_logical_ring_advance(ringbuf); + intel_ring_advance(ringbuf); request->tail = ringbuf->tail; /* @@ -782,9 +782,9 @@ intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request) * * Caller must reserve WA_TAIL_DWORDS for us! */ - intel_logical_ring_emit(ringbuf, MI_NOOP); - intel_logical_ring_emit(ringbuf, MI_NOOP); - intel_logical_ring_advance(ringbuf); + intel_ring_emit(ringbuf, MI_NOOP); + intel_ring_emit(ringbuf, MI_NOOP); + intel_ring_advance(ringbuf); /* We keep the previous context alive until we retire the following * request. This ensures that any the context object is still pinned @@ -868,11 +868,11 @@ int intel_execlists_submission(struct i915_execbuffer_params *params, if (ret) return ret; - intel_logical_ring_emit(ringbuf, MI_NOOP); - intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(1)); - intel_logical_ring_emit_reg(ringbuf, INSTPM); - intel_logical_ring_emit(ringbuf, instp_mask << 16 | instp_mode); - intel_logical_ring_advance(ringbuf); + intel_ring_emit(ringbuf, MI_NOOP); + intel_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(1)); + intel_ring_emit_reg(ringbuf, INSTPM); + intel_ring_emit(ringbuf, instp_mask << 16 | instp_mode); + intel_ring_advance(ringbuf); dev_priv->relative_constants_mode = instp_mode; } @@ -1045,14 +1045,14 @@ static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req) if (ret) return ret; - intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(w->count)); + intel_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(w->count)); for (i = 0; i < w->count; i++) { - intel_logical_ring_emit_reg(ringbuf, w->reg[i].addr); - intel_logical_ring_emit(ringbuf, w->reg[i].value); + intel_ring_emit_reg(ringbuf, w->reg[i].addr); + intel_ring_emit(ringbuf, w->reg[i].value); } - intel_logical_ring_emit(ringbuf, MI_NOOP); + intel_ring_emit(ringbuf, MI_NOOP); - intel_logical_ring_advance(ringbuf); + intel_ring_advance(ringbuf); engine->gpu_caches_dirty = true; ret = logical_ring_flush_all_caches(req); @@ -1553,8 +1553,8 @@ static int gen9_init_render_ring(struct intel_engine_cs *engine) static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req) { struct i915_hw_ppgtt *ppgtt = req->ctx->ppgtt; + struct intel_ringbuffer *ring = req->ringbuf; struct intel_engine_cs *engine = req->engine; - struct intel_ringbuffer *ringbuf = req->ringbuf; const int num_lri_cmds = GEN8_LEGACY_PDPES * 2; int i, ret; @@ -1562,20 +1562,18 @@ static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req) if (ret) return ret; - intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(num_lri_cmds)); + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(num_lri_cmds)); for (i = GEN8_LEGACY_PDPES - 1; i >= 0; i--) { const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i); - intel_logical_ring_emit_reg(ringbuf, - GEN8_RING_PDP_UDW(engine, i)); - intel_logical_ring_emit(ringbuf, upper_32_bits(pd_daddr)); - intel_logical_ring_emit_reg(ringbuf, - GEN8_RING_PDP_LDW(engine, i)); - intel_logical_ring_emit(ringbuf, lower_32_bits(pd_daddr)); + intel_ring_emit_reg(ring, GEN8_RING_PDP_UDW(engine, i)); + intel_ring_emit(ring, upper_32_bits(pd_daddr)); + intel_ring_emit_reg(ring, GEN8_RING_PDP_LDW(engine, i)); + intel_ring_emit(ring, lower_32_bits(pd_daddr)); } - intel_logical_ring_emit(ringbuf, MI_NOOP); - intel_logical_ring_advance(ringbuf); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); return 0; } @@ -1583,7 +1581,7 @@ static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req) static int gen8_emit_bb_start(struct drm_i915_gem_request *req, u64 offset, unsigned dispatch_flags) { - struct intel_ringbuffer *ringbuf = req->ringbuf; + struct intel_ringbuffer *ring = req->ringbuf; bool ppgtt = !(dispatch_flags & I915_DISPATCH_SECURE); int ret; @@ -1610,14 +1608,14 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req, return ret; /* FIXME(BDW): Address space and security selectors. */ - intel_logical_ring_emit(ringbuf, MI_BATCH_BUFFER_START_GEN8 | - (ppgtt<<8) | - (dispatch_flags & I915_DISPATCH_RS ? - MI_BATCH_RESOURCE_STREAMER : 0)); - intel_logical_ring_emit(ringbuf, lower_32_bits(offset)); - intel_logical_ring_emit(ringbuf, upper_32_bits(offset)); - intel_logical_ring_emit(ringbuf, MI_NOOP); - intel_logical_ring_advance(ringbuf); + intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | + (ppgtt<<8) | + (dispatch_flags & I915_DISPATCH_RS ? + MI_BATCH_RESOURCE_STREAMER : 0)); + intel_ring_emit(ring, lower_32_bits(offset)); + intel_ring_emit(ring, upper_32_bits(offset)); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); return 0; } @@ -1640,9 +1638,8 @@ static int gen8_emit_flush(struct drm_i915_gem_request *request, u32 invalidate_domains, u32 unused) { - struct intel_ringbuffer *ringbuf = request->ringbuf; - struct intel_engine_cs *engine = ringbuf->engine; - struct drm_i915_private *dev_priv = request->i915; + struct intel_ringbuffer *ring = request->ringbuf; + struct intel_engine_cs *engine = ring->engine; uint32_t cmd; int ret; @@ -1661,17 +1658,17 @@ static int gen8_emit_flush(struct drm_i915_gem_request *request, if (invalidate_domains & I915_GEM_GPU_DOMAINS) { cmd |= MI_INVALIDATE_TLB; - if (engine == &dev_priv->engine[VCS]) + if (engine->id == VCS) cmd |= MI_INVALIDATE_BSD; } - intel_logical_ring_emit(ringbuf, cmd); - intel_logical_ring_emit(ringbuf, - I915_GEM_HWS_SCRATCH_ADDR | - MI_FLUSH_DW_USE_GTT); - intel_logical_ring_emit(ringbuf, 0); /* upper addr */ - intel_logical_ring_emit(ringbuf, 0); /* value */ - intel_logical_ring_advance(ringbuf); + intel_ring_emit(ring, cmd); + intel_ring_emit(ring, + I915_GEM_HWS_SCRATCH_ADDR | + MI_FLUSH_DW_USE_GTT); + intel_ring_emit(ring, 0); /* upper addr */ + intel_ring_emit(ring, 0); /* value */ + intel_ring_advance(ring); return 0; } @@ -1680,8 +1677,8 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request, u32 invalidate_domains, u32 flush_domains) { - struct intel_ringbuffer *ringbuf = request->ringbuf; - struct intel_engine_cs *engine = ringbuf->engine; + struct intel_ringbuffer *ring = request->ringbuf; + struct intel_engine_cs *engine = request->engine; u32 scratch_addr = engine->scratch.gtt_offset + 2 * CACHELINE_BYTES; bool vf_flush_wa = false, dc_flush_wa = false; u32 flags = 0; @@ -1732,40 +1729,40 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request, return ret; if (vf_flush_wa) { - intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6)); - intel_logical_ring_emit(ringbuf, 0); - intel_logical_ring_emit(ringbuf, 0); - intel_logical_ring_emit(ringbuf, 0); - intel_logical_ring_emit(ringbuf, 0); - intel_logical_ring_emit(ringbuf, 0); + intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); } if (dc_flush_wa) { - intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6)); - intel_logical_ring_emit(ringbuf, PIPE_CONTROL_DC_FLUSH_ENABLE); - intel_logical_ring_emit(ringbuf, 0); - intel_logical_ring_emit(ringbuf, 0); - intel_logical_ring_emit(ringbuf, 0); - intel_logical_ring_emit(ringbuf, 0); + intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); + intel_ring_emit(ring, PIPE_CONTROL_DC_FLUSH_ENABLE); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); } - intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6)); - intel_logical_ring_emit(ringbuf, flags); - intel_logical_ring_emit(ringbuf, scratch_addr); - intel_logical_ring_emit(ringbuf, 0); - intel_logical_ring_emit(ringbuf, 0); - intel_logical_ring_emit(ringbuf, 0); + intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); + intel_ring_emit(ring, flags); + intel_ring_emit(ring, scratch_addr); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); if (dc_flush_wa) { - intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6)); - intel_logical_ring_emit(ringbuf, PIPE_CONTROL_CS_STALL); - intel_logical_ring_emit(ringbuf, 0); - intel_logical_ring_emit(ringbuf, 0); - intel_logical_ring_emit(ringbuf, 0); - intel_logical_ring_emit(ringbuf, 0); + intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); + intel_ring_emit(ring, PIPE_CONTROL_CS_STALL); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); } - intel_logical_ring_advance(ringbuf); + intel_ring_advance(ring); return 0; } @@ -1794,7 +1791,7 @@ static void bxt_a_seqno_barrier(struct intel_engine_cs *engine) static int gen8_emit_request(struct drm_i915_gem_request *request) { - struct intel_ringbuffer *ringbuf = request->ringbuf; + struct intel_ringbuffer *ring = request->ringbuf; int ret; ret = intel_ring_begin(request, 6 + WA_TAIL_DWORDS); @@ -1804,21 +1801,20 @@ static int gen8_emit_request(struct drm_i915_gem_request *request) /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */ BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5)); - intel_logical_ring_emit(ringbuf, - (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW); - intel_logical_ring_emit(ringbuf, - intel_hws_seqno_address(request->engine) | - MI_FLUSH_DW_USE_GTT); - intel_logical_ring_emit(ringbuf, 0); - intel_logical_ring_emit(ringbuf, request->fence.seqno); - intel_logical_ring_emit(ringbuf, MI_USER_INTERRUPT); - intel_logical_ring_emit(ringbuf, MI_NOOP); + intel_ring_emit(ring, (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW); + intel_ring_emit(ring, + intel_hws_seqno_address(request->engine) | + MI_FLUSH_DW_USE_GTT); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, request->fence.seqno); + intel_ring_emit(ring, MI_USER_INTERRUPT); + intel_ring_emit(ring, MI_NOOP); return intel_logical_ring_advance_and_submit(request); } static int gen8_emit_request_render(struct drm_i915_gem_request *request) { - struct intel_ringbuffer *ringbuf = request->ringbuf; + struct intel_ringbuffer *ring = request->ringbuf; int ret; ret = intel_ring_begin(request, 8 + WA_TAIL_DWORDS); @@ -1832,19 +1828,18 @@ static int gen8_emit_request_render(struct drm_i915_gem_request *request) * need a prior CS_STALL, which is emitted by the flush * following the batch. */ - intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6)); - intel_logical_ring_emit(ringbuf, - (PIPE_CONTROL_GLOBAL_GTT_IVB | - PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_QW_WRITE)); - intel_logical_ring_emit(ringbuf, - intel_hws_seqno_address(request->engine)); - intel_logical_ring_emit(ringbuf, 0); - intel_logical_ring_emit(ringbuf, i915_gem_request_get_seqno(request)); + intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); + intel_ring_emit(ring, + (PIPE_CONTROL_GLOBAL_GTT_IVB | + PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_QW_WRITE)); + intel_ring_emit(ring, intel_hws_seqno_address(request->engine)); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, i915_gem_request_get_seqno(request)); /* We're thrashing one dword of HWS. */ - intel_logical_ring_emit(ringbuf, 0); - intel_logical_ring_emit(ringbuf, MI_USER_INTERRUPT); - intel_logical_ring_emit(ringbuf, MI_NOOP); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, MI_USER_INTERRUPT); + intel_ring_emit(ring, MI_NOOP); return intel_logical_ring_advance_and_submit(request); } diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 3828730..d26fb44 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -73,32 +73,6 @@ int logical_xcs_ring_init(struct intel_engine_cs *engine); int intel_engines_init(struct drm_device *dev); int logical_ring_flush_all_caches(struct drm_i915_gem_request *req); -/** - * intel_logical_ring_advance() - advance the ringbuffer tail - * @ringbuf: Ringbuffer to advance. - * - * The tail is only updated in our logical ringbuffer struct. - */ -static inline void intel_logical_ring_advance(struct intel_ringbuffer *ringbuf) -{ - __intel_ringbuffer_advance(ringbuf); -} - -/** - * intel_logical_ring_emit() - write a DWORD to the ringbuffer. - * @ringbuf: Ringbuffer to write to. - * @data: DWORD to write. - */ -static inline void intel_logical_ring_emit(struct intel_ringbuffer *ringbuf, - u32 data) -{ - __intel_ringbuffer_emit(ringbuf, data); -} -static inline void intel_logical_ring_emit_reg(struct intel_ringbuffer *ringbuf, - i915_reg_t reg) -{ - intel_logical_ring_emit(ringbuf, i915_mmio_reg_offset(reg)); -} /* Logical Ring Contexts */ diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c index bd46968..3059c52 100644 --- a/drivers/gpu/drm/i915/intel_mocs.c +++ b/drivers/gpu/drm/i915/intel_mocs.c @@ -288,14 +288,11 @@ static int emit_mocs_control_table(struct drm_i915_gem_request *req, if (ret) return ret; - intel_logical_ring_emit(ringbuf, - MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES)); + intel_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES)); for (index = 0; index < table->size; index++) { - intel_logical_ring_emit_reg(ringbuf, - mocs_register(engine, index)); - intel_logical_ring_emit(ringbuf, - table->table[index].control_value); + intel_ring_emit_reg(ringbuf, mocs_register(engine, index)); + intel_ring_emit(ringbuf, table->table[index].control_value); } /* @@ -307,14 +304,12 @@ static int emit_mocs_control_table(struct drm_i915_gem_request *req, * that value to all the used entries. */ for (; index < GEN9_NUM_MOCS_ENTRIES; index++) { - intel_logical_ring_emit_reg(ringbuf, - mocs_register(engine, index)); - intel_logical_ring_emit(ringbuf, - table->table[0].control_value); + intel_ring_emit_reg(ringbuf, mocs_register(engine, index)); + intel_ring_emit(ringbuf, table->table[0].control_value); } - intel_logical_ring_emit(ringbuf, MI_NOOP); - intel_logical_ring_advance(ringbuf); + intel_ring_emit(ringbuf, MI_NOOP); + intel_ring_advance(ringbuf); return 0; } @@ -352,19 +347,18 @@ static int emit_mocs_l3cc_table(struct drm_i915_gem_request *req, if (ret) return ret; - intel_logical_ring_emit(ringbuf, + intel_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES / 2)); for (i = 0; i < table->size/2; i++) { - intel_logical_ring_emit_reg(ringbuf, GEN9_LNCFCMOCS(i)); - intel_logical_ring_emit(ringbuf, - l3cc_combine(table, 2*i, 2*i+1)); + intel_ring_emit_reg(ringbuf, GEN9_LNCFCMOCS(i)); + intel_ring_emit(ringbuf, l3cc_combine(table, 2*i, 2*i+1)); } if (table->size & 0x01) { /* Odd table size - 1 left over */ - intel_logical_ring_emit_reg(ringbuf, GEN9_LNCFCMOCS(i)); - intel_logical_ring_emit(ringbuf, l3cc_combine(table, 2*i, 0)); + intel_ring_emit_reg(ringbuf, GEN9_LNCFCMOCS(i)); + intel_ring_emit(ringbuf, l3cc_combine(table, 2*i, 0)); i++; } @@ -374,12 +368,12 @@ static int emit_mocs_l3cc_table(struct drm_i915_gem_request *req, * they are reserved by the hardware. */ for (; i < GEN9_NUM_MOCS_ENTRIES / 2; i++) { - intel_logical_ring_emit_reg(ringbuf, GEN9_LNCFCMOCS(i)); - intel_logical_ring_emit(ringbuf, l3cc_combine(table, 0, 0)); + intel_ring_emit_reg(ringbuf, GEN9_LNCFCMOCS(i)); + intel_ring_emit(ringbuf, l3cc_combine(table, 0, 0)); } - intel_logical_ring_emit(ringbuf, MI_NOOP); - intel_logical_ring_advance(ringbuf); + intel_ring_emit(ringbuf, MI_NOOP); + intel_ring_advance(ringbuf); return 0; } diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index c10ce36..ec63b64 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -235,6 +235,7 @@ static int intel_overlay_on(struct intel_overlay *overlay) struct drm_i915_private *dev_priv = overlay->i915; struct intel_engine_cs *engine = &dev_priv->engine[RCS]; struct drm_i915_gem_request *req; + struct intel_ringbuffer *ring; int ret; WARN_ON(overlay->active); @@ -252,11 +253,12 @@ static int intel_overlay_on(struct intel_overlay *overlay) overlay->active = true; - intel_ring_emit(engine, MI_OVERLAY_FLIP | MI_OVERLAY_ON); - intel_ring_emit(engine, overlay->flip_addr | OFC_UPDATE); - intel_ring_emit(engine, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP); - intel_ring_emit(engine, MI_NOOP); - intel_ring_advance(engine); + ring = req->ringbuf; + intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_ON); + intel_ring_emit(ring, overlay->flip_addr | OFC_UPDATE); + intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); return intel_overlay_do_wait_request(overlay, req, NULL); } @@ -268,6 +270,7 @@ static int intel_overlay_continue(struct intel_overlay *overlay, struct drm_i915_private *dev_priv = overlay->i915; struct intel_engine_cs *engine = &dev_priv->engine[RCS]; struct drm_i915_gem_request *req; + struct intel_ringbuffer *ring; u32 flip_addr = overlay->flip_addr; u32 tmp; int ret; @@ -292,9 +295,10 @@ static int intel_overlay_continue(struct intel_overlay *overlay, return ret; } - intel_ring_emit(engine, MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE); - intel_ring_emit(engine, flip_addr); - intel_ring_advance(engine); + ring = req->ringbuf; + intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE); + intel_ring_emit(ring, flip_addr); + intel_ring_advance(ring); WARN_ON(overlay->last_flip_req); i915_gem_request_assign(&overlay->last_flip_req, req); @@ -336,6 +340,7 @@ static int intel_overlay_off(struct intel_overlay *overlay) struct drm_i915_private *dev_priv = overlay->i915; struct intel_engine_cs *engine = &dev_priv->engine[RCS]; struct drm_i915_gem_request *req; + struct intel_ringbuffer *ring; u32 flip_addr = overlay->flip_addr; int ret; @@ -357,24 +362,25 @@ static int intel_overlay_off(struct intel_overlay *overlay) return ret; } + ring = req->ringbuf; /* wait for overlay to go idle */ - intel_ring_emit(engine, MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE); - intel_ring_emit(engine, flip_addr); - intel_ring_emit(engine, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP); + intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE); + intel_ring_emit(ring, flip_addr); + intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP); /* turn overlay off */ if (IS_I830(dev_priv)) { /* Workaround: Don't disable the overlay fully, since otherwise * it dies on the next OVERLAY_ON cmd. */ - intel_ring_emit(engine, MI_NOOP); - intel_ring_emit(engine, MI_NOOP); - intel_ring_emit(engine, MI_NOOP); + intel_ring_emit(ring, MI_NOOP); + intel_ring_emit(ring, MI_NOOP); + intel_ring_emit(ring, MI_NOOP); } else { - intel_ring_emit(engine, MI_OVERLAY_FLIP | MI_OVERLAY_OFF); - intel_ring_emit(engine, flip_addr); - intel_ring_emit(engine, + intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_OFF); + intel_ring_emit(ring, flip_addr); + intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP); } - intel_ring_advance(engine); + intel_ring_advance(ring); return intel_overlay_do_wait_request(overlay, req, intel_overlay_off_tail); } @@ -420,6 +426,7 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay) if (I915_READ(ISR) & I915_OVERLAY_PLANE_FLIP_PENDING_INTERRUPT) { /* synchronous slowpath */ struct drm_i915_gem_request *req; + struct intel_ringbuffer *ring; req = i915_gem_request_alloc(engine, NULL); if (IS_ERR(req)) @@ -431,10 +438,11 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay) return ret; } - intel_ring_emit(engine, + ring = req->ringbuf; + intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP); - intel_ring_emit(engine, MI_NOOP); - intel_ring_advance(engine); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); ret = intel_overlay_do_wait_request(overlay, req, intel_overlay_release_old_vid_tail); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 15acaf6..1267006 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -58,7 +58,7 @@ void intel_ring_update_space(struct intel_ringbuffer *ringbuf) ringbuf->tail, ringbuf->size); } -static void __intel_ring_advance(struct intel_engine_cs *engine) +static void __intel_engine_submit(struct intel_engine_cs *engine) { struct intel_ringbuffer *ringbuf = engine->buffer; ringbuf->tail &= ringbuf->size - 1; @@ -70,7 +70,7 @@ gen2_render_ring_flush(struct drm_i915_gem_request *req, u32 invalidate_domains, u32 flush_domains) { - struct intel_engine_cs *engine = req->engine; + struct intel_ringbuffer *ring = req->ringbuf; u32 cmd; int ret; @@ -85,9 +85,9 @@ gen2_render_ring_flush(struct drm_i915_gem_request *req, if (ret) return ret; - intel_ring_emit(engine, cmd); - intel_ring_emit(engine, MI_NOOP); - intel_ring_advance(engine); + intel_ring_emit(ring, cmd); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); return 0; } @@ -97,7 +97,7 @@ gen4_render_ring_flush(struct drm_i915_gem_request *req, u32 invalidate_domains, u32 flush_domains) { - struct intel_engine_cs *engine = req->engine; + struct intel_ringbuffer *ring = req->ringbuf; u32 cmd; int ret; @@ -129,23 +129,20 @@ gen4_render_ring_flush(struct drm_i915_gem_request *req, * are flushed at any MI_FLUSH. */ - cmd = MI_FLUSH | MI_NO_WRITE_FLUSH; - if ((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER) - cmd &= ~MI_NO_WRITE_FLUSH; - if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION) + cmd = MI_FLUSH; + if (invalidate_domains) { cmd |= MI_EXE_FLUSH; - - if (invalidate_domains & I915_GEM_DOMAIN_COMMAND && - (IS_G4X(req->i915) || IS_GEN5(req->i915))) - cmd |= MI_INVALIDATE_ISP; + if (IS_G4X(req->i915) || IS_GEN5(req->i915)) + cmd |= MI_INVALIDATE_ISP; + } ret = intel_ring_begin(req, 2); if (ret) return ret; - intel_ring_emit(engine, cmd); - intel_ring_emit(engine, MI_NOOP); - intel_ring_advance(engine); + intel_ring_emit(ring, cmd); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); return 0; } @@ -190,34 +187,35 @@ gen4_render_ring_flush(struct drm_i915_gem_request *req, static int intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req) { - struct intel_engine_cs *engine = req->engine; - u32 scratch_addr = engine->scratch.gtt_offset + 2 * CACHELINE_BYTES; + struct intel_ringbuffer *ring = req->ringbuf; + u32 scratch_addr = + req->engine->scratch.gtt_offset + 2 * CACHELINE_BYTES; int ret; ret = intel_ring_begin(req, 6); if (ret) return ret; - intel_ring_emit(engine, GFX_OP_PIPE_CONTROL(5)); - intel_ring_emit(engine, PIPE_CONTROL_CS_STALL | + intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5)); + intel_ring_emit(ring, PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD); - intel_ring_emit(engine, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */ - intel_ring_emit(engine, 0); /* low dword */ - intel_ring_emit(engine, 0); /* high dword */ - intel_ring_emit(engine, MI_NOOP); - intel_ring_advance(engine); + intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); + intel_ring_emit(ring, 0); /* low dword */ + intel_ring_emit(ring, 0); /* high dword */ + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); ret = intel_ring_begin(req, 6); if (ret) return ret; - intel_ring_emit(engine, GFX_OP_PIPE_CONTROL(5)); - intel_ring_emit(engine, PIPE_CONTROL_QW_WRITE); - intel_ring_emit(engine, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */ - intel_ring_emit(engine, 0); - intel_ring_emit(engine, 0); - intel_ring_emit(engine, MI_NOOP); - intel_ring_advance(engine); + intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5)); + intel_ring_emit(ring, PIPE_CONTROL_QW_WRITE); + intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); return 0; } @@ -226,9 +224,10 @@ static int gen6_render_ring_flush(struct drm_i915_gem_request *req, u32 invalidate_domains, u32 flush_domains) { - struct intel_engine_cs *engine = req->engine; + struct intel_ringbuffer *ring = req->ringbuf; + u32 scratch_addr = + req->engine->scratch.gtt_offset + 2 * CACHELINE_BYTES; u32 flags = 0; - u32 scratch_addr = engine->scratch.gtt_offset + 2 * CACHELINE_BYTES; int ret; /* Force SNB workarounds for PIPE_CONTROL flushes */ @@ -266,11 +265,11 @@ gen6_render_ring_flush(struct drm_i915_gem_request *req, if (ret) return ret; - intel_ring_emit(engine, GFX_OP_PIPE_CONTROL(4)); - intel_ring_emit(engine, flags); - intel_ring_emit(engine, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); - intel_ring_emit(engine, 0); - intel_ring_advance(engine); + intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4)); + intel_ring_emit(ring, flags); + intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); + intel_ring_emit(ring, 0); + intel_ring_advance(ring); return 0; } @@ -278,19 +277,20 @@ gen6_render_ring_flush(struct drm_i915_gem_request *req, static int gen7_render_ring_cs_stall_wa(struct drm_i915_gem_request *req) { - struct intel_engine_cs *engine = req->engine; + struct intel_ringbuffer *ring = req->ringbuf; int ret; ret = intel_ring_begin(req, 4); if (ret) return ret; - intel_ring_emit(engine, GFX_OP_PIPE_CONTROL(4)); - intel_ring_emit(engine, PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_STALL_AT_SCOREBOARD); - intel_ring_emit(engine, 0); - intel_ring_emit(engine, 0); - intel_ring_advance(engine); + intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4)); + intel_ring_emit(ring, + PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_STALL_AT_SCOREBOARD); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); + intel_ring_advance(ring); return 0; } @@ -299,9 +299,10 @@ static int gen7_render_ring_flush(struct drm_i915_gem_request *req, u32 invalidate_domains, u32 flush_domains) { - struct intel_engine_cs *engine = req->engine; + struct intel_ringbuffer *ring = req->ringbuf; + u32 scratch_addr = + req->engine->scratch.gtt_offset + 2 * CACHELINE_BYTES; u32 flags = 0; - u32 scratch_addr = engine->scratch.gtt_offset + 2 * CACHELINE_BYTES; int ret; /* @@ -350,11 +351,11 @@ gen7_render_ring_flush(struct drm_i915_gem_request *req, if (ret) return ret; - intel_ring_emit(engine, GFX_OP_PIPE_CONTROL(4)); - intel_ring_emit(engine, flags); - intel_ring_emit(engine, scratch_addr); - intel_ring_emit(engine, 0); - intel_ring_advance(engine); + intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4)); + intel_ring_emit(ring, flags); + intel_ring_emit(ring, scratch_addr); + intel_ring_emit(ring, 0); + intel_ring_advance(ring); return 0; } @@ -363,20 +364,20 @@ static int gen8_emit_pipe_control(struct drm_i915_gem_request *req, u32 flags, u32 scratch_addr) { - struct intel_engine_cs *engine = req->engine; + struct intel_ringbuffer *ring = req->ringbuf; int ret; ret = intel_ring_begin(req, 6); if (ret) return ret; - intel_ring_emit(engine, GFX_OP_PIPE_CONTROL(6)); - intel_ring_emit(engine, flags); - intel_ring_emit(engine, scratch_addr); - intel_ring_emit(engine, 0); - intel_ring_emit(engine, 0); - intel_ring_emit(engine, 0); - intel_ring_advance(engine); + intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); + intel_ring_emit(ring, flags); + intel_ring_emit(ring, scratch_addr); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); + intel_ring_advance(ring); return 0; } @@ -385,8 +386,8 @@ static int gen8_render_ring_flush(struct drm_i915_gem_request *req, u32 invalidate_domains, u32 flush_domains) { - u32 flags = 0; u32 scratch_addr = req->engine->scratch.gtt_offset + 2 * CACHELINE_BYTES; + u32 flags = 0; int ret; flags |= PIPE_CONTROL_CS_STALL; @@ -679,14 +680,14 @@ err: static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req) { - struct intel_engine_cs *engine = req->engine; + struct intel_ringbuffer *ring = req->ringbuf; struct i915_workarounds *w = &req->i915->workarounds; int ret, i; if (w->count == 0) return 0; - engine->gpu_caches_dirty = true; + req->engine->gpu_caches_dirty = true; ret = intel_ring_flush_all_caches(req); if (ret) return ret; @@ -695,16 +696,16 @@ static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req) if (ret) return ret; - intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(w->count)); + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(w->count)); for (i = 0; i < w->count; i++) { - intel_ring_emit_reg(engine, w->reg[i].addr); - intel_ring_emit(engine, w->reg[i].value); + intel_ring_emit_reg(ring, w->reg[i].addr); + intel_ring_emit(ring, w->reg[i].value); } - intel_ring_emit(engine, MI_NOOP); + intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(engine); + intel_ring_advance(ring); - engine->gpu_caches_dirty = true; + req->engine->gpu_caches_dirty = true; ret = intel_ring_flush_all_caches(req); if (ret) return ret; @@ -1337,7 +1338,7 @@ static int gen8_rcs_signal(struct drm_i915_gem_request *signaller_req, unsigned int num_dwords) { #define MBOX_UPDATE_DWORDS 8 - struct intel_engine_cs *signaller = signaller_req->engine; + struct intel_ringbuffer *signaller = signaller_req->ringbuf; struct drm_i915_private *dev_priv = signaller_req->i915; struct intel_engine_cs *waiter; enum intel_engine_id id; @@ -1352,20 +1353,23 @@ static int gen8_rcs_signal(struct drm_i915_gem_request *signaller_req, return ret; for_each_engine_id(waiter, dev_priv, id) { - u64 gtt_offset = signaller->semaphore.signal_ggtt[id]; + u64 gtt_offset = + signaller_req->engine->semaphore.signal_ggtt[id]; if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID) continue; intel_ring_emit(signaller, GFX_OP_PIPE_CONTROL(6)); - intel_ring_emit(signaller, PIPE_CONTROL_GLOBAL_GTT_IVB | - PIPE_CONTROL_QW_WRITE | - PIPE_CONTROL_CS_STALL); + intel_ring_emit(signaller, + PIPE_CONTROL_GLOBAL_GTT_IVB | + PIPE_CONTROL_QW_WRITE | + PIPE_CONTROL_CS_STALL); intel_ring_emit(signaller, lower_32_bits(gtt_offset)); intel_ring_emit(signaller, upper_32_bits(gtt_offset)); intel_ring_emit(signaller, signaller_req->fence.seqno); intel_ring_emit(signaller, 0); - intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL | - MI_SEMAPHORE_TARGET(waiter->hw_id)); + intel_ring_emit(signaller, + MI_SEMAPHORE_SIGNAL | + MI_SEMAPHORE_TARGET(waiter->hw_id)); intel_ring_emit(signaller, 0); } @@ -1376,7 +1380,7 @@ static int gen8_xcs_signal(struct drm_i915_gem_request *signaller_req, unsigned int num_dwords) { #define MBOX_UPDATE_DWORDS 6 - struct intel_engine_cs *signaller = signaller_req->engine; + struct intel_ringbuffer *signaller = signaller_req->ringbuf; struct drm_i915_private *dev_priv = signaller_req->i915; struct intel_engine_cs *waiter; enum intel_engine_id id; @@ -1391,18 +1395,21 @@ static int gen8_xcs_signal(struct drm_i915_gem_request *signaller_req, return ret; for_each_engine_id(waiter, dev_priv, id) { - u64 gtt_offset = signaller->semaphore.signal_ggtt[id]; + u64 gtt_offset = + signaller_req->engine->semaphore.signal_ggtt[id]; if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID) continue; - intel_ring_emit(signaller, (MI_FLUSH_DW + 1) | - MI_FLUSH_DW_OP_STOREDW); - intel_ring_emit(signaller, lower_32_bits(gtt_offset) | - MI_FLUSH_DW_USE_GTT); + intel_ring_emit(signaller, + (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW); + intel_ring_emit(signaller, + lower_32_bits(gtt_offset) | + MI_FLUSH_DW_USE_GTT); intel_ring_emit(signaller, upper_32_bits(gtt_offset)); intel_ring_emit(signaller, signaller_req->fence.seqno); - intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL | - MI_SEMAPHORE_TARGET(waiter->hw_id)); + intel_ring_emit(signaller, + MI_SEMAPHORE_SIGNAL | + MI_SEMAPHORE_TARGET(waiter->hw_id)); intel_ring_emit(signaller, 0); } @@ -1412,7 +1419,7 @@ static int gen8_xcs_signal(struct drm_i915_gem_request *signaller_req, static int gen6_signal(struct drm_i915_gem_request *signaller_req, unsigned int num_dwords) { - struct intel_engine_cs *signaller = signaller_req->engine; + struct intel_ringbuffer *signaller = signaller_req->ringbuf; struct drm_i915_private *dev_priv = signaller_req->i915; struct intel_engine_cs *useless; enum intel_engine_id id; @@ -1428,7 +1435,8 @@ static int gen6_signal(struct drm_i915_gem_request *signaller_req, return ret; for_each_engine_id(useless, dev_priv, id) { - i915_reg_t mbox_reg = signaller->semaphore.mbox.signal[id]; + i915_reg_t mbox_reg = + signaller_req->engine->semaphore.mbox.signal[id]; if (i915_mmio_reg_valid(mbox_reg)) { intel_ring_emit(signaller, MI_LOAD_REGISTER_IMM(1)); @@ -1456,6 +1464,7 @@ static int gen6_add_request(struct drm_i915_gem_request *req) { struct intel_engine_cs *engine = req->engine; + struct intel_ringbuffer *ring = req->ringbuf; int ret; if (engine->semaphore.signal) @@ -1466,12 +1475,11 @@ gen6_add_request(struct drm_i915_gem_request *req) if (ret) return ret; - intel_ring_emit(engine, MI_STORE_DWORD_INDEX); - intel_ring_emit(engine, - I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); - intel_ring_emit(engine, req->fence.seqno); - intel_ring_emit(engine, MI_USER_INTERRUPT); - __intel_ring_advance(engine); + intel_ring_emit(ring, MI_STORE_DWORD_INDEX); + intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); + intel_ring_emit(ring, req->fence.seqno); + intel_ring_emit(ring, MI_USER_INTERRUPT); + __intel_engine_submit(engine); return 0; } @@ -1480,6 +1488,7 @@ static int gen8_render_add_request(struct drm_i915_gem_request *req) { struct intel_engine_cs *engine = req->engine; + struct intel_ringbuffer *ring = req->ringbuf; int ret; if (engine->semaphore.signal) @@ -1489,18 +1498,18 @@ gen8_render_add_request(struct drm_i915_gem_request *req) if (ret) return ret; - intel_ring_emit(engine, GFX_OP_PIPE_CONTROL(6)); - intel_ring_emit(engine, (PIPE_CONTROL_GLOBAL_GTT_IVB | - PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_QW_WRITE)); - intel_ring_emit(engine, intel_hws_seqno_address(req->engine)); - intel_ring_emit(engine, 0); - intel_ring_emit(engine, i915_gem_request_get_seqno(req)); + intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); + intel_ring_emit(ring, (PIPE_CONTROL_GLOBAL_GTT_IVB | + PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_QW_WRITE)); + intel_ring_emit(ring, intel_hws_seqno_address(engine)); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, i915_gem_request_get_seqno(req)); /* We're thrashing one dword of HWS. */ - intel_ring_emit(engine, 0); - intel_ring_emit(engine, MI_USER_INTERRUPT); - intel_ring_emit(engine, MI_NOOP); - __intel_ring_advance(engine); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, MI_USER_INTERRUPT); + intel_ring_emit(ring, MI_NOOP); + __intel_engine_submit(engine); return 0; } @@ -1524,9 +1533,9 @@ gen8_ring_sync(struct drm_i915_gem_request *waiter_req, struct intel_engine_cs *signaller, u32 seqno) { - struct intel_engine_cs *waiter = waiter_req->engine; + struct intel_ringbuffer *waiter = waiter_req->ringbuf; struct drm_i915_private *dev_priv = waiter_req->i915; - u64 offset = GEN8_WAIT_OFFSET(waiter, signaller->id); + u64 offset = GEN8_WAIT_OFFSET(waiter_req->engine, signaller->id); struct i915_hw_ppgtt *ppgtt; int ret; @@ -1558,11 +1567,11 @@ gen6_ring_sync(struct drm_i915_gem_request *waiter_req, struct intel_engine_cs *signaller, u32 seqno) { - struct intel_engine_cs *waiter = waiter_req->engine; + struct intel_ringbuffer *waiter = waiter_req->ringbuf; u32 dw1 = MI_SEMAPHORE_MBOX | MI_SEMAPHORE_COMPARE | MI_SEMAPHORE_REGISTER; - u32 wait_mbox = signaller->semaphore.mbox.wait[waiter->id]; + u32 wait_mbox = signaller->semaphore.mbox.wait[waiter_req->engine->id]; int ret; /* Throughout all of the GEM code, seqno passed implies our current @@ -1692,35 +1701,34 @@ bsd_ring_flush(struct drm_i915_gem_request *req, u32 invalidate_domains, u32 flush_domains) { - struct intel_engine_cs *engine = req->engine; + struct intel_ringbuffer *ring = req->ringbuf; int ret; ret = intel_ring_begin(req, 2); if (ret) return ret; - intel_ring_emit(engine, MI_FLUSH); - intel_ring_emit(engine, MI_NOOP); - intel_ring_advance(engine); + intel_ring_emit(ring, MI_FLUSH); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); return 0; } static int i9xx_add_request(struct drm_i915_gem_request *req) { - struct intel_engine_cs *engine = req->engine; + struct intel_ringbuffer *ring = req->ringbuf; int ret; ret = intel_ring_begin(req, 4); if (ret) return ret; - intel_ring_emit(engine, MI_STORE_DWORD_INDEX); - intel_ring_emit(engine, - I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); - intel_ring_emit(engine, req->fence.seqno); - intel_ring_emit(engine, MI_USER_INTERRUPT); - __intel_ring_advance(engine); + intel_ring_emit(ring, MI_STORE_DWORD_INDEX); + intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); + intel_ring_emit(ring, req->fence.seqno); + intel_ring_emit(ring, MI_USER_INTERRUPT); + __intel_engine_submit(req->engine); return 0; } @@ -1787,20 +1795,20 @@ i965_dispatch_execbuffer(struct drm_i915_gem_request *req, u64 offset, u32 length, unsigned dispatch_flags) { - struct intel_engine_cs *engine = req->engine; + struct intel_ringbuffer *ring = req->ringbuf; int ret; ret = intel_ring_begin(req, 2); if (ret) return ret; - intel_ring_emit(engine, + intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_GTT | (dispatch_flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965)); - intel_ring_emit(engine, offset); - intel_ring_advance(engine); + intel_ring_emit(ring, offset); + intel_ring_advance(ring); return 0; } @@ -1814,8 +1822,8 @@ i830_dispatch_execbuffer(struct drm_i915_gem_request *req, u64 offset, u32 len, unsigned dispatch_flags) { - struct intel_engine_cs *engine = req->engine; - u32 cs_offset = engine->scratch.gtt_offset; + struct intel_ringbuffer *ring = req->ringbuf; + u32 cs_offset = req->engine->scratch.gtt_offset; int ret; ret = intel_ring_begin(req, 6); @@ -1823,13 +1831,13 @@ i830_dispatch_execbuffer(struct drm_i915_gem_request *req, return ret; /* Evict the invalid PTE TLBs */ - intel_ring_emit(engine, COLOR_BLT_CMD | BLT_WRITE_RGBA); - intel_ring_emit(engine, BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | 4096); - intel_ring_emit(engine, I830_TLB_ENTRIES << 16 | 4); /* load each page */ - intel_ring_emit(engine, cs_offset); - intel_ring_emit(engine, 0xdeadbeef); - intel_ring_emit(engine, MI_NOOP); - intel_ring_advance(engine); + intel_ring_emit(ring, COLOR_BLT_CMD | BLT_WRITE_RGBA); + intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | 4096); + intel_ring_emit(ring, I830_TLB_ENTRIES << 16 | 4); /* load each page */ + intel_ring_emit(ring, cs_offset); + intel_ring_emit(ring, 0xdeadbeef); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); if ((dispatch_flags & I915_DISPATCH_PINNED) == 0) { if (len > I830_BATCH_LIMIT) @@ -1843,17 +1851,17 @@ i830_dispatch_execbuffer(struct drm_i915_gem_request *req, * stable batch scratch bo area (so that the CS never * stumbles over its tlb invalidation bug) ... */ - intel_ring_emit(engine, SRC_COPY_BLT_CMD | BLT_WRITE_RGBA); - intel_ring_emit(engine, + intel_ring_emit(ring, SRC_COPY_BLT_CMD | BLT_WRITE_RGBA); + intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096); - intel_ring_emit(engine, DIV_ROUND_UP(len, 4096) << 16 | 4096); - intel_ring_emit(engine, cs_offset); - intel_ring_emit(engine, 4096); - intel_ring_emit(engine, offset); + intel_ring_emit(ring, DIV_ROUND_UP(len, 4096) << 16 | 4096); + intel_ring_emit(ring, cs_offset); + intel_ring_emit(ring, 4096); + intel_ring_emit(ring, offset); - intel_ring_emit(engine, MI_FLUSH); - intel_ring_emit(engine, MI_NOOP); - intel_ring_advance(engine); + intel_ring_emit(ring, MI_FLUSH); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); /* ... and execute it. */ offset = cs_offset; @@ -1863,10 +1871,10 @@ i830_dispatch_execbuffer(struct drm_i915_gem_request *req, if (ret) return ret; - intel_ring_emit(engine, MI_BATCH_BUFFER_START | MI_BATCH_GTT); - intel_ring_emit(engine, offset | (dispatch_flags & I915_DISPATCH_SECURE ? - 0 : MI_BATCH_NON_SECURE)); - intel_ring_advance(engine); + intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_GTT); + intel_ring_emit(ring, offset | (dispatch_flags & I915_DISPATCH_SECURE ? + 0 : MI_BATCH_NON_SECURE)); + intel_ring_advance(ring); return 0; } @@ -1876,17 +1884,17 @@ i915_dispatch_execbuffer(struct drm_i915_gem_request *req, u64 offset, u32 len, unsigned dispatch_flags) { - struct intel_engine_cs *engine = req->engine; + struct intel_ringbuffer *ring = req->ringbuf; int ret; ret = intel_ring_begin(req, 2); if (ret) return ret; - intel_ring_emit(engine, MI_BATCH_BUFFER_START | MI_BATCH_GTT); - intel_ring_emit(engine, offset | (dispatch_flags & I915_DISPATCH_SECURE ? - 0 : MI_BATCH_NON_SECURE)); - intel_ring_advance(engine); + intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_GTT); + intel_ring_emit(ring, offset | (dispatch_flags & I915_DISPATCH_SECURE ? + 0 : MI_BATCH_NON_SECURE)); + intel_ring_advance(ring); return 0; } @@ -2418,8 +2426,9 @@ int intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) /* Align the ring tail to a cacheline boundary */ int intel_ring_cacheline_align(struct drm_i915_gem_request *req) { - struct intel_engine_cs *engine = req->engine; - int num_dwords = (engine->buffer->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t); + struct intel_ringbuffer *ring = req->ringbuf; + int num_dwords = + (ring->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t); int ret; if (num_dwords == 0) @@ -2431,9 +2440,9 @@ int intel_ring_cacheline_align(struct drm_i915_gem_request *req) return ret; while (num_dwords--) - intel_ring_emit(engine, MI_NOOP); + intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(engine); + intel_ring_advance(ring); return 0; } @@ -2524,7 +2533,7 @@ static void gen6_bsd_ring_write_tail(struct intel_engine_cs *engine, static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req, u32 invalidate, u32 flush) { - struct intel_engine_cs *engine = req->engine; + struct intel_ringbuffer *ring = req->ringbuf; uint32_t cmd; int ret; @@ -2552,17 +2561,16 @@ static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req, if (invalidate & I915_GEM_GPU_DOMAINS) cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD; - intel_ring_emit(engine, cmd); - intel_ring_emit(engine, - I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT); + intel_ring_emit(ring, cmd); + intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT); if (INTEL_GEN(req->i915) >= 8) { - intel_ring_emit(engine, 0); /* upper addr */ - intel_ring_emit(engine, 0); /* value */ + intel_ring_emit(ring, 0); /* upper addr */ + intel_ring_emit(ring, 0); /* value */ } else { - intel_ring_emit(engine, 0); - intel_ring_emit(engine, MI_NOOP); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, MI_NOOP); } - intel_ring_advance(engine); + intel_ring_advance(ring); return 0; } @@ -2571,8 +2579,8 @@ gen8_ring_dispatch_execbuffer(struct drm_i915_gem_request *req, u64 offset, u32 len, unsigned dispatch_flags) { - struct intel_engine_cs *engine = req->engine; - bool ppgtt = USES_PPGTT(engine->dev) && + struct intel_ringbuffer *ring = req->ringbuf; + bool ppgtt = USES_PPGTT(req->i915) && !(dispatch_flags & I915_DISPATCH_SECURE); int ret; @@ -2581,13 +2589,13 @@ gen8_ring_dispatch_execbuffer(struct drm_i915_gem_request *req, return ret; /* FIXME(BDW): Address space and security selectors. */ - intel_ring_emit(engine, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8) | + intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8) | (dispatch_flags & I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0)); - intel_ring_emit(engine, lower_32_bits(offset)); - intel_ring_emit(engine, upper_32_bits(offset)); - intel_ring_emit(engine, MI_NOOP); - intel_ring_advance(engine); + intel_ring_emit(ring, lower_32_bits(offset)); + intel_ring_emit(ring, upper_32_bits(offset)); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); return 0; } @@ -2597,22 +2605,22 @@ hsw_ring_dispatch_execbuffer(struct drm_i915_gem_request *req, u64 offset, u32 len, unsigned dispatch_flags) { - struct intel_engine_cs *engine = req->engine; + struct intel_ringbuffer *ring = req->ringbuf; int ret; ret = intel_ring_begin(req, 2); if (ret) return ret; - intel_ring_emit(engine, + intel_ring_emit(ring, MI_BATCH_BUFFER_START | (dispatch_flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW) | (dispatch_flags & I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0)); /* bit0-7 is the length on GEN6+ */ - intel_ring_emit(engine, offset); - intel_ring_advance(engine); + intel_ring_emit(ring, offset); + intel_ring_advance(ring); return 0; } @@ -2622,20 +2630,20 @@ gen6_ring_dispatch_execbuffer(struct drm_i915_gem_request *req, u64 offset, u32 len, unsigned dispatch_flags) { - struct intel_engine_cs *engine = req->engine; + struct intel_ringbuffer *ring = req->ringbuf; int ret; ret = intel_ring_begin(req, 2); if (ret) return ret; - intel_ring_emit(engine, + intel_ring_emit(ring, MI_BATCH_BUFFER_START | (dispatch_flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965)); /* bit0-7 is the length on GEN6+ */ - intel_ring_emit(engine, offset); - intel_ring_advance(engine); + intel_ring_emit(ring, offset); + intel_ring_advance(ring); return 0; } @@ -2645,7 +2653,7 @@ gen6_ring_dispatch_execbuffer(struct drm_i915_gem_request *req, static int gen6_ring_flush(struct drm_i915_gem_request *req, u32 invalidate, u32 flush) { - struct intel_engine_cs *engine = req->engine; + struct intel_ringbuffer *ring = req->ringbuf; uint32_t cmd; int ret; @@ -2672,17 +2680,17 @@ static int gen6_ring_flush(struct drm_i915_gem_request *req, */ if (invalidate & I915_GEM_DOMAIN_RENDER) cmd |= MI_INVALIDATE_TLB; - intel_ring_emit(engine, cmd); - intel_ring_emit(engine, + intel_ring_emit(ring, cmd); + intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT); if (INTEL_GEN(req->i915) >= 8) { - intel_ring_emit(engine, 0); /* upper addr */ - intel_ring_emit(engine, 0); /* value */ + intel_ring_emit(ring, 0); /* upper addr */ + intel_ring_emit(ring, 0); /* value */ } else { - intel_ring_emit(engine, 0); - intel_ring_emit(engine, MI_NOOP); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, MI_NOOP); } - intel_ring_advance(engine); + intel_ring_advance(ring); return 0; } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 9a0a026..4f4b8ea 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -454,32 +454,21 @@ int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request); int __must_check intel_ring_begin(struct drm_i915_gem_request *req, int n); int __must_check intel_ring_cacheline_align(struct drm_i915_gem_request *req); -static inline void __intel_ringbuffer_emit(struct intel_ringbuffer *rb, - u32 data) +static inline void intel_ring_emit(struct intel_ringbuffer *ring, u32 data) { - *(uint32_t *)(rb->vaddr + rb->tail) = data; - rb->tail += 4; + *(uint32_t *)(ring->vaddr + ring->tail) = data; + ring->tail += 4; } -static inline void __intel_ringbuffer_advance(struct intel_ringbuffer *rb) -{ - rb->tail &= rb->size - 1; -} - -static inline void intel_ring_emit(struct intel_engine_cs *engine, u32 data) -{ - __intel_ringbuffer_emit(engine->buffer, data); -} - -static inline void intel_ring_emit_reg(struct intel_engine_cs *engine, +static inline void intel_ring_emit_reg(struct intel_ringbuffer *ring, i915_reg_t reg) { - intel_ring_emit(engine, i915_mmio_reg_offset(reg)); + intel_ring_emit(ring, i915_mmio_reg_offset(reg)); } -static inline void intel_ring_advance(struct intel_engine_cs *engine) +static inline void intel_ring_advance(struct intel_ringbuffer *ring) { - __intel_ringbuffer_advance(engine->buffer); + ring->tail &= ring->size - 1; } int __intel_ring_space(int head, int tail, int size); -- cgit v0.10.2 From 1dae2dfb0bfd9c148f2b26277ef961033c4c1184 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 2 Aug 2016 22:50:19 +0100 Subject: drm/i915: Rename request->ringbuf to request->ring Now that we have disambuigated ring and engine, we can use the clearer and more consistent name for the intel_ringbuffer pointer in the request. @@ struct drm_i915_gem_request *r; @@ - r->ringbuf + r->ring Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1469432687-22756-12-git-send-email-chris@chris-wilson.co.uk Link: http://patchwork.freedesktop.org/patch/msgid/1470174640-18242-2-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index a0e24eb..f7f4a8c 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -552,7 +552,7 @@ static inline int mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) { struct drm_i915_private *dev_priv = req->i915; - struct intel_ringbuffer *ring = req->ringbuf; + struct intel_ringbuffer *ring = req->ring; struct intel_engine_cs *engine = req->engine; u32 flags = hw_flags | MI_MM_SPACE_GTT; const int num_rings = @@ -655,7 +655,7 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) static int remap_l3(struct drm_i915_gem_request *req, int slice) { u32 *remap_info = req->i915->l3_parity.remap_info[slice]; - struct intel_ringbuffer *ring = req->ringbuf; + struct intel_ringbuffer *ring = req->ring; int i, ret; if (!remap_info) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 2f9f0da..42389de 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1173,7 +1173,7 @@ i915_gem_execbuffer_retire_commands(struct i915_execbuffer_params *params) static int i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req) { - struct intel_ringbuffer *ring = req->ringbuf; + struct intel_ringbuffer *ring = req->ring; int ret, i; if (!IS_GEN7(req->i915) || req->engine->id != RCS) { @@ -1303,7 +1303,7 @@ i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params, if (params->engine->id == RCS && instp_mode != dev_priv->relative_constants_mode) { - struct intel_ringbuffer *ring = params->request->ringbuf; + struct intel_ringbuffer *ring = params->request->ring; ret = intel_ring_begin(params->request, 4); if (ret) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index b38a531..46cae2a 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -669,7 +669,7 @@ static int gen8_write_pdp(struct drm_i915_gem_request *req, unsigned entry, dma_addr_t addr) { - struct intel_ringbuffer *ring = req->ringbuf; + struct intel_ringbuffer *ring = req->ring; struct intel_engine_cs *engine = req->engine; int ret; @@ -1661,7 +1661,7 @@ static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt) static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, struct drm_i915_gem_request *req) { - struct intel_ringbuffer *ring = req->ringbuf; + struct intel_ringbuffer *ring = req->ring; struct intel_engine_cs *engine = req->engine; int ret; @@ -1688,7 +1688,7 @@ static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, struct drm_i915_gem_request *req) { - struct intel_ringbuffer *ring = req->ringbuf; + struct intel_ringbuffer *ring = req->ring; struct intel_engine_cs *engine = req->engine; int ret; diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 49396b8..d2133c4 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -170,7 +170,7 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) * Note this requires that we are always called in request * completion order. */ - request->ringbuf->last_retired_head = request->postfix; + request->ring->last_retired_head = request->postfix; i915_gem_request_remove_from_client(request); @@ -423,7 +423,7 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches) { struct intel_engine_cs *engine; - struct intel_ringbuffer *ringbuf; + struct intel_ringbuffer *ring; u32 request_start; u32 reserved_tail; int ret; @@ -432,14 +432,14 @@ void __i915_add_request(struct drm_i915_gem_request *request, return; engine = request->engine; - ringbuf = request->ringbuf; + ring = request->ring; /* * To ensure that this call will not fail, space for its emissions * should already have been reserved in the ring buffer. Let the ring * know that it is time to use that space up. */ - request_start = intel_ring_get_tail(ringbuf); + request_start = intel_ring_get_tail(ring); reserved_tail = request->reserved_space; request->reserved_space = 0; @@ -486,21 +486,21 @@ void __i915_add_request(struct drm_i915_gem_request *request, * GPU processing the request, we never over-estimate the * position of the head. */ - request->postfix = intel_ring_get_tail(ringbuf); + request->postfix = intel_ring_get_tail(ring); if (i915.enable_execlists) { ret = engine->emit_request(request); } else { ret = engine->add_request(request); - request->tail = intel_ring_get_tail(ringbuf); + request->tail = intel_ring_get_tail(ring); } /* Not allowed to fail! */ WARN(ret, "emit|add_request failed: %d!\n", ret); /* Sanity check that the reserved size was large enough. */ - ret = intel_ring_get_tail(ringbuf) - request_start; + ret = intel_ring_get_tail(ring) - request_start; if (ret < 0) - ret += ringbuf->size; + ret += ring->size; WARN_ONCE(ret > reserved_tail, "Not enough space reserved (%d bytes) " "for adding the request (%d bytes)\n", diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index e06e81f..68868d8 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -61,7 +61,7 @@ struct drm_i915_gem_request { */ struct i915_gem_context *ctx; struct intel_engine_cs *engine; - struct intel_ringbuffer *ringbuf; + struct intel_ringbuffer *ring; struct intel_signal_node signaling; /** GEM sequence number associated with the previous request, diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index bc4a3eb..d490a43 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1097,7 +1097,7 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv, request = i915_gem_find_active_request(engine); if (request) { struct i915_address_space *vm; - struct intel_ringbuffer *rb; + struct intel_ringbuffer *ring; vm = request->ctx->ppgtt ? &request->ctx->ppgtt->base : &ggtt->base; @@ -1114,7 +1114,7 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv, if (HAS_BROKEN_CS_TLB(dev_priv)) ee->wa_batchbuffer = i915_error_ggtt_object_create(dev_priv, - engine->scratch.obj); + engine->scratch.obj); if (request->pid) { struct task_struct *task; @@ -1131,23 +1131,20 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv, error->simulated |= request->ctx->flags & CONTEXT_NO_ERROR_CAPTURE; - rb = request->ringbuf; - ee->cpu_ring_head = rb->head; - ee->cpu_ring_tail = rb->tail; + ring = request->ring; + ee->cpu_ring_head = ring->head; + ee->cpu_ring_tail = ring->tail; ee->ringbuffer = i915_error_ggtt_object_create(dev_priv, - rb->obj); + ring->obj); } ee->hws_page = i915_error_ggtt_object_create(dev_priv, engine->status_page.obj); - if (engine->wa_ctx.obj) { - ee->wa_ctx = - i915_error_ggtt_object_create(dev_priv, - engine->wa_ctx.obj); - } + ee->wa_ctx = i915_error_ggtt_object_create(dev_priv, + engine->wa_ctx.obj); i915_gem_record_active_context(engine, error, ee); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 1d32653..8c7da2f 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -11115,7 +11115,7 @@ static int intel_gen2_queue_flip(struct drm_device *dev, struct drm_i915_gem_request *req, uint32_t flags) { - struct intel_ringbuffer *ring = req->ringbuf; + struct intel_ringbuffer *ring = req->ring; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); u32 flip_mask; int ret; @@ -11149,7 +11149,7 @@ static int intel_gen3_queue_flip(struct drm_device *dev, struct drm_i915_gem_request *req, uint32_t flags) { - struct intel_ringbuffer *ring = req->ringbuf; + struct intel_ringbuffer *ring = req->ring; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); u32 flip_mask; int ret; @@ -11180,7 +11180,7 @@ static int intel_gen4_queue_flip(struct drm_device *dev, struct drm_i915_gem_request *req, uint32_t flags) { - struct intel_ringbuffer *ring = req->ringbuf; + struct intel_ringbuffer *ring = req->ring; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); uint32_t pf, pipesrc; @@ -11218,7 +11218,7 @@ static int intel_gen6_queue_flip(struct drm_device *dev, struct drm_i915_gem_request *req, uint32_t flags) { - struct intel_ringbuffer *ring = req->ringbuf; + struct intel_ringbuffer *ring = req->ring; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); uint32_t pf, pipesrc; @@ -11253,7 +11253,7 @@ static int intel_gen7_queue_flip(struct drm_device *dev, struct drm_i915_gem_request *req, uint32_t flags) { - struct intel_ringbuffer *ring = req->ringbuf; + struct intel_ringbuffer *ring = req->ring; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); uint32_t plane_bit = 0; int len, ret; diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index d851b4e..041868c 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -714,7 +714,7 @@ int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request return ret; } - request->ringbuf = ce->ringbuf; + request->ring = ce->ringbuf; if (i915.enable_guc_submission) { /* @@ -770,11 +770,11 @@ err_unpin: static int intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request) { - struct intel_ringbuffer *ringbuf = request->ringbuf; + struct intel_ringbuffer *ring = request->ring; struct intel_engine_cs *engine = request->engine; - intel_ring_advance(ringbuf); - request->tail = ringbuf->tail; + intel_ring_advance(ring); + request->tail = ring->tail; /* * Here we add two extra NOOPs as padding to avoid @@ -782,9 +782,9 @@ intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request) * * Caller must reserve WA_TAIL_DWORDS for us! */ - intel_ring_emit(ringbuf, MI_NOOP); - intel_ring_emit(ringbuf, MI_NOOP); - intel_ring_advance(ringbuf); + intel_ring_emit(ring, MI_NOOP); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); /* We keep the previous context alive until we retire the following * request. This ensures that any the context object is still pinned @@ -821,7 +821,7 @@ int intel_execlists_submission(struct i915_execbuffer_params *params, struct drm_device *dev = params->dev; struct intel_engine_cs *engine = params->engine; struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_ringbuffer *ringbuf = params->ctx->engine[engine->id].ringbuf; + struct intel_ringbuffer *ring = params->request->ring; u64 exec_start; int instp_mode; u32 instp_mask; @@ -833,7 +833,7 @@ int intel_execlists_submission(struct i915_execbuffer_params *params, case I915_EXEC_CONSTANTS_REL_GENERAL: case I915_EXEC_CONSTANTS_ABSOLUTE: case I915_EXEC_CONSTANTS_REL_SURFACE: - if (instp_mode != 0 && engine != &dev_priv->engine[RCS]) { + if (instp_mode != 0 && engine->id != RCS) { DRM_DEBUG("non-0 rel constants mode on non-RCS\n"); return -EINVAL; } @@ -862,17 +862,17 @@ int intel_execlists_submission(struct i915_execbuffer_params *params, if (ret) return ret; - if (engine == &dev_priv->engine[RCS] && + if (engine->id == RCS && instp_mode != dev_priv->relative_constants_mode) { ret = intel_ring_begin(params->request, 4); if (ret) return ret; - intel_ring_emit(ringbuf, MI_NOOP); - intel_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(1)); - intel_ring_emit_reg(ringbuf, INSTPM); - intel_ring_emit(ringbuf, instp_mask << 16 | instp_mode); - intel_ring_advance(ringbuf); + intel_ring_emit(ring, MI_NOOP); + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); + intel_ring_emit_reg(ring, INSTPM); + intel_ring_emit(ring, instp_mask << 16 | instp_mode); + intel_ring_advance(ring); dev_priv->relative_constants_mode = instp_mode; } @@ -1030,7 +1030,7 @@ static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req) { int ret, i; struct intel_engine_cs *engine = req->engine; - struct intel_ringbuffer *ringbuf = req->ringbuf; + struct intel_ringbuffer *ring = req->ring; struct i915_workarounds *w = &req->i915->workarounds; if (w->count == 0) @@ -1045,14 +1045,14 @@ static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req) if (ret) return ret; - intel_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(w->count)); + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(w->count)); for (i = 0; i < w->count; i++) { - intel_ring_emit_reg(ringbuf, w->reg[i].addr); - intel_ring_emit(ringbuf, w->reg[i].value); + intel_ring_emit_reg(ring, w->reg[i].addr); + intel_ring_emit(ring, w->reg[i].value); } - intel_ring_emit(ringbuf, MI_NOOP); + intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ringbuf); + intel_ring_advance(ring); engine->gpu_caches_dirty = true; ret = logical_ring_flush_all_caches(req); @@ -1553,7 +1553,7 @@ static int gen9_init_render_ring(struct intel_engine_cs *engine) static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req) { struct i915_hw_ppgtt *ppgtt = req->ctx->ppgtt; - struct intel_ringbuffer *ring = req->ringbuf; + struct intel_ringbuffer *ring = req->ring; struct intel_engine_cs *engine = req->engine; const int num_lri_cmds = GEN8_LEGACY_PDPES * 2; int i, ret; @@ -1581,7 +1581,7 @@ static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req) static int gen8_emit_bb_start(struct drm_i915_gem_request *req, u64 offset, unsigned dispatch_flags) { - struct intel_ringbuffer *ring = req->ringbuf; + struct intel_ringbuffer *ring = req->ring; bool ppgtt = !(dispatch_flags & I915_DISPATCH_SECURE); int ret; @@ -1638,8 +1638,7 @@ static int gen8_emit_flush(struct drm_i915_gem_request *request, u32 invalidate_domains, u32 unused) { - struct intel_ringbuffer *ring = request->ringbuf; - struct intel_engine_cs *engine = ring->engine; + struct intel_ringbuffer *ring = request->ring; uint32_t cmd; int ret; @@ -1658,7 +1657,7 @@ static int gen8_emit_flush(struct drm_i915_gem_request *request, if (invalidate_domains & I915_GEM_GPU_DOMAINS) { cmd |= MI_INVALIDATE_TLB; - if (engine->id == VCS) + if (request->engine->id == VCS) cmd |= MI_INVALIDATE_BSD; } @@ -1677,7 +1676,7 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request, u32 invalidate_domains, u32 flush_domains) { - struct intel_ringbuffer *ring = request->ringbuf; + struct intel_ringbuffer *ring = request->ring; struct intel_engine_cs *engine = request->engine; u32 scratch_addr = engine->scratch.gtt_offset + 2 * CACHELINE_BYTES; bool vf_flush_wa = false, dc_flush_wa = false; @@ -1791,7 +1790,7 @@ static void bxt_a_seqno_barrier(struct intel_engine_cs *engine) static int gen8_emit_request(struct drm_i915_gem_request *request) { - struct intel_ringbuffer *ring = request->ringbuf; + struct intel_ringbuffer *ring = request->ring; int ret; ret = intel_ring_begin(request, 6 + WA_TAIL_DWORDS); @@ -1814,7 +1813,7 @@ static int gen8_emit_request(struct drm_i915_gem_request *request) static int gen8_emit_request_render(struct drm_i915_gem_request *request) { - struct intel_ringbuffer *ring = request->ringbuf; + struct intel_ringbuffer *ring = request->ring; int ret; ret = intel_ring_begin(request, 8 + WA_TAIL_DWORDS); diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c index 3059c52..8534ec3 100644 --- a/drivers/gpu/drm/i915/intel_mocs.c +++ b/drivers/gpu/drm/i915/intel_mocs.c @@ -276,7 +276,7 @@ int intel_mocs_init_engine(struct intel_engine_cs *engine) static int emit_mocs_control_table(struct drm_i915_gem_request *req, const struct drm_i915_mocs_table *table) { - struct intel_ringbuffer *ringbuf = req->ringbuf; + struct intel_ringbuffer *ring = req->ring; enum intel_engine_id engine = req->engine->id; unsigned int index; int ret; @@ -288,11 +288,11 @@ static int emit_mocs_control_table(struct drm_i915_gem_request *req, if (ret) return ret; - intel_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES)); + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES)); for (index = 0; index < table->size; index++) { - intel_ring_emit_reg(ringbuf, mocs_register(engine, index)); - intel_ring_emit(ringbuf, table->table[index].control_value); + intel_ring_emit_reg(ring, mocs_register(engine, index)); + intel_ring_emit(ring, table->table[index].control_value); } /* @@ -304,12 +304,12 @@ static int emit_mocs_control_table(struct drm_i915_gem_request *req, * that value to all the used entries. */ for (; index < GEN9_NUM_MOCS_ENTRIES; index++) { - intel_ring_emit_reg(ringbuf, mocs_register(engine, index)); - intel_ring_emit(ringbuf, table->table[0].control_value); + intel_ring_emit_reg(ring, mocs_register(engine, index)); + intel_ring_emit(ring, table->table[0].control_value); } - intel_ring_emit(ringbuf, MI_NOOP); - intel_ring_advance(ringbuf); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); return 0; } @@ -336,7 +336,7 @@ static inline u32 l3cc_combine(const struct drm_i915_mocs_table *table, static int emit_mocs_l3cc_table(struct drm_i915_gem_request *req, const struct drm_i915_mocs_table *table) { - struct intel_ringbuffer *ringbuf = req->ringbuf; + struct intel_ringbuffer *ring = req->ring; unsigned int i; int ret; @@ -347,18 +347,18 @@ static int emit_mocs_l3cc_table(struct drm_i915_gem_request *req, if (ret) return ret; - intel_ring_emit(ringbuf, + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES / 2)); for (i = 0; i < table->size/2; i++) { - intel_ring_emit_reg(ringbuf, GEN9_LNCFCMOCS(i)); - intel_ring_emit(ringbuf, l3cc_combine(table, 2*i, 2*i+1)); + intel_ring_emit_reg(ring, GEN9_LNCFCMOCS(i)); + intel_ring_emit(ring, l3cc_combine(table, 2*i, 2*i+1)); } if (table->size & 0x01) { /* Odd table size - 1 left over */ - intel_ring_emit_reg(ringbuf, GEN9_LNCFCMOCS(i)); - intel_ring_emit(ringbuf, l3cc_combine(table, 2*i, 0)); + intel_ring_emit_reg(ring, GEN9_LNCFCMOCS(i)); + intel_ring_emit(ring, l3cc_combine(table, 2*i, 0)); i++; } @@ -368,12 +368,12 @@ static int emit_mocs_l3cc_table(struct drm_i915_gem_request *req, * they are reserved by the hardware. */ for (; i < GEN9_NUM_MOCS_ENTRIES / 2; i++) { - intel_ring_emit_reg(ringbuf, GEN9_LNCFCMOCS(i)); - intel_ring_emit(ringbuf, l3cc_combine(table, 0, 0)); + intel_ring_emit_reg(ring, GEN9_LNCFCMOCS(i)); + intel_ring_emit(ring, l3cc_combine(table, 0, 0)); } - intel_ring_emit(ringbuf, MI_NOOP); - intel_ring_advance(ringbuf); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); return 0; } diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index ec63b64..e750c0e 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -253,7 +253,7 @@ static int intel_overlay_on(struct intel_overlay *overlay) overlay->active = true; - ring = req->ringbuf; + ring = req->ring; intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_ON); intel_ring_emit(ring, overlay->flip_addr | OFC_UPDATE); intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP); @@ -295,7 +295,7 @@ static int intel_overlay_continue(struct intel_overlay *overlay, return ret; } - ring = req->ringbuf; + ring = req->ring; intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE); intel_ring_emit(ring, flip_addr); intel_ring_advance(ring); @@ -362,7 +362,7 @@ static int intel_overlay_off(struct intel_overlay *overlay) return ret; } - ring = req->ringbuf; + ring = req->ring; /* wait for overlay to go idle */ intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE); intel_ring_emit(ring, flip_addr); @@ -438,7 +438,7 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay) return ret; } - ring = req->ringbuf; + ring = req->ring; intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP); intel_ring_emit(ring, MI_NOOP); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 1267006..9d70d2f 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -70,7 +70,7 @@ gen2_render_ring_flush(struct drm_i915_gem_request *req, u32 invalidate_domains, u32 flush_domains) { - struct intel_ringbuffer *ring = req->ringbuf; + struct intel_ringbuffer *ring = req->ring; u32 cmd; int ret; @@ -97,7 +97,7 @@ gen4_render_ring_flush(struct drm_i915_gem_request *req, u32 invalidate_domains, u32 flush_domains) { - struct intel_ringbuffer *ring = req->ringbuf; + struct intel_ringbuffer *ring = req->ring; u32 cmd; int ret; @@ -187,7 +187,7 @@ gen4_render_ring_flush(struct drm_i915_gem_request *req, static int intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req) { - struct intel_ringbuffer *ring = req->ringbuf; + struct intel_ringbuffer *ring = req->ring; u32 scratch_addr = req->engine->scratch.gtt_offset + 2 * CACHELINE_BYTES; int ret; @@ -224,7 +224,7 @@ static int gen6_render_ring_flush(struct drm_i915_gem_request *req, u32 invalidate_domains, u32 flush_domains) { - struct intel_ringbuffer *ring = req->ringbuf; + struct intel_ringbuffer *ring = req->ring; u32 scratch_addr = req->engine->scratch.gtt_offset + 2 * CACHELINE_BYTES; u32 flags = 0; @@ -277,7 +277,7 @@ gen6_render_ring_flush(struct drm_i915_gem_request *req, static int gen7_render_ring_cs_stall_wa(struct drm_i915_gem_request *req) { - struct intel_ringbuffer *ring = req->ringbuf; + struct intel_ringbuffer *ring = req->ring; int ret; ret = intel_ring_begin(req, 4); @@ -299,7 +299,7 @@ static int gen7_render_ring_flush(struct drm_i915_gem_request *req, u32 invalidate_domains, u32 flush_domains) { - struct intel_ringbuffer *ring = req->ringbuf; + struct intel_ringbuffer *ring = req->ring; u32 scratch_addr = req->engine->scratch.gtt_offset + 2 * CACHELINE_BYTES; u32 flags = 0; @@ -364,7 +364,7 @@ static int gen8_emit_pipe_control(struct drm_i915_gem_request *req, u32 flags, u32 scratch_addr) { - struct intel_ringbuffer *ring = req->ringbuf; + struct intel_ringbuffer *ring = req->ring; int ret; ret = intel_ring_begin(req, 6); @@ -680,7 +680,7 @@ err: static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req) { - struct intel_ringbuffer *ring = req->ringbuf; + struct intel_ringbuffer *ring = req->ring; struct i915_workarounds *w = &req->i915->workarounds; int ret, i; @@ -1338,7 +1338,7 @@ static int gen8_rcs_signal(struct drm_i915_gem_request *signaller_req, unsigned int num_dwords) { #define MBOX_UPDATE_DWORDS 8 - struct intel_ringbuffer *signaller = signaller_req->ringbuf; + struct intel_ringbuffer *signaller = signaller_req->ring; struct drm_i915_private *dev_priv = signaller_req->i915; struct intel_engine_cs *waiter; enum intel_engine_id id; @@ -1380,7 +1380,7 @@ static int gen8_xcs_signal(struct drm_i915_gem_request *signaller_req, unsigned int num_dwords) { #define MBOX_UPDATE_DWORDS 6 - struct intel_ringbuffer *signaller = signaller_req->ringbuf; + struct intel_ringbuffer *signaller = signaller_req->ring; struct drm_i915_private *dev_priv = signaller_req->i915; struct intel_engine_cs *waiter; enum intel_engine_id id; @@ -1419,7 +1419,7 @@ static int gen8_xcs_signal(struct drm_i915_gem_request *signaller_req, static int gen6_signal(struct drm_i915_gem_request *signaller_req, unsigned int num_dwords) { - struct intel_ringbuffer *signaller = signaller_req->ringbuf; + struct intel_ringbuffer *signaller = signaller_req->ring; struct drm_i915_private *dev_priv = signaller_req->i915; struct intel_engine_cs *useless; enum intel_engine_id id; @@ -1464,7 +1464,7 @@ static int gen6_add_request(struct drm_i915_gem_request *req) { struct intel_engine_cs *engine = req->engine; - struct intel_ringbuffer *ring = req->ringbuf; + struct intel_ringbuffer *ring = req->ring; int ret; if (engine->semaphore.signal) @@ -1488,7 +1488,7 @@ static int gen8_render_add_request(struct drm_i915_gem_request *req) { struct intel_engine_cs *engine = req->engine; - struct intel_ringbuffer *ring = req->ringbuf; + struct intel_ringbuffer *ring = req->ring; int ret; if (engine->semaphore.signal) @@ -1533,7 +1533,7 @@ gen8_ring_sync(struct drm_i915_gem_request *waiter_req, struct intel_engine_cs *signaller, u32 seqno) { - struct intel_ringbuffer *waiter = waiter_req->ringbuf; + struct intel_ringbuffer *waiter = waiter_req->ring; struct drm_i915_private *dev_priv = waiter_req->i915; u64 offset = GEN8_WAIT_OFFSET(waiter_req->engine, signaller->id); struct i915_hw_ppgtt *ppgtt; @@ -1567,7 +1567,7 @@ gen6_ring_sync(struct drm_i915_gem_request *waiter_req, struct intel_engine_cs *signaller, u32 seqno) { - struct intel_ringbuffer *waiter = waiter_req->ringbuf; + struct intel_ringbuffer *waiter = waiter_req->ring; u32 dw1 = MI_SEMAPHORE_MBOX | MI_SEMAPHORE_COMPARE | MI_SEMAPHORE_REGISTER; @@ -1701,7 +1701,7 @@ bsd_ring_flush(struct drm_i915_gem_request *req, u32 invalidate_domains, u32 flush_domains) { - struct intel_ringbuffer *ring = req->ringbuf; + struct intel_ringbuffer *ring = req->ring; int ret; ret = intel_ring_begin(req, 2); @@ -1717,7 +1717,7 @@ bsd_ring_flush(struct drm_i915_gem_request *req, static int i9xx_add_request(struct drm_i915_gem_request *req) { - struct intel_ringbuffer *ring = req->ringbuf; + struct intel_ringbuffer *ring = req->ring; int ret; ret = intel_ring_begin(req, 4); @@ -1795,7 +1795,7 @@ i965_dispatch_execbuffer(struct drm_i915_gem_request *req, u64 offset, u32 length, unsigned dispatch_flags) { - struct intel_ringbuffer *ring = req->ringbuf; + struct intel_ringbuffer *ring = req->ring; int ret; ret = intel_ring_begin(req, 2); @@ -1822,7 +1822,7 @@ i830_dispatch_execbuffer(struct drm_i915_gem_request *req, u64 offset, u32 len, unsigned dispatch_flags) { - struct intel_ringbuffer *ring = req->ringbuf; + struct intel_ringbuffer *ring = req->ring; u32 cs_offset = req->engine->scratch.gtt_offset; int ret; @@ -1884,7 +1884,7 @@ i915_dispatch_execbuffer(struct drm_i915_gem_request *req, u64 offset, u32 len, unsigned dispatch_flags) { - struct intel_ringbuffer *ring = req->ringbuf; + struct intel_ringbuffer *ring = req->ring; int ret; ret = intel_ring_begin(req, 2); @@ -2312,7 +2312,7 @@ int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request) */ request->reserved_space += LEGACY_REQUEST_SIZE; - request->ringbuf = request->engine->buffer; + request->ring = request->engine->buffer; ret = intel_ring_begin(request, 0); if (ret) @@ -2324,12 +2324,12 @@ int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request) static int wait_for_space(struct drm_i915_gem_request *req, int bytes) { - struct intel_ringbuffer *ringbuf = req->ringbuf; + struct intel_ringbuffer *ring = req->ring; struct intel_engine_cs *engine = req->engine; struct drm_i915_gem_request *target; - intel_ring_update_space(ringbuf); - if (ringbuf->space >= bytes) + intel_ring_update_space(ring); + if (ring->space >= bytes) return 0; /* @@ -2351,12 +2351,12 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes) * from multiple ringbuffers. Here, we must ignore any that * aren't from the ringbuffer we're considering. */ - if (target->ringbuf != ringbuf) + if (target->ring != ring) continue; /* Would completion of this request free enough space? */ - space = __intel_ring_space(target->postfix, ringbuf->tail, - ringbuf->size); + space = __intel_ring_space(target->postfix, ring->tail, + ring->size); if (space >= bytes) break; } @@ -2369,9 +2369,9 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes) int intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) { - struct intel_ringbuffer *ringbuf = req->ringbuf; - int remain_actual = ringbuf->size - ringbuf->tail; - int remain_usable = ringbuf->effective_size - ringbuf->tail; + struct intel_ringbuffer *ring = req->ring; + int remain_actual = ring->size - ring->tail; + int remain_usable = ring->effective_size - ring->tail; int bytes = num_dwords * sizeof(u32); int total_bytes, wait_bytes; bool need_wrap = false; @@ -2398,35 +2398,35 @@ int intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) wait_bytes = total_bytes; } - if (wait_bytes > ringbuf->space) { + if (wait_bytes > ring->space) { int ret = wait_for_space(req, wait_bytes); if (unlikely(ret)) return ret; - intel_ring_update_space(ringbuf); - if (unlikely(ringbuf->space < wait_bytes)) + intel_ring_update_space(ring); + if (unlikely(ring->space < wait_bytes)) return -EAGAIN; } if (unlikely(need_wrap)) { - GEM_BUG_ON(remain_actual > ringbuf->space); - GEM_BUG_ON(ringbuf->tail + remain_actual > ringbuf->size); + GEM_BUG_ON(remain_actual > ring->space); + GEM_BUG_ON(ring->tail + remain_actual > ring->size); /* Fill the tail with MI_NOOP */ - memset(ringbuf->vaddr + ringbuf->tail, 0, remain_actual); - ringbuf->tail = 0; - ringbuf->space -= remain_actual; + memset(ring->vaddr + ring->tail, 0, remain_actual); + ring->tail = 0; + ring->space -= remain_actual; } - ringbuf->space -= bytes; - GEM_BUG_ON(ringbuf->space < 0); + ring->space -= bytes; + GEM_BUG_ON(ring->space < 0); return 0; } /* Align the ring tail to a cacheline boundary */ int intel_ring_cacheline_align(struct drm_i915_gem_request *req) { - struct intel_ringbuffer *ring = req->ringbuf; + struct intel_ringbuffer *ring = req->ring; int num_dwords = (ring->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t); int ret; @@ -2533,7 +2533,7 @@ static void gen6_bsd_ring_write_tail(struct intel_engine_cs *engine, static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req, u32 invalidate, u32 flush) { - struct intel_ringbuffer *ring = req->ringbuf; + struct intel_ringbuffer *ring = req->ring; uint32_t cmd; int ret; @@ -2579,7 +2579,7 @@ gen8_ring_dispatch_execbuffer(struct drm_i915_gem_request *req, u64 offset, u32 len, unsigned dispatch_flags) { - struct intel_ringbuffer *ring = req->ringbuf; + struct intel_ringbuffer *ring = req->ring; bool ppgtt = USES_PPGTT(req->i915) && !(dispatch_flags & I915_DISPATCH_SECURE); int ret; @@ -2605,7 +2605,7 @@ hsw_ring_dispatch_execbuffer(struct drm_i915_gem_request *req, u64 offset, u32 len, unsigned dispatch_flags) { - struct intel_ringbuffer *ring = req->ringbuf; + struct intel_ringbuffer *ring = req->ring; int ret; ret = intel_ring_begin(req, 2); @@ -2630,7 +2630,7 @@ gen6_ring_dispatch_execbuffer(struct drm_i915_gem_request *req, u64 offset, u32 len, unsigned dispatch_flags) { - struct intel_ringbuffer *ring = req->ringbuf; + struct intel_ringbuffer *ring = req->ring; int ret; ret = intel_ring_begin(req, 2); @@ -2653,7 +2653,7 @@ gen6_ring_dispatch_execbuffer(struct drm_i915_gem_request *req, static int gen6_ring_flush(struct drm_i915_gem_request *req, u32 invalidate, u32 flush) { - struct intel_ringbuffer *ring = req->ringbuf; + struct intel_ringbuffer *ring = req->ring; uint32_t cmd; int ret; -- cgit v0.10.2 From dca33ecc5f1e5542b3f2fa2911b92f743369d367 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 2 Aug 2016 22:50:20 +0100 Subject: drm/i915: Rename intel_context[engine].ringbuf Perform s/ringbuf/ring/ on the context struct for consistency with the ring/engine split. v2: Kill an outdated error_ringbuf label Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1469432687-22756-14-git-send-email-chris@chris-wilson.co.uk Link: http://patchwork.freedesktop.org/patch/msgid/1470174640-18242-3-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 7c42ec4..b67fb4c 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -425,8 +425,8 @@ static int per_file_ctx_stats(int id, void *ptr, void *data) for (n = 0; n < ARRAY_SIZE(ctx->engine); n++) { if (ctx->engine[n].state) per_file_stats(0, ctx->engine[n].state, data); - if (ctx->engine[n].ringbuf) - per_file_stats(0, ctx->engine[n].ringbuf->obj, data); + if (ctx->engine[n].ring) + per_file_stats(0, ctx->engine[n].ring->obj, data); } return 0; @@ -2085,8 +2085,8 @@ static int i915_context_status(struct seq_file *m, void *unused) seq_putc(m, ce->initialised ? 'I' : 'i'); if (ce->state) describe_obj(m, ce->state); - if (ce->ringbuf) - describe_ctx_ringbuf(m, ce->ringbuf); + if (ce->ring) + describe_ctx_ringbuf(m, ce->ring); seq_putc(m, '\n'); } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 65ada5d..db43ced 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -894,7 +894,7 @@ struct i915_gem_context { struct intel_context { struct drm_i915_gem_object *state; - struct intel_ringbuffer *ringbuf; + struct intel_ringbuffer *ring; struct i915_vma *lrc_vma; uint32_t *lrc_reg_state; u64 lrc_desc; diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index f7f4a8c..f825b1e 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -173,8 +173,8 @@ void i915_gem_context_free(struct kref *ctx_ref) continue; WARN_ON(ce->pin_count); - if (ce->ringbuf) - intel_ringbuffer_free(ce->ringbuf); + if (ce->ring) + intel_ringbuffer_free(ce->ring); i915_gem_object_put(ce->state); } diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 01c1c16..eccd348 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -363,7 +363,7 @@ static void guc_init_ctx_desc(struct intel_guc *guc, lrc->context_id = (client->ctx_index << GUC_ELC_CTXID_OFFSET) | (engine->guc_id << GUC_ELC_ENGINE_OFFSET); - obj = ce->ringbuf->obj; + obj = ce->ring->obj; gfx_addr = i915_gem_obj_ggtt_offset(obj); lrc->ring_begin = gfx_addr; diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 041868c..5dce6fa 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -482,11 +482,8 @@ static void execlists_context_unqueue(struct intel_engine_cs *engine) * resubmit the request. See gen8_emit_request() for where we * prepare the padding after the end of the request. */ - struct intel_ringbuffer *ringbuf; - - ringbuf = req0->ctx->engine[engine->id].ringbuf; req0->tail += 8; - req0->tail &= ringbuf->size - 1; + req0->tail &= req0->ring->size - 1; } execlists_submit_requests(req0, req1); @@ -714,7 +711,7 @@ int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request return ret; } - request->ring = ce->ringbuf; + request->ring = ce->ring; if (i915.enable_guc_submission) { /* @@ -976,14 +973,14 @@ static int intel_lr_context_pin(struct i915_gem_context *ctx, lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE; - ret = intel_pin_and_map_ringbuffer_obj(dev_priv, ce->ringbuf); + ret = intel_pin_and_map_ringbuffer_obj(dev_priv, ce->ring); if (ret) goto unpin_map; ce->lrc_vma = i915_gem_obj_to_ggtt(ce->state); intel_lr_context_descriptor_update(ctx, engine); - lrc_reg_state[CTX_RING_BUFFER_START+1] = ce->ringbuf->vma->node.start; + lrc_reg_state[CTX_RING_BUFFER_START+1] = ce->ring->vma->node.start; ce->lrc_reg_state = lrc_reg_state; ce->state->dirty = true; @@ -1014,7 +1011,7 @@ void intel_lr_context_unpin(struct i915_gem_context *ctx, if (--ce->pin_count) return; - intel_unpin_ringbuffer_obj(ce->ringbuf); + intel_unpin_ringbuffer_obj(ce->ring); i915_gem_object_unpin_map(ce->state); i915_gem_object_ggtt_unpin(ce->state); @@ -2346,7 +2343,7 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, struct drm_i915_gem_object *ctx_obj; struct intel_context *ce = &ctx->engine[engine->id]; uint32_t context_size; - struct intel_ringbuffer *ringbuf; + struct intel_ringbuffer *ring; int ret; WARN_ON(ce->state); @@ -2362,29 +2359,29 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, return PTR_ERR(ctx_obj); } - ringbuf = intel_engine_create_ringbuffer(engine, ctx->ring_size); - if (IS_ERR(ringbuf)) { - ret = PTR_ERR(ringbuf); + ring = intel_engine_create_ringbuffer(engine, ctx->ring_size); + if (IS_ERR(ring)) { + ret = PTR_ERR(ring); goto error_deref_obj; } - ret = populate_lr_context(ctx, ctx_obj, engine, ringbuf); + ret = populate_lr_context(ctx, ctx_obj, engine, ring); if (ret) { DRM_DEBUG_DRIVER("Failed to populate LRC: %d\n", ret); - goto error_ringbuf; + goto error_ring_free; } - ce->ringbuf = ringbuf; + ce->ring = ring; ce->state = ctx_obj; ce->initialised = engine->init_context == NULL; return 0; -error_ringbuf: - intel_ringbuffer_free(ringbuf); +error_ring_free: + intel_ringbuffer_free(ring); error_deref_obj: i915_gem_object_put(ctx_obj); - ce->ringbuf = NULL; + ce->ring = NULL; ce->state = NULL; return ret; } @@ -2415,7 +2412,7 @@ void intel_lr_context_reset(struct drm_i915_private *dev_priv, i915_gem_object_unpin_map(ctx_obj); - ce->ringbuf->head = 0; - ce->ringbuf->tail = 0; + ce->ring->head = 0; + ce->ring->tail = 0; } } -- cgit v0.10.2 From 7e37f889b50e798fc17f74e561b06a8715bee5a2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 2 Aug 2016 22:50:21 +0100 Subject: drm/i915: Rename struct intel_ringbuffer to struct intel_ring The state stored in this struct is not only the information about the buffer object, but the ring used to communicate with the hardware. Using buffer here is overly specific and, for me at least, conflates with the notion of buffer objects themselves. s/struct intel_ringbuffer/struct intel_ring/ s/enum intel_ring_hangcheck/enum intel_engine_hangcheck/ s/describe_ctx_ringbuf()/describe_ctx_ring()/ s/intel_ring_get_active_head()/intel_engine_get_active_head()/ s/intel_ring_sync_index()/intel_engine_sync_index()/ s/intel_ring_init_seqno()/intel_engine_init_seqno()/ s/ring_stuck()/engine_stuck()/ s/intel_cleanup_engine()/intel_engine_cleanup()/ s/intel_stop_engine()/intel_engine_stop()/ s/intel_pin_and_map_ringbuffer_obj()/intel_pin_and_map_ring()/ s/intel_unpin_ringbuffer()/intel_unpin_ring()/ s/intel_engine_create_ringbuffer()/intel_engine_create_ring()/ s/intel_ring_flush_all_caches()/intel_engine_flush_all_caches()/ s/intel_ring_invalidate_all_caches()/intel_engine_invalidate_all_caches()/ s/intel_ringbuffer_free()/intel_ring_free()/ Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1469432687-22756-15-git-send-email-chris@chris-wilson.co.uk Link: http://patchwork.freedesktop.org/patch/msgid/1470174640-18242-4-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index b67fb4c..410dc35 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1419,7 +1419,7 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) intel_runtime_pm_get(dev_priv); for_each_engine_id(engine, dev_priv, id) { - acthd[id] = intel_ring_get_active_head(engine); + acthd[id] = intel_engine_get_active_head(engine); seqno[id] = intel_engine_get_seqno(engine); } @@ -2036,12 +2036,11 @@ static int i915_gem_framebuffer_info(struct seq_file *m, void *data) return 0; } -static void describe_ctx_ringbuf(struct seq_file *m, - struct intel_ringbuffer *ringbuf) +static void describe_ctx_ring(struct seq_file *m, struct intel_ring *ring) { seq_printf(m, " (ringbuffer, space: %d, head: %u, tail: %u, last head: %d)", - ringbuf->space, ringbuf->head, ringbuf->tail, - ringbuf->last_retired_head); + ring->space, ring->head, ring->tail, + ring->last_retired_head); } static int i915_context_status(struct seq_file *m, void *unused) @@ -2086,7 +2085,7 @@ static int i915_context_status(struct seq_file *m, void *unused) if (ce->state) describe_obj(m, ce->state); if (ce->ring) - describe_ctx_ringbuf(m, ce->ring); + describe_ctx_ring(m, ce->ring); seq_putc(m, '\n'); } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index db43ced..4c43bd3 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -518,7 +518,7 @@ struct drm_i915_error_state { bool waiting; int num_waiters; int hangcheck_score; - enum intel_ring_hangcheck_action hangcheck_action; + enum intel_engine_hangcheck_action hangcheck_action; int num_requests; /* our own tracking of ring head and tail */ @@ -894,7 +894,7 @@ struct i915_gem_context { struct intel_context { struct drm_i915_gem_object *state; - struct intel_ringbuffer *ring; + struct intel_ring *ring; struct i915_vma *lrc_vma; uint32_t *lrc_reg_state; u64 lrc_desc; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 7bfce1d..59890f5 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2486,7 +2486,7 @@ static void i915_gem_reset_engine_status(struct intel_engine_cs *engine) static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine) { - struct intel_ringbuffer *buffer; + struct intel_ring *ring; while (!list_empty(&engine->active_list)) { struct drm_i915_gem_object *obj; @@ -2502,7 +2502,7 @@ static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine) * (lockless) lookup doesn't try and wait upon the request as we * reset it. */ - intel_ring_init_seqno(engine, engine->last_submitted_seqno); + intel_engine_init_seqno(engine, engine->last_submitted_seqno); /* * Clear the execlists queue up before freeing the requests, as those @@ -2541,9 +2541,9 @@ static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine) * upon reset is less than when we start. Do one more pass over * all the ringbuffers to reset last_retired_head. */ - list_for_each_entry(buffer, &engine->buffers, link) { - buffer->last_retired_head = buffer->tail; - intel_ring_update_space(buffer); + list_for_each_entry(ring, &engine->buffers, link) { + ring->last_retired_head = ring->tail; + intel_ring_update_space(ring); } engine->i915->gt.active_engines &= ~intel_engine_flag(engine); @@ -2870,7 +2870,7 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj, i915_gem_object_retire_request(obj, from_req); } else { - int idx = intel_ring_sync_index(from, to); + int idx = intel_engine_sync_index(from, to); u32 seqno = i915_gem_request_get_seqno(from_req); WARN_ON(!to_req); @@ -4570,8 +4570,8 @@ int i915_gem_init(struct drm_device *dev) if (!i915.enable_execlists) { dev_priv->gt.execbuf_submit = i915_gem_ringbuffer_submission; - dev_priv->gt.cleanup_engine = intel_cleanup_engine; - dev_priv->gt.stop_engine = intel_stop_engine; + dev_priv->gt.cleanup_engine = intel_engine_cleanup; + dev_priv->gt.stop_engine = intel_engine_stop; } else { dev_priv->gt.execbuf_submit = intel_execlists_submission; dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup; diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index f825b1e..3336a5f 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -174,7 +174,7 @@ void i915_gem_context_free(struct kref *ctx_ref) WARN_ON(ce->pin_count); if (ce->ring) - intel_ringbuffer_free(ce->ring); + intel_ring_free(ce->ring); i915_gem_object_put(ce->state); } @@ -552,7 +552,7 @@ static inline int mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) { struct drm_i915_private *dev_priv = req->i915; - struct intel_ringbuffer *ring = req->ring; + struct intel_ring *ring = req->ring; struct intel_engine_cs *engine = req->engine; u32 flags = hw_flags | MI_MM_SPACE_GTT; const int num_rings = @@ -655,7 +655,7 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) static int remap_l3(struct drm_i915_gem_request *req, int slice) { u32 *remap_info = req->i915->l3_parity.remap_info[slice]; - struct intel_ringbuffer *ring = req->ring; + struct intel_ring *ring = req->ring; int i, ret; if (!remap_info) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 42389de..d0ef675 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1001,7 +1001,7 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req, /* Unconditionally invalidate gpu caches and ensure that we do flush * any residual writes from the previous batch. */ - return intel_ring_invalidate_all_caches(req); + return intel_engine_invalidate_all_caches(req); } static bool @@ -1173,7 +1173,7 @@ i915_gem_execbuffer_retire_commands(struct i915_execbuffer_params *params) static int i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req) { - struct intel_ringbuffer *ring = req->ring; + struct intel_ring *ring = req->ring; int ret, i; if (!IS_GEN7(req->i915) || req->engine->id != RCS) { @@ -1303,7 +1303,7 @@ i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params, if (params->engine->id == RCS && instp_mode != dev_priv->relative_constants_mode) { - struct intel_ringbuffer *ring = params->request->ring; + struct intel_ring *ring = params->request->ring; ret = intel_ring_begin(params->request, 4); if (ret) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 46cae2a..ebfa040 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -669,7 +669,7 @@ static int gen8_write_pdp(struct drm_i915_gem_request *req, unsigned entry, dma_addr_t addr) { - struct intel_ringbuffer *ring = req->ring; + struct intel_ring *ring = req->ring; struct intel_engine_cs *engine = req->engine; int ret; @@ -1661,7 +1661,7 @@ static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt) static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, struct drm_i915_gem_request *req) { - struct intel_ringbuffer *ring = req->ring; + struct intel_ring *ring = req->ring; struct intel_engine_cs *engine = req->engine; int ret; @@ -1688,7 +1688,7 @@ static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, struct drm_i915_gem_request *req) { - struct intel_ringbuffer *ring = req->ring; + struct intel_ring *ring = req->ring; struct intel_engine_cs *engine = req->engine; int ret; diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index d2133c4..942b5b1 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -244,7 +244,7 @@ static int i915_gem_init_seqno(struct drm_i915_private *dev_priv, u32 seqno) /* Finally reset hw state */ for_each_engine(engine, dev_priv) - intel_ring_init_seqno(engine, seqno); + intel_engine_init_seqno(engine, seqno); return 0; } @@ -423,7 +423,7 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches) { struct intel_engine_cs *engine; - struct intel_ringbuffer *ring; + struct intel_ring *ring; u32 request_start; u32 reserved_tail; int ret; @@ -454,7 +454,7 @@ void __i915_add_request(struct drm_i915_gem_request *request, if (i915.enable_execlists) ret = logical_ring_flush_all_caches(request); else - ret = intel_ring_flush_all_caches(request); + ret = intel_engine_flush_all_caches(request); /* Not allowed to fail! */ WARN(ret, "*_ring_flush_all_caches failed: %d!\n", ret); } diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index 68868d8..382ca5a 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -61,7 +61,7 @@ struct drm_i915_gem_request { */ struct i915_gem_context *ctx; struct intel_engine_cs *engine; - struct intel_ringbuffer *ring; + struct intel_ring *ring; struct intel_signal_node signaling; /** GEM sequence number associated with the previous request, diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index d490a43..226b28e 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -221,7 +221,7 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m, } } -static const char *hangcheck_action_to_str(enum intel_ring_hangcheck_action a) +static const char *hangcheck_action_to_str(enum intel_engine_hangcheck_action a) { switch (a) { case HANGCHECK_IDLE: @@ -879,7 +879,7 @@ static void gen8_record_semaphore_state(struct drm_i915_error_state *error, signal_offset = (GEN8_SIGNAL_OFFSET(engine, id) & (PAGE_SIZE - 1)) / 4; tmp = error->semaphore_obj->pages[0]; - idx = intel_ring_sync_index(engine, to); + idx = intel_engine_sync_index(engine, to); ee->semaphore_mboxes[idx] = tmp[signal_offset]; ee->semaphore_seqno[idx] = engine->semaphore.sync_seqno[idx]; @@ -981,7 +981,7 @@ static void error_record_engine_registers(struct drm_i915_error_state *error, ee->waiting = intel_engine_has_waiter(engine); ee->instpm = I915_READ(RING_INSTPM(engine->mmio_base)); - ee->acthd = intel_ring_get_active_head(engine); + ee->acthd = intel_engine_get_active_head(engine); ee->seqno = intel_engine_get_seqno(engine); ee->last_seqno = engine->last_submitted_seqno; ee->start = I915_READ_START(engine); @@ -1097,7 +1097,7 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv, request = i915_gem_find_active_request(engine); if (request) { struct i915_address_space *vm; - struct intel_ringbuffer *ring; + struct intel_ring *ring; vm = request->ctx->ppgtt ? &request->ctx->ppgtt->base : &ggtt->base; diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index f5bf4f9..e586500 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -2993,7 +2993,7 @@ static bool subunits_stuck(struct intel_engine_cs *engine) return stuck; } -static enum intel_ring_hangcheck_action +static enum intel_engine_hangcheck_action head_stuck(struct intel_engine_cs *engine, u64 acthd) { if (acthd != engine->hangcheck.acthd) { @@ -3011,11 +3011,11 @@ head_stuck(struct intel_engine_cs *engine, u64 acthd) return HANGCHECK_HUNG; } -static enum intel_ring_hangcheck_action -ring_stuck(struct intel_engine_cs *engine, u64 acthd) +static enum intel_engine_hangcheck_action +engine_stuck(struct intel_engine_cs *engine, u64 acthd) { struct drm_i915_private *dev_priv = engine->i915; - enum intel_ring_hangcheck_action ha; + enum intel_engine_hangcheck_action ha; u32 tmp; ha = head_stuck(engine, acthd); @@ -3124,7 +3124,7 @@ static void i915_hangcheck_elapsed(struct work_struct *work) if (engine->irq_seqno_barrier) engine->irq_seqno_barrier(engine); - acthd = intel_ring_get_active_head(engine); + acthd = intel_engine_get_active_head(engine); seqno = intel_engine_get_seqno(engine); /* Reset stuck interrupts between batch advances */ @@ -3154,8 +3154,8 @@ static void i915_hangcheck_elapsed(struct work_struct *work) * being repeatedly kicked and so responsible * for stalling the machine. */ - engine->hangcheck.action = ring_stuck(engine, - acthd); + engine->hangcheck.action = + engine_stuck(engine, acthd); switch (engine->hangcheck.action) { case HANGCHECK_IDLE: diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 8c7da2f..3f05c62 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -11115,7 +11115,7 @@ static int intel_gen2_queue_flip(struct drm_device *dev, struct drm_i915_gem_request *req, uint32_t flags) { - struct intel_ringbuffer *ring = req->ring; + struct intel_ring *ring = req->ring; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); u32 flip_mask; int ret; @@ -11149,7 +11149,7 @@ static int intel_gen3_queue_flip(struct drm_device *dev, struct drm_i915_gem_request *req, uint32_t flags) { - struct intel_ringbuffer *ring = req->ring; + struct intel_ring *ring = req->ring; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); u32 flip_mask; int ret; @@ -11180,7 +11180,7 @@ static int intel_gen4_queue_flip(struct drm_device *dev, struct drm_i915_gem_request *req, uint32_t flags) { - struct intel_ringbuffer *ring = req->ring; + struct intel_ring *ring = req->ring; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); uint32_t pf, pipesrc; @@ -11218,7 +11218,7 @@ static int intel_gen6_queue_flip(struct drm_device *dev, struct drm_i915_gem_request *req, uint32_t flags) { - struct intel_ringbuffer *ring = req->ring; + struct intel_ring *ring = req->ring; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); uint32_t pf, pipesrc; @@ -11253,7 +11253,7 @@ static int intel_gen7_queue_flip(struct drm_device *dev, struct drm_i915_gem_request *req, uint32_t flags) { - struct intel_ringbuffer *ring = req->ring; + struct intel_ring *ring = req->ring; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); uint32_t plane_bit = 0; int len, ret; diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index b90dd2f..a5f2128 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -155,7 +155,7 @@ cleanup: if (i915.enable_execlists) intel_logical_ring_cleanup(&dev_priv->engine[i]); else - intel_cleanup_engine(&dev_priv->engine[i]); + intel_engine_cleanup(&dev_priv->engine[i]); } return ret; diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 5dce6fa..0a664f2 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -767,7 +767,7 @@ err_unpin: static int intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request) { - struct intel_ringbuffer *ring = request->ring; + struct intel_ring *ring = request->ring; struct intel_engine_cs *engine = request->engine; intel_ring_advance(ring); @@ -818,7 +818,7 @@ int intel_execlists_submission(struct i915_execbuffer_params *params, struct drm_device *dev = params->dev; struct intel_engine_cs *engine = params->engine; struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_ringbuffer *ring = params->request->ring; + struct intel_ring *ring = params->request->ring; u64 exec_start; int instp_mode; u32 instp_mask; @@ -973,7 +973,7 @@ static int intel_lr_context_pin(struct i915_gem_context *ctx, lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE; - ret = intel_pin_and_map_ringbuffer_obj(dev_priv, ce->ring); + ret = intel_pin_and_map_ring(dev_priv, ce->ring); if (ret) goto unpin_map; @@ -1011,7 +1011,7 @@ void intel_lr_context_unpin(struct i915_gem_context *ctx, if (--ce->pin_count) return; - intel_unpin_ringbuffer_obj(ce->ring); + intel_unpin_ring(ce->ring); i915_gem_object_unpin_map(ce->state); i915_gem_object_ggtt_unpin(ce->state); @@ -1027,7 +1027,7 @@ static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req) { int ret, i; struct intel_engine_cs *engine = req->engine; - struct intel_ringbuffer *ring = req->ring; + struct intel_ring *ring = req->ring; struct i915_workarounds *w = &req->i915->workarounds; if (w->count == 0) @@ -1550,7 +1550,7 @@ static int gen9_init_render_ring(struct intel_engine_cs *engine) static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req) { struct i915_hw_ppgtt *ppgtt = req->ctx->ppgtt; - struct intel_ringbuffer *ring = req->ring; + struct intel_ring *ring = req->ring; struct intel_engine_cs *engine = req->engine; const int num_lri_cmds = GEN8_LEGACY_PDPES * 2; int i, ret; @@ -1578,7 +1578,7 @@ static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req) static int gen8_emit_bb_start(struct drm_i915_gem_request *req, u64 offset, unsigned dispatch_flags) { - struct intel_ringbuffer *ring = req->ring; + struct intel_ring *ring = req->ring; bool ppgtt = !(dispatch_flags & I915_DISPATCH_SECURE); int ret; @@ -1635,8 +1635,8 @@ static int gen8_emit_flush(struct drm_i915_gem_request *request, u32 invalidate_domains, u32 unused) { - struct intel_ringbuffer *ring = request->ring; - uint32_t cmd; + struct intel_ring *ring = request->ring; + u32 cmd; int ret; ret = intel_ring_begin(request, 4); @@ -1673,7 +1673,7 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request, u32 invalidate_domains, u32 flush_domains) { - struct intel_ringbuffer *ring = request->ring; + struct intel_ring *ring = request->ring; struct intel_engine_cs *engine = request->engine; u32 scratch_addr = engine->scratch.gtt_offset + 2 * CACHELINE_BYTES; bool vf_flush_wa = false, dc_flush_wa = false; @@ -1787,7 +1787,7 @@ static void bxt_a_seqno_barrier(struct intel_engine_cs *engine) static int gen8_emit_request(struct drm_i915_gem_request *request) { - struct intel_ringbuffer *ring = request->ring; + struct intel_ring *ring = request->ring; int ret; ret = intel_ring_begin(request, 6 + WA_TAIL_DWORDS); @@ -1810,7 +1810,7 @@ static int gen8_emit_request(struct drm_i915_gem_request *request) static int gen8_emit_request_render(struct drm_i915_gem_request *request) { - struct intel_ringbuffer *ring = request->ring; + struct intel_ring *ring = request->ring; int ret; ret = intel_ring_begin(request, 8 + WA_TAIL_DWORDS); @@ -2162,7 +2162,7 @@ static int populate_lr_context(struct i915_gem_context *ctx, struct drm_i915_gem_object *ctx_obj, struct intel_engine_cs *engine, - struct intel_ringbuffer *ringbuf) + struct intel_ring *ring) { struct drm_i915_private *dev_priv = ctx->i915; struct i915_hw_ppgtt *ppgtt = ctx->ppgtt; @@ -2215,7 +2215,7 @@ populate_lr_context(struct i915_gem_context *ctx, RING_START(engine->mmio_base), 0); ASSIGN_CTX_REG(reg_state, CTX_RING_BUFFER_CONTROL, RING_CTL(engine->mmio_base), - ((ringbuf->size - PAGE_SIZE) & RING_NR_PAGES) | RING_VALID); + ((ring->size - PAGE_SIZE) & RING_NR_PAGES) | RING_VALID); ASSIGN_CTX_REG(reg_state, CTX_BB_HEAD_U, RING_BBADDR_UDW(engine->mmio_base), 0); ASSIGN_CTX_REG(reg_state, CTX_BB_HEAD_L, @@ -2343,7 +2343,7 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, struct drm_i915_gem_object *ctx_obj; struct intel_context *ce = &ctx->engine[engine->id]; uint32_t context_size; - struct intel_ringbuffer *ring; + struct intel_ring *ring; int ret; WARN_ON(ce->state); @@ -2359,7 +2359,7 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, return PTR_ERR(ctx_obj); } - ring = intel_engine_create_ringbuffer(engine, ctx->ring_size); + ring = intel_engine_create_ring(engine, ctx->ring_size); if (IS_ERR(ring)) { ret = PTR_ERR(ring); goto error_deref_obj; @@ -2378,7 +2378,7 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, return 0; error_ring_free: - intel_ringbuffer_free(ring); + intel_ring_free(ring); error_deref_obj: i915_gem_object_put(ctx_obj); ce->ring = NULL; diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c index 8534ec3..80bb924 100644 --- a/drivers/gpu/drm/i915/intel_mocs.c +++ b/drivers/gpu/drm/i915/intel_mocs.c @@ -276,7 +276,7 @@ int intel_mocs_init_engine(struct intel_engine_cs *engine) static int emit_mocs_control_table(struct drm_i915_gem_request *req, const struct drm_i915_mocs_table *table) { - struct intel_ringbuffer *ring = req->ring; + struct intel_ring *ring = req->ring; enum intel_engine_id engine = req->engine->id; unsigned int index; int ret; @@ -336,7 +336,7 @@ static inline u32 l3cc_combine(const struct drm_i915_mocs_table *table, static int emit_mocs_l3cc_table(struct drm_i915_gem_request *req, const struct drm_i915_mocs_table *table) { - struct intel_ringbuffer *ring = req->ring; + struct intel_ring *ring = req->ring; unsigned int i; int ret; diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index e750c0e..8f1d4d9 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -235,7 +235,7 @@ static int intel_overlay_on(struct intel_overlay *overlay) struct drm_i915_private *dev_priv = overlay->i915; struct intel_engine_cs *engine = &dev_priv->engine[RCS]; struct drm_i915_gem_request *req; - struct intel_ringbuffer *ring; + struct intel_ring *ring; int ret; WARN_ON(overlay->active); @@ -270,7 +270,7 @@ static int intel_overlay_continue(struct intel_overlay *overlay, struct drm_i915_private *dev_priv = overlay->i915; struct intel_engine_cs *engine = &dev_priv->engine[RCS]; struct drm_i915_gem_request *req; - struct intel_ringbuffer *ring; + struct intel_ring *ring; u32 flip_addr = overlay->flip_addr; u32 tmp; int ret; @@ -340,7 +340,7 @@ static int intel_overlay_off(struct intel_overlay *overlay) struct drm_i915_private *dev_priv = overlay->i915; struct intel_engine_cs *engine = &dev_priv->engine[RCS]; struct drm_i915_gem_request *req; - struct intel_ringbuffer *ring; + struct intel_ring *ring; u32 flip_addr = overlay->flip_addr; int ret; @@ -426,7 +426,7 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay) if (I915_READ(ISR) & I915_OVERLAY_PLANE_FLIP_PENDING_INTERRUPT) { /* synchronous slowpath */ struct drm_i915_gem_request *req; - struct intel_ringbuffer *ring; + struct intel_ring *ring; req = i915_gem_request_alloc(engine, NULL); if (IS_ERR(req)) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 9d70d2f..acbabbd 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -47,7 +47,7 @@ int __intel_ring_space(int head, int tail, int size) return space - I915_RING_FREE_SPACE; } -void intel_ring_update_space(struct intel_ringbuffer *ringbuf) +void intel_ring_update_space(struct intel_ring *ringbuf) { if (ringbuf->last_retired_head != -1) { ringbuf->head = ringbuf->last_retired_head; @@ -60,9 +60,10 @@ void intel_ring_update_space(struct intel_ringbuffer *ringbuf) static void __intel_engine_submit(struct intel_engine_cs *engine) { - struct intel_ringbuffer *ringbuf = engine->buffer; - ringbuf->tail &= ringbuf->size - 1; - engine->write_tail(engine, ringbuf->tail); + struct intel_ring *ring = engine->buffer; + + ring->tail &= ring->size - 1; + engine->write_tail(engine, ring->tail); } static int @@ -70,7 +71,7 @@ gen2_render_ring_flush(struct drm_i915_gem_request *req, u32 invalidate_domains, u32 flush_domains) { - struct intel_ringbuffer *ring = req->ring; + struct intel_ring *ring = req->ring; u32 cmd; int ret; @@ -97,7 +98,7 @@ gen4_render_ring_flush(struct drm_i915_gem_request *req, u32 invalidate_domains, u32 flush_domains) { - struct intel_ringbuffer *ring = req->ring; + struct intel_ring *ring = req->ring; u32 cmd; int ret; @@ -187,7 +188,7 @@ gen4_render_ring_flush(struct drm_i915_gem_request *req, static int intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req) { - struct intel_ringbuffer *ring = req->ring; + struct intel_ring *ring = req->ring; u32 scratch_addr = req->engine->scratch.gtt_offset + 2 * CACHELINE_BYTES; int ret; @@ -224,7 +225,7 @@ static int gen6_render_ring_flush(struct drm_i915_gem_request *req, u32 invalidate_domains, u32 flush_domains) { - struct intel_ringbuffer *ring = req->ring; + struct intel_ring *ring = req->ring; u32 scratch_addr = req->engine->scratch.gtt_offset + 2 * CACHELINE_BYTES; u32 flags = 0; @@ -277,7 +278,7 @@ gen6_render_ring_flush(struct drm_i915_gem_request *req, static int gen7_render_ring_cs_stall_wa(struct drm_i915_gem_request *req) { - struct intel_ringbuffer *ring = req->ring; + struct intel_ring *ring = req->ring; int ret; ret = intel_ring_begin(req, 4); @@ -299,7 +300,7 @@ static int gen7_render_ring_flush(struct drm_i915_gem_request *req, u32 invalidate_domains, u32 flush_domains) { - struct intel_ringbuffer *ring = req->ring; + struct intel_ring *ring = req->ring; u32 scratch_addr = req->engine->scratch.gtt_offset + 2 * CACHELINE_BYTES; u32 flags = 0; @@ -364,7 +365,7 @@ static int gen8_emit_pipe_control(struct drm_i915_gem_request *req, u32 flags, u32 scratch_addr) { - struct intel_ringbuffer *ring = req->ring; + struct intel_ring *ring = req->ring; int ret; ret = intel_ring_begin(req, 6); @@ -427,7 +428,7 @@ static void ring_write_tail(struct intel_engine_cs *engine, I915_WRITE_TAIL(engine, value); } -u64 intel_ring_get_active_head(struct intel_engine_cs *engine) +u64 intel_engine_get_active_head(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; u64 acthd; @@ -553,8 +554,8 @@ static bool stop_ring(struct intel_engine_cs *engine) static int init_ring_common(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; - struct intel_ringbuffer *ringbuf = engine->buffer; - struct drm_i915_gem_object *obj = ringbuf->obj; + struct intel_ring *ring = engine->buffer; + struct drm_i915_gem_object *obj = ring->obj; int ret = 0; intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); @@ -604,7 +605,7 @@ static int init_ring_common(struct intel_engine_cs *engine) (void)I915_READ_HEAD(engine); I915_WRITE_CTL(engine, - ((ringbuf->size - PAGE_SIZE) & RING_NR_PAGES) + ((ring->size - PAGE_SIZE) & RING_NR_PAGES) | RING_VALID); /* If the head is still not zero, the ring is dead */ @@ -623,10 +624,10 @@ static int init_ring_common(struct intel_engine_cs *engine) goto out; } - ringbuf->last_retired_head = -1; - ringbuf->head = I915_READ_HEAD(engine); - ringbuf->tail = I915_READ_TAIL(engine) & TAIL_ADDR; - intel_ring_update_space(ringbuf); + ring->last_retired_head = -1; + ring->head = I915_READ_HEAD(engine); + ring->tail = I915_READ_TAIL(engine) & TAIL_ADDR; + intel_ring_update_space(ring); intel_engine_init_hangcheck(engine); @@ -680,7 +681,7 @@ err: static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req) { - struct intel_ringbuffer *ring = req->ring; + struct intel_ring *ring = req->ring; struct i915_workarounds *w = &req->i915->workarounds; int ret, i; @@ -688,7 +689,7 @@ static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req) return 0; req->engine->gpu_caches_dirty = true; - ret = intel_ring_flush_all_caches(req); + ret = intel_engine_flush_all_caches(req); if (ret) return ret; @@ -706,7 +707,7 @@ static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req) intel_ring_advance(ring); req->engine->gpu_caches_dirty = true; - ret = intel_ring_flush_all_caches(req); + ret = intel_engine_flush_all_caches(req); if (ret) return ret; @@ -1338,7 +1339,7 @@ static int gen8_rcs_signal(struct drm_i915_gem_request *signaller_req, unsigned int num_dwords) { #define MBOX_UPDATE_DWORDS 8 - struct intel_ringbuffer *signaller = signaller_req->ring; + struct intel_ring *signaller = signaller_req->ring; struct drm_i915_private *dev_priv = signaller_req->i915; struct intel_engine_cs *waiter; enum intel_engine_id id; @@ -1380,7 +1381,7 @@ static int gen8_xcs_signal(struct drm_i915_gem_request *signaller_req, unsigned int num_dwords) { #define MBOX_UPDATE_DWORDS 6 - struct intel_ringbuffer *signaller = signaller_req->ring; + struct intel_ring *signaller = signaller_req->ring; struct drm_i915_private *dev_priv = signaller_req->i915; struct intel_engine_cs *waiter; enum intel_engine_id id; @@ -1419,7 +1420,7 @@ static int gen8_xcs_signal(struct drm_i915_gem_request *signaller_req, static int gen6_signal(struct drm_i915_gem_request *signaller_req, unsigned int num_dwords) { - struct intel_ringbuffer *signaller = signaller_req->ring; + struct intel_ring *signaller = signaller_req->ring; struct drm_i915_private *dev_priv = signaller_req->i915; struct intel_engine_cs *useless; enum intel_engine_id id; @@ -1464,7 +1465,7 @@ static int gen6_add_request(struct drm_i915_gem_request *req) { struct intel_engine_cs *engine = req->engine; - struct intel_ringbuffer *ring = req->ring; + struct intel_ring *ring = req->ring; int ret; if (engine->semaphore.signal) @@ -1488,7 +1489,7 @@ static int gen8_render_add_request(struct drm_i915_gem_request *req) { struct intel_engine_cs *engine = req->engine; - struct intel_ringbuffer *ring = req->ring; + struct intel_ring *ring = req->ring; int ret; if (engine->semaphore.signal) @@ -1533,7 +1534,7 @@ gen8_ring_sync(struct drm_i915_gem_request *waiter_req, struct intel_engine_cs *signaller, u32 seqno) { - struct intel_ringbuffer *waiter = waiter_req->ring; + struct intel_ring *waiter = waiter_req->ring; struct drm_i915_private *dev_priv = waiter_req->i915; u64 offset = GEN8_WAIT_OFFSET(waiter_req->engine, signaller->id); struct i915_hw_ppgtt *ppgtt; @@ -1567,7 +1568,7 @@ gen6_ring_sync(struct drm_i915_gem_request *waiter_req, struct intel_engine_cs *signaller, u32 seqno) { - struct intel_ringbuffer *waiter = waiter_req->ring; + struct intel_ring *waiter = waiter_req->ring; u32 dw1 = MI_SEMAPHORE_MBOX | MI_SEMAPHORE_COMPARE | MI_SEMAPHORE_REGISTER; @@ -1701,7 +1702,7 @@ bsd_ring_flush(struct drm_i915_gem_request *req, u32 invalidate_domains, u32 flush_domains) { - struct intel_ringbuffer *ring = req->ring; + struct intel_ring *ring = req->ring; int ret; ret = intel_ring_begin(req, 2); @@ -1717,7 +1718,7 @@ bsd_ring_flush(struct drm_i915_gem_request *req, static int i9xx_add_request(struct drm_i915_gem_request *req) { - struct intel_ringbuffer *ring = req->ring; + struct intel_ring *ring = req->ring; int ret; ret = intel_ring_begin(req, 4); @@ -1795,7 +1796,7 @@ i965_dispatch_execbuffer(struct drm_i915_gem_request *req, u64 offset, u32 length, unsigned dispatch_flags) { - struct intel_ringbuffer *ring = req->ring; + struct intel_ring *ring = req->ring; int ret; ret = intel_ring_begin(req, 2); @@ -1822,7 +1823,7 @@ i830_dispatch_execbuffer(struct drm_i915_gem_request *req, u64 offset, u32 len, unsigned dispatch_flags) { - struct intel_ringbuffer *ring = req->ring; + struct intel_ring *ring = req->ring; u32 cs_offset = req->engine->scratch.gtt_offset; int ret; @@ -1884,7 +1885,7 @@ i915_dispatch_execbuffer(struct drm_i915_gem_request *req, u64 offset, u32 len, unsigned dispatch_flags) { - struct intel_ringbuffer *ring = req->ring; + struct intel_ring *ring = req->ring; int ret; ret = intel_ring_begin(req, 2); @@ -1992,7 +1993,7 @@ static int init_phys_status_page(struct intel_engine_cs *engine) return 0; } -void intel_unpin_ringbuffer_obj(struct intel_ringbuffer *ringbuf) +void intel_unpin_ring(struct intel_ring *ringbuf) { GEM_BUG_ON(!ringbuf->vma); GEM_BUG_ON(!ringbuf->vaddr); @@ -2007,8 +2008,8 @@ void intel_unpin_ringbuffer_obj(struct intel_ringbuffer *ringbuf) ringbuf->vma = NULL; } -int intel_pin_and_map_ringbuffer_obj(struct drm_i915_private *dev_priv, - struct intel_ringbuffer *ringbuf) +int intel_pin_and_map_ring(struct drm_i915_private *dev_priv, + struct intel_ring *ringbuf) { struct drm_i915_gem_object *obj = ringbuf->obj; /* Ring wraparound at offset 0 sometimes hangs. No idea why. */ @@ -2060,14 +2061,14 @@ err_unpin: return ret; } -static void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf) +static void intel_destroy_ringbuffer_obj(struct intel_ring *ringbuf) { i915_gem_object_put(ringbuf->obj); ringbuf->obj = NULL; } static int intel_alloc_ringbuffer_obj(struct drm_device *dev, - struct intel_ringbuffer *ringbuf) + struct intel_ring *ringbuf) { struct drm_i915_gem_object *obj; @@ -2087,10 +2088,10 @@ static int intel_alloc_ringbuffer_obj(struct drm_device *dev, return 0; } -struct intel_ringbuffer * -intel_engine_create_ringbuffer(struct intel_engine_cs *engine, int size) +struct intel_ring * +intel_engine_create_ring(struct intel_engine_cs *engine, int size) { - struct intel_ringbuffer *ring; + struct intel_ring *ring; int ret; ring = kzalloc(sizeof(*ring), GFP_KERNEL); @@ -2128,7 +2129,7 @@ intel_engine_create_ringbuffer(struct intel_engine_cs *engine, int size) } void -intel_ringbuffer_free(struct intel_ringbuffer *ring) +intel_ring_free(struct intel_ring *ring) { intel_destroy_ringbuffer_obj(ring); list_del(&ring->link); @@ -2189,7 +2190,7 @@ static void intel_ring_context_unpin(struct i915_gem_context *ctx, static int intel_init_ring_buffer(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; - struct intel_ringbuffer *ringbuf; + struct intel_ring *ringbuf; int ret; WARN_ON(engine->buffer); @@ -2214,7 +2215,7 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine) if (ret) goto error; - ringbuf = intel_engine_create_ringbuffer(engine, 32 * PAGE_SIZE); + ringbuf = intel_engine_create_ring(engine, 32 * PAGE_SIZE); if (IS_ERR(ringbuf)) { ret = PTR_ERR(ringbuf); goto error; @@ -2232,7 +2233,7 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine) goto error; } - ret = intel_pin_and_map_ringbuffer_obj(dev_priv, ringbuf); + ret = intel_pin_and_map_ring(dev_priv, ringbuf); if (ret) { DRM_ERROR("Failed to pin and map ringbuffer %s: %d\n", engine->name, ret); @@ -2243,11 +2244,11 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine) return 0; error: - intel_cleanup_engine(engine); + intel_engine_cleanup(engine); return ret; } -void intel_cleanup_engine(struct intel_engine_cs *engine) +void intel_engine_cleanup(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv; @@ -2257,11 +2258,11 @@ void intel_cleanup_engine(struct intel_engine_cs *engine) dev_priv = engine->i915; if (engine->buffer) { - intel_stop_engine(engine); + intel_engine_stop(engine); WARN_ON(!IS_GEN2(dev_priv) && (I915_READ_MODE(engine) & MODE_IDLE) == 0); - intel_unpin_ringbuffer_obj(engine->buffer); - intel_ringbuffer_free(engine->buffer); + intel_unpin_ring(engine->buffer); + intel_ring_free(engine->buffer); engine->buffer = NULL; } @@ -2324,7 +2325,7 @@ int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request) static int wait_for_space(struct drm_i915_gem_request *req, int bytes) { - struct intel_ringbuffer *ring = req->ring; + struct intel_ring *ring = req->ring; struct intel_engine_cs *engine = req->engine; struct drm_i915_gem_request *target; @@ -2369,7 +2370,7 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes) int intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) { - struct intel_ringbuffer *ring = req->ring; + struct intel_ring *ring = req->ring; int remain_actual = ring->size - ring->tail; int remain_usable = ring->effective_size - ring->tail; int bytes = num_dwords * sizeof(u32); @@ -2426,7 +2427,7 @@ int intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) /* Align the ring tail to a cacheline boundary */ int intel_ring_cacheline_align(struct drm_i915_gem_request *req) { - struct intel_ringbuffer *ring = req->ring; + struct intel_ring *ring = req->ring; int num_dwords = (ring->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t); int ret; @@ -2447,7 +2448,7 @@ int intel_ring_cacheline_align(struct drm_i915_gem_request *req) return 0; } -void intel_ring_init_seqno(struct intel_engine_cs *engine, u32 seqno) +void intel_engine_init_seqno(struct intel_engine_cs *engine, u32 seqno) { struct drm_i915_private *dev_priv = engine->i915; @@ -2533,7 +2534,7 @@ static void gen6_bsd_ring_write_tail(struct intel_engine_cs *engine, static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req, u32 invalidate, u32 flush) { - struct intel_ringbuffer *ring = req->ring; + struct intel_ring *ring = req->ring; uint32_t cmd; int ret; @@ -2579,7 +2580,7 @@ gen8_ring_dispatch_execbuffer(struct drm_i915_gem_request *req, u64 offset, u32 len, unsigned dispatch_flags) { - struct intel_ringbuffer *ring = req->ring; + struct intel_ring *ring = req->ring; bool ppgtt = USES_PPGTT(req->i915) && !(dispatch_flags & I915_DISPATCH_SECURE); int ret; @@ -2605,7 +2606,7 @@ hsw_ring_dispatch_execbuffer(struct drm_i915_gem_request *req, u64 offset, u32 len, unsigned dispatch_flags) { - struct intel_ringbuffer *ring = req->ring; + struct intel_ring *ring = req->ring; int ret; ret = intel_ring_begin(req, 2); @@ -2630,7 +2631,7 @@ gen6_ring_dispatch_execbuffer(struct drm_i915_gem_request *req, u64 offset, u32 len, unsigned dispatch_flags) { - struct intel_ringbuffer *ring = req->ring; + struct intel_ring *ring = req->ring; int ret; ret = intel_ring_begin(req, 2); @@ -2653,7 +2654,7 @@ gen6_ring_dispatch_execbuffer(struct drm_i915_gem_request *req, static int gen6_ring_flush(struct drm_i915_gem_request *req, u32 invalidate, u32 flush) { - struct intel_ringbuffer *ring = req->ring; + struct intel_ring *ring = req->ring; uint32_t cmd; int ret; @@ -2970,7 +2971,7 @@ int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine) } int -intel_ring_flush_all_caches(struct drm_i915_gem_request *req) +intel_engine_flush_all_caches(struct drm_i915_gem_request *req) { struct intel_engine_cs *engine = req->engine; int ret; @@ -2989,7 +2990,7 @@ intel_ring_flush_all_caches(struct drm_i915_gem_request *req) } int -intel_ring_invalidate_all_caches(struct drm_i915_gem_request *req) +intel_engine_invalidate_all_caches(struct drm_i915_gem_request *req) { struct intel_engine_cs *engine = req->engine; uint32_t flush_domains; @@ -3009,8 +3010,7 @@ intel_ring_invalidate_all_caches(struct drm_i915_gem_request *req) return 0; } -void -intel_stop_engine(struct intel_engine_cs *engine) +void intel_engine_stop(struct intel_engine_cs *engine) { int ret; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 4f4b8ea..8f94e93 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -62,7 +62,7 @@ struct intel_hw_status_page { (i915_gem_obj_ggtt_offset(dev_priv->semaphore_obj) + \ GEN8_SEMAPHORE_OFFSET(from, (__ring)->id)) -enum intel_ring_hangcheck_action { +enum intel_engine_hangcheck_action { HANGCHECK_IDLE = 0, HANGCHECK_WAIT, HANGCHECK_ACTIVE, @@ -72,17 +72,17 @@ enum intel_ring_hangcheck_action { #define HANGCHECK_SCORE_RING_HUNG 31 -struct intel_ring_hangcheck { +struct intel_engine_hangcheck { u64 acthd; unsigned long user_interrupts; u32 seqno; int score; - enum intel_ring_hangcheck_action action; + enum intel_engine_hangcheck_action action; int deadlock; u32 instdone[I915_NUM_INSTDONE_REG]; }; -struct intel_ringbuffer { +struct intel_ring { struct drm_i915_gem_object *obj; void *vaddr; struct i915_vma *vma; @@ -149,7 +149,7 @@ struct intel_engine_cs { u64 fence_context; u32 mmio_base; unsigned int irq_shift; - struct intel_ringbuffer *buffer; + struct intel_ring *buffer; struct list_head buffers; /* Rather than have every client wait upon all user interrupts, @@ -329,7 +329,7 @@ struct intel_engine_cs { struct i915_gem_context *last_context; - struct intel_ring_hangcheck hangcheck; + struct intel_engine_hangcheck hangcheck; struct { struct drm_i915_gem_object *obj; @@ -376,8 +376,8 @@ intel_engine_flag(const struct intel_engine_cs *engine) } static inline u32 -intel_ring_sync_index(struct intel_engine_cs *engine, - struct intel_engine_cs *other) +intel_engine_sync_index(struct intel_engine_cs *engine, + struct intel_engine_cs *other) { int idx; @@ -439,45 +439,44 @@ intel_write_status_page(struct intel_engine_cs *engine, #define I915_GEM_HWS_SCRATCH_INDEX 0x40 #define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT) -struct intel_ringbuffer * -intel_engine_create_ringbuffer(struct intel_engine_cs *engine, int size); -int intel_pin_and_map_ringbuffer_obj(struct drm_i915_private *dev_priv, - struct intel_ringbuffer *ringbuf); -void intel_unpin_ringbuffer_obj(struct intel_ringbuffer *ringbuf); -void intel_ringbuffer_free(struct intel_ringbuffer *ring); +struct intel_ring * +intel_engine_create_ring(struct intel_engine_cs *engine, int size); +int intel_pin_and_map_ring(struct drm_i915_private *dev_priv, + struct intel_ring *ring); +void intel_unpin_ring(struct intel_ring *ring); +void intel_ring_free(struct intel_ring *ring); -void intel_stop_engine(struct intel_engine_cs *engine); -void intel_cleanup_engine(struct intel_engine_cs *engine); +void intel_engine_stop(struct intel_engine_cs *engine); +void intel_engine_cleanup(struct intel_engine_cs *engine); int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request); int __must_check intel_ring_begin(struct drm_i915_gem_request *req, int n); int __must_check intel_ring_cacheline_align(struct drm_i915_gem_request *req); -static inline void intel_ring_emit(struct intel_ringbuffer *ring, u32 data) +static inline void intel_ring_emit(struct intel_ring *ring, u32 data) { *(uint32_t *)(ring->vaddr + ring->tail) = data; ring->tail += 4; } -static inline void intel_ring_emit_reg(struct intel_ringbuffer *ring, - i915_reg_t reg) +static inline void intel_ring_emit_reg(struct intel_ring *ring, i915_reg_t reg) { intel_ring_emit(ring, i915_mmio_reg_offset(reg)); } -static inline void intel_ring_advance(struct intel_ringbuffer *ring) +static inline void intel_ring_advance(struct intel_ring *ring) { ring->tail &= ring->size - 1; } int __intel_ring_space(int head, int tail, int size); -void intel_ring_update_space(struct intel_ringbuffer *ringbuf); +void intel_ring_update_space(struct intel_ring *ringbuf); int __must_check intel_engine_idle(struct intel_engine_cs *engine); -void intel_ring_init_seqno(struct intel_engine_cs *engine, u32 seqno); -int intel_ring_flush_all_caches(struct drm_i915_gem_request *req); -int intel_ring_invalidate_all_caches(struct drm_i915_gem_request *req); +void intel_engine_init_seqno(struct intel_engine_cs *engine, u32 seqno); +int intel_engine_flush_all_caches(struct drm_i915_gem_request *req); +int intel_engine_invalidate_all_caches(struct drm_i915_gem_request *req); int intel_init_pipe_control(struct intel_engine_cs *engine, int size); void intel_fini_pipe_control(struct intel_engine_cs *engine); @@ -491,7 +490,7 @@ int intel_init_bsd2_ring_buffer(struct intel_engine_cs *engine); int intel_init_blt_ring_buffer(struct intel_engine_cs *engine); int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine); -u64 intel_ring_get_active_head(struct intel_engine_cs *engine); +u64 intel_engine_get_active_head(struct intel_engine_cs *engine); static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine) { return intel_read_status_page(engine, I915_GEM_HWS_INDEX); @@ -499,7 +498,7 @@ static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine) int init_workarounds_ring(struct intel_engine_cs *engine); -static inline u32 intel_ring_get_tail(struct intel_ringbuffer *ringbuf) +static inline u32 intel_ring_get_tail(struct intel_ring *ringbuf) { return ringbuf->tail; } -- cgit v0.10.2 From 32c04f16f0db826cb76152e1418f1cdcc6ef7e9f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 2 Aug 2016 22:50:22 +0100 Subject: drm/i915: Rename residual ringbuf parameters Now that we have a clear ring/engine split and a struct intel_ring, we no longer need the stopgap ringbuf names. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1469432687-22756-16-git-send-email-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470174640-18242-5-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index acbabbd..5dd720e 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -47,15 +47,15 @@ int __intel_ring_space(int head, int tail, int size) return space - I915_RING_FREE_SPACE; } -void intel_ring_update_space(struct intel_ring *ringbuf) +void intel_ring_update_space(struct intel_ring *ring) { - if (ringbuf->last_retired_head != -1) { - ringbuf->head = ringbuf->last_retired_head; - ringbuf->last_retired_head = -1; + if (ring->last_retired_head != -1) { + ring->head = ring->last_retired_head; + ring->last_retired_head = -1; } - ringbuf->space = __intel_ring_space(ringbuf->head & HEAD_ADDR, - ringbuf->tail, ringbuf->size); + ring->space = __intel_ring_space(ring->head & HEAD_ADDR, + ring->tail, ring->size); } static void __intel_engine_submit(struct intel_engine_cs *engine) @@ -1993,25 +1993,25 @@ static int init_phys_status_page(struct intel_engine_cs *engine) return 0; } -void intel_unpin_ring(struct intel_ring *ringbuf) +void intel_unpin_ring(struct intel_ring *ring) { - GEM_BUG_ON(!ringbuf->vma); - GEM_BUG_ON(!ringbuf->vaddr); + GEM_BUG_ON(!ring->vma); + GEM_BUG_ON(!ring->vaddr); - if (HAS_LLC(ringbuf->obj->base.dev) && !ringbuf->obj->stolen) - i915_gem_object_unpin_map(ringbuf->obj); + if (HAS_LLC(ring->obj->base.dev) && !ring->obj->stolen) + i915_gem_object_unpin_map(ring->obj); else - i915_vma_unpin_iomap(ringbuf->vma); - ringbuf->vaddr = NULL; + i915_vma_unpin_iomap(ring->vma); + ring->vaddr = NULL; - i915_gem_object_ggtt_unpin(ringbuf->obj); - ringbuf->vma = NULL; + i915_gem_object_ggtt_unpin(ring->obj); + ring->vma = NULL; } int intel_pin_and_map_ring(struct drm_i915_private *dev_priv, - struct intel_ring *ringbuf) + struct intel_ring *ring) { - struct drm_i915_gem_object *obj = ringbuf->obj; + struct drm_i915_gem_object *obj = ring->obj; /* Ring wraparound at offset 0 sometimes hangs. No idea why. */ unsigned flags = PIN_OFFSET_BIAS | 4096; void *addr; @@ -2052,8 +2052,8 @@ int intel_pin_and_map_ring(struct drm_i915_private *dev_priv, } } - ringbuf->vaddr = addr; - ringbuf->vma = i915_gem_obj_to_ggtt(obj); + ring->vaddr = addr; + ring->vma = i915_gem_obj_to_ggtt(obj); return 0; err_unpin: @@ -2061,29 +2061,29 @@ err_unpin: return ret; } -static void intel_destroy_ringbuffer_obj(struct intel_ring *ringbuf) +static void intel_destroy_ringbuffer_obj(struct intel_ring *ring) { - i915_gem_object_put(ringbuf->obj); - ringbuf->obj = NULL; + i915_gem_object_put(ring->obj); + ring->obj = NULL; } static int intel_alloc_ringbuffer_obj(struct drm_device *dev, - struct intel_ring *ringbuf) + struct intel_ring *ring) { struct drm_i915_gem_object *obj; obj = NULL; if (!HAS_LLC(dev)) - obj = i915_gem_object_create_stolen(dev, ringbuf->size); + obj = i915_gem_object_create_stolen(dev, ring->size); if (obj == NULL) - obj = i915_gem_object_create(dev, ringbuf->size); + obj = i915_gem_object_create(dev, ring->size); if (IS_ERR(obj)) return PTR_ERR(obj); /* mark ring buffers as read-only from GPU side by default */ obj->gt_ro = 1; - ringbuf->obj = obj; + ring->obj = obj; return 0; } @@ -2190,7 +2190,7 @@ static void intel_ring_context_unpin(struct i915_gem_context *ctx, static int intel_init_ring_buffer(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; - struct intel_ring *ringbuf; + struct intel_ring *ring; int ret; WARN_ON(engine->buffer); @@ -2215,12 +2215,12 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine) if (ret) goto error; - ringbuf = intel_engine_create_ring(engine, 32 * PAGE_SIZE); - if (IS_ERR(ringbuf)) { - ret = PTR_ERR(ringbuf); + ring = intel_engine_create_ring(engine, 32 * PAGE_SIZE); + if (IS_ERR(ring)) { + ret = PTR_ERR(ring); goto error; } - engine->buffer = ringbuf; + engine->buffer = ring; if (I915_NEED_GFX_HWS(dev_priv)) { ret = init_status_page(engine); @@ -2233,11 +2233,11 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine) goto error; } - ret = intel_pin_and_map_ring(dev_priv, ringbuf); + ret = intel_pin_and_map_ring(dev_priv, ring); if (ret) { DRM_ERROR("Failed to pin and map ringbuffer %s: %d\n", engine->name, ret); - intel_destroy_ringbuffer_obj(ringbuf); + intel_destroy_ringbuffer_obj(ring); goto error; } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 8f94e93..2dfc418c 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -471,7 +471,7 @@ static inline void intel_ring_advance(struct intel_ring *ring) } int __intel_ring_space(int head, int tail, int size); -void intel_ring_update_space(struct intel_ring *ringbuf); +void intel_ring_update_space(struct intel_ring *ring); int __must_check intel_engine_idle(struct intel_engine_cs *engine); void intel_engine_init_seqno(struct intel_engine_cs *engine, u32 seqno); @@ -498,9 +498,9 @@ static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine) int init_workarounds_ring(struct intel_engine_cs *engine); -static inline u32 intel_ring_get_tail(struct intel_ring *ringbuf) +static inline u32 intel_ring_get_tail(struct intel_ring *ring) { - return ringbuf->tail; + return ring->tail; } /* -- cgit v0.10.2 From aad29fbbb86dbac69e25433b14c8a718fb53115e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 2 Aug 2016 22:50:23 +0100 Subject: drm/i915: Rename intel_pin_and_map_ring() For more consistent oop-naming, we would use intel_ring_verb, so pick intel_ring_pin() and intel_ring_unpin(). Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1469432687-22756-17-git-send-email-chris@chris-wilson.co.uk Link: http://patchwork.freedesktop.org/patch/msgid/1470174640-18242-6-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 0a664f2..86b8f41 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -973,7 +973,7 @@ static int intel_lr_context_pin(struct i915_gem_context *ctx, lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE; - ret = intel_pin_and_map_ring(dev_priv, ce->ring); + ret = intel_ring_pin(ce->ring); if (ret) goto unpin_map; @@ -1011,7 +1011,7 @@ void intel_lr_context_unpin(struct i915_gem_context *ctx, if (--ce->pin_count) return; - intel_unpin_ring(ce->ring); + intel_ring_unpin(ce->ring); i915_gem_object_unpin_map(ce->state); i915_gem_object_ggtt_unpin(ce->state); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 5dd720e..e7a7f67 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1993,24 +1993,9 @@ static int init_phys_status_page(struct intel_engine_cs *engine) return 0; } -void intel_unpin_ring(struct intel_ring *ring) -{ - GEM_BUG_ON(!ring->vma); - GEM_BUG_ON(!ring->vaddr); - - if (HAS_LLC(ring->obj->base.dev) && !ring->obj->stolen) - i915_gem_object_unpin_map(ring->obj); - else - i915_vma_unpin_iomap(ring->vma); - ring->vaddr = NULL; - - i915_gem_object_ggtt_unpin(ring->obj); - ring->vma = NULL; -} - -int intel_pin_and_map_ring(struct drm_i915_private *dev_priv, - struct intel_ring *ring) +int intel_ring_pin(struct intel_ring *ring) { + struct drm_i915_private *dev_priv = ring->engine->i915; struct drm_i915_gem_object *obj = ring->obj; /* Ring wraparound at offset 0 sometimes hangs. No idea why. */ unsigned flags = PIN_OFFSET_BIAS | 4096; @@ -2061,6 +2046,21 @@ err_unpin: return ret; } +void intel_ring_unpin(struct intel_ring *ring) +{ + GEM_BUG_ON(!ring->vma); + GEM_BUG_ON(!ring->vaddr); + + if (HAS_LLC(ring->engine->i915) && !ring->obj->stolen) + i915_gem_object_unpin_map(ring->obj); + else + i915_vma_unpin_iomap(ring->vma); + ring->vaddr = NULL; + + i915_gem_object_ggtt_unpin(ring->obj); + ring->vma = NULL; +} + static void intel_destroy_ringbuffer_obj(struct intel_ring *ring) { i915_gem_object_put(ring->obj); @@ -2233,7 +2233,7 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine) goto error; } - ret = intel_pin_and_map_ring(dev_priv, ring); + ret = intel_ring_pin(ring); if (ret) { DRM_ERROR("Failed to pin and map ringbuffer %s: %d\n", engine->name, ret); @@ -2261,7 +2261,7 @@ void intel_engine_cleanup(struct intel_engine_cs *engine) intel_engine_stop(engine); WARN_ON(!IS_GEN2(dev_priv) && (I915_READ_MODE(engine) & MODE_IDLE) == 0); - intel_unpin_ring(engine->buffer); + intel_ring_unpin(engine->buffer); intel_ring_free(engine->buffer); engine->buffer = NULL; } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 2dfc418c..ba54ffc 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -441,9 +441,8 @@ intel_write_status_page(struct intel_engine_cs *engine, struct intel_ring * intel_engine_create_ring(struct intel_engine_cs *engine, int size); -int intel_pin_and_map_ring(struct drm_i915_private *dev_priv, - struct intel_ring *ring); -void intel_unpin_ring(struct intel_ring *ring); +int intel_ring_pin(struct intel_ring *ring); +void intel_ring_unpin(struct intel_ring *ring); void intel_ring_free(struct intel_ring *ring); void intel_engine_stop(struct intel_engine_cs *engine); -- cgit v0.10.2 From c7fe7d25ed6036ff16b1c112463baff21c3b205d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 2 Aug 2016 22:50:24 +0100 Subject: drm/i915: Remove obsolete engine->gpu_caches_dirty Space for flushing the GPU cache prior to completing the request is preallocated and so cannot fail - the GPU caches will always be flushed along with the completed request. This means we no longer have to track whether the GPU cache is dirty between batches like we had to with the outstanding_lazy_seqno. With the removal of the duplication in the per-backend entry points for emitting the obsolete lazy flush, we can then further unify the engine->emit_flush. v2: Expand a bit on the legacy of gpu_caches_dirty Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1469432687-22756-18-git-send-email-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470174640-18242-7-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 3336a5f..beece8f 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -568,7 +568,7 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) * itlb_before_ctx_switch. */ if (IS_GEN6(dev_priv)) { - ret = engine->flush(req, I915_GEM_GPU_DOMAINS, 0); + ret = engine->emit_flush(req, I915_GEM_GPU_DOMAINS, 0); if (ret) return ret; } diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index d0ef675..35c4c595 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -998,10 +998,8 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req, if (flush_domains & I915_GEM_DOMAIN_GTT) wmb(); - /* Unconditionally invalidate gpu caches and ensure that we do flush - * any residual writes from the previous batch. - */ - return intel_engine_invalidate_all_caches(req); + /* Unconditionally invalidate GPU caches and TLBs. */ + return req->engine->emit_flush(req, I915_GEM_GPU_DOMAINS, 0); } static bool @@ -1163,9 +1161,6 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas, static void i915_gem_execbuffer_retire_commands(struct i915_execbuffer_params *params) { - /* Unconditionally force add_request to emit a full flush. */ - params->engine->gpu_caches_dirty = true; - /* Add a breadcrumb for the completion of the batch buffer */ __i915_add_request(params->request, params->batch_obj, true); } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index ebfa040..39fa9eb 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1666,7 +1666,8 @@ static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, int ret; /* NB: TLBs must be flushed and invalidated before a switch */ - ret = engine->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); + ret = engine->emit_flush(req, + I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); if (ret) return ret; @@ -1693,7 +1694,8 @@ static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, int ret; /* NB: TLBs must be flushed and invalidated before a switch */ - ret = engine->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); + ret = engine->emit_flush(req, + I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); if (ret) return ret; @@ -1711,8 +1713,9 @@ static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, /* XXX: RCS is the only one to auto invalidate the TLBs? */ if (engine->id != RCS) { - ret = engine->flush(req, - I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); + ret = engine->emit_flush(req, + I915_GEM_GPU_DOMAINS, + I915_GEM_GPU_DOMAINS); if (ret) return ret; } diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 942b5b1..7e32060 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -451,12 +451,10 @@ void __i915_add_request(struct drm_i915_gem_request *request, * what. */ if (flush_caches) { - if (i915.enable_execlists) - ret = logical_ring_flush_all_caches(request); - else - ret = intel_engine_flush_all_caches(request); + ret = engine->emit_flush(request, 0, I915_GEM_GPU_DOMAINS); + /* Not allowed to fail! */ - WARN(ret, "*_ring_flush_all_caches failed: %d!\n", ret); + WARN(ret, "engine->emit_flush() failed: %d!\n", ret); } trace_i915_gem_request_add(request); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 86b8f41..e8d971e 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -642,24 +642,6 @@ static void execlists_context_queue(struct drm_i915_gem_request *request) spin_unlock_bh(&engine->execlist_lock); } -static int logical_ring_invalidate_all_caches(struct drm_i915_gem_request *req) -{ - struct intel_engine_cs *engine = req->engine; - uint32_t flush_domains; - int ret; - - flush_domains = 0; - if (engine->gpu_caches_dirty) - flush_domains = I915_GEM_GPU_DOMAINS; - - ret = engine->emit_flush(req, I915_GEM_GPU_DOMAINS, flush_domains); - if (ret) - return ret; - - engine->gpu_caches_dirty = false; - return 0; -} - static int execlists_move_to_gpu(struct drm_i915_gem_request *req, struct list_head *vmas) { @@ -690,7 +672,7 @@ static int execlists_move_to_gpu(struct drm_i915_gem_request *req, /* Unconditionally invalidate gpu caches and ensure that we do flush * any residual writes from the previous batch. */ - return logical_ring_invalidate_all_caches(req); + return req->engine->emit_flush(req, I915_GEM_GPU_DOMAINS, 0); } int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request) @@ -930,22 +912,6 @@ void intel_logical_ring_stop(struct intel_engine_cs *engine) I915_WRITE_MODE(engine, _MASKED_BIT_DISABLE(STOP_RING)); } -int logical_ring_flush_all_caches(struct drm_i915_gem_request *req) -{ - struct intel_engine_cs *engine = req->engine; - int ret; - - if (!engine->gpu_caches_dirty) - return 0; - - ret = engine->emit_flush(req, 0, I915_GEM_GPU_DOMAINS); - if (ret) - return ret; - - engine->gpu_caches_dirty = false; - return 0; -} - static int intel_lr_context_pin(struct i915_gem_context *ctx, struct intel_engine_cs *engine) { @@ -1026,15 +992,15 @@ void intel_lr_context_unpin(struct i915_gem_context *ctx, static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req) { int ret, i; - struct intel_engine_cs *engine = req->engine; struct intel_ring *ring = req->ring; struct i915_workarounds *w = &req->i915->workarounds; if (w->count == 0) return 0; - engine->gpu_caches_dirty = true; - ret = logical_ring_flush_all_caches(req); + ret = req->engine->emit_flush(req, + I915_GEM_GPU_DOMAINS, + I915_GEM_GPU_DOMAINS); if (ret) return ret; @@ -1051,8 +1017,9 @@ static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req) intel_ring_advance(ring); - engine->gpu_caches_dirty = true; - ret = logical_ring_flush_all_caches(req); + ret = req->engine->emit_flush(req, + I915_GEM_GPU_DOMAINS, + I915_GEM_GPU_DOMAINS); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index d26fb44..33e0193 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -72,8 +72,6 @@ int logical_xcs_ring_init(struct intel_engine_cs *engine); int intel_engines_init(struct drm_device *dev); -int logical_ring_flush_all_caches(struct drm_i915_gem_request *req); - /* Logical Ring Contexts */ /* One extra page is added before LRC for GuC as shared data */ diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index e7a7f67..9e4b496 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -688,8 +688,9 @@ static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req) if (w->count == 0) return 0; - req->engine->gpu_caches_dirty = true; - ret = intel_engine_flush_all_caches(req); + ret = req->engine->emit_flush(req, + I915_GEM_GPU_DOMAINS, + I915_GEM_GPU_DOMAINS); if (ret) return ret; @@ -706,8 +707,9 @@ static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req) intel_ring_advance(ring); - req->engine->gpu_caches_dirty = true; - ret = intel_engine_flush_all_caches(req); + ret = req->engine->emit_flush(req, + I915_GEM_GPU_DOMAINS, + I915_GEM_GPU_DOMAINS); if (ret) return ret; @@ -2860,21 +2862,21 @@ int intel_init_render_ring_buffer(struct intel_engine_cs *engine) if (INTEL_GEN(dev_priv) >= 8) { engine->init_context = intel_rcs_ctx_init; engine->add_request = gen8_render_add_request; - engine->flush = gen8_render_ring_flush; + engine->emit_flush = gen8_render_ring_flush; if (i915.semaphores) engine->semaphore.signal = gen8_rcs_signal; } else if (INTEL_GEN(dev_priv) >= 6) { engine->init_context = intel_rcs_ctx_init; - engine->flush = gen7_render_ring_flush; + engine->emit_flush = gen7_render_ring_flush; if (IS_GEN6(dev_priv)) - engine->flush = gen6_render_ring_flush; + engine->emit_flush = gen6_render_ring_flush; } else if (IS_GEN5(dev_priv)) { - engine->flush = gen4_render_ring_flush; + engine->emit_flush = gen4_render_ring_flush; } else { if (INTEL_GEN(dev_priv) < 4) - engine->flush = gen2_render_ring_flush; + engine->emit_flush = gen2_render_ring_flush; else - engine->flush = gen4_render_ring_flush; + engine->emit_flush = gen4_render_ring_flush; engine->irq_enable_mask = I915_USER_INTERRUPT; } @@ -2911,12 +2913,12 @@ int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine) /* gen6 bsd needs a special wa for tail updates */ if (IS_GEN6(dev_priv)) engine->write_tail = gen6_bsd_ring_write_tail; - engine->flush = gen6_bsd_ring_flush; + engine->emit_flush = gen6_bsd_ring_flush; if (INTEL_GEN(dev_priv) < 8) engine->irq_enable_mask = GT_BSD_USER_INTERRUPT; } else { engine->mmio_base = BSD_RING_BASE; - engine->flush = bsd_ring_flush; + engine->emit_flush = bsd_ring_flush; if (IS_GEN5(dev_priv)) engine->irq_enable_mask = ILK_BSD_USER_INTERRUPT; else @@ -2935,7 +2937,7 @@ int intel_init_bsd2_ring_buffer(struct intel_engine_cs *engine) intel_ring_default_vfuncs(dev_priv, engine); - engine->flush = gen6_bsd_ring_flush; + engine->emit_flush = gen6_bsd_ring_flush; return intel_init_ring_buffer(engine); } @@ -2946,7 +2948,7 @@ int intel_init_blt_ring_buffer(struct intel_engine_cs *engine) intel_ring_default_vfuncs(dev_priv, engine); - engine->flush = gen6_ring_flush; + engine->emit_flush = gen6_ring_flush; if (INTEL_GEN(dev_priv) < 8) engine->irq_enable_mask = GT_BLT_USER_INTERRUPT; @@ -2959,7 +2961,7 @@ int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine) intel_ring_default_vfuncs(dev_priv, engine); - engine->flush = gen6_ring_flush; + engine->emit_flush = gen6_ring_flush; if (INTEL_GEN(dev_priv) < 8) { engine->irq_enable_mask = PM_VEBOX_USER_INTERRUPT; @@ -2970,46 +2972,6 @@ int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine) return intel_init_ring_buffer(engine); } -int -intel_engine_flush_all_caches(struct drm_i915_gem_request *req) -{ - struct intel_engine_cs *engine = req->engine; - int ret; - - if (!engine->gpu_caches_dirty) - return 0; - - ret = engine->flush(req, 0, I915_GEM_GPU_DOMAINS); - if (ret) - return ret; - - trace_i915_gem_ring_flush(req, 0, I915_GEM_GPU_DOMAINS); - - engine->gpu_caches_dirty = false; - return 0; -} - -int -intel_engine_invalidate_all_caches(struct drm_i915_gem_request *req) -{ - struct intel_engine_cs *engine = req->engine; - uint32_t flush_domains; - int ret; - - flush_domains = 0; - if (engine->gpu_caches_dirty) - flush_domains = I915_GEM_GPU_DOMAINS; - - ret = engine->flush(req, I915_GEM_GPU_DOMAINS, flush_domains); - if (ret) - return ret; - - trace_i915_gem_ring_flush(req, I915_GEM_GPU_DOMAINS, flush_domains); - - engine->gpu_caches_dirty = false; - return 0; -} - void intel_engine_stop(struct intel_engine_cs *engine) { int ret; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index ba54ffc..00723401 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -206,9 +206,6 @@ struct intel_engine_cs { void (*write_tail)(struct intel_engine_cs *engine, u32 value); - int __must_check (*flush)(struct drm_i915_gem_request *req, - u32 invalidate_domains, - u32 flush_domains); int (*add_request)(struct drm_i915_gem_request *req); /* Some chipsets are not quite as coherent as advertised and need * an expensive kick to force a true read of the up-to-date seqno. @@ -325,8 +322,6 @@ struct intel_engine_cs { */ u32 last_submitted_seqno; - bool gpu_caches_dirty; - struct i915_gem_context *last_context; struct intel_engine_hangcheck hangcheck; @@ -474,8 +469,6 @@ void intel_ring_update_space(struct intel_ring *ring); int __must_check intel_engine_idle(struct intel_engine_cs *engine); void intel_engine_init_seqno(struct intel_engine_cs *engine, u32 seqno); -int intel_engine_flush_all_caches(struct drm_i915_gem_request *req); -int intel_engine_invalidate_all_caches(struct drm_i915_gem_request *req); int intel_init_pipe_control(struct intel_engine_cs *engine, int size); void intel_fini_pipe_control(struct intel_engine_cs *engine); -- cgit v0.10.2 From 7c9cf4e33a72c36a62471709d85d096eaac86dc6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 2 Aug 2016 22:50:25 +0100 Subject: drm/i915: Reduce engine->emit_flush() to a single mode parameter Rather than passing a complete set of GPU cache domains for either invalidation or for flushing, or even both, just pass a single parameter to the engine->emit_flush to determine the required operations. engine->emit_flush(GPU, 0) -> engine->emit_flush(EMIT_INVALIDATE) engine->emit_flush(0, GPU) -> engine->emit_flush(EMIT_FLUSH) engine->emit_flush(GPU, GPU) -> engine->emit_flush(EMIT_FLUSH | EMIT_INVALIDATE) This allows us to extend the behaviour easily in future, for example if we want just a command barrier without the overhead of flushing. Signed-off-by: Chris Wilson Cc: Dave Gordon Cc: Joonas Lahtinen Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470174640-18242-8-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index beece8f..edde841 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -568,7 +568,7 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) * itlb_before_ctx_switch. */ if (IS_GEN6(dev_priv)) { - ret = engine->emit_flush(req, I915_GEM_GPU_DOMAINS, 0); + ret = engine->emit_flush(req, EMIT_INVALIDATE); if (ret) return ret; } diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 35c4c595..e49776e 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -999,7 +999,7 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req, wmb(); /* Unconditionally invalidate GPU caches and TLBs. */ - return req->engine->emit_flush(req, I915_GEM_GPU_DOMAINS, 0); + return req->engine->emit_flush(req, EMIT_INVALIDATE); } static bool diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 39fa9eb..671b1ca 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1666,8 +1666,7 @@ static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, int ret; /* NB: TLBs must be flushed and invalidated before a switch */ - ret = engine->emit_flush(req, - I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); + ret = engine->emit_flush(req, EMIT_INVALIDATE | EMIT_FLUSH); if (ret) return ret; @@ -1694,8 +1693,7 @@ static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, int ret; /* NB: TLBs must be flushed and invalidated before a switch */ - ret = engine->emit_flush(req, - I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); + ret = engine->emit_flush(req, EMIT_INVALIDATE | EMIT_FLUSH); if (ret) return ret; @@ -1713,9 +1711,7 @@ static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, /* XXX: RCS is the only one to auto invalidate the TLBs? */ if (engine->id != RCS) { - ret = engine->emit_flush(req, - I915_GEM_GPU_DOMAINS, - I915_GEM_GPU_DOMAINS); + ret = engine->emit_flush(req, EMIT_INVALIDATE | EMIT_FLUSH); if (ret) return ret; } diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 7e32060..67f16fe 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -451,7 +451,7 @@ void __i915_add_request(struct drm_i915_gem_request *request, * what. */ if (flush_caches) { - ret = engine->emit_flush(request, 0, I915_GEM_GPU_DOMAINS); + ret = engine->emit_flush(request, EMIT_FLUSH); /* Not allowed to fail! */ WARN(ret, "engine->emit_flush() failed: %d!\n", ret); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index e8d971e..af7d7e0 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -672,7 +672,7 @@ static int execlists_move_to_gpu(struct drm_i915_gem_request *req, /* Unconditionally invalidate gpu caches and ensure that we do flush * any residual writes from the previous batch. */ - return req->engine->emit_flush(req, I915_GEM_GPU_DOMAINS, 0); + return req->engine->emit_flush(req, EMIT_INVALIDATE); } int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request) @@ -998,9 +998,7 @@ static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req) if (w->count == 0) return 0; - ret = req->engine->emit_flush(req, - I915_GEM_GPU_DOMAINS, - I915_GEM_GPU_DOMAINS); + ret = req->engine->emit_flush(req, EMIT_BARRIER); if (ret) return ret; @@ -1017,9 +1015,7 @@ static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req) intel_ring_advance(ring); - ret = req->engine->emit_flush(req, - I915_GEM_GPU_DOMAINS, - I915_GEM_GPU_DOMAINS); + ret = req->engine->emit_flush(req, EMIT_BARRIER); if (ret) return ret; @@ -1598,9 +1594,7 @@ static void gen8_logical_ring_disable_irq(struct intel_engine_cs *engine) I915_WRITE_IMR(engine, ~engine->irq_keep_mask); } -static int gen8_emit_flush(struct drm_i915_gem_request *request, - u32 invalidate_domains, - u32 unused) +static int gen8_emit_flush(struct drm_i915_gem_request *request, u32 mode) { struct intel_ring *ring = request->ring; u32 cmd; @@ -1619,7 +1613,7 @@ static int gen8_emit_flush(struct drm_i915_gem_request *request, */ cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW; - if (invalidate_domains & I915_GEM_GPU_DOMAINS) { + if (mode & EMIT_INVALIDATE) { cmd |= MI_INVALIDATE_TLB; if (request->engine->id == VCS) cmd |= MI_INVALIDATE_BSD; @@ -1637,8 +1631,7 @@ static int gen8_emit_flush(struct drm_i915_gem_request *request, } static int gen8_emit_flush_render(struct drm_i915_gem_request *request, - u32 invalidate_domains, - u32 flush_domains) + u32 mode) { struct intel_ring *ring = request->ring; struct intel_engine_cs *engine = request->engine; @@ -1650,14 +1643,14 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request, flags |= PIPE_CONTROL_CS_STALL; - if (flush_domains) { + if (mode & EMIT_FLUSH) { flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; flags |= PIPE_CONTROL_FLUSH_ENABLE; } - if (invalidate_domains) { + if (mode & EMIT_INVALIDATE) { flags |= PIPE_CONTROL_TLB_INVALIDATE; flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 9e4b496..1f876e7 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -67,19 +67,15 @@ static void __intel_engine_submit(struct intel_engine_cs *engine) } static int -gen2_render_ring_flush(struct drm_i915_gem_request *req, - u32 invalidate_domains, - u32 flush_domains) +gen2_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) { struct intel_ring *ring = req->ring; u32 cmd; int ret; cmd = MI_FLUSH; - if (((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER) == 0) - cmd |= MI_NO_WRITE_FLUSH; - if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER) + if (mode & EMIT_INVALIDATE) cmd |= MI_READ_FLUSH; ret = intel_ring_begin(req, 2); @@ -94,9 +90,7 @@ gen2_render_ring_flush(struct drm_i915_gem_request *req, } static int -gen4_render_ring_flush(struct drm_i915_gem_request *req, - u32 invalidate_domains, - u32 flush_domains) +gen4_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) { struct intel_ring *ring = req->ring; u32 cmd; @@ -131,7 +125,7 @@ gen4_render_ring_flush(struct drm_i915_gem_request *req, */ cmd = MI_FLUSH; - if (invalidate_domains) { + if (mode & EMIT_INVALIDATE) { cmd |= MI_EXE_FLUSH; if (IS_G4X(req->i915) || IS_GEN5(req->i915)) cmd |= MI_INVALIDATE_ISP; @@ -222,8 +216,7 @@ intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req) } static int -gen6_render_ring_flush(struct drm_i915_gem_request *req, - u32 invalidate_domains, u32 flush_domains) +gen6_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) { struct intel_ring *ring = req->ring; u32 scratch_addr = @@ -240,7 +233,7 @@ gen6_render_ring_flush(struct drm_i915_gem_request *req, * number of bits based on the write domains has little performance * impact. */ - if (flush_domains) { + if (mode & EMIT_FLUSH) { flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; /* @@ -249,7 +242,7 @@ gen6_render_ring_flush(struct drm_i915_gem_request *req, */ flags |= PIPE_CONTROL_CS_STALL; } - if (invalidate_domains) { + if (mode & EMIT_INVALIDATE) { flags |= PIPE_CONTROL_TLB_INVALIDATE; flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; @@ -297,8 +290,7 @@ gen7_render_ring_cs_stall_wa(struct drm_i915_gem_request *req) } static int -gen7_render_ring_flush(struct drm_i915_gem_request *req, - u32 invalidate_domains, u32 flush_domains) +gen7_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) { struct intel_ring *ring = req->ring; u32 scratch_addr = @@ -320,13 +312,13 @@ gen7_render_ring_flush(struct drm_i915_gem_request *req, * number of bits based on the write domains has little performance * impact. */ - if (flush_domains) { + if (mode & EMIT_FLUSH) { flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; flags |= PIPE_CONTROL_FLUSH_ENABLE; } - if (invalidate_domains) { + if (mode & EMIT_INVALIDATE) { flags |= PIPE_CONTROL_TLB_INVALIDATE; flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; @@ -384,8 +376,7 @@ gen8_emit_pipe_control(struct drm_i915_gem_request *req, } static int -gen8_render_ring_flush(struct drm_i915_gem_request *req, - u32 invalidate_domains, u32 flush_domains) +gen8_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) { u32 scratch_addr = req->engine->scratch.gtt_offset + 2 * CACHELINE_BYTES; u32 flags = 0; @@ -393,13 +384,13 @@ gen8_render_ring_flush(struct drm_i915_gem_request *req, flags |= PIPE_CONTROL_CS_STALL; - if (flush_domains) { + if (mode & EMIT_FLUSH) { flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; flags |= PIPE_CONTROL_FLUSH_ENABLE; } - if (invalidate_domains) { + if (mode & EMIT_INVALIDATE) { flags |= PIPE_CONTROL_TLB_INVALIDATE; flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; @@ -688,9 +679,7 @@ static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req) if (w->count == 0) return 0; - ret = req->engine->emit_flush(req, - I915_GEM_GPU_DOMAINS, - I915_GEM_GPU_DOMAINS); + ret = req->engine->emit_flush(req, EMIT_BARRIER); if (ret) return ret; @@ -707,9 +696,7 @@ static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req) intel_ring_advance(ring); - ret = req->engine->emit_flush(req, - I915_GEM_GPU_DOMAINS, - I915_GEM_GPU_DOMAINS); + ret = req->engine->emit_flush(req, EMIT_BARRIER); if (ret) return ret; @@ -1700,9 +1687,7 @@ i8xx_irq_disable(struct intel_engine_cs *engine) } static int -bsd_ring_flush(struct drm_i915_gem_request *req, - u32 invalidate_domains, - u32 flush_domains) +bsd_ring_flush(struct drm_i915_gem_request *req, u32 mode) { struct intel_ring *ring = req->ring; int ret; @@ -2533,8 +2518,7 @@ static void gen6_bsd_ring_write_tail(struct intel_engine_cs *engine, intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); } -static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req, - u32 invalidate, u32 flush) +static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req, u32 mode) { struct intel_ring *ring = req->ring; uint32_t cmd; @@ -2561,7 +2545,7 @@ static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req, * operation is complete. This bit is only valid when the * Post-Sync Operation field is a value of 1h or 3h." */ - if (invalidate & I915_GEM_GPU_DOMAINS) + if (mode & EMIT_INVALIDATE) cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD; intel_ring_emit(ring, cmd); @@ -2653,8 +2637,7 @@ gen6_ring_dispatch_execbuffer(struct drm_i915_gem_request *req, /* Blitter support (SandyBridge+) */ -static int gen6_ring_flush(struct drm_i915_gem_request *req, - u32 invalidate, u32 flush) +static int gen6_ring_flush(struct drm_i915_gem_request *req, u32 mode) { struct intel_ring *ring = req->ring; uint32_t cmd; @@ -2681,7 +2664,7 @@ static int gen6_ring_flush(struct drm_i915_gem_request *req, * operation is complete. This bit is only valid when the * Post-Sync Operation field is a value of 1h or 3h." */ - if (invalidate & I915_GEM_DOMAIN_RENDER) + if (mode & EMIT_INVALIDATE) cmd |= MI_INVALIDATE_TLB; intel_ring_emit(ring, cmd); intel_ring_emit(ring, diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 00723401..76d0495 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -292,8 +292,10 @@ struct intel_engine_cs { u32 ctx_desc_template; int (*emit_request)(struct drm_i915_gem_request *request); int (*emit_flush)(struct drm_i915_gem_request *request, - u32 invalidate_domains, - u32 flush_domains); + u32 mode); +#define EMIT_INVALIDATE BIT(0) +#define EMIT_FLUSH BIT(1) +#define EMIT_BARRIER (EMIT_INVALIDATE | EMIT_FLUSH) int (*emit_bb_start)(struct drm_i915_gem_request *req, u64 offset, unsigned dispatch_flags); -- cgit v0.10.2 From 8e6371783738b29f92ab3b8916c652a4a600dd52 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 2 Aug 2016 22:50:26 +0100 Subject: drm/i915: Simplify request_alloc by returning the allocated request If is simpler and leads to more readable code through the callstack if the allocation returns the allocated struct through the return value. The importance of this is that it no longer looks like we accidentally allocate requests as side-effect of calling certain functions. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1469432687-22756-19-git-send-email-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470174640-18242-9-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 4c43bd3..8f4edc9 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3171,8 +3171,7 @@ static inline void i915_gem_object_unpin_map(struct drm_i915_gem_object *obj) int __must_check i915_mutex_lock_interruptible(struct drm_device *dev); int i915_gem_object_sync(struct drm_i915_gem_object *obj, - struct intel_engine_cs *to, - struct drm_i915_gem_request **to_req); + struct drm_i915_gem_request *to); void i915_vma_move_to_active(struct i915_vma *vma, struct drm_i915_gem_request *req); int i915_gem_dumb_create(struct drm_file *file_priv, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 59890f5..b6c4ff6 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2845,51 +2845,35 @@ out: static int __i915_gem_object_sync(struct drm_i915_gem_object *obj, - struct intel_engine_cs *to, - struct drm_i915_gem_request *from_req, - struct drm_i915_gem_request **to_req) + struct drm_i915_gem_request *to, + struct drm_i915_gem_request *from) { - struct intel_engine_cs *from; int ret; - from = i915_gem_request_get_engine(from_req); - if (to == from) + if (to->engine == from->engine) return 0; - if (i915_gem_request_completed(from_req)) + if (i915_gem_request_completed(from)) return 0; if (!i915.semaphores) { - struct drm_i915_private *i915 = to_i915(obj->base.dev); - ret = __i915_wait_request(from_req, - i915->mm.interruptible, + ret = __i915_wait_request(from, + from->i915->mm.interruptible, NULL, NO_WAITBOOST); if (ret) return ret; - i915_gem_object_retire_request(obj, from_req); + i915_gem_object_retire_request(obj, from); } else { - int idx = intel_engine_sync_index(from, to); - u32 seqno = i915_gem_request_get_seqno(from_req); + int idx = intel_engine_sync_index(from->engine, to->engine); + u32 seqno = i915_gem_request_get_seqno(from); - WARN_ON(!to_req); - - if (seqno <= from->semaphore.sync_seqno[idx]) + if (seqno <= from->engine->semaphore.sync_seqno[idx]) return 0; - if (*to_req == NULL) { - struct drm_i915_gem_request *req; - - req = i915_gem_request_alloc(to, NULL); - if (IS_ERR(req)) - return PTR_ERR(req); - - *to_req = req; - } - - trace_i915_gem_ring_sync_to(*to_req, from, from_req); - ret = to->semaphore.sync_to(*to_req, from, seqno); + trace_i915_gem_ring_sync_to(to, from); + ret = to->engine->semaphore.sync_to(to, from->engine, seqno); if (ret) return ret; @@ -2897,8 +2881,8 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj, * might have just caused seqno wrap under * the radar. */ - from->semaphore.sync_seqno[idx] = - i915_gem_request_get_seqno(obj->last_read_req[from->id]); + from->engine->semaphore.sync_seqno[idx] = + i915_gem_request_get_seqno(obj->last_read_req[from->engine->id]); } return 0; @@ -2908,17 +2892,12 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj, * i915_gem_object_sync - sync an object to a ring. * * @obj: object which may be in use on another ring. - * @to: ring we wish to use the object on. May be NULL. - * @to_req: request we wish to use the object for. See below. - * This will be allocated and returned if a request is - * required but not passed in. + * @to: request we are wishing to use * * This code is meant to abstract object synchronization with the GPU. - * Calling with NULL implies synchronizing the object with the CPU - * rather than a particular GPU ring. Conceptually we serialise writes - * between engines inside the GPU. We only allow one engine to write - * into a buffer at any time, but multiple readers. To ensure each has - * a coherent view of memory, we must: + * Conceptually we serialise writes between engines inside the GPU. + * We only allow one engine to write into a buffer at any time, but + * multiple readers. To ensure each has a coherent view of memory, we must: * * - If there is an outstanding write request to the object, the new * request must wait for it to complete (either CPU or in hw, requests @@ -2927,22 +2906,11 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj, * - If we are a write request (pending_write_domain is set), the new * request must wait for outstanding read requests to complete. * - * For CPU synchronisation (NULL to) no request is required. For syncing with - * rings to_req must be non-NULL. However, a request does not have to be - * pre-allocated. If *to_req is NULL and sync commands will be emitted then a - * request will be allocated automatically and returned through *to_req. Note - * that it is not guaranteed that commands will be emitted (because the system - * might already be idle). Hence there is no need to create a request that - * might never have any work submitted. Note further that if a request is - * returned in *to_req, it is the responsibility of the caller to submit - * that request (after potentially adding more work to it). - * * Returns 0 if successful, else propagates up the lower layer error. */ int i915_gem_object_sync(struct drm_i915_gem_object *obj, - struct intel_engine_cs *to, - struct drm_i915_gem_request **to_req) + struct drm_i915_gem_request *to) { const bool readonly = obj->base.pending_write_domain == 0; struct drm_i915_gem_request *req[I915_NUM_ENGINES]; @@ -2951,9 +2919,6 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj, if (!obj->active) return 0; - if (to == NULL) - return i915_gem_object_wait_rendering(obj, readonly); - n = 0; if (readonly) { if (obj->last_write_req) @@ -2964,7 +2929,7 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj, req[n++] = obj->last_read_req[i]; } for (i = 0; i < n; i++) { - ret = __i915_gem_object_sync(obj, to, req[i], to_req); + ret = __i915_gem_object_sync(obj, to, req[i]); if (ret) return ret; } diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index e49776e..fe06b58 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -981,7 +981,7 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req, struct drm_i915_gem_object *obj = vma->obj; if (obj->active & other_rings) { - ret = i915_gem_object_sync(obj, req->engine, &req); + ret = i915_gem_object_sync(obj, req); if (ret) return ret; } @@ -1427,7 +1427,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, { struct drm_i915_private *dev_priv = to_i915(dev); struct i915_ggtt *ggtt = &dev_priv->ggtt; - struct drm_i915_gem_request *req = NULL; struct eb_vmas *eb; struct drm_i915_gem_object *batch_obj; struct drm_i915_gem_exec_object2 shadow_exec_entry; @@ -1615,13 +1614,13 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, params->batch_obj_vm_offset = i915_gem_obj_offset(batch_obj, vm); /* Allocate a request for this batch buffer nice and early. */ - req = i915_gem_request_alloc(engine, ctx); - if (IS_ERR(req)) { - ret = PTR_ERR(req); + params->request = i915_gem_request_alloc(engine, ctx); + if (IS_ERR(params->request)) { + ret = PTR_ERR(params->request); goto err_batch_unpin; } - ret = i915_gem_request_add_to_client(req, file); + ret = i915_gem_request_add_to_client(params->request, file); if (ret) goto err_request; @@ -1637,7 +1636,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, params->dispatch_flags = dispatch_flags; params->batch_obj = batch_obj; params->ctx = ctx; - params->request = req; ret = dev_priv->gt.execbuf_submit(params, args, &eb->vmas); err_request: diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 67f16fe..f4e6c40 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -292,10 +292,21 @@ static int i915_gem_get_seqno(struct drm_i915_private *dev_priv, u32 *seqno) return 0; } -static inline int -__i915_gem_request_alloc(struct intel_engine_cs *engine, - struct i915_gem_context *ctx, - struct drm_i915_gem_request **req_out) +/** + * i915_gem_request_alloc - allocate a request structure + * + * @engine: engine that we wish to issue the request on. + * @ctx: context that the request will be associated with. + * This can be NULL if the request is not directly related to + * any specific user context, in which case this function will + * choose an appropriate context to use. + * + * Returns a pointer to the allocated request if successful, + * or an error code if not. + */ +struct drm_i915_gem_request * +i915_gem_request_alloc(struct intel_engine_cs *engine, + struct i915_gem_context *ctx) { struct drm_i915_private *dev_priv = engine->i915; unsigned int reset_counter = i915_reset_counter(&dev_priv->gpu_error); @@ -303,18 +314,13 @@ __i915_gem_request_alloc(struct intel_engine_cs *engine, u32 seqno; int ret; - if (!req_out) - return -EINVAL; - - *req_out = NULL; - /* ABI: Before userspace accesses the GPU (e.g. execbuffer), report * EIO if the GPU is already wedged, or EAGAIN to drop the struct_mutex * and restart. */ ret = i915_gem_check_wedge(reset_counter, dev_priv->mm.interruptible); if (ret) - return ret; + return ERR_PTR(ret); /* Move the oldest request to the slab-cache (if not in use!) */ req = list_first_entry_or_null(&engine->request_list, @@ -324,7 +330,7 @@ __i915_gem_request_alloc(struct intel_engine_cs *engine, req = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL); if (!req) - return -ENOMEM; + return ERR_PTR(-ENOMEM); ret = i915_gem_get_seqno(dev_priv, &seqno); if (ret) @@ -357,39 +363,13 @@ __i915_gem_request_alloc(struct intel_engine_cs *engine, if (ret) goto err_ctx; - *req_out = req; - return 0; + return req; err_ctx: i915_gem_context_put(ctx); err: kmem_cache_free(dev_priv->requests, req); - return ret; -} - -/** - * i915_gem_request_alloc - allocate a request structure - * - * @engine: engine that we wish to issue the request on. - * @ctx: context that the request will be associated with. - * This can be NULL if the request is not directly related to - * any specific user context, in which case this function will - * choose an appropriate context to use. - * - * Returns a pointer to the allocated request if successful, - * or an error code if not. - */ -struct drm_i915_gem_request * -i915_gem_request_alloc(struct intel_engine_cs *engine, - struct i915_gem_context *ctx) -{ - struct drm_i915_gem_request *req; - int err; - - if (!ctx) - ctx = engine->i915->kernel_context; - err = __i915_gem_request_alloc(engine, ctx, &req); - return err ? ERR_PTR(err) : req; + return ERR_PTR(ret); } static void i915_gem_mark_busy(const struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 007112d..9e43c0a 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -449,10 +449,9 @@ TRACE_EVENT(i915_gem_evict_vm, ); TRACE_EVENT(i915_gem_ring_sync_to, - TP_PROTO(struct drm_i915_gem_request *to_req, - struct intel_engine_cs *from, - struct drm_i915_gem_request *req), - TP_ARGS(to_req, from, req), + TP_PROTO(struct drm_i915_gem_request *to, + struct drm_i915_gem_request *from), + TP_ARGS(to, from), TP_STRUCT__entry( __field(u32, dev) @@ -463,9 +462,9 @@ TRACE_EVENT(i915_gem_ring_sync_to, TP_fast_assign( __entry->dev = from->i915->drm.primary->index; - __entry->sync_from = from->id; - __entry->sync_to = to_req->engine->id; - __entry->seqno = req->fence.seqno; + __entry->sync_from = from->engine->id; + __entry->sync_to = to->engine->id; + __entry->seqno = from->fence.seqno; ), TP_printk("dev=%u, sync-from=%u, sync-to=%u, seqno=%u", diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 3f05c62..993bcfb 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -11575,7 +11575,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, struct intel_flip_work *work; struct intel_engine_cs *engine; bool mmio_flip; - struct drm_i915_gem_request *request = NULL; + struct drm_i915_gem_request *request; int ret; /* @@ -11682,22 +11682,6 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, mmio_flip = use_mmio_flip(engine, obj); - /* When using CS flips, we want to emit semaphores between rings. - * However, when using mmio flips we will create a task to do the - * synchronisation, so all we want here is to pin the framebuffer - * into the display plane and skip any waits. - */ - if (!mmio_flip) { - ret = i915_gem_object_sync(obj, engine, &request); - if (!ret && !request) { - request = i915_gem_request_alloc(engine, NULL); - ret = PTR_ERR_OR_ZERO(request); - } - - if (ret) - goto cleanup_pending; - } - ret = intel_pin_and_fence_fb_obj(fb, primary->state->rotation); if (ret) goto cleanup_pending; @@ -11715,14 +11699,24 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, schedule_work(&work->mmio_work); } else { - i915_gem_request_assign(&work->flip_queued_req, request); + request = i915_gem_request_alloc(engine, engine->last_context); + if (IS_ERR(request)) { + ret = PTR_ERR(request); + goto cleanup_unpin; + } + + ret = i915_gem_object_sync(obj, request); + if (ret) + goto cleanup_request; + ret = dev_priv->display.queue_flip(dev, crtc, fb, obj, request, page_flip_flags); if (ret) - goto cleanup_unpin; + goto cleanup_request; intel_mark_page_flip_active(intel_crtc, work); + work->flip_queued_req = i915_gem_request_get(request); i915_add_request_no_flush(request); } @@ -11737,11 +11731,11 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, return 0; +cleanup_request: + i915_add_request_no_flush(request); cleanup_unpin: intel_unpin_fb_obj(fb, crtc->primary->state->rotation); cleanup_pending: - if (!IS_ERR_OR_NULL(request)) - i915_add_request_no_flush(request); atomic_dec(&intel_crtc->unpin_work_count); mutex_unlock(&dev->struct_mutex); cleanup: diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index af7d7e0..b760e90a 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -655,7 +655,7 @@ static int execlists_move_to_gpu(struct drm_i915_gem_request *req, struct drm_i915_gem_object *obj = vma->obj; if (obj->active & other_rings) { - ret = i915_gem_object_sync(obj, req->engine, &req); + ret = i915_gem_object_sync(obj, req); if (ret) return ret; } diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index 8f1d4d9..651efe4 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -229,11 +229,18 @@ static int intel_overlay_do_wait_request(struct intel_overlay *overlay, return 0; } +static struct drm_i915_gem_request *alloc_request(struct intel_overlay *overlay) +{ + struct drm_i915_private *dev_priv = overlay->i915; + struct intel_engine_cs *engine = &dev_priv->engine[RCS]; + + return i915_gem_request_alloc(engine, dev_priv->kernel_context); +} + /* overlay needs to be disable in OCMD reg */ static int intel_overlay_on(struct intel_overlay *overlay) { struct drm_i915_private *dev_priv = overlay->i915; - struct intel_engine_cs *engine = &dev_priv->engine[RCS]; struct drm_i915_gem_request *req; struct intel_ring *ring; int ret; @@ -241,7 +248,7 @@ static int intel_overlay_on(struct intel_overlay *overlay) WARN_ON(overlay->active); WARN_ON(IS_I830(dev_priv) && !(dev_priv->quirks & QUIRK_PIPEA_FORCE)); - req = i915_gem_request_alloc(engine, NULL); + req = alloc_request(overlay); if (IS_ERR(req)) return PTR_ERR(req); @@ -268,7 +275,6 @@ static int intel_overlay_continue(struct intel_overlay *overlay, bool load_polyphase_filter) { struct drm_i915_private *dev_priv = overlay->i915; - struct intel_engine_cs *engine = &dev_priv->engine[RCS]; struct drm_i915_gem_request *req; struct intel_ring *ring; u32 flip_addr = overlay->flip_addr; @@ -285,7 +291,7 @@ static int intel_overlay_continue(struct intel_overlay *overlay, if (tmp & (1 << 17)) DRM_DEBUG("overlay underrun, DOVSTA: %x\n", tmp); - req = i915_gem_request_alloc(engine, NULL); + req = alloc_request(overlay); if (IS_ERR(req)) return PTR_ERR(req); @@ -338,7 +344,6 @@ static void intel_overlay_off_tail(struct intel_overlay *overlay) static int intel_overlay_off(struct intel_overlay *overlay) { struct drm_i915_private *dev_priv = overlay->i915; - struct intel_engine_cs *engine = &dev_priv->engine[RCS]; struct drm_i915_gem_request *req; struct intel_ring *ring; u32 flip_addr = overlay->flip_addr; @@ -352,7 +357,7 @@ static int intel_overlay_off(struct intel_overlay *overlay) * of the hw. Do it in both cases */ flip_addr |= OFC_UPDATE; - req = i915_gem_request_alloc(engine, NULL); + req = alloc_request(overlay); if (IS_ERR(req)) return PTR_ERR(req); @@ -412,7 +417,6 @@ static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay) static int intel_overlay_release_old_vid(struct intel_overlay *overlay) { struct drm_i915_private *dev_priv = overlay->i915; - struct intel_engine_cs *engine = &dev_priv->engine[RCS]; int ret; lockdep_assert_held(&dev_priv->drm.struct_mutex); @@ -428,7 +432,7 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay) struct drm_i915_gem_request *req; struct intel_ring *ring; - req = i915_gem_request_alloc(engine, NULL); + req = alloc_request(overlay); if (IS_ERR(req)) return PTR_ERR(req); -- cgit v0.10.2 From 803688babda2398ac705825e94e1617d8419f737 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 2 Aug 2016 22:50:27 +0100 Subject: drm/i915: Unify legacy/execlists emission of MI_BATCHBUFFER_START Both the ->dispatch_execbuffer and ->emit_bb_start callbacks do exactly the same thing, add MI_BATCHBUFFER_START to the request's ringbuffer - we need only one vfunc. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1469432687-22756-20-git-send-email-chris@chris-wilson.co.uk Link: http://patchwork.freedesktop.org/patch/msgid/1470174640-18242-10-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index fe06b58..ca941ff 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1326,9 +1326,9 @@ i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params, if (exec_len == 0) exec_len = params->batch_obj->base.size; - ret = params->engine->dispatch_execbuffer(params->request, - exec_start, exec_len, - params->dispatch_flags); + ret = params->engine->emit_bb_start(params->request, + exec_start, exec_len, + params->dispatch_flags); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c index b2be467..2ba759f 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.c +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c @@ -234,18 +234,18 @@ int i915_gem_render_state_init(struct drm_i915_gem_request *req) if (so.rodata == NULL) return 0; - ret = req->engine->dispatch_execbuffer(req, so.ggtt_offset, - so.rodata->batch_items * 4, - I915_DISPATCH_SECURE); + ret = req->engine->emit_bb_start(req, so.ggtt_offset, + so.rodata->batch_items * 4, + I915_DISPATCH_SECURE); if (ret) goto out; if (so.aux_batch_size > 8) { - ret = req->engine->dispatch_execbuffer(req, - (so.ggtt_offset + - so.aux_batch_offset), - so.aux_batch_size, - I915_DISPATCH_SECURE); + ret = req->engine->emit_bb_start(req, + (so.ggtt_offset + + so.aux_batch_offset), + so.aux_batch_size, + I915_DISPATCH_SECURE); if (ret) goto out; } diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index b760e90a..a74006b 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -859,7 +859,9 @@ int intel_execlists_submission(struct i915_execbuffer_params *params, exec_start = params->batch_obj_vm_offset + args->batch_start_offset; - ret = engine->emit_bb_start(params->request, exec_start, params->dispatch_flags); + ret = engine->emit_bb_start(params->request, + exec_start, args->batch_len, + params->dispatch_flags); if (ret) return ret; @@ -1539,7 +1541,8 @@ static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req) } static int gen8_emit_bb_start(struct drm_i915_gem_request *req, - u64 offset, unsigned dispatch_flags) + u64 offset, u32 len, + unsigned int dispatch_flags) { struct intel_ring *ring = req->ring; bool ppgtt = !(dispatch_flags & I915_DISPATCH_SECURE); @@ -1812,13 +1815,15 @@ static int intel_lr_context_render_state_init(struct drm_i915_gem_request *req) return 0; ret = req->engine->emit_bb_start(req, so.ggtt_offset, - I915_DISPATCH_SECURE); + so.rodata->batch_items * 4, + I915_DISPATCH_SECURE); if (ret) goto out; ret = req->engine->emit_bb_start(req, - (so.ggtt_offset + so.aux_batch_offset), - I915_DISPATCH_SECURE); + (so.ggtt_offset + so.aux_batch_offset), + so.aux_batch_size, + I915_DISPATCH_SECURE); if (ret) goto out; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 1f876e7..799a7dc 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1779,9 +1779,9 @@ gen8_irq_disable(struct intel_engine_cs *engine) } static int -i965_dispatch_execbuffer(struct drm_i915_gem_request *req, - u64 offset, u32 length, - unsigned dispatch_flags) +i965_emit_bb_start(struct drm_i915_gem_request *req, + u64 offset, u32 length, + unsigned int dispatch_flags) { struct intel_ring *ring = req->ring; int ret; @@ -1806,9 +1806,9 @@ i965_dispatch_execbuffer(struct drm_i915_gem_request *req, #define I830_TLB_ENTRIES (2) #define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT) static int -i830_dispatch_execbuffer(struct drm_i915_gem_request *req, - u64 offset, u32 len, - unsigned dispatch_flags) +i830_emit_bb_start(struct drm_i915_gem_request *req, + u64 offset, u32 len, + unsigned int dispatch_flags) { struct intel_ring *ring = req->ring; u32 cs_offset = req->engine->scratch.gtt_offset; @@ -1868,9 +1868,9 @@ i830_dispatch_execbuffer(struct drm_i915_gem_request *req, } static int -i915_dispatch_execbuffer(struct drm_i915_gem_request *req, - u64 offset, u32 len, - unsigned dispatch_flags) +i915_emit_bb_start(struct drm_i915_gem_request *req, + u64 offset, u32 len, + unsigned int dispatch_flags) { struct intel_ring *ring = req->ring; int ret; @@ -2562,9 +2562,9 @@ static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req, u32 mode) } static int -gen8_ring_dispatch_execbuffer(struct drm_i915_gem_request *req, - u64 offset, u32 len, - unsigned dispatch_flags) +gen8_emit_bb_start(struct drm_i915_gem_request *req, + u64 offset, u32 len, + unsigned int dispatch_flags) { struct intel_ring *ring = req->ring; bool ppgtt = USES_PPGTT(req->i915) && @@ -2588,9 +2588,9 @@ gen8_ring_dispatch_execbuffer(struct drm_i915_gem_request *req, } static int -hsw_ring_dispatch_execbuffer(struct drm_i915_gem_request *req, - u64 offset, u32 len, - unsigned dispatch_flags) +hsw_emit_bb_start(struct drm_i915_gem_request *req, + u64 offset, u32 len, + unsigned int dispatch_flags) { struct intel_ring *ring = req->ring; int ret; @@ -2613,9 +2613,9 @@ hsw_ring_dispatch_execbuffer(struct drm_i915_gem_request *req, } static int -gen6_ring_dispatch_execbuffer(struct drm_i915_gem_request *req, - u64 offset, u32 len, - unsigned dispatch_flags) +gen6_emit_bb_start(struct drm_i915_gem_request *req, + u64 offset, u32 len, + unsigned int dispatch_flags) { struct intel_ring *ring = req->ring; int ret; @@ -2818,15 +2818,15 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, engine->add_request = gen6_add_request; if (INTEL_GEN(dev_priv) >= 8) - engine->dispatch_execbuffer = gen8_ring_dispatch_execbuffer; + engine->emit_bb_start = gen8_emit_bb_start; else if (INTEL_GEN(dev_priv) >= 6) - engine->dispatch_execbuffer = gen6_ring_dispatch_execbuffer; + engine->emit_bb_start = gen6_emit_bb_start; else if (INTEL_GEN(dev_priv) >= 4) - engine->dispatch_execbuffer = i965_dispatch_execbuffer; + engine->emit_bb_start = i965_emit_bb_start; else if (IS_I830(dev_priv) || IS_845G(dev_priv)) - engine->dispatch_execbuffer = i830_dispatch_execbuffer; + engine->emit_bb_start = i830_emit_bb_start; else - engine->dispatch_execbuffer = i915_dispatch_execbuffer; + engine->emit_bb_start = i915_emit_bb_start; intel_ring_init_irq(dev_priv, engine); intel_ring_init_semaphores(dev_priv, engine); @@ -2864,7 +2864,7 @@ int intel_init_render_ring_buffer(struct intel_engine_cs *engine) } if (IS_HASWELL(dev_priv)) - engine->dispatch_execbuffer = hsw_ring_dispatch_execbuffer; + engine->emit_bb_start = hsw_emit_bb_start; engine->init_hw = init_render_ring; engine->cleanup = render_ring_cleanup; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 76d0495..45ba29c 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -214,12 +214,6 @@ struct intel_engine_cs { * monotonic, even if not coherent. */ void (*irq_seqno_barrier)(struct intel_engine_cs *engine); - int (*dispatch_execbuffer)(struct drm_i915_gem_request *req, - u64 offset, u32 length, - unsigned dispatch_flags); -#define I915_DISPATCH_SECURE 0x1 -#define I915_DISPATCH_PINNED 0x2 -#define I915_DISPATCH_RS 0x4 void (*cleanup)(struct intel_engine_cs *engine); /* GEN8 signal/wait table - never trust comments! @@ -297,7 +291,11 @@ struct intel_engine_cs { #define EMIT_FLUSH BIT(1) #define EMIT_BARRIER (EMIT_INVALIDATE | EMIT_FLUSH) int (*emit_bb_start)(struct drm_i915_gem_request *req, - u64 offset, unsigned dispatch_flags); + u64 offset, u32 length, + unsigned int dispatch_flags); +#define I915_DISPATCH_SECURE 0x1 +#define I915_DISPATCH_PINNED 0x2 +#define I915_DISPATCH_RS 0x4 /** * List of objects currently involved in rendering from the -- cgit v0.10.2 From ba76d91bc00b5f58ae42991cf9ec97e989bc05a2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 2 Aug 2016 22:50:28 +0100 Subject: drm/i915: Remove intel_ring_get_tail() Joonas doesn't like the tiny function, especially if I go around making it more complicated and using it elsewhere. To remove that temptation, remove the function! Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1469432687-22756-21-git-send-email-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470174640-18242-11-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index f4e6c40..606b0b8 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -419,7 +419,7 @@ void __i915_add_request(struct drm_i915_gem_request *request, * should already have been reserved in the ring buffer. Let the ring * know that it is time to use that space up. */ - request_start = intel_ring_get_tail(ring); + request_start = ring->tail; reserved_tail = request->reserved_space; request->reserved_space = 0; @@ -464,19 +464,19 @@ void __i915_add_request(struct drm_i915_gem_request *request, * GPU processing the request, we never over-estimate the * position of the head. */ - request->postfix = intel_ring_get_tail(ring); + request->postfix = ring->tail; if (i915.enable_execlists) { ret = engine->emit_request(request); } else { ret = engine->add_request(request); - request->tail = intel_ring_get_tail(ring); + request->tail = ring->tail; } /* Not allowed to fail! */ WARN(ret, "emit|add_request failed: %d!\n", ret); /* Sanity check that the reserved size was large enough. */ - ret = intel_ring_get_tail(ring) - request_start; + ret = ring->tail - request_start; if (ret < 0) ret += ring->size; WARN_ONCE(ret > reserved_tail, diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 45ba29c..0c3c718 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -490,11 +490,6 @@ static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine) int init_workarounds_ring(struct intel_engine_cs *engine); -static inline u32 intel_ring_get_tail(struct intel_ring *ring) -{ - return ring->tail; -} - /* * Arbitrary size for largest possible 'add request' sequence. The code paths * are complex and variable. Empirical measurement shows that the worst case -- cgit v0.10.2 From c5efa1ad09784905ae31cd65b659cc73c09fd290 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 2 Aug 2016 22:50:29 +0100 Subject: drm/i915: Convert engine->write_tail to operate on a request If we rewrite the I915_WRITE_TAIL specialisation for the legacy ringbuffer as submitting the request onto the ringbuffer, we can unify the vfunc with both execlists and GuC in the next patch. v2: Drop the modulus from the I915_WRITE_TAIL as it is currently being applied in intel_ring_advance() after every command packet, and add a comment explaining why we need the modulus at all. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1469432687-22756-22-git-send-email-chris@chris-wilson.co.uk Link: http://patchwork.freedesktop.org/patch/msgid/1470174640-18242-12-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 606b0b8..a885905 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -466,15 +466,13 @@ void __i915_add_request(struct drm_i915_gem_request *request, */ request->postfix = ring->tail; - if (i915.enable_execlists) { + if (i915.enable_execlists) ret = engine->emit_request(request); - } else { + else ret = engine->add_request(request); - - request->tail = ring->tail; - } /* Not allowed to fail! */ WARN(ret, "emit|add_request failed: %d!\n", ret); + /* Sanity check that the reserved size was large enough. */ ret = ring->tail - request_start; if (ret < 0) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 799a7dc..3142085 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -58,14 +58,6 @@ void intel_ring_update_space(struct intel_ring *ring) ring->tail, ring->size); } -static void __intel_engine_submit(struct intel_engine_cs *engine) -{ - struct intel_ring *ring = engine->buffer; - - ring->tail &= ring->size - 1; - engine->write_tail(engine, ring->tail); -} - static int gen2_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) { @@ -412,13 +404,6 @@ gen8_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) return gen8_emit_pipe_control(req, flags, scratch_addr); } -static void ring_write_tail(struct intel_engine_cs *engine, - u32 value) -{ - struct drm_i915_private *dev_priv = engine->i915; - I915_WRITE_TAIL(engine, value); -} - u64 intel_engine_get_active_head(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; @@ -532,7 +517,7 @@ static bool stop_ring(struct intel_engine_cs *engine) I915_WRITE_CTL(engine, 0); I915_WRITE_HEAD(engine, 0); - engine->write_tail(engine, 0); + I915_WRITE_TAIL(engine, 0); if (!IS_GEN2(dev_priv)) { (void)I915_READ_CTL(engine); @@ -1469,7 +1454,10 @@ gen6_add_request(struct drm_i915_gem_request *req) intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); intel_ring_emit(ring, req->fence.seqno); intel_ring_emit(ring, MI_USER_INTERRUPT); - __intel_engine_submit(engine); + intel_ring_advance(ring); + + req->tail = ring->tail; + engine->submit_request(req); return 0; } @@ -1499,7 +1487,9 @@ gen8_render_add_request(struct drm_i915_gem_request *req) intel_ring_emit(ring, 0); intel_ring_emit(ring, MI_USER_INTERRUPT); intel_ring_emit(ring, MI_NOOP); - __intel_engine_submit(engine); + + req->tail = ring->tail; + engine->submit_request(req); return 0; } @@ -1716,11 +1706,21 @@ i9xx_add_request(struct drm_i915_gem_request *req) intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); intel_ring_emit(ring, req->fence.seqno); intel_ring_emit(ring, MI_USER_INTERRUPT); - __intel_engine_submit(req->engine); + intel_ring_advance(ring); + + req->tail = ring->tail; + req->engine->submit_request(req); return 0; } +static void i9xx_submit_request(struct drm_i915_gem_request *request) +{ + struct drm_i915_private *dev_priv = request->i915; + + I915_WRITE_TAIL(request->engine, request->tail); +} + static void gen6_irq_enable(struct intel_engine_cs *engine) { @@ -2479,10 +2479,9 @@ void intel_engine_init_seqno(struct intel_engine_cs *engine, u32 seqno) rcu_read_unlock(); } -static void gen6_bsd_ring_write_tail(struct intel_engine_cs *engine, - u32 value) +static void gen6_bsd_submit_request(struct drm_i915_gem_request *request) { - struct drm_i915_private *dev_priv = engine->i915; + struct drm_i915_private *dev_priv = request->i915; intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); @@ -2506,8 +2505,8 @@ static void gen6_bsd_ring_write_tail(struct intel_engine_cs *engine, DRM_ERROR("timed out waiting for the BSD ring to wake up\n"); /* Now that the ring is fully powered up, update the tail */ - I915_WRITE_FW(RING_TAIL(engine->mmio_base), value); - POSTING_READ_FW(RING_TAIL(engine->mmio_base)); + I915_WRITE_FW(RING_TAIL(request->engine->mmio_base), request->tail); + POSTING_READ_FW(RING_TAIL(request->engine->mmio_base)); /* Let the ring send IDLE messages to the GT again, * and so let it sleep to conserve power when idle. @@ -2811,7 +2810,7 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, struct intel_engine_cs *engine) { engine->init_hw = init_ring_common; - engine->write_tail = ring_write_tail; + engine->submit_request = i9xx_submit_request; engine->add_request = i9xx_add_request; if (INTEL_GEN(dev_priv) >= 6) @@ -2895,7 +2894,7 @@ int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine) if (INTEL_GEN(dev_priv) >= 6) { /* gen6 bsd needs a special wa for tail updates */ if (IS_GEN6(dev_priv)) - engine->write_tail = gen6_bsd_ring_write_tail; + engine->submit_request = gen6_bsd_submit_request; engine->emit_flush = gen6_bsd_ring_flush; if (INTEL_GEN(dev_priv) < 8) engine->irq_enable_mask = GT_BSD_USER_INTERRUPT; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 0c3c718..14d2ea3 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -204,8 +204,6 @@ struct intel_engine_cs { int (*init_context)(struct drm_i915_gem_request *req); - void (*write_tail)(struct intel_engine_cs *engine, - u32 value); int (*add_request)(struct drm_i915_gem_request *req); /* Some chipsets are not quite as coherent as advertised and need * an expensive kick to force a true read of the up-to-date seqno. @@ -296,6 +294,7 @@ struct intel_engine_cs { #define I915_DISPATCH_SECURE 0x1 #define I915_DISPATCH_PINNED 0x2 #define I915_DISPATCH_RS 0x4 + void (*submit_request)(struct drm_i915_gem_request *req); /** * List of objects currently involved in rendering from the @@ -461,6 +460,13 @@ static inline void intel_ring_emit_reg(struct intel_ring *ring, i915_reg_t reg) static inline void intel_ring_advance(struct intel_ring *ring) { + /* The modulus is required so that we avoid writing + * request->tail == ring->size, rather than the expected 0, + * into the RING_TAIL register as that can cause a GPU hang. + * As this is only strictly required for the request->tail, + * and only then as we write the value into hardware, we can + * one day remove the modulus after every command packet. + */ ring->tail &= ring->size - 1; } -- cgit v0.10.2 From 8f9420184a3d62f9ca1ddb2b2f716989c461489b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 2 Aug 2016 22:50:30 +0100 Subject: drm/i915: Move the modulus for ring emission to the register write Space reservation is already safe with respect to the ring->size modulus, but hardware only expects to see values in the range 0...ring->size-1 (inclusive) and so requires the modulus to prevent us writing the value ring->size instead of 0. As this is only required for the register itself, we can defer the modulus to the register update and not perform it after every command packet. We keep the intel_ring_advance() around in the code to provide demarcation for the end-of-packet (which then can be compared against intel_ring_begin() as the number of dwords emitted must match the reserved space). v2: Assert that the ring size is a power-of-two to match assumptions in the code. Simplify the comment before writing the tail value to explain why the modulus is necessary. Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Dave Gordon Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470174640-18242-13-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index a74006b..0c3ca46 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -373,7 +373,7 @@ static void execlists_update_context(struct drm_i915_gem_request *rq) struct i915_hw_ppgtt *ppgtt = rq->ctx->ppgtt; uint32_t *reg_state = rq->ctx->engine[engine->id].lrc_reg_state; - reg_state[CTX_RING_TAIL+1] = rq->tail; + reg_state[CTX_RING_TAIL+1] = intel_ring_offset(rq->ring, rq->tail); /* True 32b PPGTT with dynamic page allocation: update PDP * registers and point the unallocated PDPs to scratch page. diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 3142085..a039353 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1718,7 +1718,8 @@ static void i9xx_submit_request(struct drm_i915_gem_request *request) { struct drm_i915_private *dev_priv = request->i915; - I915_WRITE_TAIL(request->engine, request->tail); + I915_WRITE_TAIL(request->engine, + intel_ring_offset(request->ring, request->tail)); } static void @@ -2081,6 +2082,8 @@ intel_engine_create_ring(struct intel_engine_cs *engine, int size) struct intel_ring *ring; int ret; + GEM_BUG_ON(!is_power_of_2(size)); + ring = kzalloc(sizeof(*ring), GFP_KERNEL); if (ring == NULL) { DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s\n", @@ -2505,7 +2508,8 @@ static void gen6_bsd_submit_request(struct drm_i915_gem_request *request) DRM_ERROR("timed out waiting for the BSD ring to wake up\n"); /* Now that the ring is fully powered up, update the tail */ - I915_WRITE_FW(RING_TAIL(request->engine->mmio_base), request->tail); + I915_WRITE_FW(RING_TAIL(request->engine->mmio_base), + intel_ring_offset(request->ring, request->tail)); POSTING_READ_FW(RING_TAIL(request->engine->mmio_base)); /* Let the ring send IDLE messages to the GT again, diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 14d2ea3..7d4a281 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -460,14 +460,20 @@ static inline void intel_ring_emit_reg(struct intel_ring *ring, i915_reg_t reg) static inline void intel_ring_advance(struct intel_ring *ring) { - /* The modulus is required so that we avoid writing - * request->tail == ring->size, rather than the expected 0, - * into the RING_TAIL register as that can cause a GPU hang. - * As this is only strictly required for the request->tail, - * and only then as we write the value into hardware, we can - * one day remove the modulus after every command packet. + /* Dummy function. + * + * This serves as a placeholder in the code so that the reader + * can compare against the preceding intel_ring_begin() and + * check that the number of dwords emitted matches the space + * reserved for the command packet (i.e. the value passed to + * intel_ring_begin()). */ - ring->tail &= ring->size - 1; +} + +static inline u32 intel_ring_offset(struct intel_ring *ring, u32 value) +{ + /* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */ + return value & (ring->size - 1); } int __intel_ring_space(int head, int tail, int size); -- cgit v0.10.2 From ddd66c5154c25dcb78e3ca29baa0d16cf2c8d6fb Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 2 Aug 2016 22:50:31 +0100 Subject: drm/i915: Unify request submission Move request submission from emit_request into its own common vfunc from i915_add_request(). v2: Convert I915_DISPATCH_flags to BIT(x) whilst passing v3: Rename a few functions to match. v4: Reenable execlists submission after disabling guc. v5: Be aware that everyone calls i915_guc_submission_disable()! Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1469432687-22756-23-git-send-email-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470174640-18242-14-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index a885905..e378eb6 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -466,12 +466,9 @@ void __i915_add_request(struct drm_i915_gem_request *request, */ request->postfix = ring->tail; - if (i915.enable_execlists) - ret = engine->emit_request(request); - else - ret = engine->add_request(request); /* Not allowed to fail! */ - WARN(ret, "emit|add_request failed: %d!\n", ret); + ret = engine->emit_request(request); + WARN(ret, "(%s)->emit_request failed: %d!\n", engine->name, ret); /* Sanity check that the reserved size was large enough. */ ret = ring->tail - request_start; @@ -483,6 +480,7 @@ void __i915_add_request(struct drm_i915_gem_request *request, reserved_tail, ret); i915_gem_mark_busy(engine); + engine->submit_request(request); } static unsigned long local_clock_us(unsigned int *cpu) diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index eccd348..5de8675 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -585,7 +585,7 @@ static int guc_ring_doorbell(struct i915_guc_client *gc) * The only error here arises if the doorbell hardware isn't functioning * as expected, which really shouln't happen. */ -int i915_guc_submit(struct drm_i915_gem_request *rq) +static void i915_guc_submit(struct drm_i915_gem_request *rq) { unsigned int engine_id = rq->engine->id; struct intel_guc *guc = &rq->i915->guc; @@ -602,8 +602,6 @@ int i915_guc_submit(struct drm_i915_gem_request *rq) guc->submissions[engine_id] += 1; guc->last_seqno[engine_id] = rq->fence.seqno; - - return b_ret; } /* @@ -992,6 +990,7 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv) { struct intel_guc *guc = &dev_priv->guc; struct i915_guc_client *client; + struct intel_engine_cs *engine; /* client for execbuf submission */ client = guc_client_alloc(dev_priv, @@ -1006,6 +1005,10 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv) host2guc_sample_forcewake(guc, client); guc_init_doorbell_hw(guc); + /* Take over from manual control of ELSP (execlists) */ + for_each_engine(engine, dev_priv) + engine->submit_request = i915_guc_submit; + return 0; } @@ -1013,8 +1016,14 @@ void i915_guc_submission_disable(struct drm_i915_private *dev_priv) { struct intel_guc *guc = &dev_priv->guc; + if (!guc->execbuf_client) + return; + guc_client_free(dev_priv, guc->execbuf_client); guc->execbuf_client = NULL; + + /* Revert back to manual ELSP submission */ + intel_execlists_enable_submission(dev_priv); } void i915_guc_submission_fini(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h index 3e3e743..623cf26 100644 --- a/drivers/gpu/drm/i915/intel_guc.h +++ b/drivers/gpu/drm/i915/intel_guc.h @@ -160,7 +160,6 @@ extern int intel_guc_resume(struct drm_device *dev); int i915_guc_submission_init(struct drm_i915_private *dev_priv); int i915_guc_submission_enable(struct drm_i915_private *dev_priv); int i915_guc_wq_check_space(struct drm_i915_gem_request *rq); -int i915_guc_submit(struct drm_i915_gem_request *rq); void i915_guc_submission_disable(struct drm_i915_private *dev_priv); void i915_guc_submission_fini(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 0c3ca46..524c195 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -738,7 +738,7 @@ err_unpin: } /* - * intel_logical_ring_advance_and_submit() - advance the tail and submit the workload + * intel_logical_ring_advance() - advance the tail and prepare for submission * @request: Request to advance the logical ringbuffer of. * * The tail is updated in our logical ringbuffer struct, not in the actual context. What @@ -747,7 +747,7 @@ err_unpin: * point, the tail *inside* the context is updated and the ELSP written to. */ static int -intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request) +intel_logical_ring_advance(struct drm_i915_gem_request *request) { struct intel_ring *ring = request->ring; struct intel_engine_cs *engine = request->engine; @@ -773,12 +773,6 @@ intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request) */ request->previous_context = engine->last_context; engine->last_context = request->ctx; - - if (i915.enable_guc_submission) - i915_guc_submit(request); - else - execlists_context_queue(request); - return 0; } @@ -1768,7 +1762,7 @@ static int gen8_emit_request(struct drm_i915_gem_request *request) intel_ring_emit(ring, request->fence.seqno); intel_ring_emit(ring, MI_USER_INTERRUPT); intel_ring_emit(ring, MI_NOOP); - return intel_logical_ring_advance_and_submit(request); + return intel_logical_ring_advance(request); } static int gen8_emit_request_render(struct drm_i915_gem_request *request) @@ -1799,7 +1793,7 @@ static int gen8_emit_request_render(struct drm_i915_gem_request *request) intel_ring_emit(ring, 0); intel_ring_emit(ring, MI_USER_INTERRUPT); intel_ring_emit(ring, MI_NOOP); - return intel_logical_ring_advance_and_submit(request); + return intel_logical_ring_advance(request); } static int intel_lr_context_render_state_init(struct drm_i915_gem_request *req) @@ -1900,13 +1894,23 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine) engine->i915 = NULL; } +void intel_execlists_enable_submission(struct drm_i915_private *dev_priv) +{ + struct intel_engine_cs *engine; + + for_each_engine(engine, dev_priv) + engine->submit_request = execlists_context_queue; +} + static void logical_ring_default_vfuncs(struct intel_engine_cs *engine) { /* Default vfuncs which can be overriden by each engine. */ engine->init_hw = gen8_init_common_ring; - engine->emit_request = gen8_emit_request; engine->emit_flush = gen8_emit_flush; + engine->emit_request = gen8_emit_request; + engine->submit_request = execlists_context_queue; + engine->irq_enable = gen8_logical_ring_enable_irq; engine->irq_disable = gen8_logical_ring_disable_irq; engine->emit_bb_start = gen8_emit_bb_start; diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 33e0193..bdd764a 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -95,6 +95,8 @@ uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx, /* Execlists */ int intel_sanitize_enable_execlists(struct drm_i915_private *dev_priv, int enable_execlists); +void intel_execlists_enable_submission(struct drm_i915_private *dev_priv); + struct i915_execbuffer_params; int intel_execlists_submission(struct i915_execbuffer_params *params, struct drm_i915_gem_execbuffer2 *args, diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index a039353..8250db7 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1428,15 +1428,14 @@ static int gen6_signal(struct drm_i915_gem_request *signaller_req, } /** - * gen6_add_request - Update the semaphore mailbox registers + * gen6_emit_request - Update the semaphore mailbox registers * * @request - request to write to the ring * * Update the mailbox registers in the *other* rings with the current seqno. * This acts like a signal in the canonical semaphore. */ -static int -gen6_add_request(struct drm_i915_gem_request *req) +static int gen6_emit_request(struct drm_i915_gem_request *req) { struct intel_engine_cs *engine = req->engine; struct intel_ring *ring = req->ring; @@ -1457,13 +1456,11 @@ gen6_add_request(struct drm_i915_gem_request *req) intel_ring_advance(ring); req->tail = ring->tail; - engine->submit_request(req); return 0; } -static int -gen8_render_add_request(struct drm_i915_gem_request *req) +static int gen8_render_emit_request(struct drm_i915_gem_request *req) { struct intel_engine_cs *engine = req->engine; struct intel_ring *ring = req->ring; @@ -1487,9 +1484,9 @@ gen8_render_add_request(struct drm_i915_gem_request *req) intel_ring_emit(ring, 0); intel_ring_emit(ring, MI_USER_INTERRUPT); intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); req->tail = ring->tail; - engine->submit_request(req); return 0; } @@ -1692,8 +1689,7 @@ bsd_ring_flush(struct drm_i915_gem_request *req, u32 mode) return 0; } -static int -i9xx_add_request(struct drm_i915_gem_request *req) +static int i9xx_emit_request(struct drm_i915_gem_request *req) { struct intel_ring *ring = req->ring; int ret; @@ -1709,7 +1705,6 @@ i9xx_add_request(struct drm_i915_gem_request *req) intel_ring_advance(ring); req->tail = ring->tail; - req->engine->submit_request(req); return 0; } @@ -2814,11 +2809,11 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, struct intel_engine_cs *engine) { engine->init_hw = init_ring_common; - engine->submit_request = i9xx_submit_request; - engine->add_request = i9xx_add_request; + engine->emit_request = i9xx_emit_request; if (INTEL_GEN(dev_priv) >= 6) - engine->add_request = gen6_add_request; + engine->emit_request = gen6_emit_request; + engine->submit_request = i9xx_submit_request; if (INTEL_GEN(dev_priv) >= 8) engine->emit_bb_start = gen8_emit_bb_start; @@ -2847,7 +2842,7 @@ int intel_init_render_ring_buffer(struct intel_engine_cs *engine) if (INTEL_GEN(dev_priv) >= 8) { engine->init_context = intel_rcs_ctx_init; - engine->add_request = gen8_render_add_request; + engine->emit_request = gen8_render_emit_request; engine->emit_flush = gen8_render_ring_flush; if (i915.semaphores) engine->semaphore.signal = gen8_rcs_signal; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 7d4a281..5f44000 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -204,7 +204,19 @@ struct intel_engine_cs { int (*init_context)(struct drm_i915_gem_request *req); - int (*add_request)(struct drm_i915_gem_request *req); + int (*emit_flush)(struct drm_i915_gem_request *request, + u32 mode); +#define EMIT_INVALIDATE BIT(0) +#define EMIT_FLUSH BIT(1) +#define EMIT_BARRIER (EMIT_INVALIDATE | EMIT_FLUSH) + int (*emit_bb_start)(struct drm_i915_gem_request *req, + u64 offset, u32 length, + unsigned int dispatch_flags); +#define I915_DISPATCH_SECURE BIT(0) +#define I915_DISPATCH_PINNED BIT(1) +#define I915_DISPATCH_RS BIT(2) + int (*emit_request)(struct drm_i915_gem_request *req); + void (*submit_request)(struct drm_i915_gem_request *req); /* Some chipsets are not quite as coherent as advertised and need * an expensive kick to force a true read of the up-to-date seqno. * However, the up-to-date seqno is not always required and the last @@ -282,19 +294,6 @@ struct intel_engine_cs { unsigned int idle_lite_restore_wa; bool disable_lite_restore_wa; u32 ctx_desc_template; - int (*emit_request)(struct drm_i915_gem_request *request); - int (*emit_flush)(struct drm_i915_gem_request *request, - u32 mode); -#define EMIT_INVALIDATE BIT(0) -#define EMIT_FLUSH BIT(1) -#define EMIT_BARRIER (EMIT_INVALIDATE | EMIT_FLUSH) - int (*emit_bb_start)(struct drm_i915_gem_request *req, - u64 offset, u32 length, - unsigned int dispatch_flags); -#define I915_DISPATCH_SECURE 0x1 -#define I915_DISPATCH_PINNED 0x2 -#define I915_DISPATCH_RS 0x4 - void (*submit_request)(struct drm_i915_gem_request *req); /** * List of objects currently involved in rendering from the -- cgit v0.10.2 From f4ea6bddb9988e7aa6f50732c75739e8cd7c2582 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 2 Aug 2016 22:50:32 +0100 Subject: drm/i915/lrc: Update function names to match request flow With adding engine->submit_request, we now have a bunch of functions with similar names used at different stages of the execlist submission. Try a different coat of paint, to hopefully reduce confusion between the requests, intel_engine_cs and the actual execlists submision process. Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/1469432687-22756-24-git-send-email-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470174640-18242-15-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 5de8675..c9105f6 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -1019,11 +1019,11 @@ void i915_guc_submission_disable(struct drm_i915_private *dev_priv) if (!guc->execbuf_client) return; - guc_client_free(dev_priv, guc->execbuf_client); - guc->execbuf_client = NULL; - /* Revert back to manual ELSP submission */ intel_execlists_enable_submission(dev_priv); + + guc_client_free(dev_priv, guc->execbuf_client); + guc->execbuf_client = NULL; } void i915_guc_submission_fini(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 524c195..e8f855b 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -384,8 +384,8 @@ static void execlists_update_context(struct drm_i915_gem_request *rq) execlists_update_context_pdps(ppgtt, reg_state); } -static void execlists_submit_requests(struct drm_i915_gem_request *rq0, - struct drm_i915_gem_request *rq1) +static void execlists_elsp_submit_contexts(struct drm_i915_gem_request *rq0, + struct drm_i915_gem_request *rq1) { struct drm_i915_private *dev_priv = rq0->i915; unsigned int fw_domains = rq0->engine->fw_domains; @@ -418,7 +418,7 @@ static inline void execlists_context_status_change( atomic_notifier_call_chain(&rq->ctx->status_notifier, status, rq); } -static void execlists_context_unqueue(struct intel_engine_cs *engine) +static void execlists_unqueue(struct intel_engine_cs *engine) { struct drm_i915_gem_request *req0 = NULL, *req1 = NULL; struct drm_i915_gem_request *cursor, *tmp; @@ -486,7 +486,7 @@ static void execlists_context_unqueue(struct intel_engine_cs *engine) req0->tail &= req0->ring->size - 1; } - execlists_submit_requests(req0, req1); + execlists_elsp_submit_contexts(req0, req1); } static unsigned int @@ -597,7 +597,7 @@ static void intel_lrc_irq_handler(unsigned long data) if (submit_contexts) { if (!engine->disable_lite_restore_wa || (csb[i][0] & GEN8_CTX_STATUS_ACTIVE_IDLE)) - execlists_context_unqueue(engine); + execlists_unqueue(engine); } spin_unlock(&engine->execlist_lock); @@ -606,7 +606,7 @@ static void intel_lrc_irq_handler(unsigned long data) DRM_ERROR("More than two context complete events?\n"); } -static void execlists_context_queue(struct drm_i915_gem_request *request) +static void execlists_submit_request(struct drm_i915_gem_request *request) { struct intel_engine_cs *engine = request->engine; struct drm_i915_gem_request *cursor; @@ -637,7 +637,7 @@ static void execlists_context_queue(struct drm_i915_gem_request *request) list_add_tail(&request->execlist_link, &engine->execlist_queue); request->ctx_hw_id = request->ctx->hw_id; if (num_elements == 0) - execlists_context_unqueue(engine); + execlists_unqueue(engine); spin_unlock_bh(&engine->execlist_lock); } @@ -1899,7 +1899,7 @@ void intel_execlists_enable_submission(struct drm_i915_private *dev_priv) struct intel_engine_cs *engine; for_each_engine(engine, dev_priv) - engine->submit_request = execlists_context_queue; + engine->submit_request = execlists_submit_request; } static void @@ -1909,7 +1909,7 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine) engine->init_hw = gen8_init_common_ring; engine->emit_flush = gen8_emit_flush; engine->emit_request = gen8_emit_request; - engine->submit_request = execlists_context_queue; + engine->submit_request = execlists_submit_request; engine->irq_enable = gen8_logical_ring_enable_irq; engine->irq_disable = gen8_logical_ring_disable_irq; -- cgit v0.10.2 From 9242f974dc4c4656981aaa4104d0cbc46ddb03d5 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 2 Aug 2016 22:50:33 +0100 Subject: drm/i915: Stop passing caller's num_dwords to engine->semaphore.signal() Rather than pass in the num_dwords that the caller wishes to use after the signal command packet, split the breadcrumb emission into two phases and have both the signal and breadcrumb individiually acquire space on the ring. This makes the interface simpler for the reader, and will simplify for patches. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1469432687-22756-25-git-send-email-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470174640-18242-16-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 8250db7..a8de46c 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1309,10 +1309,8 @@ static void render_ring_cleanup(struct intel_engine_cs *engine) intel_fini_pipe_control(engine); } -static int gen8_rcs_signal(struct drm_i915_gem_request *signaller_req, - unsigned int num_dwords) +static int gen8_rcs_signal(struct drm_i915_gem_request *signaller_req) { -#define MBOX_UPDATE_DWORDS 8 struct intel_ring *signaller = signaller_req->ring; struct drm_i915_private *dev_priv = signaller_req->i915; struct intel_engine_cs *waiter; @@ -1320,10 +1318,7 @@ static int gen8_rcs_signal(struct drm_i915_gem_request *signaller_req, int ret, num_rings; num_rings = hweight32(INTEL_INFO(dev_priv)->ring_mask); - num_dwords += (num_rings-1) * MBOX_UPDATE_DWORDS; -#undef MBOX_UPDATE_DWORDS - - ret = intel_ring_begin(signaller_req, num_dwords); + ret = intel_ring_begin(signaller_req, (num_rings-1) * 8); if (ret) return ret; @@ -1347,14 +1342,13 @@ static int gen8_rcs_signal(struct drm_i915_gem_request *signaller_req, MI_SEMAPHORE_TARGET(waiter->hw_id)); intel_ring_emit(signaller, 0); } + intel_ring_advance(signaller); return 0; } -static int gen8_xcs_signal(struct drm_i915_gem_request *signaller_req, - unsigned int num_dwords) +static int gen8_xcs_signal(struct drm_i915_gem_request *signaller_req) { -#define MBOX_UPDATE_DWORDS 6 struct intel_ring *signaller = signaller_req->ring; struct drm_i915_private *dev_priv = signaller_req->i915; struct intel_engine_cs *waiter; @@ -1362,10 +1356,7 @@ static int gen8_xcs_signal(struct drm_i915_gem_request *signaller_req, int ret, num_rings; num_rings = hweight32(INTEL_INFO(dev_priv)->ring_mask); - num_dwords += (num_rings-1) * MBOX_UPDATE_DWORDS; -#undef MBOX_UPDATE_DWORDS - - ret = intel_ring_begin(signaller_req, num_dwords); + ret = intel_ring_begin(signaller_req, (num_rings-1) * 6); if (ret) return ret; @@ -1387,12 +1378,12 @@ static int gen8_xcs_signal(struct drm_i915_gem_request *signaller_req, MI_SEMAPHORE_TARGET(waiter->hw_id)); intel_ring_emit(signaller, 0); } + intel_ring_advance(signaller); return 0; } -static int gen6_signal(struct drm_i915_gem_request *signaller_req, - unsigned int num_dwords) +static int gen6_signal(struct drm_i915_gem_request *signaller_req) { struct intel_ring *signaller = signaller_req->ring; struct drm_i915_private *dev_priv = signaller_req->i915; @@ -1400,12 +1391,8 @@ static int gen6_signal(struct drm_i915_gem_request *signaller_req, enum intel_engine_id id; int ret, num_rings; -#define MBOX_UPDATE_DWORDS 3 num_rings = hweight32(INTEL_INFO(dev_priv)->ring_mask); - num_dwords += round_up((num_rings-1) * MBOX_UPDATE_DWORDS, 2); -#undef MBOX_UPDATE_DWORDS - - ret = intel_ring_begin(signaller_req, num_dwords); + ret = intel_ring_begin(signaller_req, round_up((num_rings-1) * 3, 2)); if (ret) return ret; @@ -1423,6 +1410,7 @@ static int gen6_signal(struct drm_i915_gem_request *signaller_req, /* If num_dwords was rounded, make sure the tail pointer is correct */ if (num_rings % 2 == 0) intel_ring_emit(signaller, MI_NOOP); + intel_ring_advance(signaller); return 0; } @@ -1441,11 +1429,13 @@ static int gen6_emit_request(struct drm_i915_gem_request *req) struct intel_ring *ring = req->ring; int ret; - if (engine->semaphore.signal) - ret = engine->semaphore.signal(req, 4); - else - ret = intel_ring_begin(req, 4); + if (engine->semaphore.signal) { + ret = engine->semaphore.signal(req); + if (ret) + return ret; + } + ret = intel_ring_begin(req, 4); if (ret) return ret; @@ -1466,10 +1456,13 @@ static int gen8_render_emit_request(struct drm_i915_gem_request *req) struct intel_ring *ring = req->ring; int ret; - if (engine->semaphore.signal) - ret = engine->semaphore.signal(req, 8); - else - ret = intel_ring_begin(req, 8); + if (engine->semaphore.signal) { + ret = engine->semaphore.signal(req); + if (ret) + return ret; + } + + ret = intel_ring_begin(req, 8); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 5f44000..c3d4b88 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -280,9 +280,7 @@ struct intel_engine_cs { int (*sync_to)(struct drm_i915_gem_request *to_req, struct intel_engine_cs *from, u32 seqno); - int (*signal)(struct drm_i915_gem_request *signaller_req, - /* num_dwords needed by caller */ - unsigned int num_dwords); + int (*signal)(struct drm_i915_gem_request *signaller_req); } semaphore; /* Execlists */ -- cgit v0.10.2 From b0411e7d453118fb72f5b0c10202b5b7c98a0329 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 2 Aug 2016 22:50:34 +0100 Subject: drm/i915: Reuse legacy breadcrumbs + tail emission As GEN6+ is now a simple variant on the basic breadcrumbs + tail write, reuse the common code. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1469432687-22756-26-git-send-email-chris@chris-wilson.co.uk Link: http://patchwork.freedesktop.org/patch/msgid/1470174640-18242-17-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index a8de46c..6f7f704 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1415,26 +1415,19 @@ static int gen6_signal(struct drm_i915_gem_request *signaller_req) return 0; } -/** - * gen6_emit_request - Update the semaphore mailbox registers - * - * @request - request to write to the ring - * - * Update the mailbox registers in the *other* rings with the current seqno. - * This acts like a signal in the canonical semaphore. - */ -static int gen6_emit_request(struct drm_i915_gem_request *req) +static void i9xx_submit_request(struct drm_i915_gem_request *request) +{ + struct drm_i915_private *dev_priv = request->i915; + + I915_WRITE_TAIL(request->engine, + intel_ring_offset(request->ring, request->tail)); +} + +static int i9xx_emit_request(struct drm_i915_gem_request *req) { - struct intel_engine_cs *engine = req->engine; struct intel_ring *ring = req->ring; int ret; - if (engine->semaphore.signal) { - ret = engine->semaphore.signal(req); - if (ret) - return ret; - } - ret = intel_ring_begin(req, 4); if (ret) return ret; @@ -1450,6 +1443,27 @@ static int gen6_emit_request(struct drm_i915_gem_request *req) return 0; } +/** + * gen6_emit_request - Update the semaphore mailbox registers + * + * @request - request to write to the ring + * + * Update the mailbox registers in the *other* rings with the current seqno. + * This acts like a signal in the canonical semaphore. + */ +static int gen6_emit_request(struct drm_i915_gem_request *req) +{ + if (req->engine->semaphore.signal) { + int ret; + + ret = req->engine->semaphore.signal(req); + if (ret) + return ret; + } + + return i9xx_emit_request(req); +} + static int gen8_render_emit_request(struct drm_i915_gem_request *req) { struct intel_engine_cs *engine = req->engine; @@ -1682,34 +1696,6 @@ bsd_ring_flush(struct drm_i915_gem_request *req, u32 mode) return 0; } -static int i9xx_emit_request(struct drm_i915_gem_request *req) -{ - struct intel_ring *ring = req->ring; - int ret; - - ret = intel_ring_begin(req, 4); - if (ret) - return ret; - - intel_ring_emit(ring, MI_STORE_DWORD_INDEX); - intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); - intel_ring_emit(ring, req->fence.seqno); - intel_ring_emit(ring, MI_USER_INTERRUPT); - intel_ring_advance(ring); - - req->tail = ring->tail; - - return 0; -} - -static void i9xx_submit_request(struct drm_i915_gem_request *request) -{ - struct drm_i915_private *dev_priv = request->i915; - - I915_WRITE_TAIL(request->engine, - intel_ring_offset(request->ring, request->tail)); -} - static void gen6_irq_enable(struct intel_engine_cs *engine) { @@ -2496,9 +2482,7 @@ static void gen6_bsd_submit_request(struct drm_i915_gem_request *request) DRM_ERROR("timed out waiting for the BSD ring to wake up\n"); /* Now that the ring is fully powered up, update the tail */ - I915_WRITE_FW(RING_TAIL(request->engine->mmio_base), - intel_ring_offset(request->ring, request->tail)); - POSTING_READ_FW(RING_TAIL(request->engine->mmio_base)); + i9xx_submit_request(request); /* Let the ring send IDLE messages to the GT again, * and so let it sleep to conserve power when idle. -- cgit v0.10.2 From 618e4ca7b137612fd6b4395f011d9b392d68d149 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 2 Aug 2016 22:50:35 +0100 Subject: drm/i915/ringbuffer: Specialise SNB+ request emission for semaphores As gen6_emit_request() only differs from i9xx_emit_request() when semaphores are enabled, only use the specialised vfunc in that scenario. v2: Reorder semaphore init so as to keep engine->emit_request default vfunc selection compact. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1469432687-22756-27-git-send-email-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470174640-18242-18-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 6f7f704..0b6f12c 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1444,22 +1444,20 @@ static int i9xx_emit_request(struct drm_i915_gem_request *req) } /** - * gen6_emit_request - Update the semaphore mailbox registers + * gen6_sema_emit_request - Update the semaphore mailbox registers * * @request - request to write to the ring * * Update the mailbox registers in the *other* rings with the current seqno. * This acts like a signal in the canonical semaphore. */ -static int gen6_emit_request(struct drm_i915_gem_request *req) +static int gen6_sema_emit_request(struct drm_i915_gem_request *req) { - if (req->engine->semaphore.signal) { - int ret; + int ret; - ret = req->engine->semaphore.signal(req); - if (ret) - return ret; - } + ret = req->engine->semaphore.signal(req); + if (ret) + return ret; return i9xx_emit_request(req); } @@ -2785,11 +2783,14 @@ static void intel_ring_init_irq(struct drm_i915_private *dev_priv, static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, struct intel_engine_cs *engine) { + intel_ring_init_irq(dev_priv, engine); + intel_ring_init_semaphores(dev_priv, engine); + engine->init_hw = init_ring_common; engine->emit_request = i9xx_emit_request; - if (INTEL_GEN(dev_priv) >= 6) - engine->emit_request = gen6_emit_request; + if (i915.semaphores) + engine->emit_request = gen6_sema_emit_request; engine->submit_request = i9xx_submit_request; if (INTEL_GEN(dev_priv) >= 8) @@ -2802,9 +2803,6 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, engine->emit_bb_start = i830_emit_bb_start; else engine->emit_bb_start = i915_emit_bb_start; - - intel_ring_init_irq(dev_priv, engine); - intel_ring_init_semaphores(dev_priv, engine); } int intel_init_render_ring_buffer(struct intel_engine_cs *engine) -- cgit v0.10.2 From e40f9ee6612c492d875b3de6dcba98522e0afe36 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 2 Aug 2016 22:50:36 +0100 Subject: drm/i915: Remove duplicate golden render state init from execlists Now that we use the same vfuncs for emitting the batch buffer in both execlists and legacy, the golden render state initialisation is identical between both. v2: gcc wants so.ggtt_offset initialised (even though it is not used) Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1469432687-22756-28-git-send-email-chris@chris-wilson.co.uk Link: http://patchwork.freedesktop.org/patch/msgid/1470174640-18242-19-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c index 2ba759f..a9b56d1 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.c +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c @@ -28,6 +28,15 @@ #include "i915_drv.h" #include "intel_renderstate.h" +struct render_state { + const struct intel_renderstate_rodata *rodata; + struct drm_i915_gem_object *obj; + u64 ggtt_offset; + int gen; + u32 aux_batch_size; + u32 aux_batch_offset; +}; + static const struct intel_renderstate_rodata * render_state_get_rodata(const int gen) { @@ -51,6 +60,7 @@ static int render_state_init(struct render_state *so, int ret; so->gen = INTEL_GEN(dev_priv); + so->ggtt_offset = 0; /* keep gcc quiet */ so->rodata = render_state_get_rodata(so->gen); if (so->rodata == NULL) return 0; @@ -192,14 +202,14 @@ err_out: #undef OUT_BATCH -void i915_gem_render_state_fini(struct render_state *so) +static void render_state_fini(struct render_state *so) { i915_gem_object_ggtt_unpin(so->obj); i915_gem_object_put(so->obj); } -int i915_gem_render_state_prepare(struct intel_engine_cs *engine, - struct render_state *so) +static int render_state_prepare(struct intel_engine_cs *engine, + struct render_state *so) { int ret; @@ -215,7 +225,7 @@ int i915_gem_render_state_prepare(struct intel_engine_cs *engine, ret = render_state_setup(so); if (ret) { - i915_gem_render_state_fini(so); + render_state_fini(so); return ret; } @@ -227,7 +237,7 @@ int i915_gem_render_state_init(struct drm_i915_gem_request *req) struct render_state so; int ret; - ret = i915_gem_render_state_prepare(req->engine, &so); + ret = render_state_prepare(req->engine, &so); if (ret) return ret; @@ -251,8 +261,7 @@ int i915_gem_render_state_init(struct drm_i915_gem_request *req) } i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req); - out: - i915_gem_render_state_fini(&so); + render_state_fini(&so); return ret; } diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.h b/drivers/gpu/drm/i915/i915_gem_render_state.h index 6aaa3a1..c44fca8 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.h +++ b/drivers/gpu/drm/i915/i915_gem_render_state.h @@ -26,24 +26,6 @@ #include -struct intel_renderstate_rodata { - const u32 *reloc; - const u32 *batch; - const u32 batch_items; -}; - -struct render_state { - const struct intel_renderstate_rodata *rodata; - struct drm_i915_gem_object *obj; - u64 ggtt_offset; - int gen; - u32 aux_batch_size; - u32 aux_batch_offset; -}; - int i915_gem_render_state_init(struct drm_i915_gem_request *req); -void i915_gem_render_state_fini(struct render_state *so); -int i915_gem_render_state_prepare(struct intel_engine_cs *engine, - struct render_state *so); #endif /* _I915_GEM_RENDER_STATE_H_ */ diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index e8f855b..8f6324b 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1796,38 +1796,6 @@ static int gen8_emit_request_render(struct drm_i915_gem_request *request) return intel_logical_ring_advance(request); } -static int intel_lr_context_render_state_init(struct drm_i915_gem_request *req) -{ - struct render_state so; - int ret; - - ret = i915_gem_render_state_prepare(req->engine, &so); - if (ret) - return ret; - - if (so.rodata == NULL) - return 0; - - ret = req->engine->emit_bb_start(req, so.ggtt_offset, - so.rodata->batch_items * 4, - I915_DISPATCH_SECURE); - if (ret) - goto out; - - ret = req->engine->emit_bb_start(req, - (so.ggtt_offset + so.aux_batch_offset), - so.aux_batch_size, - I915_DISPATCH_SECURE); - if (ret) - goto out; - - i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req); - -out: - i915_gem_render_state_fini(&so); - return ret; -} - static int gen8_init_rcs_context(struct drm_i915_gem_request *req) { int ret; @@ -1844,7 +1812,7 @@ static int gen8_init_rcs_context(struct drm_i915_gem_request *req) if (ret) DRM_ERROR("MOCS failed to program: expect performance issues.\n"); - return intel_lr_context_render_state_init(req); + return i915_gem_render_state_init(req); } /** diff --git a/drivers/gpu/drm/i915/intel_renderstate.h b/drivers/gpu/drm/i915/intel_renderstate.h index 5bd6985..08f6fea 100644 --- a/drivers/gpu/drm/i915/intel_renderstate.h +++ b/drivers/gpu/drm/i915/intel_renderstate.h @@ -24,12 +24,13 @@ #ifndef _INTEL_RENDERSTATE_H #define _INTEL_RENDERSTATE_H -#include "i915_drv.h" +#include -extern const struct intel_renderstate_rodata gen6_null_state; -extern const struct intel_renderstate_rodata gen7_null_state; -extern const struct intel_renderstate_rodata gen8_null_state; -extern const struct intel_renderstate_rodata gen9_null_state; +struct intel_renderstate_rodata { + const u32 *reloc; + const u32 *batch; + const u32 batch_items; +}; #define RO_RENDERSTATE(_g) \ const struct intel_renderstate_rodata gen ## _g ## _null_state = { \ @@ -38,4 +39,9 @@ extern const struct intel_renderstate_rodata gen9_null_state; .batch_items = sizeof(gen ## _g ## _null_state_batch)/4, \ } +extern const struct intel_renderstate_rodata gen6_null_state; +extern const struct intel_renderstate_rodata gen7_null_state; +extern const struct intel_renderstate_rodata gen8_null_state; +extern const struct intel_renderstate_rodata gen9_null_state; + #endif /* INTEL_RENDERSTATE_H */ -- cgit v0.10.2 From 15d21db87239840ae8248cf26e46391edc13cfe3 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 2 Aug 2016 22:50:37 +0100 Subject: drm/i915: Refactor golden render state emission to unconfuse gcc GCC was inlining the init and setup functions, but was getting itself confused into thinking that variables could be used uninitialised. If we do the inline for gcc, it is happy! As a bonus we shrink the code. v2: A couple of minor tweaks from Joonas Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1469432687-22756-29-git-send-email-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470174640-18242-20-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c index a9b56d1..f85c550 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.c +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c @@ -32,15 +32,14 @@ struct render_state { const struct intel_renderstate_rodata *rodata; struct drm_i915_gem_object *obj; u64 ggtt_offset; - int gen; u32 aux_batch_size; u32 aux_batch_offset; }; static const struct intel_renderstate_rodata * -render_state_get_rodata(const int gen) +render_state_get_rodata(const struct drm_i915_gem_request *req) { - switch (gen) { + switch (INTEL_GEN(req->i915)) { case 6: return &gen6_null_state; case 7: @@ -54,36 +53,6 @@ render_state_get_rodata(const int gen) return NULL; } -static int render_state_init(struct render_state *so, - struct drm_i915_private *dev_priv) -{ - int ret; - - so->gen = INTEL_GEN(dev_priv); - so->ggtt_offset = 0; /* keep gcc quiet */ - so->rodata = render_state_get_rodata(so->gen); - if (so->rodata == NULL) - return 0; - - if (so->rodata->batch_items * 4 > 4096) - return -EINVAL; - - so->obj = i915_gem_object_create(&dev_priv->drm, 4096); - if (IS_ERR(so->obj)) - return PTR_ERR(so->obj); - - ret = i915_gem_obj_ggtt_pin(so->obj, 4096, 0); - if (ret) - goto free_gem; - - so->ggtt_offset = i915_gem_obj_ggtt_offset(so->obj); - return 0; - -free_gem: - i915_gem_object_put(so->obj); - return ret; -} - /* * Macro to add commands to auxiliary batch. * This macro only checks for page overflow before inserting the commands, @@ -106,6 +75,7 @@ static int render_state_setup(struct render_state *so) { struct drm_device *dev = so->obj->base.dev; const struct intel_renderstate_rodata *rodata = so->rodata; + const bool has_64bit_reloc = INTEL_GEN(dev) >= 8; unsigned int i = 0, reloc_index = 0; struct page *page; u32 *d; @@ -124,7 +94,7 @@ static int render_state_setup(struct render_state *so) if (i * 4 == rodata->reloc[reloc_index]) { u64 r = s + so->ggtt_offset; s = lower_32_bits(r); - if (so->gen >= 8) { + if (has_64bit_reloc) { if (i + 1 >= rodata->batch_items || rodata->batch[i + 1] != 0) { ret = -EINVAL; @@ -202,53 +172,40 @@ err_out: #undef OUT_BATCH -static void render_state_fini(struct render_state *so) -{ - i915_gem_object_ggtt_unpin(so->obj); - i915_gem_object_put(so->obj); -} - -static int render_state_prepare(struct intel_engine_cs *engine, - struct render_state *so) +int i915_gem_render_state_init(struct drm_i915_gem_request *req) { + struct render_state so; int ret; - if (WARN_ON(engine->id != RCS)) + if (WARN_ON(req->engine->id != RCS)) return -ENOENT; - ret = render_state_init(so, engine->i915); - if (ret) - return ret; - - if (so->rodata == NULL) + so.rodata = render_state_get_rodata(req); + if (!so.rodata) return 0; - ret = render_state_setup(so); - if (ret) { - render_state_fini(so); - return ret; - } - - return 0; -} + if (so.rodata->batch_items * 4 > 4096) + return -EINVAL; -int i915_gem_render_state_init(struct drm_i915_gem_request *req) -{ - struct render_state so; - int ret; + so.obj = i915_gem_object_create(&req->i915->drm, 4096); + if (IS_ERR(so.obj)) + return PTR_ERR(so.obj); - ret = render_state_prepare(req->engine, &so); + ret = i915_gem_obj_ggtt_pin(so.obj, 4096, 0); if (ret) - return ret; + goto err_obj; - if (so.rodata == NULL) - return 0; + so.ggtt_offset = i915_gem_obj_ggtt_offset(so.obj); + + ret = render_state_setup(&so); + if (ret) + goto err_unpin; ret = req->engine->emit_bb_start(req, so.ggtt_offset, so.rodata->batch_items * 4, I915_DISPATCH_SECURE); if (ret) - goto out; + goto err_unpin; if (so.aux_batch_size > 8) { ret = req->engine->emit_bb_start(req, @@ -257,11 +214,13 @@ int i915_gem_render_state_init(struct drm_i915_gem_request *req) so.aux_batch_size, I915_DISPATCH_SECURE); if (ret) - goto out; + goto err_unpin; } i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req); -out: - render_state_fini(&so); +err_unpin: + i915_gem_object_ggtt_unpin(so.obj); +err_obj: + i915_gem_object_put(so.obj); return ret; } -- cgit v0.10.2 From 5b043f4e60ff72d1a0348871e33c176e4005ae9b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 2 Aug 2016 22:50:38 +0100 Subject: drm/i915: Unify legacy/execlists submit_execbuf callbacks Now that emitting requests is identical between legacy and execlists, we can use the same function to build up the ring for submitting to either engine. (With the exception of i915_switch_contexts(), but in time that will also be handled gracefully.) Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1469432687-22756-30-git-send-email-chris@chris-wilson.co.uk Link: http://patchwork.freedesktop.org/patch/msgid/1470174640-18242-21-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 8f4edc9..49ce21a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1705,18 +1705,6 @@ struct i915_virtual_gpu { bool active; }; -struct i915_execbuffer_params { - struct drm_device *dev; - struct drm_file *file; - uint32_t dispatch_flags; - uint32_t args_batch_start_offset; - uint64_t batch_obj_vm_offset; - struct intel_engine_cs *engine; - struct drm_i915_gem_object *batch_obj; - struct i915_gem_context *ctx; - struct drm_i915_gem_request *request; -}; - /* used in computing the new watermarks state */ struct intel_wm_config { unsigned int num_pipes_active; @@ -2016,9 +2004,6 @@ struct drm_i915_private { /* Abstract the submission mechanism (legacy ringbuffer or execlists) away */ struct { - int (*execbuf_submit)(struct i915_execbuffer_params *params, - struct drm_i915_gem_execbuffer2 *args, - struct list_head *vmas); void (*cleanup_engine)(struct intel_engine_cs *engine); void (*stop_engine)(struct intel_engine_cs *engine); @@ -2993,11 +2978,6 @@ int i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); -void i915_gem_execbuffer_move_to_active(struct list_head *vmas, - struct drm_i915_gem_request *req); -int i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params, - struct drm_i915_gem_execbuffer2 *args, - struct list_head *vmas); int i915_gem_execbuffer(struct drm_device *dev, void *data, struct drm_file *file_priv); int i915_gem_execbuffer2(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index b6c4ff6..d79b949 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4534,11 +4534,9 @@ int i915_gem_init(struct drm_device *dev) mutex_lock(&dev->struct_mutex); if (!i915.enable_execlists) { - dev_priv->gt.execbuf_submit = i915_gem_ringbuffer_submission; dev_priv->gt.cleanup_engine = intel_engine_cleanup; dev_priv->gt.stop_engine = intel_engine_stop; } else { - dev_priv->gt.execbuf_submit = intel_execlists_submission; dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup; dev_priv->gt.stop_engine = intel_logical_ring_stop; } diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index edde841..d7a7cc8 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -894,8 +894,9 @@ int i915_switch_context(struct drm_i915_gem_request *req) { struct intel_engine_cs *engine = req->engine; - WARN_ON(i915.enable_execlists); lockdep_assert_held(&req->i915->drm.struct_mutex); + if (i915.enable_execlists) + return 0; if (!req->ctx->engine[engine->id].state) { struct i915_gem_context *to = req->ctx; @@ -943,9 +944,7 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv) if (IS_ERR(req)) return PTR_ERR(req); - ret = 0; - if (!i915.enable_execlists) - ret = i915_switch_context(req); + ret = i915_switch_context(req); i915_add_request_no_flush(req); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index ca941ff..a4b98af 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -42,6 +42,18 @@ #define BATCH_OFFSET_BIAS (256*1024) +struct i915_execbuffer_params { + struct drm_device *dev; + struct drm_file *file; + u32 dispatch_flags; + u32 args_batch_start_offset; + u32 batch_obj_vm_offset; + struct intel_engine_cs *engine; + struct drm_i915_gem_object *batch_obj; + struct i915_gem_context *ctx; + struct drm_i915_gem_request *request; +}; + struct eb_vmas { struct list_head vmas; int and; @@ -1117,7 +1129,7 @@ i915_gem_validate_context(struct drm_device *dev, struct drm_file *file, return ctx; } -void +static void i915_gem_execbuffer_move_to_active(struct list_head *vmas, struct drm_i915_gem_request *req) { @@ -1244,10 +1256,10 @@ err: return ERR_PTR(ret); } -int -i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params, - struct drm_i915_gem_execbuffer2 *args, - struct list_head *vmas) +static int +execbuf_submit(struct i915_execbuffer_params *params, + struct drm_i915_gem_execbuffer2 *args, + struct list_head *vmas) { struct drm_i915_private *dev_priv = params->request->i915; u64 exec_start, exec_len; @@ -1637,7 +1649,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, params->batch_obj = batch_obj; params->ctx = ctx; - ret = dev_priv->gt.execbuf_submit(params, args, &eb->vmas); + ret = execbuf_submit(params, args, &eb->vmas); err_request: i915_gem_execbuffer_retire_commands(params); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 8f6324b..afc51d9 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -642,39 +642,6 @@ static void execlists_submit_request(struct drm_i915_gem_request *request) spin_unlock_bh(&engine->execlist_lock); } -static int execlists_move_to_gpu(struct drm_i915_gem_request *req, - struct list_head *vmas) -{ - const unsigned other_rings = ~intel_engine_flag(req->engine); - struct i915_vma *vma; - uint32_t flush_domains = 0; - bool flush_chipset = false; - int ret; - - list_for_each_entry(vma, vmas, exec_list) { - struct drm_i915_gem_object *obj = vma->obj; - - if (obj->active & other_rings) { - ret = i915_gem_object_sync(obj, req); - if (ret) - return ret; - } - - if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) - flush_chipset |= i915_gem_clflush_object(obj, false); - - flush_domains |= obj->base.write_domain; - } - - if (flush_domains & I915_GEM_DOMAIN_GTT) - wmb(); - - /* Unconditionally invalidate gpu caches and ensure that we do flush - * any residual writes from the previous batch. - */ - return req->engine->emit_flush(req, EMIT_INVALIDATE); -} - int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request) { struct intel_engine_cs *engine = request->engine; @@ -776,96 +743,6 @@ intel_logical_ring_advance(struct drm_i915_gem_request *request) return 0; } -/** - * intel_execlists_submission() - submit a batchbuffer for execution, Execlists style - * @params: execbuffer call parameters. - * @args: execbuffer call arguments. - * @vmas: list of vmas. - * - * This is the evil twin version of i915_gem_ringbuffer_submission. It abstracts - * away the submission details of the execbuffer ioctl call. - * - * Return: non-zero if the submission fails. - */ -int intel_execlists_submission(struct i915_execbuffer_params *params, - struct drm_i915_gem_execbuffer2 *args, - struct list_head *vmas) -{ - struct drm_device *dev = params->dev; - struct intel_engine_cs *engine = params->engine; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_ring *ring = params->request->ring; - u64 exec_start; - int instp_mode; - u32 instp_mask; - int ret; - - instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK; - instp_mask = I915_EXEC_CONSTANTS_MASK; - switch (instp_mode) { - case I915_EXEC_CONSTANTS_REL_GENERAL: - case I915_EXEC_CONSTANTS_ABSOLUTE: - case I915_EXEC_CONSTANTS_REL_SURFACE: - if (instp_mode != 0 && engine->id != RCS) { - DRM_DEBUG("non-0 rel constants mode on non-RCS\n"); - return -EINVAL; - } - - if (instp_mode != dev_priv->relative_constants_mode) { - if (instp_mode == I915_EXEC_CONSTANTS_REL_SURFACE) { - DRM_DEBUG("rel surface constants mode invalid on gen5+\n"); - return -EINVAL; - } - - /* The HW changed the meaning on this bit on gen6 */ - instp_mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE; - } - break; - default: - DRM_DEBUG("execbuf with unknown constants: %d\n", instp_mode); - return -EINVAL; - } - - if (args->flags & I915_EXEC_GEN7_SOL_RESET) { - DRM_DEBUG("sol reset is gen7 only\n"); - return -EINVAL; - } - - ret = execlists_move_to_gpu(params->request, vmas); - if (ret) - return ret; - - if (engine->id == RCS && - instp_mode != dev_priv->relative_constants_mode) { - ret = intel_ring_begin(params->request, 4); - if (ret) - return ret; - - intel_ring_emit(ring, MI_NOOP); - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); - intel_ring_emit_reg(ring, INSTPM); - intel_ring_emit(ring, instp_mask << 16 | instp_mode); - intel_ring_advance(ring); - - dev_priv->relative_constants_mode = instp_mode; - } - - exec_start = params->batch_obj_vm_offset + - args->batch_start_offset; - - ret = engine->emit_bb_start(params->request, - exec_start, args->batch_len, - params->dispatch_flags); - if (ret) - return ret; - - trace_i915_gem_ring_dispatch(params->request, params->dispatch_flags); - - i915_gem_execbuffer_move_to_active(vmas, params->request); - - return 0; -} - void intel_execlists_cancel_requests(struct intel_engine_cs *engine) { struct drm_i915_gem_request *req, *tmp; diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index bdd764a..a52cf57 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -97,11 +97,6 @@ int intel_sanitize_enable_execlists(struct drm_i915_private *dev_priv, int enable_execlists); void intel_execlists_enable_submission(struct drm_i915_private *dev_priv); -struct i915_execbuffer_params; -int intel_execlists_submission(struct i915_execbuffer_params *params, - struct drm_i915_gem_execbuffer2 *args, - struct list_head *vmas); - void intel_execlists_cancel_requests(struct intel_engine_cs *engine); #endif /* _INTEL_LRC_H_ */ -- cgit v0.10.2 From ddf07be7a2aeb80aa159a7eeade01b7b5e1e3e43 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 2 Aug 2016 22:50:39 +0100 Subject: drm/i915: Simplify calling engine->sync_to Since requests can no longer be generated as a side-effect of intel_ring_begin(), we know that the seqno will be unchanged during ring-emission. This predicatablity then means we do not have to check for the seqno wrapping around whilst emitting the semaphore for engine->sync_to(). Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1469432687-22756-31-git-send-email-chris@chris-wilson.co.uk Link: http://patchwork.freedesktop.org/patch/msgid/1470174640-18242-22-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 49ce21a..9b18b9c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1757,7 +1757,7 @@ struct drm_i915_private { struct i915_gem_context *kernel_context; struct intel_engine_cs engine[I915_NUM_ENGINES]; struct drm_i915_gem_object *semaphore_obj; - uint32_t last_seqno, next_seqno; + u32 next_seqno; struct drm_dma_handle *status_page_dmah; struct resource mch_res; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index d79b949..3df6b48 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2867,22 +2867,15 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj, i915_gem_object_retire_request(obj, from); } else { int idx = intel_engine_sync_index(from->engine, to->engine); - u32 seqno = i915_gem_request_get_seqno(from); - - if (seqno <= from->engine->semaphore.sync_seqno[idx]) + if (from->fence.seqno <= from->engine->semaphore.sync_seqno[idx]) return 0; trace_i915_gem_ring_sync_to(to, from); - ret = to->engine->semaphore.sync_to(to, from->engine, seqno); + ret = to->engine->semaphore.sync_to(to, from); if (ret) return ret; - /* We use last_read_req because sync_to() - * might have just caused seqno wrap under - * the radar. - */ - from->engine->semaphore.sync_seqno[idx] = - i915_gem_request_get_seqno(obj->last_read_req[from->engine->id]); + from->engine->semaphore.sync_seqno[idx] = from->fence.seqno; } return 0; diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index e378eb6..11c19e7 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -264,14 +264,7 @@ int i915_gem_set_seqno(struct drm_device *dev, u32 seqno) if (ret) return ret; - /* Carefully set the last_seqno value so that wrap - * detection still works - */ dev_priv->next_seqno = seqno; - dev_priv->last_seqno = seqno - 1; - if (dev_priv->last_seqno == 0) - dev_priv->last_seqno--; - return 0; } @@ -288,7 +281,7 @@ static int i915_gem_get_seqno(struct drm_i915_private *dev_priv, u32 *seqno) dev_priv->next_seqno = 1; } - *seqno = dev_priv->last_seqno = dev_priv->next_seqno++; + *seqno = dev_priv->next_seqno++; return 0; } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 0b6f12c..51f3123 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1496,12 +1496,6 @@ static int gen8_render_emit_request(struct drm_i915_gem_request *req) return 0; } -static inline bool i915_gem_has_seqno_wrapped(struct drm_i915_private *dev_priv, - u32 seqno) -{ - return dev_priv->last_seqno < seqno; -} - /** * intel_ring_sync - sync the waiter to the signaller on seqno * @@ -1511,24 +1505,23 @@ static inline bool i915_gem_has_seqno_wrapped(struct drm_i915_private *dev_priv, */ static int -gen8_ring_sync(struct drm_i915_gem_request *waiter_req, - struct intel_engine_cs *signaller, - u32 seqno) +gen8_ring_sync(struct drm_i915_gem_request *wait, + struct drm_i915_gem_request *signal) { - struct intel_ring *waiter = waiter_req->ring; - struct drm_i915_private *dev_priv = waiter_req->i915; - u64 offset = GEN8_WAIT_OFFSET(waiter_req->engine, signaller->id); + struct intel_ring *waiter = wait->ring; + struct drm_i915_private *dev_priv = wait->i915; + u64 offset = GEN8_WAIT_OFFSET(wait->engine, signal->engine->id); struct i915_hw_ppgtt *ppgtt; int ret; - ret = intel_ring_begin(waiter_req, 4); + ret = intel_ring_begin(wait, 4); if (ret) return ret; intel_ring_emit(waiter, MI_SEMAPHORE_WAIT | MI_SEMAPHORE_GLOBAL_GTT | MI_SEMAPHORE_SAD_GTE_SDD); - intel_ring_emit(waiter, seqno); + intel_ring_emit(waiter, signal->fence.seqno); intel_ring_emit(waiter, lower_32_bits(offset)); intel_ring_emit(waiter, upper_32_bits(offset)); intel_ring_advance(waiter); @@ -1538,48 +1531,37 @@ gen8_ring_sync(struct drm_i915_gem_request *waiter_req, * We do this on the i915_switch_context() following the wait and * before the dispatch. */ - ppgtt = waiter_req->ctx->ppgtt; - if (ppgtt && waiter_req->engine->id != RCS) - ppgtt->pd_dirty_rings |= intel_engine_flag(waiter_req->engine); + ppgtt = wait->ctx->ppgtt; + if (ppgtt && wait->engine->id != RCS) + ppgtt->pd_dirty_rings |= intel_engine_flag(wait->engine); return 0; } static int -gen6_ring_sync(struct drm_i915_gem_request *waiter_req, - struct intel_engine_cs *signaller, - u32 seqno) +gen6_ring_sync(struct drm_i915_gem_request *wait, + struct drm_i915_gem_request *signal) { - struct intel_ring *waiter = waiter_req->ring; + struct intel_ring *waiter = wait->ring; u32 dw1 = MI_SEMAPHORE_MBOX | MI_SEMAPHORE_COMPARE | MI_SEMAPHORE_REGISTER; - u32 wait_mbox = signaller->semaphore.mbox.wait[waiter_req->engine->id]; + u32 wait_mbox = signal->engine->semaphore.mbox.wait[wait->engine->id]; int ret; - /* Throughout all of the GEM code, seqno passed implies our current - * seqno is >= the last seqno executed. However for hardware the - * comparison is strictly greater than. - */ - seqno -= 1; - WARN_ON(wait_mbox == MI_SEMAPHORE_SYNC_INVALID); - ret = intel_ring_begin(waiter_req, 4); + ret = intel_ring_begin(wait, 4); if (ret) return ret; - /* If seqno wrap happened, omit the wait with no-ops */ - if (likely(!i915_gem_has_seqno_wrapped(waiter_req->i915, seqno))) { - intel_ring_emit(waiter, dw1 | wait_mbox); - intel_ring_emit(waiter, seqno); - intel_ring_emit(waiter, 0); - intel_ring_emit(waiter, MI_NOOP); - } else { - intel_ring_emit(waiter, MI_NOOP); - intel_ring_emit(waiter, MI_NOOP); - intel_ring_emit(waiter, MI_NOOP); - intel_ring_emit(waiter, MI_NOOP); - } + intel_ring_emit(waiter, dw1 | wait_mbox); + /* Throughout all of the GEM code, seqno passed implies our current + * seqno is >= the last seqno executed. However for hardware the + * comparison is strictly greater than. + */ + intel_ring_emit(waiter, signal->fence.seqno - 1); + intel_ring_emit(waiter, 0); + intel_ring_emit(waiter, MI_NOOP); intel_ring_advance(waiter); return 0; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index c3d4b88..c0b7ce3 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -277,9 +277,8 @@ struct intel_engine_cs { }; /* AKA wait() */ - int (*sync_to)(struct drm_i915_gem_request *to_req, - struct intel_engine_cs *from, - u32 seqno); + int (*sync_to)(struct drm_i915_gem_request *to, + struct drm_i915_gem_request *from); int (*signal)(struct drm_i915_gem_request *signaller_req); } semaphore; -- cgit v0.10.2 From ad7bdb2b99273b9fc7eb2dbf47b8d02b364d470e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 2 Aug 2016 22:50:40 +0100 Subject: drm/i915: Rename engine->semaphore.sync_to, engine->sempahore.signal locals In order to be more consistent with the rest of the request construction and ring emission, use the common names for the ring and request. Rather than using signaler_req, waiter_req, and intel_ring *wait, we use plain req and ring. Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1469432687-22756-32-git-send-email-chris@chris-wilson.co.uk Link: http://patchwork.freedesktop.org/patch/msgid/1470174640-18242-23-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 51f3123..a979acf 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1309,108 +1309,105 @@ static void render_ring_cleanup(struct intel_engine_cs *engine) intel_fini_pipe_control(engine); } -static int gen8_rcs_signal(struct drm_i915_gem_request *signaller_req) +static int gen8_rcs_signal(struct drm_i915_gem_request *req) { - struct intel_ring *signaller = signaller_req->ring; - struct drm_i915_private *dev_priv = signaller_req->i915; + struct intel_ring *ring = req->ring; + struct drm_i915_private *dev_priv = req->i915; struct intel_engine_cs *waiter; enum intel_engine_id id; int ret, num_rings; num_rings = hweight32(INTEL_INFO(dev_priv)->ring_mask); - ret = intel_ring_begin(signaller_req, (num_rings-1) * 8); + ret = intel_ring_begin(req, (num_rings-1) * 8); if (ret) return ret; for_each_engine_id(waiter, dev_priv, id) { - u64 gtt_offset = - signaller_req->engine->semaphore.signal_ggtt[id]; + u64 gtt_offset = req->engine->semaphore.signal_ggtt[id]; if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID) continue; - intel_ring_emit(signaller, GFX_OP_PIPE_CONTROL(6)); - intel_ring_emit(signaller, + intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); + intel_ring_emit(ring, PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL); - intel_ring_emit(signaller, lower_32_bits(gtt_offset)); - intel_ring_emit(signaller, upper_32_bits(gtt_offset)); - intel_ring_emit(signaller, signaller_req->fence.seqno); - intel_ring_emit(signaller, 0); - intel_ring_emit(signaller, + intel_ring_emit(ring, lower_32_bits(gtt_offset)); + intel_ring_emit(ring, upper_32_bits(gtt_offset)); + intel_ring_emit(ring, req->fence.seqno); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, MI_SEMAPHORE_SIGNAL | MI_SEMAPHORE_TARGET(waiter->hw_id)); - intel_ring_emit(signaller, 0); + intel_ring_emit(ring, 0); } - intel_ring_advance(signaller); + intel_ring_advance(ring); return 0; } -static int gen8_xcs_signal(struct drm_i915_gem_request *signaller_req) +static int gen8_xcs_signal(struct drm_i915_gem_request *req) { - struct intel_ring *signaller = signaller_req->ring; - struct drm_i915_private *dev_priv = signaller_req->i915; + struct intel_ring *ring = req->ring; + struct drm_i915_private *dev_priv = req->i915; struct intel_engine_cs *waiter; enum intel_engine_id id; int ret, num_rings; num_rings = hweight32(INTEL_INFO(dev_priv)->ring_mask); - ret = intel_ring_begin(signaller_req, (num_rings-1) * 6); + ret = intel_ring_begin(req, (num_rings-1) * 6); if (ret) return ret; for_each_engine_id(waiter, dev_priv, id) { - u64 gtt_offset = - signaller_req->engine->semaphore.signal_ggtt[id]; + u64 gtt_offset = req->engine->semaphore.signal_ggtt[id]; if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID) continue; - intel_ring_emit(signaller, + intel_ring_emit(ring, (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW); - intel_ring_emit(signaller, + intel_ring_emit(ring, lower_32_bits(gtt_offset) | MI_FLUSH_DW_USE_GTT); - intel_ring_emit(signaller, upper_32_bits(gtt_offset)); - intel_ring_emit(signaller, signaller_req->fence.seqno); - intel_ring_emit(signaller, + intel_ring_emit(ring, upper_32_bits(gtt_offset)); + intel_ring_emit(ring, req->fence.seqno); + intel_ring_emit(ring, MI_SEMAPHORE_SIGNAL | MI_SEMAPHORE_TARGET(waiter->hw_id)); - intel_ring_emit(signaller, 0); + intel_ring_emit(ring, 0); } - intel_ring_advance(signaller); + intel_ring_advance(ring); return 0; } -static int gen6_signal(struct drm_i915_gem_request *signaller_req) +static int gen6_signal(struct drm_i915_gem_request *req) { - struct intel_ring *signaller = signaller_req->ring; - struct drm_i915_private *dev_priv = signaller_req->i915; + struct intel_ring *ring = req->ring; + struct drm_i915_private *dev_priv = req->i915; struct intel_engine_cs *useless; enum intel_engine_id id; int ret, num_rings; num_rings = hweight32(INTEL_INFO(dev_priv)->ring_mask); - ret = intel_ring_begin(signaller_req, round_up((num_rings-1) * 3, 2)); + ret = intel_ring_begin(req, round_up((num_rings-1) * 3, 2)); if (ret) return ret; for_each_engine_id(useless, dev_priv, id) { - i915_reg_t mbox_reg = - signaller_req->engine->semaphore.mbox.signal[id]; + i915_reg_t mbox_reg = req->engine->semaphore.mbox.signal[id]; if (i915_mmio_reg_valid(mbox_reg)) { - intel_ring_emit(signaller, MI_LOAD_REGISTER_IMM(1)); - intel_ring_emit_reg(signaller, mbox_reg); - intel_ring_emit(signaller, signaller_req->fence.seqno); + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); + intel_ring_emit_reg(ring, mbox_reg); + intel_ring_emit(ring, req->fence.seqno); } } /* If num_dwords was rounded, make sure the tail pointer is correct */ if (num_rings % 2 == 0) - intel_ring_emit(signaller, MI_NOOP); - intel_ring_advance(signaller); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); return 0; } @@ -1505,64 +1502,65 @@ static int gen8_render_emit_request(struct drm_i915_gem_request *req) */ static int -gen8_ring_sync(struct drm_i915_gem_request *wait, - struct drm_i915_gem_request *signal) +gen8_ring_sync_to(struct drm_i915_gem_request *req, + struct drm_i915_gem_request *signal) { - struct intel_ring *waiter = wait->ring; - struct drm_i915_private *dev_priv = wait->i915; - u64 offset = GEN8_WAIT_OFFSET(wait->engine, signal->engine->id); + struct intel_ring *ring = req->ring; + struct drm_i915_private *dev_priv = req->i915; + u64 offset = GEN8_WAIT_OFFSET(req->engine, signal->engine->id); struct i915_hw_ppgtt *ppgtt; int ret; - ret = intel_ring_begin(wait, 4); + ret = intel_ring_begin(req, 4); if (ret) return ret; - intel_ring_emit(waiter, MI_SEMAPHORE_WAIT | - MI_SEMAPHORE_GLOBAL_GTT | - MI_SEMAPHORE_SAD_GTE_SDD); - intel_ring_emit(waiter, signal->fence.seqno); - intel_ring_emit(waiter, lower_32_bits(offset)); - intel_ring_emit(waiter, upper_32_bits(offset)); - intel_ring_advance(waiter); + intel_ring_emit(ring, + MI_SEMAPHORE_WAIT | + MI_SEMAPHORE_GLOBAL_GTT | + MI_SEMAPHORE_SAD_GTE_SDD); + intel_ring_emit(ring, signal->fence.seqno); + intel_ring_emit(ring, lower_32_bits(offset)); + intel_ring_emit(ring, upper_32_bits(offset)); + intel_ring_advance(ring); /* When the !RCS engines idle waiting upon a semaphore, they lose their * pagetables and we must reload them before executing the batch. * We do this on the i915_switch_context() following the wait and * before the dispatch. */ - ppgtt = wait->ctx->ppgtt; - if (ppgtt && wait->engine->id != RCS) - ppgtt->pd_dirty_rings |= intel_engine_flag(wait->engine); + ppgtt = req->ctx->ppgtt; + if (ppgtt && req->engine->id != RCS) + ppgtt->pd_dirty_rings |= intel_engine_flag(req->engine); return 0; } static int -gen6_ring_sync(struct drm_i915_gem_request *wait, - struct drm_i915_gem_request *signal) +gen6_ring_sync_to(struct drm_i915_gem_request *req, + struct drm_i915_gem_request *signal) { - struct intel_ring *waiter = wait->ring; + struct intel_ring *ring = req->ring; u32 dw1 = MI_SEMAPHORE_MBOX | MI_SEMAPHORE_COMPARE | MI_SEMAPHORE_REGISTER; - u32 wait_mbox = signal->engine->semaphore.mbox.wait[wait->engine->id]; + u32 wait_mbox = signal->engine->semaphore.mbox.wait[req->engine->id]; int ret; WARN_ON(wait_mbox == MI_SEMAPHORE_SYNC_INVALID); - ret = intel_ring_begin(wait, 4); + ret = intel_ring_begin(req, 4); if (ret) return ret; - intel_ring_emit(waiter, dw1 | wait_mbox); + intel_ring_emit(ring, dw1 | wait_mbox); /* Throughout all of the GEM code, seqno passed implies our current * seqno is >= the last seqno executed. However for hardware the * comparison is strictly greater than. */ - intel_ring_emit(waiter, signal->fence.seqno - 1); - intel_ring_emit(waiter, 0); - intel_ring_emit(waiter, MI_NOOP); - intel_ring_advance(waiter); + intel_ring_emit(ring, signal->fence.seqno - 1); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); return 0; } @@ -2669,7 +2667,7 @@ static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv, if (INTEL_GEN(dev_priv) >= 8) { u64 offset = i915_gem_obj_ggtt_offset(dev_priv->semaphore_obj); - engine->semaphore.sync_to = gen8_ring_sync; + engine->semaphore.sync_to = gen8_ring_sync_to; engine->semaphore.signal = gen8_xcs_signal; for (i = 0; i < I915_NUM_ENGINES; i++) { @@ -2683,7 +2681,7 @@ static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv, engine->semaphore.signal_ggtt[i] = ring_offset; } } else if (INTEL_GEN(dev_priv) >= 6) { - engine->semaphore.sync_to = gen6_ring_sync; + engine->semaphore.sync_to = gen6_ring_sync_to; engine->semaphore.signal = gen6_signal; /* diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index c0b7ce3..a94ed5d 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -277,9 +277,9 @@ struct intel_engine_cs { }; /* AKA wait() */ - int (*sync_to)(struct drm_i915_gem_request *to, - struct drm_i915_gem_request *from); - int (*signal)(struct drm_i915_gem_request *signaller_req); + int (*sync_to)(struct drm_i915_gem_request *req, + struct drm_i915_gem_request *signal); + int (*signal)(struct drm_i915_gem_request *req); } semaphore; /* Execlists */ -- cgit v0.10.2 From 96a945aa4267b4b16f4ddace7d03775fd8752c9e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 3 Aug 2016 13:19:16 +0100 Subject: drm/i915: Move the common engine cleanup to intel_engine_cs.c Now that we initialize the state to both legacy and execlists inside intel_engine_cs, we should also clean up that state from the common functions. Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/1470226756-24401-1-git-send-email-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index a5f2128..4ec914e 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -210,3 +210,17 @@ int intel_engine_init_common(struct intel_engine_cs *engine) return intel_engine_init_cmd_parser(engine); } + +/** + * intel_engines_cleanup_common - cleans up the engine state created by + * the common initiailizers. + * @engine: Engine to cleanup. + * + * This cleans up everything created by the common helpers. + */ +void intel_engine_cleanup_common(struct intel_engine_cs *engine) +{ + intel_engine_cleanup_cmd_parser(engine); + intel_engine_fini_breadcrumbs(engine); + i915_gem_batch_pool_fini(&engine->batch_pool); +} diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index afc51d9..622cd0b 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1720,10 +1720,7 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine) if (engine->cleanup) engine->cleanup(engine); - intel_engine_cleanup_cmd_parser(engine); - i915_gem_batch_pool_fini(&engine->batch_pool); - - intel_engine_fini_breadcrumbs(engine); + intel_engine_cleanup_common(engine); if (engine->status_page.obj) { i915_gem_object_unpin_map(engine->status_page.obj); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index a979acf..ecf4278 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2218,9 +2218,7 @@ void intel_engine_cleanup(struct intel_engine_cs *engine) cleanup_phys_status_page(engine); } - intel_engine_cleanup_cmd_parser(engine); - i915_gem_batch_pool_fini(&engine->batch_pool); - intel_engine_fini_breadcrumbs(engine); + intel_engine_cleanup_common(engine); intel_ring_context_unpin(dev_priv->kernel_context, engine); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index a94ed5d..1706241 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -483,6 +483,7 @@ void intel_fini_pipe_control(struct intel_engine_cs *engine); void intel_engine_setup_common(struct intel_engine_cs *engine); int intel_engine_init_common(struct intel_engine_cs *engine); +void intel_engine_cleanup_common(struct intel_engine_cs *engine); int intel_init_render_ring_buffer(struct intel_engine_cs *engine); int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine); -- cgit v0.10.2 From 8dac1e1f2068321fb4b7062d3c5408971f7a7e35 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 2 Aug 2016 14:07:33 +0300 Subject: drm/i915: Clean up the extra RPM ref on CHV with i915.enable_rc6=0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the CHV early bail out from intel_cleanup_gt_powersave() so that we'll clean up the extra RPM reference held due to i915.enable_rc6=0. Cc: Imre Deak Fixes: b268c699aca5 ("drm/i915: refactor RPM disabling due to RC6 being disabled") Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1470136053-23276-1-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: Imre Deak diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index df02483..63f454a 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -6541,9 +6541,7 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv) void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv) { - if (IS_CHERRYVIEW(dev_priv)) - return; - else if (IS_VALLEYVIEW(dev_priv)) + if (IS_VALLEYVIEW(dev_priv)) valleyview_cleanup_gt_powersave(dev_priv); if (!i915.enable_rc6) -- cgit v0.10.2 From 894eeecc1e3b47ef115e252ec58eff7ec174e7ef Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 Aug 2016 07:52:20 +0100 Subject: drm/i915: Amalgamate GGTT/ppGTT vma debug list walkers As we can now have multiple VMA inside the global GTT (with partial mappings, rotations, etc), it is no longer true that there may just be a single GGTT entry and so we should walk the full vma_list to count up the actual usage. In addition to unifying the two walkers, switch from multiplying the object size for each vma to summing the bound vma sizes. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470293567-10811-1-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 410dc35..e7724be 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -335,6 +335,7 @@ static int per_file_stats(int id, void *ptr, void *data) struct drm_i915_gem_object *obj = ptr; struct file_stats *stats = data; struct i915_vma *vma; + int bound = 0; stats->count++; stats->total += obj->base.size; @@ -342,41 +343,28 @@ static int per_file_stats(int id, void *ptr, void *data) if (obj->base.name || obj->base.dma_buf) stats->shared += obj->base.size; - if (USES_FULL_PPGTT(obj->base.dev)) { - list_for_each_entry(vma, &obj->vma_list, obj_link) { - struct i915_hw_ppgtt *ppgtt; + list_for_each_entry(vma, &obj->vma_list, obj_link) { + if (!drm_mm_node_allocated(&vma->node)) + continue; - if (!drm_mm_node_allocated(&vma->node)) - continue; + bound++; - if (vma->is_ggtt) { - stats->global += obj->base.size; - continue; - } + if (vma->is_ggtt) { + stats->global += vma->node.size; + } else { + struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vma->vm); - ppgtt = container_of(vma->vm, struct i915_hw_ppgtt, base); if (ppgtt->file_priv != stats->file_priv) continue; - - if (obj->active) /* XXX per-vma statistic */ - stats->active += obj->base.size; - else - stats->inactive += obj->base.size; - - return 0; - } - } else { - if (i915_gem_obj_ggtt_bound(obj)) { - stats->global += obj->base.size; - if (obj->active) - stats->active += obj->base.size; - else - stats->inactive += obj->base.size; - return 0; } + + if (obj->active) /* XXX per-vma statistic */ + stats->active += vma->node.size; + else + stats->inactive += vma->node.size; } - if (!list_empty(&obj->global_list)) + if (!bound) stats->unbound += obj->base.size; return 0; -- cgit v0.10.2 From 0088e522ddcb2cb5763ad85f9db2c2b473843d8d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 Aug 2016 07:52:21 +0100 Subject: drm/i915: Split GGTT initialisation between probing and setup In order to handle conflicting drivers (i.e. vgacon) having a different setup of hardware, we have to remove those other drivers before we try to setup our own mappings. This requires us to split GGTT initialisation between probing for the hardware location (part of the PCI BAR) and later establishing the kernel resources for it. Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470293567-10811-2-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 83afdd0..8996c2a 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1004,16 +1004,10 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv) intel_sanitize_options(dev_priv); - ret = i915_ggtt_init_hw(dev); + ret = i915_ggtt_probe_hw(dev); if (ret) return ret; - ret = i915_ggtt_enable_hw(dev); - if (ret) { - DRM_ERROR("failed to enable GGTT\n"); - goto out_ggtt; - } - /* WARNING: Apparently we must kick fbdev drivers before vgacon, * otherwise the vga fbdev driver falls over. */ ret = i915_kick_out_firmware_fb(dev_priv); @@ -1028,6 +1022,16 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv) goto out_ggtt; } + ret = i915_ggtt_init_hw(dev); + if (ret) + return ret; + + ret = i915_ggtt_enable_hw(dev); + if (ret) { + DRM_ERROR("failed to enable GGTT\n"); + goto out_ggtt; + } + pci_set_master(dev->pdev); /* overlay on gen2 is broken and can't address above 1G */ diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 671b1ca..3fc7777 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -3208,10 +3208,10 @@ static void i915_gmch_remove(struct i915_address_space *vm) } /** - * i915_ggtt_init_hw - Initialize GGTT hardware + * i915_ggtt_probe_hw - Probe GGTT hardware location * @dev: DRM device */ -int i915_ggtt_init_hw(struct drm_device *dev) +int i915_ggtt_probe_hw(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); struct i915_ggtt *ggtt = &dev_priv->ggtt; @@ -3254,14 +3254,6 @@ int i915_ggtt_init_hw(struct drm_device *dev) ggtt->mappable_end = min(ggtt->mappable_end, ggtt->base.total); } - /* - * Initialise stolen early so that we may reserve preallocated - * objects for the BIOS to KMS transition. - */ - ret = i915_gem_init_stolen(dev); - if (ret) - goto out_gtt_cleanup; - /* GMADR is the PCI mmio aperture into the global GTT. */ DRM_INFO("Memory usable by graphics device = %lluM\n", ggtt->base.total >> 20); @@ -3273,10 +3265,30 @@ int i915_ggtt_init_hw(struct drm_device *dev) #endif return 0; +} + +/** + * i915_ggtt_init_hw - Initialize GGTT hardware + * @dev: DRM device + */ +int i915_ggtt_init_hw(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = to_i915(dev); + struct i915_ggtt *ggtt = &dev_priv->ggtt; + int ret; + + /* + * Initialise stolen early so that we may reserve preallocated + * objects for the BIOS to KMS transition. + */ + ret = i915_gem_init_stolen(dev); + if (ret) + goto out_gtt_cleanup; + + return 0; out_gtt_cleanup: ggtt->base.cleanup(&ggtt->base); - return ret; } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index c4a6579..5b6744a 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -521,6 +521,7 @@ i915_page_dir_dma_addr(const struct i915_hw_ppgtt *ppgtt, const unsigned n) px_dma(ppgtt->base.scratch_pd); } +int i915_ggtt_probe_hw(struct drm_device *dev); int i915_ggtt_init_hw(struct drm_device *dev); int i915_ggtt_enable_hw(struct drm_device *dev); void i915_gem_init_ggtt(struct drm_device *dev); -- cgit v0.10.2 From 97d6d7ab68e0b1f7268a5a69187c86305d65762f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 Aug 2016 07:52:22 +0100 Subject: drm/i915: Update GGTT initialisation functions to take drm_i915_private Since these are internal functions they operate on drm_i915_private and not the drm_device being passed in. So pass in the drm_i915_private instead, and remove one layer of dancing. No space wins here, just conforming to the norm in function parameters. v2: Include all the probe functions Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470293567-10811-3-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 8996c2a..ed52f61 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1004,7 +1004,7 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv) intel_sanitize_options(dev_priv); - ret = i915_ggtt_probe_hw(dev); + ret = i915_ggtt_probe_hw(dev_priv); if (ret) return ret; @@ -1022,11 +1022,11 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv) goto out_ggtt; } - ret = i915_ggtt_init_hw(dev); + ret = i915_ggtt_init_hw(dev_priv); if (ret) return ret; - ret = i915_ggtt_enable_hw(dev); + ret = i915_ggtt_enable_hw(dev_priv); if (ret) { DRM_ERROR("failed to enable GGTT\n"); goto out_ggtt; @@ -1104,7 +1104,7 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv) return 0; out_ggtt: - i915_ggtt_cleanup_hw(dev); + i915_ggtt_cleanup_hw(dev_priv); return ret; } @@ -1124,7 +1124,7 @@ static void i915_driver_cleanup_hw(struct drm_i915_private *dev_priv) pm_qos_remove_request(&dev_priv->pm_qos); arch_phys_wc_del(ggtt->mtrr); io_mapping_free(ggtt->mappable); - i915_ggtt_cleanup_hw(dev); + i915_ggtt_cleanup_hw(dev_priv); } /** @@ -1570,7 +1570,7 @@ static int i915_drm_resume(struct drm_device *dev) disable_rpm_wakeref_asserts(dev_priv); - ret = i915_ggtt_enable_hw(dev); + ret = i915_ggtt_enable_hw(dev_priv); if (ret) DRM_ERROR("failed to re-enable GGTT\n"); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 3df6b48..c4119ba 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4543,7 +4543,7 @@ int i915_gem_init(struct drm_device *dev) intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); i915_gem_init_userptr(dev_priv); - i915_gem_init_ggtt(dev); + i915_gem_init_ggtt(dev_priv); ret = i915_gem_context_init(dev); if (ret) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 3fc7777..39b3b36 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2223,17 +2223,16 @@ void i915_ppgtt_release(struct kref *kref) kfree(ppgtt); } -extern int intel_iommu_gfx_mapped; /* Certain Gen5 chipsets require require idling the GPU before * unmapping anything from the GTT when VT-d is enabled. */ -static bool needs_idle_maps(struct drm_device *dev) +static bool needs_idle_maps(struct drm_i915_private *dev_priv) { #ifdef CONFIG_INTEL_IOMMU /* Query intel_iommu to see if we need the workaround. Presumably that * was loaded first. */ - if (IS_GEN5(dev) && IS_MOBILE(dev) && intel_iommu_gfx_mapped) + if (IS_GEN5(dev_priv) && IS_MOBILE(dev_priv) && intel_iommu_gfx_mapped) return true; #endif return false; @@ -2746,7 +2745,7 @@ static void i915_gtt_color_adjust(struct drm_mm_node *node, *end -= 4096; } -static int i915_gem_setup_global_gtt(struct drm_device *dev, +static int i915_gem_setup_global_gtt(struct drm_i915_private *dev_priv, u64 start, u64 mappable_end, u64 end) @@ -2760,7 +2759,6 @@ static int i915_gem_setup_global_gtt(struct drm_device *dev, * aperture. One page should be enough to keep any prefetching inside * of the aperture. */ - struct drm_i915_private *dev_priv = to_i915(dev); struct i915_ggtt *ggtt = &dev_priv->ggtt; struct drm_mm_node *entry; struct drm_i915_gem_object *obj; @@ -2781,7 +2779,7 @@ static int i915_gem_setup_global_gtt(struct drm_device *dev, if (ret) return ret; - if (!HAS_LLC(dev)) + if (!HAS_LLC(dev_priv)) ggtt->base.mm.color_adjust = i915_gtt_color_adjust; /* Mark any preallocated objects as occupied */ @@ -2813,14 +2811,14 @@ static int i915_gem_setup_global_gtt(struct drm_device *dev, /* And finally clear the reserved guard page */ ggtt->base.clear_range(&ggtt->base, end - PAGE_SIZE, PAGE_SIZE, true); - if (USES_PPGTT(dev) && !USES_FULL_PPGTT(dev)) { + if (USES_PPGTT(dev_priv) && !USES_FULL_PPGTT(dev_priv)) { struct i915_hw_ppgtt *ppgtt; ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); if (!ppgtt) return -ENOMEM; - ret = __hw_ppgtt_init(dev, ppgtt); + ret = __hw_ppgtt_init(&dev_priv->drm, ppgtt); if (ret) { ppgtt->base.cleanup(&ppgtt->base); kfree(ppgtt); @@ -2851,23 +2849,22 @@ static int i915_gem_setup_global_gtt(struct drm_device *dev, /** * i915_gem_init_ggtt - Initialize GEM for Global GTT - * @dev: DRM device + * @dev_priv: i915 device */ -void i915_gem_init_ggtt(struct drm_device *dev) +void i915_gem_init_ggtt(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); struct i915_ggtt *ggtt = &dev_priv->ggtt; - i915_gem_setup_global_gtt(dev, 0, ggtt->mappable_end, ggtt->base.total); + i915_gem_setup_global_gtt(dev_priv, + 0, ggtt->mappable_end, ggtt->base.total); } /** * i915_ggtt_cleanup_hw - Clean up GGTT hardware initialization - * @dev: DRM device + * @dev_priv: i915 device */ -void i915_ggtt_cleanup_hw(struct drm_device *dev) +void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); struct i915_ggtt *ggtt = &dev_priv->ggtt; if (dev_priv->mm.aliasing_ppgtt) { @@ -2876,7 +2873,7 @@ void i915_ggtt_cleanup_hw(struct drm_device *dev) ppgtt->base.cleanup(&ppgtt->base); } - i915_gem_cleanup_stolen(dev); + i915_gem_cleanup_stolen(&dev_priv->drm); if (drm_mm_initialized(&ggtt->base.mm)) { intel_vgt_deballoon(dev_priv); @@ -2966,17 +2963,16 @@ static size_t gen9_get_stolen_size(u16 gen9_gmch_ctl) return (gen9_gmch_ctl - 0xf0 + 1) << 22; } -static int ggtt_probe_common(struct drm_device *dev, - size_t gtt_size) +static int ggtt_probe_common(struct drm_i915_private *dev_priv, size_t gtt_size) { - struct drm_i915_private *dev_priv = to_i915(dev); struct i915_ggtt *ggtt = &dev_priv->ggtt; + struct pci_dev *pdev = dev_priv->drm.pdev; struct i915_page_scratch *scratch_page; phys_addr_t ggtt_phys_addr; /* For Modern GENs the PTEs and register space are split in the BAR */ - ggtt_phys_addr = pci_resource_start(dev->pdev, 0) + - (pci_resource_len(dev->pdev, 0) / 2); + ggtt_phys_addr = pci_resource_start(pdev, 0) + + (pci_resource_len(pdev, 0) / 2); /* * On BXT writes larger than 64 bit to the GTT pagetable range will be @@ -2985,7 +2981,7 @@ static int ggtt_probe_common(struct drm_device *dev, * resort to an uncached mapping. The WC issue is easily caught by the * readback check when writing GTT PTE entries. */ - if (IS_BROXTON(dev)) + if (IS_BROXTON(dev_priv)) ggtt->gsm = ioremap_nocache(ggtt_phys_addr, gtt_size); else ggtt->gsm = ioremap_wc(ggtt_phys_addr, gtt_size); @@ -2994,7 +2990,7 @@ static int ggtt_probe_common(struct drm_device *dev, return -ENOMEM; } - scratch_page = alloc_scratch_page(dev); + scratch_page = alloc_scratch_page(&dev_priv->drm); if (IS_ERR(scratch_page)) { DRM_ERROR("Scratch setup failed\n"); /* iounmap will also get called at remove, but meh */ @@ -3082,24 +3078,24 @@ static void chv_setup_private_ppat(struct drm_i915_private *dev_priv) static int gen8_gmch_probe(struct i915_ggtt *ggtt) { - struct drm_device *dev = ggtt->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(ggtt->base.dev); + struct pci_dev *pdev = dev_priv->drm.pdev; u16 snb_gmch_ctl; int ret; /* TODO: We're not aware of mappable constraints on gen8 yet */ - ggtt->mappable_base = pci_resource_start(dev->pdev, 2); - ggtt->mappable_end = pci_resource_len(dev->pdev, 2); + ggtt->mappable_base = pci_resource_start(pdev, 2); + ggtt->mappable_end = pci_resource_len(pdev, 2); - if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(39))) - pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(39)); + if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(39))) + pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39)); - pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); + pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); - if (INTEL_INFO(dev)->gen >= 9) { + if (INTEL_GEN(dev_priv) >= 9) { ggtt->stolen_size = gen9_get_stolen_size(snb_gmch_ctl); ggtt->size = gen8_get_total_gtt_size(snb_gmch_ctl); - } else if (IS_CHERRYVIEW(dev)) { + } else if (IS_CHERRYVIEW(dev_priv)) { ggtt->stolen_size = chv_get_stolen_size(snb_gmch_ctl); ggtt->size = chv_get_total_gtt_size(snb_gmch_ctl); } else { @@ -3109,12 +3105,12 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) ggtt->base.total = (ggtt->size / sizeof(gen8_pte_t)) << PAGE_SHIFT; - if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev)) + if (IS_CHERRYVIEW(dev_priv) || IS_BROXTON(dev_priv)) chv_setup_private_ppat(dev_priv); else bdw_setup_private_ppat(dev_priv); - ret = ggtt_probe_common(dev, ggtt->size); + ret = ggtt_probe_common(dev_priv, ggtt->size); ggtt->base.bind_vma = ggtt_bind_vma; ggtt->base.unbind_vma = ggtt_unbind_vma; @@ -3132,12 +3128,13 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) static int gen6_gmch_probe(struct i915_ggtt *ggtt) { - struct drm_device *dev = ggtt->base.dev; + struct drm_i915_private *dev_priv = to_i915(ggtt->base.dev); + struct pci_dev *pdev = dev_priv->drm.pdev; u16 snb_gmch_ctl; int ret; - ggtt->mappable_base = pci_resource_start(dev->pdev, 2); - ggtt->mappable_end = pci_resource_len(dev->pdev, 2); + ggtt->mappable_base = pci_resource_start(pdev, 2); + ggtt->mappable_end = pci_resource_len(pdev, 2); /* 64/512MB is the current min/max we actually know of, but this is just * a coarse sanity check. @@ -3147,15 +3144,15 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt) return -ENXIO; } - if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(40))) - pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(40)); - pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); + if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(40))) + pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40)); + pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); ggtt->stolen_size = gen6_get_stolen_size(snb_gmch_ctl); ggtt->size = gen6_get_total_gtt_size(snb_gmch_ctl); ggtt->base.total = (ggtt->size / sizeof(gen6_pte_t)) << PAGE_SHIFT; - ret = ggtt_probe_common(dev, ggtt->size); + ret = ggtt_probe_common(dev_priv, ggtt->size); ggtt->base.clear_range = gen6_ggtt_clear_range; ggtt->base.insert_page = gen6_ggtt_insert_page; @@ -3176,8 +3173,7 @@ static void gen6_gmch_remove(struct i915_address_space *vm) static int i915_gmch_probe(struct i915_ggtt *ggtt) { - struct drm_device *dev = ggtt->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(ggtt->base.dev); int ret; ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->drm.pdev, NULL); @@ -3189,7 +3185,7 @@ static int i915_gmch_probe(struct i915_ggtt *ggtt) intel_gtt_get(&ggtt->base.total, &ggtt->stolen_size, &ggtt->mappable_base, &ggtt->mappable_end); - ggtt->do_idle_maps = needs_idle_maps(&dev_priv->drm); + ggtt->do_idle_maps = needs_idle_maps(dev_priv); ggtt->base.insert_page = i915_ggtt_insert_page; ggtt->base.insert_entries = i915_ggtt_insert_entries; ggtt->base.clear_range = i915_ggtt_clear_range; @@ -3209,28 +3205,27 @@ static void i915_gmch_remove(struct i915_address_space *vm) /** * i915_ggtt_probe_hw - Probe GGTT hardware location - * @dev: DRM device + * @dev_priv: i915 device */ -int i915_ggtt_probe_hw(struct drm_device *dev) +int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); struct i915_ggtt *ggtt = &dev_priv->ggtt; int ret; - if (INTEL_INFO(dev)->gen <= 5) { + if (INTEL_GEN(dev_priv) <= 5) { ggtt->probe = i915_gmch_probe; ggtt->base.cleanup = i915_gmch_remove; - } else if (INTEL_INFO(dev)->gen < 8) { + } else if (INTEL_GEN(dev_priv) < 8) { ggtt->probe = gen6_gmch_probe; ggtt->base.cleanup = gen6_gmch_remove; - if (HAS_EDRAM(dev)) + if (HAS_EDRAM(dev_priv)) ggtt->base.pte_encode = iris_pte_encode; - else if (IS_HASWELL(dev)) + else if (IS_HASWELL(dev_priv)) ggtt->base.pte_encode = hsw_pte_encode; - else if (IS_VALLEYVIEW(dev)) + else if (IS_VALLEYVIEW(dev_priv)) ggtt->base.pte_encode = byt_pte_encode; - else if (INTEL_INFO(dev)->gen >= 7) + else if (INTEL_GEN(dev_priv) >= 7) ggtt->base.pte_encode = ivb_pte_encode; else ggtt->base.pte_encode = snb_pte_encode; @@ -3239,7 +3234,7 @@ int i915_ggtt_probe_hw(struct drm_device *dev) ggtt->base.cleanup = gen6_gmch_remove; } - ggtt->base.dev = dev; + ggtt->base.dev = &dev_priv->drm; ggtt->base.is_ggtt = true; ret = ggtt->probe(ggtt); @@ -3269,11 +3264,10 @@ int i915_ggtt_probe_hw(struct drm_device *dev) /** * i915_ggtt_init_hw - Initialize GGTT hardware - * @dev: DRM device + * @dev_priv: i915 device */ -int i915_ggtt_init_hw(struct drm_device *dev) +int i915_ggtt_init_hw(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); struct i915_ggtt *ggtt = &dev_priv->ggtt; int ret; @@ -3281,7 +3275,7 @@ int i915_ggtt_init_hw(struct drm_device *dev) * Initialise stolen early so that we may reserve preallocated * objects for the BIOS to KMS transition. */ - ret = i915_gem_init_stolen(dev); + ret = i915_gem_init_stolen(&dev_priv->drm); if (ret) goto out_gtt_cleanup; @@ -3292,9 +3286,9 @@ out_gtt_cleanup: return ret; } -int i915_ggtt_enable_hw(struct drm_device *dev) +int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv) { - if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt()) + if (INTEL_GEN(dev_priv) < 6 && !intel_enable_gtt()) return -EIO; return 0; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 5b6744a..f85a73f 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -521,11 +521,11 @@ i915_page_dir_dma_addr(const struct i915_hw_ppgtt *ppgtt, const unsigned n) px_dma(ppgtt->base.scratch_pd); } -int i915_ggtt_probe_hw(struct drm_device *dev); -int i915_ggtt_init_hw(struct drm_device *dev); -int i915_ggtt_enable_hw(struct drm_device *dev); -void i915_gem_init_ggtt(struct drm_device *dev); -void i915_ggtt_cleanup_hw(struct drm_device *dev); +int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv); +int i915_ggtt_init_hw(struct drm_i915_private *dev_priv); +int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv); +void i915_gem_init_ggtt(struct drm_i915_private *dev_priv); +void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv); int i915_ppgtt_init_hw(struct drm_device *dev); void i915_ppgtt_release(struct kref *kref); -- cgit v0.10.2 From f6b9d5cabd582bcae8db92eac0c3a138370e4167 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 Aug 2016 07:52:23 +0100 Subject: drm/i915: Split early global GTT initialisation Initialising the global GTT is tricky as we wish to use the drm_mm range manager during the modesetting initialisation (to capture stolen allocations from the BIOS) before we actually enable GEM. To overcome this, we currently setup the drm_mm first and then carefully rebind them. v2: Fixup after rebasing v3: GGTT initialisation needs to be split around kicking out conflicts v4: Restore an old UMS BUG_ON(mappable > total) as a DRM_ERROR plus fixup of probe results. Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470293567-10811-4-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index ed52f61..50c5640 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -993,8 +993,6 @@ static void intel_sanitize_options(struct drm_i915_private *dev_priv) static int i915_driver_init_hw(struct drm_i915_private *dev_priv) { struct drm_device *dev = &dev_priv->drm; - struct i915_ggtt *ggtt = &dev_priv->ggtt; - uint32_t aperture_size; int ret; if (i915_inject_load_failure()) @@ -1044,7 +1042,6 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv) } } - /* 965GM sometimes incorrectly writes to hardware status page (HWS) * using 32bit addressing, overwriting memory if HWS is located * above 4GB. @@ -1063,19 +1060,6 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv) } } - aperture_size = ggtt->mappable_end; - - ggtt->mappable = - io_mapping_create_wc(ggtt->mappable_base, - aperture_size); - if (!ggtt->mappable) { - ret = -EIO; - goto out_ggtt; - } - - ggtt->mtrr = arch_phys_wc_add(ggtt->mappable_base, - aperture_size); - pm_qos_add_request(&dev_priv->pm_qos, PM_QOS_CPU_DMA_LATENCY, PM_QOS_DEFAULT_VALUE); @@ -1116,14 +1100,11 @@ out_ggtt: static void i915_driver_cleanup_hw(struct drm_i915_private *dev_priv) { struct drm_device *dev = &dev_priv->drm; - struct i915_ggtt *ggtt = &dev_priv->ggtt; if (dev->pdev->msi_enabled) pci_disable_msi(dev->pdev); pm_qos_remove_request(&dev_priv->pm_qos); - arch_phys_wc_del(ggtt->mtrr); - io_mapping_free(ggtt->mappable); i915_ggtt_cleanup_hw(dev_priv); } diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index c4119ba..5303d74 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4543,7 +4543,10 @@ int i915_gem_init(struct drm_device *dev) intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); i915_gem_init_userptr(dev_priv); - i915_gem_init_ggtt(dev_priv); + + ret = i915_gem_init_ggtt(dev_priv); + if (ret) + goto out_unlock; ret = i915_gem_context_init(dev); if (ret) @@ -4634,7 +4637,6 @@ i915_gem_load_init(struct drm_device *dev) SLAB_HWCACHE_ALIGN, NULL); - INIT_LIST_HEAD(&dev_priv->vm_list); INIT_LIST_HEAD(&dev_priv->context_list); INIT_LIST_HEAD(&dev_priv->mm.unbound_list); INIT_LIST_HEAD(&dev_priv->mm.bound_list); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 39b3b36..72231ea4 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2745,10 +2745,7 @@ static void i915_gtt_color_adjust(struct drm_mm_node *node, *end -= 4096; } -static int i915_gem_setup_global_gtt(struct drm_i915_private *dev_priv, - u64 start, - u64 mappable_end, - u64 end) +int i915_gem_init_ggtt(struct drm_i915_private *dev_priv) { /* Let GEM Manage all of the aperture. * @@ -2760,46 +2757,14 @@ static int i915_gem_setup_global_gtt(struct drm_i915_private *dev_priv, * of the aperture. */ struct i915_ggtt *ggtt = &dev_priv->ggtt; - struct drm_mm_node *entry; - struct drm_i915_gem_object *obj; unsigned long hole_start, hole_end; + struct drm_mm_node *entry; int ret; - BUG_ON(mappable_end > end); - - ggtt->base.start = start; - - /* Subtract the guard page before address space initialization to - * shrink the range used by drm_mm */ - ggtt->base.total = end - start - PAGE_SIZE; - i915_address_space_init(&ggtt->base, dev_priv); - ggtt->base.total += PAGE_SIZE; - ret = intel_vgt_balloon(dev_priv); if (ret) return ret; - if (!HAS_LLC(dev_priv)) - ggtt->base.mm.color_adjust = i915_gtt_color_adjust; - - /* Mark any preallocated objects as occupied */ - list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { - struct i915_vma *vma = i915_gem_obj_to_vma(obj, &ggtt->base); - - DRM_DEBUG_KMS("reserving preallocated space: %llx + %zx\n", - i915_gem_obj_ggtt_offset(obj), obj->base.size); - - WARN_ON(i915_gem_obj_ggtt_bound(obj)); - ret = drm_mm_reserve_node(&ggtt->base.mm, &vma->node); - if (ret) { - DRM_DEBUG_KMS("Reservation failed: %i\n", ret); - return ret; - } - vma->bound |= GLOBAL_BIND; - __i915_vma_set_map_and_fenceable(vma); - list_add_tail(&vma->vm_link, &ggtt->base.inactive_list); - } - /* Clear any non-preallocated blocks */ drm_mm_for_each_hole(entry, &ggtt->base.mm, hole_start, hole_end) { DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n", @@ -2809,7 +2774,9 @@ static int i915_gem_setup_global_gtt(struct drm_i915_private *dev_priv, } /* And finally clear the reserved guard page */ - ggtt->base.clear_range(&ggtt->base, end - PAGE_SIZE, PAGE_SIZE, true); + ggtt->base.clear_range(&ggtt->base, + ggtt->base.total - PAGE_SIZE, PAGE_SIZE, + true); if (USES_PPGTT(dev_priv) && !USES_FULL_PPGTT(dev_priv)) { struct i915_hw_ppgtt *ppgtt; @@ -2848,18 +2815,6 @@ static int i915_gem_setup_global_gtt(struct drm_i915_private *dev_priv, } /** - * i915_gem_init_ggtt - Initialize GEM for Global GTT - * @dev_priv: i915 device - */ -void i915_gem_init_ggtt(struct drm_i915_private *dev_priv) -{ - struct i915_ggtt *ggtt = &dev_priv->ggtt; - - i915_gem_setup_global_gtt(dev_priv, - 0, ggtt->mappable_end, ggtt->base.total); -} - -/** * i915_ggtt_cleanup_hw - Clean up GGTT hardware initialization * @dev_priv: i915 device */ @@ -2883,6 +2838,9 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv) } ggtt->base.cleanup(&ggtt->base); + + arch_phys_wc_del(ggtt->mtrr); + io_mapping_free(ggtt->mappable); } static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl) @@ -3243,12 +3201,19 @@ int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv) if ((ggtt->base.total - 1) >> 32) { DRM_ERROR("We never expected a Global GTT with more than 32bits" - "of address space! Found %lldM!\n", + " of address space! Found %lldM!\n", ggtt->base.total >> 20); ggtt->base.total = 1ULL << 32; ggtt->mappable_end = min(ggtt->mappable_end, ggtt->base.total); } + if (ggtt->mappable_end > ggtt->base.total) { + DRM_ERROR("mappable aperture extends past end of GGTT," + " aperture=%llx, total=%llx\n", + ggtt->mappable_end, ggtt->base.total); + ggtt->mappable_end = ggtt->base.total; + } + /* GMADR is the PCI mmio aperture into the global GTT. */ DRM_INFO("Memory usable by graphics device = %lluM\n", ggtt->base.total >> 20); @@ -3271,6 +3236,26 @@ int i915_ggtt_init_hw(struct drm_i915_private *dev_priv) struct i915_ggtt *ggtt = &dev_priv->ggtt; int ret; + INIT_LIST_HEAD(&dev_priv->vm_list); + + /* Subtract the guard page before address space initialization to + * shrink the range used by drm_mm. + */ + ggtt->base.total -= PAGE_SIZE; + i915_address_space_init(&ggtt->base, dev_priv); + ggtt->base.total += PAGE_SIZE; + if (!HAS_LLC(dev_priv)) + ggtt->base.mm.color_adjust = i915_gtt_color_adjust; + + ggtt->mappable = + io_mapping_create_wc(ggtt->mappable_base, ggtt->mappable_end); + if (!ggtt->mappable) { + ret = -EIO; + goto out_gtt_cleanup; + } + + ggtt->mtrr = arch_phys_wc_add(ggtt->mappable_base, ggtt->mappable_end); + /* * Initialise stolen early so that we may reserve preallocated * objects for the BIOS to KMS transition. diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index f85a73f..bb39993 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -524,7 +524,7 @@ i915_page_dir_dma_addr(const struct i915_hw_ppgtt *ppgtt, const unsigned n) int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv); int i915_ggtt_init_hw(struct drm_i915_private *dev_priv); int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv); -void i915_gem_init_ggtt(struct drm_i915_private *dev_priv); +int i915_gem_init_ggtt(struct drm_i915_private *dev_priv); void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv); int i915_ppgtt_init_hw(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index 310756c..9a8cc8c 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -698,18 +698,17 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_device *dev, */ vma->node.start = gtt_offset; vma->node.size = size; - if (drm_mm_initialized(&ggtt->base.mm)) { - ret = drm_mm_reserve_node(&ggtt->base.mm, &vma->node); - if (ret) { - DRM_DEBUG_KMS("failed to allocate stolen GTT space\n"); - goto err; - } - vma->bound |= GLOBAL_BIND; - __i915_vma_set_map_and_fenceable(vma); - list_add_tail(&vma->vm_link, &ggtt->base.inactive_list); + ret = drm_mm_reserve_node(&ggtt->base.mm, &vma->node); + if (ret) { + DRM_DEBUG_KMS("failed to allocate stolen GTT space\n"); + goto err; } + vma->bound |= GLOBAL_BIND; + __i915_vma_set_map_and_fenceable(vma); + list_add_tail(&vma->vm_link, &ggtt->base.inactive_list); + list_add_tail(&obj->global_list, &dev_priv->mm.bound_list); i915_gem_object_pin_pages(obj); -- cgit v0.10.2 From 34c998b4ebf6c48154c76ea9bc9bffabd91a8af2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 Aug 2016 07:52:24 +0100 Subject: drm/i915: Rearrange GGTT probing to avoid needing a vfunc Since we have a static if-else-chain for device probing of the global GTT, we do not need to use a function pointer, let alone store it when we never use it again. So use the if-else-chain to call down into the device specific probe. Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470293567-10811-5-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 72231ea4..0380129 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2118,7 +2118,6 @@ static void i915_address_space_init(struct i915_address_space *vm, struct drm_i915_private *dev_priv) { drm_mm_init(&vm->mm, vm->start, vm->total); - vm->dev = &dev_priv->drm; INIT_LIST_HEAD(&vm->active_list); INIT_LIST_HEAD(&vm->inactive_list); list_add_tail(&vm->global_link, &dev_priv->vm_list); @@ -2921,16 +2920,14 @@ static size_t gen9_get_stolen_size(u16 gen9_gmch_ctl) return (gen9_gmch_ctl - 0xf0 + 1) << 22; } -static int ggtt_probe_common(struct drm_i915_private *dev_priv, size_t gtt_size) +static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) { - struct i915_ggtt *ggtt = &dev_priv->ggtt; - struct pci_dev *pdev = dev_priv->drm.pdev; + struct pci_dev *pdev = ggtt->base.dev->pdev; struct i915_page_scratch *scratch_page; - phys_addr_t ggtt_phys_addr; + phys_addr_t phys_addr; /* For Modern GENs the PTEs and register space are split in the BAR */ - ggtt_phys_addr = pci_resource_start(pdev, 0) + - (pci_resource_len(pdev, 0) / 2); + phys_addr = pci_resource_start(pdev, 0) + pci_resource_len(pdev, 0) / 2; /* * On BXT writes larger than 64 bit to the GTT pagetable range will be @@ -2939,16 +2936,16 @@ static int ggtt_probe_common(struct drm_i915_private *dev_priv, size_t gtt_size) * resort to an uncached mapping. The WC issue is easily caught by the * readback check when writing GTT PTE entries. */ - if (IS_BROXTON(dev_priv)) - ggtt->gsm = ioremap_nocache(ggtt_phys_addr, gtt_size); + if (IS_BROXTON(ggtt->base.dev)) + ggtt->gsm = ioremap_nocache(phys_addr, size); else - ggtt->gsm = ioremap_wc(ggtt_phys_addr, gtt_size); + ggtt->gsm = ioremap_wc(phys_addr, size); if (!ggtt->gsm) { - DRM_ERROR("Failed to map the gtt page table\n"); + DRM_ERROR("Failed to map the ggtt page table\n"); return -ENOMEM; } - scratch_page = alloc_scratch_page(&dev_priv->drm); + scratch_page = alloc_scratch_page(ggtt->base.dev); if (IS_ERR(scratch_page)) { DRM_ERROR("Scratch setup failed\n"); /* iounmap will also get called at remove, but meh */ @@ -3034,12 +3031,20 @@ static void chv_setup_private_ppat(struct drm_i915_private *dev_priv) I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32); } +static void gen6_gmch_remove(struct i915_address_space *vm) +{ + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); + + iounmap(ggtt->gsm); + free_scratch_page(vm->dev, vm->scratch_page); +} + static int gen8_gmch_probe(struct i915_ggtt *ggtt) { struct drm_i915_private *dev_priv = to_i915(ggtt->base.dev); struct pci_dev *pdev = dev_priv->drm.pdev; + unsigned int size; u16 snb_gmch_ctl; - int ret; /* TODO: We're not aware of mappable constraints on gen8 yet */ ggtt->mappable_base = pci_resource_start(pdev, 2); @@ -3052,24 +3057,23 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) if (INTEL_GEN(dev_priv) >= 9) { ggtt->stolen_size = gen9_get_stolen_size(snb_gmch_ctl); - ggtt->size = gen8_get_total_gtt_size(snb_gmch_ctl); + size = gen8_get_total_gtt_size(snb_gmch_ctl); } else if (IS_CHERRYVIEW(dev_priv)) { ggtt->stolen_size = chv_get_stolen_size(snb_gmch_ctl); - ggtt->size = chv_get_total_gtt_size(snb_gmch_ctl); + size = chv_get_total_gtt_size(snb_gmch_ctl); } else { ggtt->stolen_size = gen8_get_stolen_size(snb_gmch_ctl); - ggtt->size = gen8_get_total_gtt_size(snb_gmch_ctl); + size = gen8_get_total_gtt_size(snb_gmch_ctl); } - ggtt->base.total = (ggtt->size / sizeof(gen8_pte_t)) << PAGE_SHIFT; + ggtt->base.total = (size / sizeof(gen8_pte_t)) << PAGE_SHIFT; if (IS_CHERRYVIEW(dev_priv) || IS_BROXTON(dev_priv)) chv_setup_private_ppat(dev_priv); else bdw_setup_private_ppat(dev_priv); - ret = ggtt_probe_common(dev_priv, ggtt->size); - + ggtt->base.cleanup = gen6_gmch_remove; ggtt->base.bind_vma = ggtt_bind_vma; ggtt->base.unbind_vma = ggtt_unbind_vma; ggtt->base.insert_page = gen8_ggtt_insert_page; @@ -3081,15 +3085,15 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) if (IS_CHERRYVIEW(dev_priv)) ggtt->base.insert_entries = gen8_ggtt_insert_entries__BKL; - return ret; + return ggtt_probe_common(ggtt, size); } static int gen6_gmch_probe(struct i915_ggtt *ggtt) { struct drm_i915_private *dev_priv = to_i915(ggtt->base.dev); struct pci_dev *pdev = dev_priv->drm.pdev; + unsigned int size; u16 snb_gmch_ctl; - int ret; ggtt->mappable_base = pci_resource_start(pdev, 2); ggtt->mappable_end = pci_resource_len(pdev, 2); @@ -3097,7 +3101,7 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt) /* 64/512MB is the current min/max we actually know of, but this is just * a coarse sanity check. */ - if ((ggtt->mappable_end < (64<<20) || (ggtt->mappable_end > (512<<20)))) { + if (ggtt->mappable_end < (64<<20) || ggtt->mappable_end > (512<<20)) { DRM_ERROR("Unknown GMADR size (%llx)\n", ggtt->mappable_end); return -ENXIO; } @@ -3107,26 +3111,34 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt) pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); ggtt->stolen_size = gen6_get_stolen_size(snb_gmch_ctl); - ggtt->size = gen6_get_total_gtt_size(snb_gmch_ctl); - ggtt->base.total = (ggtt->size / sizeof(gen6_pte_t)) << PAGE_SHIFT; - ret = ggtt_probe_common(dev_priv, ggtt->size); + size = gen6_get_total_gtt_size(snb_gmch_ctl); + ggtt->base.total = (size / sizeof(gen6_pte_t)) << PAGE_SHIFT; ggtt->base.clear_range = gen6_ggtt_clear_range; ggtt->base.insert_page = gen6_ggtt_insert_page; ggtt->base.insert_entries = gen6_ggtt_insert_entries; ggtt->base.bind_vma = ggtt_bind_vma; ggtt->base.unbind_vma = ggtt_unbind_vma; + ggtt->base.cleanup = gen6_gmch_remove; + + if (HAS_EDRAM(dev_priv)) + ggtt->base.pte_encode = iris_pte_encode; + else if (IS_HASWELL(dev_priv)) + ggtt->base.pte_encode = hsw_pte_encode; + else if (IS_VALLEYVIEW(dev_priv)) + ggtt->base.pte_encode = byt_pte_encode; + else if (INTEL_GEN(dev_priv) >= 7) + ggtt->base.pte_encode = ivb_pte_encode; + else + ggtt->base.pte_encode = snb_pte_encode; - return ret; + return ggtt_probe_common(ggtt, size); } -static void gen6_gmch_remove(struct i915_address_space *vm) +static void i915_gmch_remove(struct i915_address_space *vm) { - struct i915_ggtt *ggtt = container_of(vm, struct i915_ggtt, base); - - iounmap(ggtt->gsm); - free_scratch_page(vm->dev, vm->scratch_page); + intel_gmch_remove(); } static int i915_gmch_probe(struct i915_ggtt *ggtt) @@ -3149,6 +3161,7 @@ static int i915_gmch_probe(struct i915_ggtt *ggtt) ggtt->base.clear_range = i915_ggtt_clear_range; ggtt->base.bind_vma = ggtt_bind_vma; ggtt->base.unbind_vma = ggtt_unbind_vma; + ggtt->base.cleanup = i915_gmch_remove; if (unlikely(ggtt->do_idle_maps)) DRM_INFO("applying Ironlake quirks for intel_iommu\n"); @@ -3156,11 +3169,6 @@ static int i915_gmch_probe(struct i915_ggtt *ggtt) return 0; } -static void i915_gmch_remove(struct i915_address_space *vm) -{ - intel_gmch_remove(); -} - /** * i915_ggtt_probe_hw - Probe GGTT hardware location * @dev_priv: i915 device @@ -3170,32 +3178,15 @@ int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv) struct i915_ggtt *ggtt = &dev_priv->ggtt; int ret; - if (INTEL_GEN(dev_priv) <= 5) { - ggtt->probe = i915_gmch_probe; - ggtt->base.cleanup = i915_gmch_remove; - } else if (INTEL_GEN(dev_priv) < 8) { - ggtt->probe = gen6_gmch_probe; - ggtt->base.cleanup = gen6_gmch_remove; - - if (HAS_EDRAM(dev_priv)) - ggtt->base.pte_encode = iris_pte_encode; - else if (IS_HASWELL(dev_priv)) - ggtt->base.pte_encode = hsw_pte_encode; - else if (IS_VALLEYVIEW(dev_priv)) - ggtt->base.pte_encode = byt_pte_encode; - else if (INTEL_GEN(dev_priv) >= 7) - ggtt->base.pte_encode = ivb_pte_encode; - else - ggtt->base.pte_encode = snb_pte_encode; - } else { - ggtt->probe = gen8_gmch_probe; - ggtt->base.cleanup = gen6_gmch_remove; - } - ggtt->base.dev = &dev_priv->drm; ggtt->base.is_ggtt = true; - ret = ggtt->probe(ggtt); + if (INTEL_GEN(dev_priv) <= 5) + ret = i915_gmch_probe(ggtt); + else if (INTEL_GEN(dev_priv) < 8) + ret = gen6_gmch_probe(ggtt); + else + ret = gen8_gmch_probe(ggtt); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index bb39993..48ce722 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -354,7 +354,6 @@ struct i915_ggtt { size_t stolen_usable_size; /* Total size minus BIOS reserved */ size_t stolen_reserved_base; size_t stolen_reserved_size; - size_t size; /* Total size of Global GTT */ u64 mappable_end; /* End offset that we can CPU map */ struct io_mapping *mappable; /* Mapping to our CPU mappable region */ phys_addr_t mappable_base; /* PA of our GMADR */ @@ -365,8 +364,6 @@ struct i915_ggtt { bool do_idle_maps; int mtrr; - - int (*probe)(struct i915_ggtt *ggtt); }; struct i915_hw_ppgtt { -- cgit v0.10.2 From 2bfa996e031bdc6de1567ee05438f8a310fa7a4c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 Aug 2016 07:52:25 +0100 Subject: drm/i915: Store owning file on the i915_address_space For the global GTT (and aliasing GTT), the address space is owned by the device (it is a global resource) and so the per-file owner field is NULL. For per-process GTT (where we create an address space per context), each is owned by the opening file. We can use this ownership information to both distinguish GGTT and ppGTT address spaces, as well as occasionally inspect the owner. v2: Whitespace, tells us who owns i915_address_space Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470293567-10811-6-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index e7724be..9bd5eb3 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -354,7 +354,7 @@ static int per_file_stats(int id, void *ptr, void *data) } else { struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vma->vm); - if (ppgtt->file_priv != stats->file_priv) + if (ppgtt->base.file != stats->file_priv) continue; } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 9b18b9c..7452a40 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3300,7 +3300,6 @@ i915_vm_to_ppgtt(struct i915_address_space *vm) return container_of(vm, struct i915_hw_ppgtt, base); } - static inline bool i915_gem_obj_ggtt_bound(struct drm_i915_gem_object *obj) { return i915_gem_obj_ggtt_bound_view(obj, &i915_ggtt_view_normal); diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index d7a7cc8..24383f0 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -327,7 +327,8 @@ i915_gem_create_context(struct drm_device *dev, return ctx; if (USES_FULL_PPGTT(dev)) { - struct i915_hw_ppgtt *ppgtt = i915_ppgtt_create(dev, file_priv); + struct i915_hw_ppgtt *ppgtt = + i915_ppgtt_create(to_i915(dev), file_priv); if (IS_ERR(ppgtt)) { DRM_DEBUG_DRIVER("PPGTT setup failed (%ld)\n", diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 0380129..ad97892 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2104,11 +2104,12 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) return 0; } -static int __hw_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt) +static int __hw_ppgtt_init(struct i915_hw_ppgtt *ppgtt, + struct drm_i915_private *dev_priv) { - ppgtt->base.dev = dev; + ppgtt->base.dev = &dev_priv->drm; - if (INTEL_INFO(dev)->gen < 8) + if (INTEL_INFO(dev_priv)->gen < 8) return gen6_ppgtt_init(ppgtt); else return gen8_ppgtt_init(ppgtt); @@ -2142,15 +2143,17 @@ static void gtt_write_workarounds(struct drm_device *dev) I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT); } -static int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt) +static int i915_ppgtt_init(struct i915_hw_ppgtt *ppgtt, + struct drm_i915_private *dev_priv, + struct drm_i915_file_private *file_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); - int ret = 0; + int ret; - ret = __hw_ppgtt_init(dev, ppgtt); + ret = __hw_ppgtt_init(ppgtt, dev_priv); if (ret == 0) { kref_init(&ppgtt->ref); i915_address_space_init(&ppgtt->base, dev_priv); + ppgtt->base.file = file_priv; } return ret; @@ -2182,7 +2185,8 @@ int i915_ppgtt_init_hw(struct drm_device *dev) } struct i915_hw_ppgtt * -i915_ppgtt_create(struct drm_device *dev, struct drm_i915_file_private *fpriv) +i915_ppgtt_create(struct drm_i915_private *dev_priv, + struct drm_i915_file_private *fpriv) { struct i915_hw_ppgtt *ppgtt; int ret; @@ -2191,14 +2195,12 @@ i915_ppgtt_create(struct drm_device *dev, struct drm_i915_file_private *fpriv) if (!ppgtt) return ERR_PTR(-ENOMEM); - ret = i915_ppgtt_init(dev, ppgtt); + ret = i915_ppgtt_init(ppgtt, dev_priv, fpriv); if (ret) { kfree(ppgtt); return ERR_PTR(ret); } - ppgtt->file_priv = fpriv; - trace_i915_ppgtt_create(&ppgtt->base); return ppgtt; @@ -2784,9 +2786,8 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv) if (!ppgtt) return -ENOMEM; - ret = __hw_ppgtt_init(&dev_priv->drm, ppgtt); + ret = __hw_ppgtt_init(ppgtt, dev_priv); if (ret) { - ppgtt->base.cleanup(&ppgtt->base); kfree(ppgtt); return ret; } @@ -3179,7 +3180,6 @@ int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv) int ret; ggtt->base.dev = &dev_priv->drm; - ggtt->base.is_ggtt = true; if (INTEL_GEN(dev_priv) <= 5) ret = i915_gmch_probe(ggtt); @@ -3314,7 +3314,7 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev) struct i915_hw_ppgtt *ppgtt; - if (vm->is_ggtt) + if (i915_is_ggtt(vm)) ppgtt = dev_priv->mm.aliasing_ppgtt; else ppgtt = i915_vm_to_ppgtt(vm); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 48ce722..cf8e3fc 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -272,12 +272,19 @@ struct i915_pml4 { struct i915_address_space { struct drm_mm mm; struct drm_device *dev; + /* Every address space belongs to a struct file - except for the global + * GTT that is owned by the driver (and so @file is set to NULL). In + * principle, no information should leak from one context to another + * (or between files/processes etc) unless explicitly shared by the + * owner. Tracking the owner is important in order to free up per-file + * objects along with the file, to aide resource tracking, and to + * assign blame. + */ + struct drm_i915_file_private *file; struct list_head global_link; u64 start; /* Start offset always 0 for dri2 */ u64 total; /* size addr space maps (ex. 2GB for ggtt) */ - bool is_ggtt; - struct i915_page_scratch *scratch_page; struct i915_page_table *scratch_pt; struct i915_page_directory *scratch_pd; @@ -338,7 +345,7 @@ struct i915_address_space { u32 flags); }; -#define i915_is_ggtt(V) ((V)->is_ggtt) +#define i915_is_ggtt(V) (!(V)->file) /* The Graphics Translation Table is the way in which GEN hardware translates a * Graphics Virtual Address into a Physical Address. In addition to the normal @@ -377,8 +384,6 @@ struct i915_hw_ppgtt { struct i915_page_directory pd; /* GEN6-7 */ }; - struct drm_i915_file_private *file_priv; - gen6_pte_t __iomem *pd_addr; int (*enable)(struct i915_hw_ppgtt *ppgtt); @@ -526,7 +531,7 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv); int i915_ppgtt_init_hw(struct drm_device *dev); void i915_ppgtt_release(struct kref *kref); -struct i915_hw_ppgtt *i915_ppgtt_create(struct drm_device *dev, +struct i915_hw_ppgtt *i915_ppgtt_create(struct drm_i915_private *dev_priv, struct drm_i915_file_private *fpriv); static inline void i915_ppgtt_get(struct i915_hw_ppgtt *ppgtt) { -- cgit v0.10.2 From 15717de219f2ea4792d27ff62c29d08e46cda7f8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 Aug 2016 07:52:26 +0100 Subject: drm/i915: Count how many VMA are bound for an object Since we may have VMA allocated for an object, but we interrupted their binding, there is a disparity between have elements on the obj->vma_list and being bound. i915_gem_obj_bound_any() does this check, but this is not rigorously observed - add an explicit count to make it easier. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470293567-10811-7-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 9bd5eb3..44cf4d0 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -174,6 +174,9 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) if (obj->fence_reg != I915_FENCE_REG_NONE) seq_printf(m, " (fence: %d)", obj->fence_reg); list_for_each_entry(vma, &obj->vma_list, obj_link) { + if (!drm_mm_node_allocated(&vma->node)) + continue; + seq_printf(m, " (%sgtt offset: %08llx, size: %08llx", vma->is_ggtt ? "g" : "pp", vma->node.start, vma->node.size); @@ -335,11 +338,11 @@ static int per_file_stats(int id, void *ptr, void *data) struct drm_i915_gem_object *obj = ptr; struct file_stats *stats = data; struct i915_vma *vma; - int bound = 0; stats->count++; stats->total += obj->base.size; - + if (!obj->bind_count) + stats->unbound += obj->base.size; if (obj->base.name || obj->base.dma_buf) stats->shared += obj->base.size; @@ -347,8 +350,6 @@ static int per_file_stats(int id, void *ptr, void *data) if (!drm_mm_node_allocated(&vma->node)) continue; - bound++; - if (vma->is_ggtt) { stats->global += vma->node.size; } else { @@ -364,9 +365,6 @@ static int per_file_stats(int id, void *ptr, void *data) stats->inactive += vma->node.size; } - if (!bound) - stats->unbound += obj->base.size; - return 0; } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 7452a40..7a9a2d8 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2221,6 +2221,8 @@ struct drm_i915_gem_object { unsigned int frontbuffer_bits:INTEL_FRONTBUFFER_BITS; unsigned int has_wc_mmap; + /** Count of VMA actually bound by this object */ + unsigned int bind_count; unsigned int pin_display; struct sg_table *pages; @@ -3266,7 +3268,6 @@ i915_gem_obj_ggtt_offset(struct drm_i915_gem_object *o) return i915_gem_obj_ggtt_offset_view(o, &i915_ggtt_view_normal); } -bool i915_gem_obj_bound_any(struct drm_i915_gem_object *o); bool i915_gem_obj_ggtt_bound_view(struct drm_i915_gem_object *o, const struct i915_ggtt_view *view); bool i915_gem_obj_bound(struct drm_i915_gem_object *o, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 5303d74..0884870 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2107,7 +2107,7 @@ i915_gem_object_put_pages(struct drm_i915_gem_object *obj) if (obj->pages_pin_count) return -EBUSY; - BUG_ON(i915_gem_obj_bound_any(obj)); + GEM_BUG_ON(obj->bind_count); /* ->put_pages might need to allocate memory for the bit17 swizzle * array, hence protect them from being reaped by removing them from gtt @@ -2965,7 +2965,6 @@ static void __i915_vma_iounmap(struct i915_vma *vma) static int __i915_vma_unbind(struct i915_vma *vma, bool wait) { struct drm_i915_gem_object *obj = vma->obj; - struct drm_i915_private *dev_priv = to_i915(obj->base.dev); int ret; if (list_empty(&vma->obj_link)) @@ -2979,7 +2978,8 @@ static int __i915_vma_unbind(struct i915_vma *vma, bool wait) if (vma->pin_count) return -EBUSY; - BUG_ON(obj->pages == NULL); + GEM_BUG_ON(obj->bind_count == 0); + GEM_BUG_ON(!obj->pages); if (wait) { ret = i915_gem_object_wait_rendering(obj, false); @@ -3019,8 +3019,9 @@ static int __i915_vma_unbind(struct i915_vma *vma, bool wait) /* Since the unbound list is global, only move to that list if * no more VMAs exist. */ - if (list_empty(&obj->vma_list)) - list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list); + if (--obj->bind_count == 0) + list_move_tail(&obj->global_list, + &to_i915(obj->base.dev)->mm.unbound_list); /* And finally now the object is completely decoupled from this vma, * we can drop its hold on the backing storage and allow it to be @@ -3255,6 +3256,7 @@ search_free: list_move_tail(&obj->global_list, &dev_priv->mm.bound_list); list_add_tail(&vma->vm_link, &vm->inactive_list); + obj->bind_count++; return vma; @@ -3450,7 +3452,6 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, { struct drm_device *dev = obj->base.dev; struct i915_vma *vma, *next; - bool bound = false; int ret = 0; if (obj->cache_level == cache_level) @@ -3474,8 +3475,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, ret = i915_vma_unbind(vma); if (ret) return ret; - } else - bound = true; + } } /* We can reuse the existing drm_mm nodes but need to change the @@ -3485,7 +3485,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, * rewrite the PTE in the belief that doing so tramples upon less * state and so involves less work. */ - if (bound) { + if (obj->bind_count) { /* Before we change the PTE, the GPU must not be accessing it. * If we wait upon the object, we know that all the bound * VMA are no longer active. @@ -4223,6 +4223,7 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj) dev_priv->mm.interruptible = was_interruptible; } } + GEM_BUG_ON(obj->bind_count); /* Stolen objects don't hold a ref, but do hold pin count. Fix that up * before progressing. */ @@ -4840,17 +4841,6 @@ bool i915_gem_obj_ggtt_bound_view(struct drm_i915_gem_object *o, return false; } -bool i915_gem_obj_bound_any(struct drm_i915_gem_object *o) -{ - struct i915_vma *vma; - - list_for_each_entry(vma, &o->vma_list, obj_link) - if (drm_mm_node_allocated(&vma->node)) - return true; - - return false; -} - unsigned long i915_gem_obj_ggtt_size(struct drm_i915_gem_object *o) { struct i915_vma *vma; diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index 5d4772c..b95cd9f 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -48,19 +48,15 @@ static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task) #endif } -static int num_vma_bound(struct drm_i915_gem_object *obj) +static bool any_vma_pinned(struct drm_i915_gem_object *obj) { struct i915_vma *vma; - int count = 0; - list_for_each_entry(vma, &obj->vma_list, obj_link) { - if (drm_mm_node_allocated(&vma->node)) - count++; + list_for_each_entry(vma, &obj->vma_list, obj_link) if (vma->pin_count) - count++; - } + return true; - return count; + return false; } static bool swap_available(void) @@ -82,7 +78,10 @@ static bool can_release_pages(struct drm_i915_gem_object *obj) * to the GPU, simply unbinding from the GPU is not going to succeed * in releasing our pin count on the pages themselves. */ - if (obj->pages_pin_count != num_vma_bound(obj)) + if (obj->pages_pin_count > obj->bind_count) + return false; + + if (any_vma_pinned(obj)) return false; /* We can only return physical pages to the system if we can either diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index 9a8cc8c..2c321c8 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -708,6 +708,7 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_device *dev, vma->bound |= GLOBAL_BIND; __i915_vma_set_map_and_fenceable(vma); list_add_tail(&vma->vm_link, &ggtt->base.inactive_list); + obj->bind_count++; list_add_tail(&obj->global_list, &dev_priv->mm.bound_list); i915_gem_object_pin_pages(obj); -- cgit v0.10.2 From aa653a685d816648dd903f76c14a30d8baa23933 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 Aug 2016 07:52:27 +0100 Subject: drm/i915: Be more careful when unbinding vma When we call i915_vma_unbind(), we will wait upon outstanding rendering. This will also trigger a retirement phase, which may update the object lists. If, we extend request tracking to the VMA itself (rather than keep it at the encompassing object), then there is a potential that the obj->vma_list be modified for other elements upon i915_vma_unbind(). As a result, if we walk over the object list and call i915_vma_unbind(), we need to be prepared for that list to change. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470293567-10811-8-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 7a9a2d8..a6bab90 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3052,6 +3052,8 @@ int __must_check i915_vma_unbind(struct i915_vma *vma); * _guarantee_ VMA in question is _not in use_ anywhere. */ int __must_check __i915_vma_unbind_no_wait(struct i915_vma *vma); + +int i915_gem_object_unbind(struct drm_i915_gem_object *obj); int i915_gem_object_put_pages(struct drm_i915_gem_object *obj); void i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv); void i915_gem_release_mmap(struct drm_i915_gem_object *obj); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 0884870..a38af9e 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -283,18 +283,38 @@ static const struct drm_i915_gem_object_ops i915_gem_phys_ops = { .release = i915_gem_object_release_phys, }; +int +i915_gem_object_unbind(struct drm_i915_gem_object *obj) +{ + struct i915_vma *vma; + LIST_HEAD(still_in_list); + int ret; + + /* The vma will only be freed if it is marked as closed, and if we wait + * upon rendering to the vma, we may unbind anything in the list. + */ + while ((vma = list_first_entry_or_null(&obj->vma_list, + struct i915_vma, + obj_link))) { + list_move_tail(&vma->obj_link, &still_in_list); + ret = i915_vma_unbind(vma); + if (ret) + break; + } + list_splice(&still_in_list, &obj->vma_list); + + return ret; +} + static int drop_pages(struct drm_i915_gem_object *obj) { - struct i915_vma *vma, *next; int ret; i915_gem_object_get(obj); - list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) - if (i915_vma_unbind(vma)) - break; - - ret = i915_gem_object_put_pages(obj); + ret = i915_gem_object_unbind(obj); + if (ret == 0) + ret = i915_gem_object_put_pages(obj); i915_gem_object_put(obj); return ret; @@ -3450,8 +3470,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, enum i915_cache_level cache_level) { - struct drm_device *dev = obj->base.dev; - struct i915_vma *vma, *next; + struct i915_vma *vma; int ret = 0; if (obj->cache_level == cache_level) @@ -3462,7 +3481,8 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, * catch the issue of the CS prefetch crossing page boundaries and * reading an invalid PTE on older architectures. */ - list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) { +restart: + list_for_each_entry(vma, &obj->vma_list, obj_link) { if (!drm_mm_node_allocated(&vma->node)) continue; @@ -3471,11 +3491,18 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, return -EBUSY; } - if (!i915_gem_valid_gtt_space(vma, cache_level)) { - ret = i915_vma_unbind(vma); - if (ret) - return ret; - } + if (i915_gem_valid_gtt_space(vma, cache_level)) + continue; + + ret = i915_vma_unbind(vma); + if (ret) + return ret; + + /* As unbinding may affect other elements in the + * obj->vma_list (due to side-effects from retiring + * an active vma), play safe and restart the iterator. + */ + goto restart; } /* We can reuse the existing drm_mm nodes but need to change the @@ -3494,7 +3521,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, if (ret) return ret; - if (!HAS_LLC(dev) && cache_level != I915_CACHE_NONE) { + if (!HAS_LLC(obj->base.dev) && cache_level != I915_CACHE_NONE) { /* Access to snoopable pages through the GTT is * incoherent and on some machines causes a hard * lockup. Relinquish the CPU mmaping to force diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index b95cd9f..64d179d 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -172,8 +172,6 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, (obj = list_first_entry_or_null(phase->list, typeof(*obj), global_list))) { - struct i915_vma *vma, *v; - list_move_tail(&obj->global_list, &still_in_list); if (flags & I915_SHRINK_PURGEABLE && @@ -193,11 +191,7 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, i915_gem_object_get(obj); /* For the unbound phase, this should be a no-op! */ - list_for_each_entry_safe(vma, v, - &obj->vma_list, obj_link) - if (i915_vma_unbind(vma)) - break; - + i915_gem_object_unbind(obj); if (i915_gem_object_put_pages(obj) == 0) count += obj->base.size >> PAGE_SHIFT; diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index ca8b82a..e935b32 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -104,7 +104,6 @@ static void cancel_userptr(struct work_struct *work) if (obj->pages != NULL) { struct drm_i915_private *dev_priv = to_i915(dev); - struct i915_vma *vma, *tmp; bool was_interruptible; wait_rendering(obj); @@ -112,8 +111,7 @@ static void cancel_userptr(struct work_struct *work) was_interruptible = dev_priv->mm.interruptible; dev_priv->mm.interruptible = false; - list_for_each_entry_safe(vma, tmp, &obj->vma_list, obj_link) - WARN_ON(i915_vma_unbind(vma)); + WARN_ON(i915_gem_object_unbind(obj)); WARN_ON(i915_gem_object_put_pages(obj)); dev_priv->mm.interruptible = was_interruptible; -- cgit v0.10.2 From 4717ca9eec1bb88513fb5cbe62f44348d5bc946c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 Aug 2016 07:52:28 +0100 Subject: drm/i915: Kill drop_pages() The drop_pages() function is a dangerous trap in that it can release the passed in object pointer and so unless the caller is aware, it can easily trick us into using the stale object afterwards. Move it into its solitary callsite where we know it is safe. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/1470293567-10811-9-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index a38af9e..8892baa 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -306,20 +306,6 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj) return ret; } -static int -drop_pages(struct drm_i915_gem_object *obj) -{ - int ret; - - i915_gem_object_get(obj); - ret = i915_gem_object_unbind(obj); - if (ret == 0) - ret = i915_gem_object_put_pages(obj); - i915_gem_object_put(obj); - - return ret; -} - int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align) @@ -340,7 +326,11 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, if (obj->base.filp == NULL) return -EINVAL; - ret = drop_pages(obj); + ret = i915_gem_object_unbind(obj); + if (ret) + return ret; + + ret = i915_gem_object_put_pages(obj); if (ret) return ret; -- cgit v0.10.2 From 381f371b25946518f4882fa2060326fa92a33bfc Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 Aug 2016 07:52:29 +0100 Subject: drm/i915: Introduce i915_gem_active for request tracking In the next patch, request tracking is made more generic and for that we need a new expanded struct and to separate out the logic changes from the mechanical churn, we split out the structure renaming into this patch. v2: Writer's block. Add some spiel about why we track requests. v3: Now i915_gem_active. v4: Now with i915_gem_active_set() for attaching to the active request. v5: Use i915_gem_active_set() from inside the retirement handlers Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470293567-10811-10-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 44cf4d0..6151460 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -155,10 +155,10 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) obj->base.write_domain); for_each_engine_id(engine, dev_priv, id) seq_printf(m, "%x ", - i915_gem_request_get_seqno(obj->last_read_req[id])); + i915_gem_request_get_seqno(obj->last_read[id].request)); seq_printf(m, "] %x %x%s%s%s", - i915_gem_request_get_seqno(obj->last_write_req), - i915_gem_request_get_seqno(obj->last_fenced_req), + i915_gem_request_get_seqno(obj->last_write.request), + i915_gem_request_get_seqno(obj->last_fence.request), i915_cache_level_str(to_i915(obj->base.dev), obj->cache_level), obj->dirty ? " dirty" : "", obj->madv == I915_MADV_DONTNEED ? " purgeable" : ""); @@ -195,9 +195,8 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) *t = '\0'; seq_printf(m, " (%s mappable)", s); } - if (obj->last_write_req != NULL) - seq_printf(m, " (%s)", - i915_gem_request_get_engine(obj->last_write_req)->name); + if (obj->last_write.request) + seq_printf(m, " (%s)", obj->last_write.request->engine->name); if (obj->frontbuffer_bits) seq_printf(m, " (frontbuffer: 0x%03x)", obj->frontbuffer_bits); } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index a6bab90..17a206f 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2242,11 +2242,10 @@ struct drm_i915_gem_object { * requests on one ring where the write request is older than the * read request. This allows for the CPU to read from an active * buffer by only waiting for the write to complete. - * */ - struct drm_i915_gem_request *last_read_req[I915_NUM_ENGINES]; - struct drm_i915_gem_request *last_write_req; - /** Breadcrumb of last fenced GPU access to the buffer. */ - struct drm_i915_gem_request *last_fenced_req; + */ + struct i915_gem_active last_read[I915_NUM_ENGINES]; + struct i915_gem_active last_write; + struct i915_gem_active last_fence; /** Current tiling stride for the object, if it's tiled. */ uint32_t stride; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 8892baa..1478efd 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1353,23 +1353,23 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, int ret, i; if (readonly) { - if (obj->last_write_req != NULL) { - ret = i915_wait_request(obj->last_write_req); + if (obj->last_write.request) { + ret = i915_wait_request(obj->last_write.request); if (ret) return ret; - i = obj->last_write_req->engine->id; - if (obj->last_read_req[i] == obj->last_write_req) + i = obj->last_write.request->engine->id; + if (obj->last_read[i].request == obj->last_write.request) i915_gem_object_retire__read(obj, i); else i915_gem_object_retire__write(obj); } } else { for (i = 0; i < I915_NUM_ENGINES; i++) { - if (obj->last_read_req[i] == NULL) + if (!obj->last_read[i].request) continue; - ret = i915_wait_request(obj->last_read_req[i]); + ret = i915_wait_request(obj->last_read[i].request); if (ret) return ret; @@ -1397,9 +1397,9 @@ i915_gem_object_retire_request(struct drm_i915_gem_object *obj, { int idx = req->engine->id; - if (obj->last_read_req[idx] == req) + if (obj->last_read[idx].request == req) i915_gem_object_retire__read(obj, idx); - else if (obj->last_write_req == req) + else if (obj->last_write.request == req) i915_gem_object_retire__write(obj); if (!i915_reset_in_progress(&req->i915->gpu_error)) @@ -1428,7 +1428,7 @@ i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, if (readonly) { struct drm_i915_gem_request *req; - req = obj->last_write_req; + req = obj->last_write.request; if (req == NULL) return 0; @@ -1437,7 +1437,7 @@ i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, for (i = 0; i < I915_NUM_ENGINES; i++) { struct drm_i915_gem_request *req; - req = obj->last_read_req[i]; + req = obj->last_read[i].request; if (req == NULL) continue; @@ -2375,7 +2375,7 @@ void i915_vma_move_to_active(struct i915_vma *vma, obj->active |= intel_engine_flag(engine); list_move_tail(&obj->engine_list[engine->id], &engine->active_list); - i915_gem_request_assign(&obj->last_read_req[engine->id], req); + i915_gem_active_set(&obj->last_read[engine->id], req); list_move_tail(&vma->vm_link, &vma->vm->active_list); } @@ -2383,10 +2383,10 @@ void i915_vma_move_to_active(struct i915_vma *vma, static void i915_gem_object_retire__write(struct drm_i915_gem_object *obj) { - GEM_BUG_ON(obj->last_write_req == NULL); - GEM_BUG_ON(!(obj->active & intel_engine_flag(obj->last_write_req->engine))); + GEM_BUG_ON(!obj->last_write.request); + GEM_BUG_ON(!(obj->active & intel_engine_flag(obj->last_write.request->engine))); - i915_gem_request_assign(&obj->last_write_req, NULL); + i915_gem_active_set(&obj->last_write, NULL); intel_fb_obj_flush(obj, true, ORIGIN_CS); } @@ -2395,13 +2395,13 @@ i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int idx) { struct i915_vma *vma; - GEM_BUG_ON(obj->last_read_req[idx] == NULL); + GEM_BUG_ON(!obj->last_read[idx].request); GEM_BUG_ON(!(obj->active & (1 << idx))); list_del_init(&obj->engine_list[idx]); - i915_gem_request_assign(&obj->last_read_req[idx], NULL); + i915_gem_active_set(&obj->last_read[idx], NULL); - if (obj->last_write_req && obj->last_write_req->engine->id == idx) + if (obj->last_write.request && obj->last_write.request->engine->id == idx) i915_gem_object_retire__write(obj); obj->active &= ~(1 << idx); @@ -2420,7 +2420,7 @@ i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int idx) list_move_tail(&vma->vm_link, &vma->vm->inactive_list); } - i915_gem_request_assign(&obj->last_fenced_req, NULL); + i915_gem_active_set(&obj->last_fence, NULL); i915_gem_object_put(obj); } @@ -2621,7 +2621,7 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *engine) struct drm_i915_gem_object, engine_list[engine->id]); - if (!list_empty(&obj->last_read_req[engine->id]->list)) + if (!list_empty(&obj->last_read[engine->id].request->list)) break; i915_gem_object_retire__read(obj, engine->id); @@ -2754,7 +2754,7 @@ i915_gem_object_flush_active(struct drm_i915_gem_object *obj) for (i = 0; i < I915_NUM_ENGINES; i++) { struct drm_i915_gem_request *req; - req = obj->last_read_req[i]; + req = obj->last_read[i].request; if (req == NULL) continue; @@ -2830,10 +2830,10 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) i915_gem_object_put(obj); for (i = 0; i < I915_NUM_ENGINES; i++) { - if (obj->last_read_req[i] == NULL) + if (!obj->last_read[i].request) continue; - req[n++] = i915_gem_request_get(obj->last_read_req[i]); + req[n++] = i915_gem_request_get(obj->last_read[i].request); } mutex_unlock(&dev->struct_mutex); @@ -2924,12 +2924,12 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj, n = 0; if (readonly) { - if (obj->last_write_req) - req[n++] = obj->last_write_req; + if (obj->last_write.request) + req[n++] = obj->last_write.request; } else { for (i = 0; i < I915_NUM_ENGINES; i++) - if (obj->last_read_req[i]) - req[n++] = obj->last_read_req[i]; + if (obj->last_read[i].request) + req[n++] = obj->last_read[i].request; } for (i = 0; i < n; i++) { ret = __i915_gem_object_sync(obj, to, req[i]); @@ -4026,12 +4026,12 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, for (i = 0; i < I915_NUM_ENGINES; i++) { struct drm_i915_gem_request *req; - req = obj->last_read_req[i]; + req = obj->last_read[i].request; if (req) args->busy |= 1 << (16 + req->engine->exec_id); } - if (obj->last_write_req) - args->busy |= obj->last_write_req->engine->exec_id; + if (obj->last_write.request) + args->busy |= obj->last_write.request->engine->exec_id; } unref: diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index a4b98af..5e1fb85 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1150,7 +1150,7 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas, i915_vma_move_to_active(vma, req); if (obj->base.write_domain) { - i915_gem_request_assign(&obj->last_write_req, req); + i915_gem_active_set(&obj->last_write, req); intel_fb_obj_invalidate(obj, ORIGIN_CS); @@ -1158,7 +1158,7 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas, obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS; } if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) { - i915_gem_request_assign(&obj->last_fenced_req, req); + i915_gem_active_set(&obj->last_fence, req); if (entry->flags & __EXEC_OBJECT_HAS_FENCE) { struct drm_i915_private *dev_priv = engine->i915; list_move_tail(&dev_priv->fence_regs[obj->fence_reg].lru_list, diff --git a/drivers/gpu/drm/i915/i915_gem_fence.c b/drivers/gpu/drm/i915/i915_gem_fence.c index 251d7a9..d16b385 100644 --- a/drivers/gpu/drm/i915/i915_gem_fence.c +++ b/drivers/gpu/drm/i915/i915_gem_fence.c @@ -261,12 +261,12 @@ static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj) static int i915_gem_object_wait_fence(struct drm_i915_gem_object *obj) { - if (obj->last_fenced_req) { - int ret = i915_wait_request(obj->last_fenced_req); + if (obj->last_fence.request) { + int ret = i915_wait_request(obj->last_fence.request); if (ret) return ret; - i915_gem_request_assign(&obj->last_fenced_req, NULL); + i915_gem_active_set(&obj->last_fence, NULL); } return 0; diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index 382ca5a..cf2df33 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -249,4 +249,45 @@ static inline bool i915_spin_request(const struct drm_i915_gem_request *request, __i915_spin_request(request, state, timeout_us)); } +/* We treat requests as fences. This is not be to confused with our + * "fence registers" but pipeline synchronisation objects ala GL_ARB_sync. + * We use the fences to synchronize access from the CPU with activity on the + * GPU, for example, we should not rewrite an object's PTE whilst the GPU + * is reading them. We also track fences at a higher level to provide + * implicit synchronisation around GEM objects, e.g. set-domain will wait + * for outstanding GPU rendering before marking the object ready for CPU + * access, or a pageflip will wait until the GPU is complete before showing + * the frame on the scanout. + * + * In order to use a fence, the object must track the fence it needs to + * serialise with. For example, GEM objects want to track both read and + * write access so that we can perform concurrent read operations between + * the CPU and GPU engines, as well as waiting for all rendering to + * complete, or waiting for the last GPU user of a "fence register". The + * object then embeds a #i915_gem_active to track the most recent (in + * retirement order) request relevant for the desired mode of access. + * The #i915_gem_active is updated with i915_gem_active_set() to track the + * most recent fence request, typically this is done as part of + * i915_vma_move_to_active(). + * + * When the #i915_gem_active completes (is retired), it will + * signal its completion to the owner through a callback as well as mark + * itself as idle (i915_gem_active.request == NULL). The owner + * can then perform any action, such as delayed freeing of an active + * resource including itself. + */ +struct i915_gem_active { + struct drm_i915_gem_request *request; +}; + +static inline void +i915_gem_active_set(struct i915_gem_active *active, + struct drm_i915_gem_request *request) +{ + i915_gem_request_assign(&active->request, request); +} + +#define for_each_active(mask, idx) \ + for (; mask ? idx = ffs(mask) - 1, 1 : 0; mask &= ~BIT(idx)) + #endif /* I915_GEM_REQUEST_H */ diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index e83fc2d..00d796d 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -242,7 +242,7 @@ i915_gem_set_tiling(struct drm_device *dev, void *data, } obj->fence_dirty = - obj->last_fenced_req || + obj->last_fence.request || obj->fence_reg != I915_FENCE_REG_NONE; obj->tiling_mode = args->tiling_mode; diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index e935b32..32f50a70 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -74,7 +74,7 @@ static void wait_rendering(struct drm_i915_gem_object *obj) for (i = 0; i < I915_NUM_ENGINES; i++) { struct drm_i915_gem_request *req; - req = obj->last_read_req[i]; + req = obj->last_read[i].request; if (req == NULL) continue; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 226b28e..d6482e9 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -751,8 +751,8 @@ static void capture_bo(struct drm_i915_error_buffer *err, err->size = obj->base.size; err->name = obj->base.name; for (i = 0; i < I915_NUM_ENGINES; i++) - err->rseqno[i] = i915_gem_request_get_seqno(obj->last_read_req[i]); - err->wseqno = i915_gem_request_get_seqno(obj->last_write_req); + err->rseqno[i] = i915_gem_request_get_seqno(obj->last_read[i].request); + err->wseqno = i915_gem_request_get_seqno(obj->last_write.request); err->gtt_offset = vma->node.start; err->read_domains = obj->base.read_domains; err->write_domain = obj->base.write_domain; @@ -764,8 +764,7 @@ static void capture_bo(struct drm_i915_error_buffer *err, err->dirty = obj->dirty; err->purgeable = obj->madv != I915_MADV_WILLNEED; err->userptr = obj->userptr.mm != NULL; - err->engine = obj->last_write_req ? - i915_gem_request_get_engine(obj->last_write_req)->id : -1; + err->engine = obj->last_write.request ? obj->last_write.request->engine->id : -1; err->cache_level = obj->cache_level; } diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 993bcfb..e858fed 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -11370,7 +11370,7 @@ static bool use_mmio_flip(struct intel_engine_cs *engine, if (resv && !reservation_object_test_signaled_rcu(resv, false)) return true; - return engine != i915_gem_request_get_engine(obj->last_write_req); + return engine != i915_gem_request_get_engine(obj->last_write.request); } static void skl_do_mmio_flip(struct intel_crtc *intel_crtc, @@ -11673,7 +11673,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, } else if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev)) { engine = &dev_priv->engine[BCS]; } else if (INTEL_INFO(dev)->gen >= 7) { - engine = i915_gem_request_get_engine(obj->last_write_req); + engine = i915_gem_request_get_engine(obj->last_write.request); if (engine == NULL || engine->id != RCS) engine = &dev_priv->engine[BCS]; } else { @@ -11695,7 +11695,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, INIT_WORK(&work->mmio_work, intel_mmio_flip_work_func); i915_gem_request_assign(&work->flip_queued_req, - obj->last_write_req); + obj->last_write.request); schedule_work(&work->mmio_work); } else { @@ -14043,7 +14043,7 @@ intel_prepare_plane_fb(struct drm_plane *plane, to_intel_plane_state(new_state); i915_gem_request_assign(&plane_state->wait_req, - obj->last_write_req); + obj->last_write.request); } return ret; -- cgit v0.10.2 From 27c01aaef041f1fa3908c0330ff86d345523c3dc Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 Aug 2016 07:52:30 +0100 Subject: drm/i915: Prepare i915_gem_active for annotations In the future, we will want to add annotations to the i915_gem_active struct. The API is thus expanded to hide direct access to the contents of i915_gem_active and mediated instead through a number of helpers. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470293567-10811-11-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 6151460..24ff7f4 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -155,10 +155,10 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) obj->base.write_domain); for_each_engine_id(engine, dev_priv, id) seq_printf(m, "%x ", - i915_gem_request_get_seqno(obj->last_read[id].request)); + i915_gem_active_get_seqno(&obj->last_read[id])); seq_printf(m, "] %x %x%s%s%s", - i915_gem_request_get_seqno(obj->last_write.request), - i915_gem_request_get_seqno(obj->last_fence.request), + i915_gem_active_get_seqno(&obj->last_write), + i915_gem_active_get_seqno(&obj->last_fence), i915_cache_level_str(to_i915(obj->base.dev), obj->cache_level), obj->dirty ? " dirty" : "", obj->madv == I915_MADV_DONTNEED ? " purgeable" : ""); @@ -195,8 +195,11 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) *t = '\0'; seq_printf(m, " (%s mappable)", s); } - if (obj->last_write.request) - seq_printf(m, " (%s)", obj->last_write.request->engine->name); + + engine = i915_gem_active_get_engine(&obj->last_write); + if (engine) + seq_printf(m, " (%s)", engine->name); + if (obj->frontbuffer_bits) seq_printf(m, " (frontbuffer: 0x%03x)", obj->frontbuffer_bits); } diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 1478efd..0c158c7 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1349,27 +1349,30 @@ int i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, bool readonly) { + struct drm_i915_gem_request *request; struct reservation_object *resv; int ret, i; if (readonly) { - if (obj->last_write.request) { - ret = i915_wait_request(obj->last_write.request); + request = i915_gem_active_peek(&obj->last_write); + if (request) { + ret = i915_wait_request(request); if (ret) return ret; - i = obj->last_write.request->engine->id; - if (obj->last_read[i].request == obj->last_write.request) + i = request->engine->id; + if (i915_gem_active_peek(&obj->last_read[i]) == request) i915_gem_object_retire__read(obj, i); else i915_gem_object_retire__write(obj); } } else { for (i = 0; i < I915_NUM_ENGINES; i++) { - if (!obj->last_read[i].request) + request = i915_gem_active_peek(&obj->last_read[i]); + if (!request) continue; - ret = i915_wait_request(obj->last_read[i].request); + ret = i915_wait_request(request); if (ret) return ret; @@ -1397,9 +1400,9 @@ i915_gem_object_retire_request(struct drm_i915_gem_object *obj, { int idx = req->engine->id; - if (obj->last_read[idx].request == req) + if (i915_gem_active_peek(&obj->last_read[idx]) == req) i915_gem_object_retire__read(obj, idx); - else if (obj->last_write.request == req) + else if (i915_gem_active_peek(&obj->last_write) == req) i915_gem_object_retire__write(obj); if (!i915_reset_in_progress(&req->i915->gpu_error)) @@ -1428,20 +1431,20 @@ i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, if (readonly) { struct drm_i915_gem_request *req; - req = obj->last_write.request; + req = i915_gem_active_get(&obj->last_write); if (req == NULL) return 0; - requests[n++] = i915_gem_request_get(req); + requests[n++] = req; } else { for (i = 0; i < I915_NUM_ENGINES; i++) { struct drm_i915_gem_request *req; - req = obj->last_read[i].request; + req = i915_gem_active_get(&obj->last_read[i]); if (req == NULL) continue; - requests[n++] = i915_gem_request_get(req); + requests[n++] = req; } } @@ -2383,8 +2386,8 @@ void i915_vma_move_to_active(struct i915_vma *vma, static void i915_gem_object_retire__write(struct drm_i915_gem_object *obj) { - GEM_BUG_ON(!obj->last_write.request); - GEM_BUG_ON(!(obj->active & intel_engine_flag(obj->last_write.request->engine))); + GEM_BUG_ON(!i915_gem_active_isset(&obj->last_write)); + GEM_BUG_ON(!(obj->active & intel_engine_flag(i915_gem_active_get_engine(&obj->last_write)))); i915_gem_active_set(&obj->last_write, NULL); intel_fb_obj_flush(obj, true, ORIGIN_CS); @@ -2393,15 +2396,17 @@ i915_gem_object_retire__write(struct drm_i915_gem_object *obj) static void i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int idx) { + struct intel_engine_cs *engine; struct i915_vma *vma; - GEM_BUG_ON(!obj->last_read[idx].request); + GEM_BUG_ON(!i915_gem_active_isset(&obj->last_read[idx])); GEM_BUG_ON(!(obj->active & (1 << idx))); list_del_init(&obj->engine_list[idx]); i915_gem_active_set(&obj->last_read[idx], NULL); - if (obj->last_write.request && obj->last_write.request->engine->id == idx) + engine = i915_gem_active_get_engine(&obj->last_write); + if (engine && engine->id == idx) i915_gem_object_retire__write(obj); obj->active &= ~(1 << idx); @@ -2621,7 +2626,7 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *engine) struct drm_i915_gem_object, engine_list[engine->id]); - if (!list_empty(&obj->last_read[engine->id].request->list)) + if (!list_empty(&i915_gem_active_peek(&obj->last_read[engine->id])->list)) break; i915_gem_object_retire__read(obj, engine->id); @@ -2754,7 +2759,7 @@ i915_gem_object_flush_active(struct drm_i915_gem_object *obj) for (i = 0; i < I915_NUM_ENGINES; i++) { struct drm_i915_gem_request *req; - req = obj->last_read[i].request; + req = i915_gem_active_peek(&obj->last_read[i]); if (req == NULL) continue; @@ -2794,7 +2799,7 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { struct drm_i915_gem_wait *args = data; struct drm_i915_gem_object *obj; - struct drm_i915_gem_request *req[I915_NUM_ENGINES]; + struct drm_i915_gem_request *requests[I915_NUM_ENGINES]; int i, n = 0; int ret; @@ -2830,20 +2835,21 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) i915_gem_object_put(obj); for (i = 0; i < I915_NUM_ENGINES; i++) { - if (!obj->last_read[i].request) - continue; + struct drm_i915_gem_request *req; - req[n++] = i915_gem_request_get(obj->last_read[i].request); + req = i915_gem_active_get(&obj->last_read[i]); + if (req) + requests[n++] = req; } mutex_unlock(&dev->struct_mutex); for (i = 0; i < n; i++) { if (ret == 0) - ret = __i915_wait_request(req[i], true, + ret = __i915_wait_request(requests[i], true, args->timeout_ns > 0 ? &args->timeout_ns : NULL, to_rps_client(file)); - i915_gem_request_put(req[i]); + i915_gem_request_put(requests[i]); } return ret; @@ -2916,7 +2922,7 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj, struct drm_i915_gem_request *to) { const bool readonly = obj->base.pending_write_domain == 0; - struct drm_i915_gem_request *req[I915_NUM_ENGINES]; + struct drm_i915_gem_request *requests[I915_NUM_ENGINES]; int ret, i, n; if (!obj->active) @@ -2924,15 +2930,22 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj, n = 0; if (readonly) { - if (obj->last_write.request) - req[n++] = obj->last_write.request; + struct drm_i915_gem_request *req; + + req = i915_gem_active_peek(&obj->last_write); + if (req) + requests[n++] = req; } else { - for (i = 0; i < I915_NUM_ENGINES; i++) - if (obj->last_read[i].request) - req[n++] = obj->last_read[i].request; + for (i = 0; i < I915_NUM_ENGINES; i++) { + struct drm_i915_gem_request *req; + + req = i915_gem_active_peek(&obj->last_read[i]); + if (req) + requests[n++] = req; + } } for (i = 0; i < n; i++) { - ret = __i915_gem_object_sync(obj, to, req[i]); + ret = __i915_gem_object_sync(obj, to, requests[i]); if (ret) return ret; } @@ -4021,17 +4034,17 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, args->busy = 0; if (obj->active) { + struct drm_i915_gem_request *req; int i; for (i = 0; i < I915_NUM_ENGINES; i++) { - struct drm_i915_gem_request *req; - - req = obj->last_read[i].request; + req = i915_gem_active_peek(&obj->last_read[i]); if (req) args->busy |= 1 << (16 + req->engine->exec_id); } - if (obj->last_write.request) - args->busy |= obj->last_write.request->engine->exec_id; + req = i915_gem_active_peek(&obj->last_write); + if (req) + args->busy |= req->engine->exec_id; } unref: diff --git a/drivers/gpu/drm/i915/i915_gem_fence.c b/drivers/gpu/drm/i915/i915_gem_fence.c index d16b385..9fdbd66 100644 --- a/drivers/gpu/drm/i915/i915_gem_fence.c +++ b/drivers/gpu/drm/i915/i915_gem_fence.c @@ -261,14 +261,13 @@ static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj) static int i915_gem_object_wait_fence(struct drm_i915_gem_object *obj) { - if (obj->last_fence.request) { - int ret = i915_wait_request(obj->last_fence.request); - if (ret) - return ret; + int ret; - i915_gem_active_set(&obj->last_fence, NULL); - } + ret = i915_gem_active_wait(&obj->last_fence); + if (ret) + return ret; + i915_gem_active_set(&obj->last_fence, NULL); return 0; } diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index cf2df33..e13834e 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -280,6 +280,15 @@ struct i915_gem_active { struct drm_i915_gem_request *request; }; +/** + * i915_gem_active_set - updates the tracker to watch the current request + * @active - the active tracker + * @request - the request to watch + * + * i915_gem_active_set() watches the given @request for completion. Whilst + * that @request is busy, the @active reports busy. When that @request is + * retired, the @active tracker is updated to report idle. + */ static inline void i915_gem_active_set(struct i915_gem_active *active, struct drm_i915_gem_request *request) @@ -287,6 +296,124 @@ i915_gem_active_set(struct i915_gem_active *active, i915_gem_request_assign(&active->request, request); } +/** + * i915_gem_active_peek - report the request being monitored + * @active - the active tracker + * + * i915_gem_active_peek() returns the current request being tracked, or NULL. + * It does not obtain a reference on the request for the caller, so the + * caller must hold struct_mutex. + */ +static inline struct drm_i915_gem_request * +i915_gem_active_peek(const struct i915_gem_active *active) +{ + return active->request; +} + +/** + * i915_gem_active_get - return a reference to the active request + * @active - the active tracker + * + * i915_gem_active_get() returns a reference to the active request, or NULL + * if the active tracker is idle. The caller must hold struct_mutex. + */ +static inline struct drm_i915_gem_request * +i915_gem_active_get(const struct i915_gem_active *active) +{ + struct drm_i915_gem_request *request; + + request = i915_gem_active_peek(active); + if (!request || i915_gem_request_completed(request)) + return NULL; + + return i915_gem_request_get(request); +} + +/** + * i915_gem_active_isset - report whether the active tracker is assigned + * @active - the active tracker + * + * i915_gem_active_isset() returns true if the active tracker is currently + * assigned to a request. Due to the lazy retiring, that request may be idle + * and this may report stale information. + */ +static inline bool +i915_gem_active_isset(const struct i915_gem_active *active) +{ + return active->request; +} + +/** + * i915_gem_active_is_idle - report whether the active tracker is idle + * @active - the active tracker + * + * i915_gem_active_is_idle() returns true if the active tracker is currently + * unassigned or if the request is complete (but not yet retired). Requires + * the caller to hold struct_mutex (but that can be relaxed if desired). + */ +static inline bool +i915_gem_active_is_idle(const struct i915_gem_active *active) +{ + struct drm_i915_gem_request *request; + + request = i915_gem_active_peek(active); + if (!request || i915_gem_request_completed(request)) + return true; + + return false; +} + +/** + * i915_gem_active_wait - waits until the request is completed + * @active - the active request on which to wait + * + * i915_gem_active_wait() waits until the request is completed before + * returning. Note that it does not guarantee that the request is + * retired first, see i915_gem_active_retire(). + */ +static inline int __must_check +i915_gem_active_wait(const struct i915_gem_active *active) +{ + struct drm_i915_gem_request *request; + + request = i915_gem_active_peek(active); + if (!request) + return 0; + + return i915_wait_request(request); +} + +/** + * i915_gem_active_retire - waits until the request is retired + * @active - the active request on which to wait + * + * i915_gem_active_retire() waits until the request is completed, + * and then ensures that at least the retirement handler for this + * @active tracker is called before returning. If the @active + * tracker is idle, the function returns immediately. + */ +static inline int __must_check +i915_gem_active_retire(const struct i915_gem_active *active) +{ + return i915_gem_active_wait(active); +} + +/* Convenience functions for peeking at state inside active's request whilst + * guarded by the struct_mutex. + */ + +static inline uint32_t +i915_gem_active_get_seqno(const struct i915_gem_active *active) +{ + return i915_gem_request_get_seqno(i915_gem_active_peek(active)); +} + +static inline struct intel_engine_cs * +i915_gem_active_get_engine(const struct i915_gem_active *active) +{ + return i915_gem_request_get_engine(i915_gem_active_peek(active)); +} + #define for_each_active(mask, idx) \ for (; mask ? idx = ffs(mask) - 1, 1 : 0; mask &= ~BIT(idx)) diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index 00d796d..8cef2d6 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -242,7 +242,7 @@ i915_gem_set_tiling(struct drm_device *dev, void *data, } obj->fence_dirty = - obj->last_fence.request || + !i915_gem_active_is_idle(&obj->last_fence) || obj->fence_reg != I915_FENCE_REG_NONE; obj->tiling_mode = args->tiling_mode; diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index 32f50a70..00ab5e9 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -74,11 +74,9 @@ static void wait_rendering(struct drm_i915_gem_object *obj) for (i = 0; i < I915_NUM_ENGINES; i++) { struct drm_i915_gem_request *req; - req = obj->last_read[i].request; - if (req == NULL) - continue; - - requests[n++] = i915_gem_request_get(req); + req = i915_gem_active_get(&obj->last_read[i]); + if (req) + requests[n++] = req; } mutex_unlock(&dev->struct_mutex); diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index d6482e9..585fe2b 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -746,13 +746,14 @@ static void capture_bo(struct drm_i915_error_buffer *err, struct i915_vma *vma) { struct drm_i915_gem_object *obj = vma->obj; + struct intel_engine_cs *engine; int i; err->size = obj->base.size; err->name = obj->base.name; for (i = 0; i < I915_NUM_ENGINES; i++) - err->rseqno[i] = i915_gem_request_get_seqno(obj->last_read[i].request); - err->wseqno = i915_gem_request_get_seqno(obj->last_write.request); + err->rseqno[i] = i915_gem_active_get_seqno(&obj->last_read[i]); + err->wseqno = i915_gem_active_get_seqno(&obj->last_write); err->gtt_offset = vma->node.start; err->read_domains = obj->base.read_domains; err->write_domain = obj->base.write_domain; @@ -764,8 +765,10 @@ static void capture_bo(struct drm_i915_error_buffer *err, err->dirty = obj->dirty; err->purgeable = obj->madv != I915_MADV_WILLNEED; err->userptr = obj->userptr.mm != NULL; - err->engine = obj->last_write.request ? obj->last_write.request->engine->id : -1; err->cache_level = obj->cache_level; + + engine = i915_gem_active_get_engine(&obj->last_write); + err->engine = engine ? engine->id : -1; } static u32 capture_active_bo(struct drm_i915_error_buffer *err, diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index e858fed..8c03d13 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -11370,7 +11370,7 @@ static bool use_mmio_flip(struct intel_engine_cs *engine, if (resv && !reservation_object_test_signaled_rcu(resv, false)) return true; - return engine != i915_gem_request_get_engine(obj->last_write.request); + return engine != i915_gem_active_get_engine(&obj->last_write); } static void skl_do_mmio_flip(struct intel_crtc *intel_crtc, @@ -11673,7 +11673,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, } else if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev)) { engine = &dev_priv->engine[BCS]; } else if (INTEL_INFO(dev)->gen >= 7) { - engine = i915_gem_request_get_engine(obj->last_write.request); + engine = i915_gem_active_get_engine(&obj->last_write); if (engine == NULL || engine->id != RCS) engine = &dev_priv->engine[BCS]; } else { @@ -11694,9 +11694,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, if (mmio_flip) { INIT_WORK(&work->mmio_work, intel_mmio_flip_work_func); - i915_gem_request_assign(&work->flip_queued_req, - obj->last_write.request); - + work->flip_queued_req = i915_gem_active_get(&obj->last_write); schedule_work(&work->mmio_work); } else { request = i915_gem_request_alloc(engine, engine->last_context); @@ -14039,11 +14037,8 @@ intel_prepare_plane_fb(struct drm_plane *plane, } if (ret == 0) { - struct intel_plane_state *plane_state = - to_intel_plane_state(new_state); - - i915_gem_request_assign(&plane_state->wait_req, - obj->last_write.request); + to_intel_plane_state(new_state)->wait_req = + i915_gem_active_get(&obj->last_write); } return ret; -- cgit v0.10.2 From d72d908b5619896fe7832b0c4c408e664ffe4e69 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 Aug 2016 07:52:31 +0100 Subject: drm/i915: Mark up i915_gem_active for locking annotation The future annotations will track the locking used for access to ensure that it is always sufficient. We make the preparations now to present the API ahead and to make sure that GCC can eliminate the unused parameter. Before: 6298417 3619610 696320 10614347 a1f64b vmlinux After: 6298417 3619610 696320 10614347 a1f64b vmlinux (with i915 builtin) Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470293567-10811-12-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 24ff7f4..c595cc8 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -155,10 +155,13 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) obj->base.write_domain); for_each_engine_id(engine, dev_priv, id) seq_printf(m, "%x ", - i915_gem_active_get_seqno(&obj->last_read[id])); + i915_gem_active_get_seqno(&obj->last_read[id], + &obj->base.dev->struct_mutex)); seq_printf(m, "] %x %x%s%s%s", - i915_gem_active_get_seqno(&obj->last_write), - i915_gem_active_get_seqno(&obj->last_fence), + i915_gem_active_get_seqno(&obj->last_write, + &obj->base.dev->struct_mutex), + i915_gem_active_get_seqno(&obj->last_fence, + &obj->base.dev->struct_mutex), i915_cache_level_str(to_i915(obj->base.dev), obj->cache_level), obj->dirty ? " dirty" : "", obj->madv == I915_MADV_DONTNEED ? " purgeable" : ""); @@ -196,7 +199,8 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) seq_printf(m, " (%s mappable)", s); } - engine = i915_gem_active_get_engine(&obj->last_write); + engine = i915_gem_active_get_engine(&obj->last_write, + &obj->base.dev->struct_mutex); if (engine) seq_printf(m, " (%s)", engine->name); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 0c158c7..2b6199c 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1354,21 +1354,24 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, int ret, i; if (readonly) { - request = i915_gem_active_peek(&obj->last_write); + request = i915_gem_active_peek(&obj->last_write, + &obj->base.dev->struct_mutex); if (request) { ret = i915_wait_request(request); if (ret) return ret; i = request->engine->id; - if (i915_gem_active_peek(&obj->last_read[i]) == request) + if (i915_gem_active_peek(&obj->last_read[i], + &obj->base.dev->struct_mutex) == request) i915_gem_object_retire__read(obj, i); else i915_gem_object_retire__write(obj); } } else { for (i = 0; i < I915_NUM_ENGINES; i++) { - request = i915_gem_active_peek(&obj->last_read[i]); + request = i915_gem_active_peek(&obj->last_read[i], + &obj->base.dev->struct_mutex); if (!request) continue; @@ -1400,9 +1403,11 @@ i915_gem_object_retire_request(struct drm_i915_gem_object *obj, { int idx = req->engine->id; - if (i915_gem_active_peek(&obj->last_read[idx]) == req) + if (i915_gem_active_peek(&obj->last_read[idx], + &obj->base.dev->struct_mutex) == req) i915_gem_object_retire__read(obj, idx); - else if (i915_gem_active_peek(&obj->last_write) == req) + else if (i915_gem_active_peek(&obj->last_write, + &obj->base.dev->struct_mutex) == req) i915_gem_object_retire__write(obj); if (!i915_reset_in_progress(&req->i915->gpu_error)) @@ -1431,7 +1436,8 @@ i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, if (readonly) { struct drm_i915_gem_request *req; - req = i915_gem_active_get(&obj->last_write); + req = i915_gem_active_get(&obj->last_write, + &obj->base.dev->struct_mutex); if (req == NULL) return 0; @@ -1440,7 +1446,8 @@ i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, for (i = 0; i < I915_NUM_ENGINES; i++) { struct drm_i915_gem_request *req; - req = i915_gem_active_get(&obj->last_read[i]); + req = i915_gem_active_get(&obj->last_read[i], + &obj->base.dev->struct_mutex); if (req == NULL) continue; @@ -2387,7 +2394,9 @@ static void i915_gem_object_retire__write(struct drm_i915_gem_object *obj) { GEM_BUG_ON(!i915_gem_active_isset(&obj->last_write)); - GEM_BUG_ON(!(obj->active & intel_engine_flag(i915_gem_active_get_engine(&obj->last_write)))); + GEM_BUG_ON(!(obj->active & + intel_engine_flag(i915_gem_active_get_engine(&obj->last_write, + &obj->base.dev->struct_mutex)))); i915_gem_active_set(&obj->last_write, NULL); intel_fb_obj_flush(obj, true, ORIGIN_CS); @@ -2405,7 +2414,8 @@ i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int idx) list_del_init(&obj->engine_list[idx]); i915_gem_active_set(&obj->last_read[idx], NULL); - engine = i915_gem_active_get_engine(&obj->last_write); + engine = i915_gem_active_get_engine(&obj->last_write, + &obj->base.dev->struct_mutex); if (engine && engine->id == idx) i915_gem_object_retire__write(obj); @@ -2626,7 +2636,8 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *engine) struct drm_i915_gem_object, engine_list[engine->id]); - if (!list_empty(&i915_gem_active_peek(&obj->last_read[engine->id])->list)) + if (!list_empty(&i915_gem_active_peek(&obj->last_read[engine->id], + &obj->base.dev->struct_mutex)->list)) break; i915_gem_object_retire__read(obj, engine->id); @@ -2759,7 +2770,8 @@ i915_gem_object_flush_active(struct drm_i915_gem_object *obj) for (i = 0; i < I915_NUM_ENGINES; i++) { struct drm_i915_gem_request *req; - req = i915_gem_active_peek(&obj->last_read[i]); + req = i915_gem_active_peek(&obj->last_read[i], + &obj->base.dev->struct_mutex); if (req == NULL) continue; @@ -2837,7 +2849,8 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) for (i = 0; i < I915_NUM_ENGINES; i++) { struct drm_i915_gem_request *req; - req = i915_gem_active_get(&obj->last_read[i]); + req = i915_gem_active_get(&obj->last_read[i], + &obj->base.dev->struct_mutex); if (req) requests[n++] = req; } @@ -2932,14 +2945,16 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj, if (readonly) { struct drm_i915_gem_request *req; - req = i915_gem_active_peek(&obj->last_write); + req = i915_gem_active_peek(&obj->last_write, + &obj->base.dev->struct_mutex); if (req) requests[n++] = req; } else { for (i = 0; i < I915_NUM_ENGINES; i++) { struct drm_i915_gem_request *req; - req = i915_gem_active_peek(&obj->last_read[i]); + req = i915_gem_active_peek(&obj->last_read[i], + &obj->base.dev->struct_mutex); if (req) requests[n++] = req; } @@ -4038,11 +4053,13 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, int i; for (i = 0; i < I915_NUM_ENGINES; i++) { - req = i915_gem_active_peek(&obj->last_read[i]); + req = i915_gem_active_peek(&obj->last_read[i], + &obj->base.dev->struct_mutex); if (req) args->busy |= 1 << (16 + req->engine->exec_id); } - req = i915_gem_active_peek(&obj->last_write); + req = i915_gem_active_peek(&obj->last_write, + &obj->base.dev->struct_mutex); if (req) args->busy |= req->engine->exec_id; } diff --git a/drivers/gpu/drm/i915/i915_gem_fence.c b/drivers/gpu/drm/i915/i915_gem_fence.c index 9fdbd66..a4ec4fe 100644 --- a/drivers/gpu/drm/i915/i915_gem_fence.c +++ b/drivers/gpu/drm/i915/i915_gem_fence.c @@ -263,7 +263,8 @@ i915_gem_object_wait_fence(struct drm_i915_gem_object *obj) { int ret; - ret = i915_gem_active_wait(&obj->last_fence); + ret = i915_gem_active_wait(&obj->last_fence, + &obj->base.dev->struct_mutex); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index e13834e..5f8d94c 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -296,6 +296,12 @@ i915_gem_active_set(struct i915_gem_active *active, i915_gem_request_assign(&active->request, request); } +static inline struct drm_i915_gem_request * +__i915_gem_active_peek(const struct i915_gem_active *active) +{ + return active->request; +} + /** * i915_gem_active_peek - report the request being monitored * @active - the active tracker @@ -305,7 +311,7 @@ i915_gem_active_set(struct i915_gem_active *active, * caller must hold struct_mutex. */ static inline struct drm_i915_gem_request * -i915_gem_active_peek(const struct i915_gem_active *active) +i915_gem_active_peek(const struct i915_gem_active *active, struct mutex *mutex) { return active->request; } @@ -318,11 +324,11 @@ i915_gem_active_peek(const struct i915_gem_active *active) * if the active tracker is idle. The caller must hold struct_mutex. */ static inline struct drm_i915_gem_request * -i915_gem_active_get(const struct i915_gem_active *active) +i915_gem_active_get(const struct i915_gem_active *active, struct mutex *mutex) { struct drm_i915_gem_request *request; - request = i915_gem_active_peek(active); + request = i915_gem_active_peek(active, mutex); if (!request || i915_gem_request_completed(request)) return NULL; @@ -352,11 +358,12 @@ i915_gem_active_isset(const struct i915_gem_active *active) * the caller to hold struct_mutex (but that can be relaxed if desired). */ static inline bool -i915_gem_active_is_idle(const struct i915_gem_active *active) +i915_gem_active_is_idle(const struct i915_gem_active *active, + struct mutex *mutex) { struct drm_i915_gem_request *request; - request = i915_gem_active_peek(active); + request = i915_gem_active_peek(active, mutex); if (!request || i915_gem_request_completed(request)) return true; @@ -372,11 +379,11 @@ i915_gem_active_is_idle(const struct i915_gem_active *active) * retired first, see i915_gem_active_retire(). */ static inline int __must_check -i915_gem_active_wait(const struct i915_gem_active *active) +i915_gem_active_wait(const struct i915_gem_active *active, struct mutex *mutex) { struct drm_i915_gem_request *request; - request = i915_gem_active_peek(active); + request = i915_gem_active_peek(active, mutex); if (!request) return 0; @@ -393,9 +400,10 @@ i915_gem_active_wait(const struct i915_gem_active *active) * tracker is idle, the function returns immediately. */ static inline int __must_check -i915_gem_active_retire(const struct i915_gem_active *active) +i915_gem_active_retire(const struct i915_gem_active *active, + struct mutex *mutex) { - return i915_gem_active_wait(active); + return i915_gem_active_wait(active, mutex); } /* Convenience functions for peeking at state inside active's request whilst @@ -403,15 +411,17 @@ i915_gem_active_retire(const struct i915_gem_active *active) */ static inline uint32_t -i915_gem_active_get_seqno(const struct i915_gem_active *active) +i915_gem_active_get_seqno(const struct i915_gem_active *active, + struct mutex *mutex) { - return i915_gem_request_get_seqno(i915_gem_active_peek(active)); + return i915_gem_request_get_seqno(i915_gem_active_peek(active, mutex)); } static inline struct intel_engine_cs * -i915_gem_active_get_engine(const struct i915_gem_active *active) +i915_gem_active_get_engine(const struct i915_gem_active *active, + struct mutex *mutex) { - return i915_gem_request_get_engine(i915_gem_active_peek(active)); + return i915_gem_request_get_engine(i915_gem_active_peek(active, mutex)); } #define for_each_active(mask, idx) \ diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index 8cef2d6..fa2eb4a 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -242,7 +242,8 @@ i915_gem_set_tiling(struct drm_device *dev, void *data, } obj->fence_dirty = - !i915_gem_active_is_idle(&obj->last_fence) || + !i915_gem_active_is_idle(&obj->last_fence, + &dev->struct_mutex) || obj->fence_reg != I915_FENCE_REG_NONE; obj->tiling_mode = args->tiling_mode; diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index 00ab5e9..e57521d 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -74,7 +74,8 @@ static void wait_rendering(struct drm_i915_gem_object *obj) for (i = 0; i < I915_NUM_ENGINES; i++) { struct drm_i915_gem_request *req; - req = i915_gem_active_get(&obj->last_read[i]); + req = i915_gem_active_get(&obj->last_read[i], + &obj->base.dev->struct_mutex); if (req) requests[n++] = req; } diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 585fe2b..0d2882a 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -742,18 +742,38 @@ unwind: #define i915_error_ggtt_object_create(dev_priv, src) \ i915_error_object_create((dev_priv), (src), &(dev_priv)->ggtt.base) +/* The error capture is special as tries to run underneath the normal + * locking rules - so we use the raw version of the i915_gem_active lookup. + */ +static inline uint32_t +__active_get_seqno(struct i915_gem_active *active) +{ + return i915_gem_request_get_seqno(__i915_gem_active_peek(active)); +} + +static inline int +__active_get_engine_id(struct i915_gem_active *active) +{ + struct intel_engine_cs *engine; + + engine = i915_gem_request_get_engine(__i915_gem_active_peek(active)); + return engine ? engine->id : -1; +} + static void capture_bo(struct drm_i915_error_buffer *err, struct i915_vma *vma) { struct drm_i915_gem_object *obj = vma->obj; - struct intel_engine_cs *engine; int i; err->size = obj->base.size; err->name = obj->base.name; + for (i = 0; i < I915_NUM_ENGINES; i++) - err->rseqno[i] = i915_gem_active_get_seqno(&obj->last_read[i]); - err->wseqno = i915_gem_active_get_seqno(&obj->last_write); + err->rseqno[i] = __active_get_seqno(&obj->last_read[i]); + err->wseqno = __active_get_seqno(&obj->last_write); + err->engine = __active_get_engine_id(&obj->last_write); + err->gtt_offset = vma->node.start; err->read_domains = obj->base.read_domains; err->write_domain = obj->base.write_domain; @@ -766,9 +786,6 @@ static void capture_bo(struct drm_i915_error_buffer *err, err->purgeable = obj->madv != I915_MADV_WILLNEED; err->userptr = obj->userptr.mm != NULL; err->cache_level = obj->cache_level; - - engine = i915_gem_active_get_engine(&obj->last_write); - err->engine = engine ? engine->id : -1; } static u32 capture_active_bo(struct drm_i915_error_buffer *err, diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 8c03d13..d54a3ea 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -11370,7 +11370,8 @@ static bool use_mmio_flip(struct intel_engine_cs *engine, if (resv && !reservation_object_test_signaled_rcu(resv, false)) return true; - return engine != i915_gem_active_get_engine(&obj->last_write); + return engine != i915_gem_active_get_engine(&obj->last_write, + &obj->base.dev->struct_mutex); } static void skl_do_mmio_flip(struct intel_crtc *intel_crtc, @@ -11673,7 +11674,8 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, } else if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev)) { engine = &dev_priv->engine[BCS]; } else if (INTEL_INFO(dev)->gen >= 7) { - engine = i915_gem_active_get_engine(&obj->last_write); + engine = i915_gem_active_get_engine(&obj->last_write, + &obj->base.dev->struct_mutex); if (engine == NULL || engine->id != RCS) engine = &dev_priv->engine[BCS]; } else { @@ -11694,7 +11696,8 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, if (mmio_flip) { INIT_WORK(&work->mmio_work, intel_mmio_flip_work_func); - work->flip_queued_req = i915_gem_active_get(&obj->last_write); + work->flip_queued_req = i915_gem_active_get(&obj->last_write, + &obj->base.dev->struct_mutex); schedule_work(&work->mmio_work); } else { request = i915_gem_request_alloc(engine, engine->last_context); @@ -14038,7 +14041,8 @@ intel_prepare_plane_fb(struct drm_plane *plane, if (ret == 0) { to_intel_plane_state(new_state)->wait_req = - i915_gem_active_get(&obj->last_write); + i915_gem_active_get(&obj->last_write, + &obj->base.dev->struct_mutex); } return ret; -- cgit v0.10.2 From 8cac6f6c415bcd559db2f5c05c1bd76d7e378f67 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 Aug 2016 07:52:32 +0100 Subject: drm/i915: Refactor blocking waits Tidy up the for loops that handle waiting for read/write vs read-only access. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470293567-10811-13-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 2b6199c..2c5a818 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1339,6 +1339,23 @@ put_rpm: return ret; } +static void +i915_gem_object_retire_request(struct drm_i915_gem_object *obj, + struct drm_i915_gem_request *req) +{ + int idx = req->engine->id; + + if (i915_gem_active_peek(&obj->last_read[idx], + &obj->base.dev->struct_mutex) == req) + i915_gem_object_retire__read(obj, idx); + else if (i915_gem_active_peek(&obj->last_write, + &obj->base.dev->struct_mutex) == req) + i915_gem_object_retire__write(obj); + + if (!i915_reset_in_progress(&req->i915->gpu_error)) + i915_gem_request_retire_upto(req); +} + /** * Ensures that all rendering to the object has completed and the object is * safe to unbind from the GTT or access from the CPU. @@ -1349,39 +1366,34 @@ int i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, bool readonly) { - struct drm_i915_gem_request *request; struct reservation_object *resv; - int ret, i; + struct i915_gem_active *active; + unsigned long active_mask; + int idx, ret; - if (readonly) { - request = i915_gem_active_peek(&obj->last_write, - &obj->base.dev->struct_mutex); - if (request) { - ret = i915_wait_request(request); - if (ret) - return ret; + lockdep_assert_held(&obj->base.dev->struct_mutex); - i = request->engine->id; - if (i915_gem_active_peek(&obj->last_read[i], - &obj->base.dev->struct_mutex) == request) - i915_gem_object_retire__read(obj, i); - else - i915_gem_object_retire__write(obj); - } + if (!readonly) { + active = obj->last_read; + active_mask = obj->active; } else { - for (i = 0; i < I915_NUM_ENGINES; i++) { - request = i915_gem_active_peek(&obj->last_read[i], - &obj->base.dev->struct_mutex); - if (!request) - continue; + active_mask = 1; + active = &obj->last_write; + } - ret = i915_wait_request(request); - if (ret) - return ret; + for_each_active(active_mask, idx) { + struct drm_i915_gem_request *request; - i915_gem_object_retire__read(obj, i); - } - GEM_BUG_ON(obj->active); + request = i915_gem_active_peek(&active[idx], + &obj->base.dev->struct_mutex); + if (!request) + continue; + + ret = i915_wait_request(request); + if (ret) + return ret; + + i915_gem_object_retire_request(obj, request); } resv = i915_gem_object_get_dmabuf_resv(obj); @@ -1397,23 +1409,6 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, return 0; } -static void -i915_gem_object_retire_request(struct drm_i915_gem_object *obj, - struct drm_i915_gem_request *req) -{ - int idx = req->engine->id; - - if (i915_gem_active_peek(&obj->last_read[idx], - &obj->base.dev->struct_mutex) == req) - i915_gem_object_retire__read(obj, idx); - else if (i915_gem_active_peek(&obj->last_write, - &obj->base.dev->struct_mutex) == req) - i915_gem_object_retire__write(obj); - - if (!i915_reset_in_progress(&req->i915->gpu_error)) - i915_gem_request_retire_upto(req); -} - /* A nonblocking variant of the above wait. This is a highly dangerous routine * as the object state may change during this call. */ @@ -1425,34 +1420,31 @@ i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, struct drm_device *dev = obj->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_gem_request *requests[I915_NUM_ENGINES]; + struct i915_gem_active *active; + unsigned long active_mask; int ret, i, n = 0; BUG_ON(!mutex_is_locked(&dev->struct_mutex)); BUG_ON(!dev_priv->mm.interruptible); - if (!obj->active) + active_mask = obj->active; + if (!active_mask) return 0; - if (readonly) { - struct drm_i915_gem_request *req; - - req = i915_gem_active_get(&obj->last_write, - &obj->base.dev->struct_mutex); - if (req == NULL) - return 0; - - requests[n++] = req; + if (!readonly) { + active = obj->last_read; } else { - for (i = 0; i < I915_NUM_ENGINES; i++) { - struct drm_i915_gem_request *req; + active_mask = 1; + active = &obj->last_write; + } - req = i915_gem_active_get(&obj->last_read[i], - &obj->base.dev->struct_mutex); - if (req == NULL) - continue; + for_each_active(active_mask, i) { + struct drm_i915_gem_request *req; + req = i915_gem_active_get(&active[i], + &obj->base.dev->struct_mutex); + if (req) requests[n++] = req; - } } mutex_unlock(&dev->struct_mutex); @@ -2934,33 +2926,33 @@ int i915_gem_object_sync(struct drm_i915_gem_object *obj, struct drm_i915_gem_request *to) { - const bool readonly = obj->base.pending_write_domain == 0; - struct drm_i915_gem_request *requests[I915_NUM_ENGINES]; - int ret, i, n; + struct i915_gem_active *active; + unsigned long active_mask; + int idx; - if (!obj->active) - return 0; + lockdep_assert_held(&obj->base.dev->struct_mutex); - n = 0; - if (readonly) { - struct drm_i915_gem_request *req; + active_mask = obj->active; + if (!active_mask) + return 0; - req = i915_gem_active_peek(&obj->last_write, - &obj->base.dev->struct_mutex); - if (req) - requests[n++] = req; + if (obj->base.pending_write_domain) { + active = obj->last_read; } else { - for (i = 0; i < I915_NUM_ENGINES; i++) { - struct drm_i915_gem_request *req; - - req = i915_gem_active_peek(&obj->last_read[i], - &obj->base.dev->struct_mutex); - if (req) - requests[n++] = req; - } + active_mask = 1; + active = &obj->last_write; } - for (i = 0; i < n; i++) { - ret = __i915_gem_object_sync(obj, to, requests[i]); + + for_each_active(active_mask, idx) { + struct drm_i915_gem_request *request; + int ret; + + request = i915_gem_active_peek(&active[idx], + &obj->base.dev->struct_mutex); + if (!request) + continue; + + ret = __i915_gem_object_sync(obj, to, request); if (ret) return ret; } -- cgit v0.10.2 From efdf7c0605ba0eeab1a002c43a84f959099aa8f0 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 Aug 2016 07:52:33 +0100 Subject: drm/i915: Rename request->list to link for consistency We use "list" to denote the list and "link" to denote an element on that list. Rename request->list to match this idiom. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470293567-10811-14-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index c595cc8..fe3c823 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -746,13 +746,13 @@ static int i915_gem_request_info(struct seq_file *m, void *data) int count; count = 0; - list_for_each_entry(req, &engine->request_list, list) + list_for_each_entry(req, &engine->request_list, link) count++; if (count == 0) continue; seq_printf(m, "%s requests: %d\n", engine->name, count); - list_for_each_entry(req, &engine->request_list, list) { + list_for_each_entry(req, &engine->request_list, link) { struct task_struct *task; rcu_read_lock(); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 2c5a818..49e79f0 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2475,7 +2475,7 @@ i915_gem_find_active_request(struct intel_engine_cs *engine) * extra delay for a recent interrupt is pointless. Hence, we do * not need an engine->irq_seqno_barrier() before the seqno reads. */ - list_for_each_entry(request, &engine->request_list, list) { + list_for_each_entry(request, &engine->request_list, link) { if (i915_gem_request_completed(request)) continue; @@ -2497,7 +2497,7 @@ static void i915_gem_reset_engine_status(struct intel_engine_cs *engine) ring_hung = engine->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG; i915_set_reset_status(request->ctx, ring_hung); - list_for_each_entry_continue(request, &engine->request_list, list) + list_for_each_entry_continue(request, &engine->request_list, link) i915_set_reset_status(request->ctx, false); } @@ -2546,7 +2546,7 @@ static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine) request = list_last_entry(&engine->request_list, struct drm_i915_gem_request, - list); + link); i915_gem_request_retire_upto(request); } @@ -2609,7 +2609,7 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *engine) request = list_first_entry(&engine->request_list, struct drm_i915_gem_request, - list); + link); if (!i915_gem_request_completed(request)) break; @@ -2629,7 +2629,7 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *engine) engine_list[engine->id]); if (!list_empty(&i915_gem_active_peek(&obj->last_read[engine->id], - &obj->base.dev->struct_mutex)->list)) + &obj->base.dev->struct_mutex)->link)) break; i915_gem_object_retire__read(obj, engine->id); diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 11c19e7..7802156 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -160,7 +160,7 @@ i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) static void i915_gem_request_retire(struct drm_i915_gem_request *request) { trace_i915_gem_request_retire(request); - list_del_init(&request->list); + list_del_init(&request->link); /* We know the GPU must have read the request to have * sent us the seqno + interrupt, so use the position @@ -191,12 +191,12 @@ void i915_gem_request_retire_upto(struct drm_i915_gem_request *req) lockdep_assert_held(&req->i915->drm.struct_mutex); - if (list_empty(&req->list)) + if (list_empty(&req->link)) return; do { tmp = list_first_entry(&engine->request_list, - typeof(*tmp), list); + typeof(*tmp), link); i915_gem_request_retire(tmp); } while (tmp != req); @@ -317,7 +317,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, /* Move the oldest request to the slab-cache (if not in use!) */ req = list_first_entry_or_null(&engine->request_list, - typeof(*req), list); + typeof(*req), link); if (req && i915_gem_request_completed(req)) i915_gem_request_retire(req); @@ -450,7 +450,7 @@ void __i915_add_request(struct drm_i915_gem_request *request, request->emitted_jiffies = jiffies; request->previous_seqno = engine->last_submitted_seqno; smp_store_mb(engine->last_submitted_seqno, request->fence.seqno); - list_add_tail(&request->list, &engine->request_list); + list_add_tail(&request->link, &engine->request_list); /* Record the position of the start of the request so that * should we detect the updated seqno part-way through the @@ -570,7 +570,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req, might_sleep(); - if (list_empty(&req->list)) + if (list_empty(&req->link)) return 0; if (i915_gem_request_completed(req)) diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index 5f8d94c..3e40661 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -105,8 +105,8 @@ struct drm_i915_gem_request { /** Time at which this request was emitted, in jiffies. */ unsigned long emitted_jiffies; - /** global list entry for this request */ - struct list_head list; + /** engine->request_list entry for this request */ + struct list_head link; struct drm_i915_file_private *file_priv; /** file_priv list entry for this request */ diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 0d2882a..c19f72e 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1168,7 +1168,7 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv, i915_gem_record_active_context(engine, error, ee); count = 0; - list_for_each_entry(request, &engine->request_list, list) + list_for_each_entry(request, &engine->request_list, link) count++; ee->num_requests = count; @@ -1180,7 +1180,7 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv, } count = 0; - list_for_each_entry(request, &engine->request_list, list) { + list_for_each_entry(request, &engine->request_list, link) { struct drm_i915_error_request *erq; if (count >= ee->num_requests) { diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index ecf4278..76ddc7c 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2235,7 +2235,7 @@ int intel_engine_idle(struct intel_engine_cs *engine) req = list_entry(engine->request_list.prev, struct drm_i915_gem_request, - list); + link); /* Make sure we do not trigger any retires */ return __i915_wait_request(req, @@ -2284,7 +2284,7 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes) */ GEM_BUG_ON(!req->reserved_space); - list_for_each_entry(target, &engine->request_list, list) { + list_for_each_entry(target, &engine->request_list, link) { unsigned space; /* @@ -2302,7 +2302,7 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes) break; } - if (WARN_ON(&target->list == &engine->request_list)) + if (WARN_ON(&target->link == &engine->request_list)) return -ENOSPC; return i915_wait_request(target); -- cgit v0.10.2 From 21c310f2f968fce8d06d8b001bd3df287189c812 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 Aug 2016 07:52:34 +0100 Subject: drm/i915: Remove obsolete i915_gem_object_flush_active() Since we track requests, and requests are always added to the GPU fully formed, we never have to flush the incomplete request and know that the given request will eventually complete without any further action on our part. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470293567-10811-15-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 49e79f0..54732ff6 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2746,35 +2746,6 @@ out_rearm: } /** - * Ensures that an object will eventually get non-busy by flushing any required - * write domains, emitting any outstanding lazy request and retiring and - * completed requests. - * @obj: object to flush - */ -static int -i915_gem_object_flush_active(struct drm_i915_gem_object *obj) -{ - int i; - - if (!obj->active) - return 0; - - for (i = 0; i < I915_NUM_ENGINES; i++) { - struct drm_i915_gem_request *req; - - req = i915_gem_active_peek(&obj->last_read[i], - &obj->base.dev->struct_mutex); - if (req == NULL) - continue; - - if (i915_gem_request_completed(req)) - i915_gem_object_retire__read(obj, i); - } - - return 0; -} - -/** * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT * @dev: drm device pointer * @data: ioctl data blob @@ -2820,24 +2791,9 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) return -ENOENT; } - /* Need to make sure the object gets inactive eventually. */ - ret = i915_gem_object_flush_active(obj); - if (ret) - goto out; - if (!obj->active) goto out; - /* Do this after OLR check to make sure we make forward progress polling - * on this IOCTL with a timeout == 0 (like busy ioctl) - */ - if (args->timeout_ns == 0) { - ret = -ETIME; - goto out; - } - - i915_gem_object_put(obj); - for (i = 0; i < I915_NUM_ENGINES; i++) { struct drm_i915_gem_request *req; @@ -2847,6 +2803,8 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) requests[n++] = req; } +out: + i915_gem_object_put(obj); mutex_unlock(&dev->struct_mutex); for (i = 0; i < n; i++) { @@ -2857,11 +2815,6 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) i915_gem_request_put(requests[i]); } return ret; - -out: - i915_gem_object_put(obj); - mutex_unlock(&dev->struct_mutex); - return ret; } static int @@ -4032,13 +3985,8 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, /* Count all active objects as busy, even if they are currently not used * by the gpu. Users of this interface expect objects to eventually - * become non-busy without any further actions, therefore emit any - * necessary flushes here. + * become non-busy without any further actions. */ - ret = i915_gem_object_flush_active(obj); - if (ret) - goto unref; - args->busy = 0; if (obj->active) { struct drm_i915_gem_request *req; @@ -4056,7 +4004,6 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, args->busy |= req->engine->exec_id; } -unref: i915_gem_object_put(obj); unlock: mutex_unlock(&dev->struct_mutex); -- cgit v0.10.2 From fa545cbf9765914da302beb74d68cfd8f21b3843 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 Aug 2016 07:52:35 +0100 Subject: drm/i915: Refactor activity tracking for requests With the introduction of requests, we amplified the number of atomic refcounted objects we use and update every execbuffer; from none to several references, and a set of references that need to be changed. We also introduced interesting side-effects in the order of retiring requests and objects. Instead of independently tracking the last request for an object, track the active objects for each request. The object will reside in the buffer list of its most recent active request and so we reduce the kref interchange to a list_move. Now retirements are entirely driven by the request, dramatically simplifying activity tracking on the object themselves, and removing the ambiguity between retiring objects and retiring requests. Furthermore with the consolidation of managing the activity tracking centrally, we can look forward to using RCU to enable lockless lookup of the current active requests for an object. In the future, we will be able to query the status or wait upon rendering to an object without even touching the struct_mutex BKL. All told, less code, simpler and faster, and more extensible. v2: Add a typedef for the function pointer for convenience later. v3: Make the noop retirement callback explicit. Allow passing NULL to the init_request_active() which is expanded to a common noop function. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470293567-10811-16-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 6092f0e..dda724f 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -25,7 +25,6 @@ i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o i915-y += i915_cmd_parser.o \ i915_gem_batch_pool.o \ i915_gem_context.o \ - i915_gem_debug.o \ i915_gem_dmabuf.o \ i915_gem_evict.o \ i915_gem_execbuffer.o \ diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 17a206f..6b57d8f 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -432,8 +432,6 @@ void intel_link_compute_m_n(int bpp, int nlanes, #define DRIVER_MINOR 6 #define DRIVER_PATCHLEVEL 0 -#define WATCH_LISTS 0 - struct opregion_header; struct opregion_acpi; struct opregion_swsci; @@ -2153,7 +2151,6 @@ struct drm_i915_gem_object { struct drm_mm_node *stolen; struct list_head global_list; - struct list_head engine_list[I915_NUM_ENGINES]; /** Used in execbuf to temporarily hold a ref */ struct list_head obj_exec_link; @@ -3463,13 +3460,6 @@ static inline bool i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_objec obj->tiling_mode != I915_TILING_NONE; } -/* i915_gem_debug.c */ -#if WATCH_LISTS -int i915_verify_lists(struct drm_device *dev); -#else -#define i915_verify_lists(dev) 0 -#endif - /* i915_debugfs.c */ #ifdef CONFIG_DEBUG_FS int i915_debugfs_register(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 54732ff6..a2eaa44 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -43,10 +43,6 @@ static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); -static void -i915_gem_object_retire__write(struct drm_i915_gem_object *obj); -static void -i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int engine); static bool cpu_cache_is_coherent(struct drm_device *dev, enum i915_cache_level level) @@ -141,7 +137,6 @@ int i915_mutex_lock_interruptible(struct drm_device *dev) if (ret) return ret; - WARN_ON(i915_verify_lists(dev)); return 0; } @@ -1339,23 +1334,6 @@ put_rpm: return ret; } -static void -i915_gem_object_retire_request(struct drm_i915_gem_object *obj, - struct drm_i915_gem_request *req) -{ - int idx = req->engine->id; - - if (i915_gem_active_peek(&obj->last_read[idx], - &obj->base.dev->struct_mutex) == req) - i915_gem_object_retire__read(obj, idx); - else if (i915_gem_active_peek(&obj->last_write, - &obj->base.dev->struct_mutex) == req) - i915_gem_object_retire__write(obj); - - if (!i915_reset_in_progress(&req->i915->gpu_error)) - i915_gem_request_retire_upto(req); -} - /** * Ensures that all rendering to the object has completed and the object is * safe to unbind from the GTT or access from the CPU. @@ -1382,18 +1360,10 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, } for_each_active(active_mask, idx) { - struct drm_i915_gem_request *request; - - request = i915_gem_active_peek(&active[idx], - &obj->base.dev->struct_mutex); - if (!request) - continue; - - ret = i915_wait_request(request); + ret = i915_gem_active_wait(&active[idx], + &obj->base.dev->struct_mutex); if (ret) return ret; - - i915_gem_object_retire_request(obj, request); } resv = i915_gem_object_get_dmabuf_resv(obj); @@ -1453,11 +1423,8 @@ i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, ret = __i915_wait_request(requests[i], true, NULL, rps); mutex_lock(&dev->struct_mutex); - for (i = 0; i < n; i++) { - if (ret == 0) - i915_gem_object_retire_request(obj, requests[i]); + for (i = 0; i < n; i++) i915_gem_request_put(requests[i]); - } return ret; } @@ -2376,40 +2343,31 @@ void i915_vma_move_to_active(struct i915_vma *vma, i915_gem_object_get(obj); obj->active |= intel_engine_flag(engine); - list_move_tail(&obj->engine_list[engine->id], &engine->active_list); i915_gem_active_set(&obj->last_read[engine->id], req); list_move_tail(&vma->vm_link, &vma->vm->active_list); } static void -i915_gem_object_retire__write(struct drm_i915_gem_object *obj) +i915_gem_object_retire__write(struct i915_gem_active *active, + struct drm_i915_gem_request *request) { - GEM_BUG_ON(!i915_gem_active_isset(&obj->last_write)); - GEM_BUG_ON(!(obj->active & - intel_engine_flag(i915_gem_active_get_engine(&obj->last_write, - &obj->base.dev->struct_mutex)))); + struct drm_i915_gem_object *obj = + container_of(active, struct drm_i915_gem_object, last_write); - i915_gem_active_set(&obj->last_write, NULL); intel_fb_obj_flush(obj, true, ORIGIN_CS); } static void -i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int idx) +i915_gem_object_retire__read(struct i915_gem_active *active, + struct drm_i915_gem_request *request) { - struct intel_engine_cs *engine; + int idx = request->engine->id; + struct drm_i915_gem_object *obj = + container_of(active, struct drm_i915_gem_object, last_read[idx]); struct i915_vma *vma; - GEM_BUG_ON(!i915_gem_active_isset(&obj->last_read[idx])); - GEM_BUG_ON(!(obj->active & (1 << idx))); - - list_del_init(&obj->engine_list[idx]); - i915_gem_active_set(&obj->last_read[idx], NULL); - - engine = i915_gem_active_get_engine(&obj->last_write, - &obj->base.dev->struct_mutex); - if (engine && engine->id == idx) - i915_gem_object_retire__write(obj); + GEM_BUG_ON((obj->active & (1 << idx)) == 0); obj->active &= ~(1 << idx); if (obj->active) @@ -2419,15 +2377,13 @@ i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int idx) * so that we don't steal from recently used but inactive objects * (unless we are forced to ofc!) */ - list_move_tail(&obj->global_list, - &to_i915(obj->base.dev)->mm.bound_list); + list_move_tail(&obj->global_list, &request->i915->mm.bound_list); list_for_each_entry(vma, &obj->vma_list, obj_link) { if (!list_empty(&vma->vm_link)) list_move_tail(&vma->vm_link, &vma->vm->inactive_list); } - i915_gem_active_set(&obj->last_fence, NULL); i915_gem_object_put(obj); } @@ -2505,16 +2461,6 @@ static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine) { struct intel_ring *ring; - while (!list_empty(&engine->active_list)) { - struct drm_i915_gem_object *obj; - - obj = list_first_entry(&engine->active_list, - struct drm_i915_gem_object, - engine_list[engine->id]); - - i915_gem_object_retire__read(obj, engine->id); - } - /* Mark all pending requests as complete so that any concurrent * (lockless) lookup doesn't try and wait upon the request as we * reset it. @@ -2586,8 +2532,6 @@ void i915_gem_reset(struct drm_device *dev) i915_gem_context_reset(dev); i915_gem_restore_fences(dev); - - WARN_ON(i915_verify_lists(dev)); } /** @@ -2597,13 +2541,6 @@ void i915_gem_reset(struct drm_device *dev) void i915_gem_retire_requests_ring(struct intel_engine_cs *engine) { - WARN_ON(i915_verify_lists(engine->dev)); - - /* Retire requests first as we use it above for the early return. - * If we retire requests last, we may use a later seqno and so clear - * the requests lists without clearing the active list, leading to - * confusion. - */ while (!list_empty(&engine->request_list)) { struct drm_i915_gem_request *request; @@ -2616,26 +2553,6 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *engine) i915_gem_request_retire_upto(request); } - - /* Move any buffers on the active list that are no longer referenced - * by the ringbuffer to the flushing/inactive lists as appropriate, - * before we free the context associated with the requests. - */ - while (!list_empty(&engine->active_list)) { - struct drm_i915_gem_object *obj; - - obj = list_first_entry(&engine->active_list, - struct drm_i915_gem_object, - engine_list[engine->id]); - - if (!list_empty(&i915_gem_active_peek(&obj->last_read[engine->id], - &obj->base.dev->struct_mutex)->link)) - break; - - i915_gem_object_retire__read(obj, engine->id); - } - - WARN_ON(i915_verify_lists(engine->dev)); } void i915_gem_retire_requests(struct drm_i915_private *dev_priv) @@ -2818,8 +2735,7 @@ out: } static int -__i915_gem_object_sync(struct drm_i915_gem_object *obj, - struct drm_i915_gem_request *to, +__i915_gem_object_sync(struct drm_i915_gem_request *to, struct drm_i915_gem_request *from) { int ret; @@ -2827,9 +2743,6 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj, if (to->engine == from->engine) return 0; - if (i915_gem_request_completed(from)) - return 0; - if (!i915.semaphores) { ret = __i915_wait_request(from, from->i915->mm.interruptible, @@ -2837,8 +2750,6 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj, NO_WAITBOOST); if (ret) return ret; - - i915_gem_object_retire_request(obj, from); } else { int idx = intel_engine_sync_index(from->engine, to->engine); if (from->fence.seqno <= from->engine->semaphore.sync_seqno[idx]) @@ -2905,7 +2816,7 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj, if (!request) continue; - ret = __i915_gem_object_sync(obj, to, request); + ret = __i915_gem_object_sync(to, request); if (ret) return ret; } @@ -3041,7 +2952,6 @@ int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv) return ret; } - WARN_ON(i915_verify_lists(dev)); return 0; } @@ -4081,7 +3991,11 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, INIT_LIST_HEAD(&obj->global_list); for (i = 0; i < I915_NUM_ENGINES; i++) - INIT_LIST_HEAD(&obj->engine_list[i]); + init_request_active(&obj->last_read[i], + i915_gem_object_retire__read); + init_request_active(&obj->last_write, + i915_gem_object_retire__write); + init_request_active(&obj->last_fence, NULL); INIT_LIST_HEAD(&obj->obj_exec_link); INIT_LIST_HEAD(&obj->vma_list); INIT_LIST_HEAD(&obj->batch_pool_link); @@ -4574,7 +4488,6 @@ i915_gem_cleanup_engines(struct drm_device *dev) static void init_engine_lists(struct intel_engine_cs *engine) { - INIT_LIST_HEAD(&engine->active_list); INIT_LIST_HEAD(&engine->request_list); } diff --git a/drivers/gpu/drm/i915/i915_gem_debug.c b/drivers/gpu/drm/i915/i915_gem_debug.c deleted file mode 100644 index a565164..0000000 --- a/drivers/gpu/drm/i915/i915_gem_debug.c +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright © 2008 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Keith Packard - * - */ - -#include -#include -#include "i915_drv.h" - -#if WATCH_LISTS -int -i915_verify_lists(struct drm_device *dev) -{ - static int warned; - struct drm_i915_private *dev_priv = to_i915(dev); - struct drm_i915_gem_object *obj; - struct intel_engine_cs *engine; - int err = 0; - - if (warned) - return 0; - - for_each_engine(engine, dev_priv) { - list_for_each_entry(obj, &engine->active_list, - engine_list[engine->id]) { - if (obj->base.dev != dev || - !atomic_read(&obj->base.refcount.refcount)) { - DRM_ERROR("%s: freed active obj %p\n", - engine->name, obj); - err++; - break; - } else if (!obj->active || - obj->last_read_req[engine->id] == NULL) { - DRM_ERROR("%s: invalid active obj %p\n", - engine->name, obj); - err++; - } else if (obj->base.write_domain) { - DRM_ERROR("%s: invalid write obj %p (w %x)\n", - engine->name, - obj, obj->base.write_domain); - err++; - } - } - } - - return warned = err; -} -#endif /* WATCH_LIST */ diff --git a/drivers/gpu/drm/i915/i915_gem_fence.c b/drivers/gpu/drm/i915/i915_gem_fence.c index a4ec4fe..dbaab9c 100644 --- a/drivers/gpu/drm/i915/i915_gem_fence.c +++ b/drivers/gpu/drm/i915/i915_gem_fence.c @@ -261,15 +261,8 @@ static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj) static int i915_gem_object_wait_fence(struct drm_i915_gem_object *obj) { - int ret; - - ret = i915_gem_active_wait(&obj->last_fence, - &obj->base.dev->struct_mutex); - if (ret) - return ret; - - i915_gem_active_set(&obj->last_fence, NULL); - return 0; + return i915_gem_active_retire(&obj->last_fence, + &obj->base.dev->struct_mutex); } /** diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 7802156..cdaaeb6 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -22,6 +22,8 @@ * */ +#include + #include "i915_drv.h" static const char *i915_fence_get_driver_name(struct fence *fence) @@ -157,8 +159,16 @@ i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) request->pid = NULL; } +void i915_gem_retire_noop(struct i915_gem_active *active, + struct drm_i915_gem_request *request) +{ + /* Space left intentionally blank */ +} + static void i915_gem_request_retire(struct drm_i915_gem_request *request) { + struct i915_gem_active *active, *next; + trace_i915_gem_request_retire(request); list_del_init(&request->link); @@ -172,6 +182,33 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) */ request->ring->last_retired_head = request->postfix; + /* Walk through the active list, calling retire on each. This allows + * objects to track their GPU activity and mark themselves as idle + * when their *last* active request is completed (updating state + * tracking lists for eviction, active references for GEM, etc). + * + * As the ->retire() may free the node, we decouple it first and + * pass along the auxiliary information (to avoid dereferencing + * the node after the callback). + */ + list_for_each_entry_safe(active, next, &request->active_list, link) { + /* In microbenchmarks or focusing upon time inside the kernel, + * we may spend an inordinate amount of time simply handling + * the retirement of requests and processing their callbacks. + * Of which, this loop itself is particularly hot due to the + * cache misses when jumping around the list of i915_gem_active. + * So we try to keep this loop as streamlined as possible and + * also prefetch the next i915_gem_active to try and hide + * the likely cache miss. + */ + prefetchw(next); + + INIT_LIST_HEAD(&active->link); + active->request = NULL; + + active->retire(active, request); + } + i915_gem_request_remove_from_client(request); if (request->previous_context) { @@ -200,8 +237,6 @@ void i915_gem_request_retire_upto(struct drm_i915_gem_request *req) i915_gem_request_retire(tmp); } while (tmp != req); - - WARN_ON(i915_verify_lists(engine->dev)); } static int i915_gem_check_wedge(unsigned int reset_counter, bool interruptible) @@ -336,6 +371,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, engine->fence_context, seqno); + INIT_LIST_HEAD(&req->active_list); req->i915 = dev_priv; req->engine = engine; req->ctx = i915_gem_context_get(ctx); @@ -570,9 +606,6 @@ int __i915_wait_request(struct drm_i915_gem_request *req, might_sleep(); - if (list_empty(&req->link)) - return 0; - if (i915_gem_request_completed(req)) return 0; @@ -705,10 +738,13 @@ int i915_wait_request(struct drm_i915_gem_request *req) { int ret; - GEM_BUG_ON(!req); lockdep_assert_held(&req->i915->drm.struct_mutex); + GEM_BUG_ON(list_empty(&req->link)); - ret = __i915_wait_request(req, req->i915->mm.interruptible, NULL, NULL); + ret = __i915_wait_request(req, + req->i915->mm.interruptible, + NULL, + NULL); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index 3e40661..6cfae20 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -101,6 +101,7 @@ struct drm_i915_gem_request { * error state dump only). */ struct drm_i915_gem_object *batch_obj; + struct list_head active_list; /** Time at which this request was emitted, in jiffies. */ unsigned long emitted_jiffies; @@ -213,8 +214,12 @@ struct intel_rps_client; int __i915_wait_request(struct drm_i915_gem_request *req, bool interruptible, s64 *timeout, - struct intel_rps_client *rps); -int __must_check i915_wait_request(struct drm_i915_gem_request *req); + struct intel_rps_client *rps) + __attribute__((nonnull(1))); + +int __must_check +i915_wait_request(struct drm_i915_gem_request *req) + __attribute__((nonnull)); static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine); @@ -276,10 +281,39 @@ static inline bool i915_spin_request(const struct drm_i915_gem_request *request, * can then perform any action, such as delayed freeing of an active * resource including itself. */ +struct i915_gem_active; + +typedef void (*i915_gem_retire_fn)(struct i915_gem_active *, + struct drm_i915_gem_request *); + struct i915_gem_active { struct drm_i915_gem_request *request; + struct list_head link; + i915_gem_retire_fn retire; }; +void i915_gem_retire_noop(struct i915_gem_active *, + struct drm_i915_gem_request *request); + +/** + * init_request_active - prepares the activity tracker for use + * @active - the active tracker + * @func - a callback when then the tracker is retired (becomes idle), + * can be NULL + * + * init_request_active() prepares the embedded @active struct for use as + * an activity tracker, that is for tracking the last known active request + * associated with it. When the last request becomes idle, when it is retired + * after completion, the optional callback @func is invoked. + */ +static inline void +init_request_active(struct i915_gem_active *active, + i915_gem_retire_fn retire) +{ + INIT_LIST_HEAD(&active->link); + active->retire = retire ?: i915_gem_retire_noop; +} + /** * i915_gem_active_set - updates the tracker to watch the current request * @active - the active tracker @@ -293,7 +327,8 @@ static inline void i915_gem_active_set(struct i915_gem_active *active, struct drm_i915_gem_request *request) { - i915_gem_request_assign(&active->request, request); + list_move(&active->link, &request->active_list); + active->request = request; } static inline struct drm_i915_gem_request * @@ -303,17 +338,23 @@ __i915_gem_active_peek(const struct i915_gem_active *active) } /** - * i915_gem_active_peek - report the request being monitored + * i915_gem_active_peek - report the active request being monitored * @active - the active tracker * - * i915_gem_active_peek() returns the current request being tracked, or NULL. - * It does not obtain a reference on the request for the caller, so the - * caller must hold struct_mutex. + * i915_gem_active_peek() returns the current request being tracked if + * still active, or NULL. It does not obtain a reference on the request + * for the caller, so the caller must hold struct_mutex. */ static inline struct drm_i915_gem_request * i915_gem_active_peek(const struct i915_gem_active *active, struct mutex *mutex) { - return active->request; + struct drm_i915_gem_request *request; + + request = active->request; + if (!request || i915_gem_request_completed(request)) + return NULL; + + return request; } /** @@ -326,13 +367,7 @@ i915_gem_active_peek(const struct i915_gem_active *active, struct mutex *mutex) static inline struct drm_i915_gem_request * i915_gem_active_get(const struct i915_gem_active *active, struct mutex *mutex) { - struct drm_i915_gem_request *request; - - request = i915_gem_active_peek(active, mutex); - if (!request || i915_gem_request_completed(request)) - return NULL; - - return i915_gem_request_get(request); + return i915_gem_request_get(i915_gem_active_peek(active, mutex)); } /** @@ -361,13 +396,7 @@ static inline bool i915_gem_active_is_idle(const struct i915_gem_active *active, struct mutex *mutex) { - struct drm_i915_gem_request *request; - - request = i915_gem_active_peek(active, mutex); - if (!request || i915_gem_request_completed(request)) - return true; - - return false; + return !i915_gem_active_peek(active, mutex); } /** @@ -377,6 +406,9 @@ i915_gem_active_is_idle(const struct i915_gem_active *active, * i915_gem_active_wait() waits until the request is completed before * returning. Note that it does not guarantee that the request is * retired first, see i915_gem_active_retire(). + * + * i915_gem_active_wait() returns immediately if the active + * request is already complete. */ static inline int __must_check i915_gem_active_wait(const struct i915_gem_active *active, struct mutex *mutex) @@ -387,7 +419,7 @@ i915_gem_active_wait(const struct i915_gem_active *active, struct mutex *mutex) if (!request) return 0; - return i915_wait_request(request); + return __i915_wait_request(request, true, NULL, NULL); } /** @@ -400,10 +432,25 @@ i915_gem_active_wait(const struct i915_gem_active *active, struct mutex *mutex) * tracker is idle, the function returns immediately. */ static inline int __must_check -i915_gem_active_retire(const struct i915_gem_active *active, +i915_gem_active_retire(struct i915_gem_active *active, struct mutex *mutex) { - return i915_gem_active_wait(active, mutex); + struct drm_i915_gem_request *request; + int ret; + + request = active->request; + if (!request) + return 0; + + ret = __i915_wait_request(request, true, NULL, NULL); + if (ret) + return ret; + + list_del_init(&active->link); + active->request = NULL; + active->retire(active, request); + + return 0; } /* Convenience functions for peeking at state inside active's request whilst diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 4ec914e..202ad83 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -177,7 +177,6 @@ void intel_engine_init_hangcheck(struct intel_engine_cs *engine) */ void intel_engine_setup_common(struct intel_engine_cs *engine) { - INIT_LIST_HEAD(&engine->active_list); INIT_LIST_HEAD(&engine->request_list); INIT_LIST_HEAD(&engine->buffers); INIT_LIST_HEAD(&engine->execlist_queue); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 1706241..236e7a2 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -293,18 +293,6 @@ struct intel_engine_cs { u32 ctx_desc_template; /** - * List of objects currently involved in rendering from the - * ringbuffer. - * - * Includes buffers having the contents of their GPU caches - * flushed, not necessarily primitives. last_read_req - * represents when the rendering involved will be completed. - * - * A reference is held on the buffer while on this list. - */ - struct list_head active_list; - - /** * List of breadcrumbs associated with GPU requests currently * outstanding. */ -- cgit v0.10.2 From 675d9ad71b0b7a6101500fea3e7b2b0c5bb6a8fd Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 Aug 2016 07:52:36 +0100 Subject: drm/i915: Track requests inside each intel_ring By tracking each request occupying space inside an individual intel_ring, we can greatly simplify the logic of tracking available space and not worry about other timelines. (Each ring is an ordered timeline of committed requests.) Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470293567-10811-17-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index cdaaeb6..a91e79f 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -180,6 +180,7 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) * Note this requires that we are always called in request * completion order. */ + list_del(&request->ring_link); request->ring->last_retired_head = request->postfix; /* Walk through the active list, calling retire on each. This allows @@ -487,6 +488,7 @@ void __i915_add_request(struct drm_i915_gem_request *request, request->previous_seqno = engine->last_submitted_seqno; smp_store_mb(engine->last_submitted_seqno, request->fence.seqno); list_add_tail(&request->link, &engine->request_list); + list_add_tail(&request->ring_link, &ring->request_list); /* Record the position of the start of the request so that * should we detect the updated seqno part-way through the diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index 6cfae20..ed16704 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -109,6 +109,9 @@ struct drm_i915_gem_request { /** engine->request_list entry for this request */ struct list_head link; + /** ring->request_list entry for this request */ + struct list_head ring_link; + struct drm_i915_file_private *file_priv; /** file_priv list entry for this request */ struct list_head client_list; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 76ddc7c..5b0eac2 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2046,6 +2046,8 @@ intel_engine_create_ring(struct intel_engine_cs *engine, int size) ring->engine = engine; list_add(&ring->link, &engine->buffers); + INIT_LIST_HEAD(&ring->request_list); + ring->size = size; /* Workaround an erratum on the i830 which causes a hang if * the TAIL pointer points to within the last 2 cachelines @@ -2266,7 +2268,6 @@ int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request) static int wait_for_space(struct drm_i915_gem_request *req, int bytes) { struct intel_ring *ring = req->ring; - struct intel_engine_cs *engine = req->engine; struct drm_i915_gem_request *target; intel_ring_update_space(ring); @@ -2284,17 +2285,9 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes) */ GEM_BUG_ON(!req->reserved_space); - list_for_each_entry(target, &engine->request_list, link) { + list_for_each_entry(target, &ring->request_list, ring_link) { unsigned space; - /* - * The request queue is per-engine, so can contain requests - * from multiple ringbuffers. Here, we must ignore any that - * aren't from the ringbuffer we're considering. - */ - if (target->ring != ring) - continue; - /* Would completion of this request free enough space? */ space = __intel_ring_space(target->postfix, ring->tail, ring->size); @@ -2302,7 +2295,7 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes) break; } - if (WARN_ON(&target->link == &engine->request_list)) + if (WARN_ON(&target->ring_link == &ring->request_list)) return -ENOSPC; return i915_wait_request(target); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 236e7a2..88952bf 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -90,6 +90,8 @@ struct intel_ring { struct intel_engine_cs *engine; struct list_head link; + struct list_head request_list; + u32 head; u32 tail; int space; -- cgit v0.10.2 From 0d9bdd886f295b62c254a99da4153e7d287adb12 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 Aug 2016 07:52:37 +0100 Subject: drm/i915: Convert intel_overlay to request tracking intel_overlay already tracks its last flip request, along with action to take after its completion. Refactor intel_overlay to reuse the common i915_gem_active tracker. v2: Now using i915_gem_retire_fn typedef References: https://bugs.freedesktop.org/show_bug.cgi?id=93730 References: https://bugs.freedesktop.org/show_bug.cgi?id=96851 Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470293567-10811-18-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index 651efe4..2c598d6 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -183,8 +183,7 @@ struct intel_overlay { u32 flip_addr; struct drm_i915_gem_object *reg_bo; /* flip handling */ - struct drm_i915_gem_request *last_flip_req; - void (*flip_tail)(struct intel_overlay *); + struct i915_gem_active last_flip; }; static struct overlay_registers __iomem * @@ -210,23 +209,24 @@ static void intel_overlay_unmap_regs(struct intel_overlay *overlay, io_mapping_unmap(regs); } -static int intel_overlay_do_wait_request(struct intel_overlay *overlay, +static void intel_overlay_submit_request(struct intel_overlay *overlay, struct drm_i915_gem_request *req, - void (*tail)(struct intel_overlay *)) + i915_gem_retire_fn retire) { - int ret; - - WARN_ON(overlay->last_flip_req); - i915_gem_request_assign(&overlay->last_flip_req, req); + GEM_BUG_ON(i915_gem_active_peek(&overlay->last_flip, + &overlay->i915->drm.struct_mutex)); + overlay->last_flip.retire = retire; + i915_gem_active_set(&overlay->last_flip, req); i915_add_request(req); +} - overlay->flip_tail = tail; - ret = i915_wait_request(overlay->last_flip_req); - if (ret) - return ret; - - i915_gem_request_assign(&overlay->last_flip_req, NULL); - return 0; +static int intel_overlay_do_wait_request(struct intel_overlay *overlay, + struct drm_i915_gem_request *req, + i915_gem_retire_fn retire) +{ + intel_overlay_submit_request(overlay, req, retire); + return i915_gem_active_retire(&overlay->last_flip, + &overlay->i915->drm.struct_mutex); } static struct drm_i915_gem_request *alloc_request(struct intel_overlay *overlay) @@ -306,25 +306,32 @@ static int intel_overlay_continue(struct intel_overlay *overlay, intel_ring_emit(ring, flip_addr); intel_ring_advance(ring); - WARN_ON(overlay->last_flip_req); - i915_gem_request_assign(&overlay->last_flip_req, req); - i915_add_request(req); + intel_overlay_submit_request(overlay, req, NULL); return 0; } -static void intel_overlay_release_old_vid_tail(struct intel_overlay *overlay) +static void intel_overlay_release_old_vid_tail(struct i915_gem_active *active, + struct drm_i915_gem_request *req) { + struct intel_overlay *overlay = + container_of(active, typeof(*overlay), last_flip); struct drm_i915_gem_object *obj = overlay->old_vid_bo; + i915_gem_track_fb(obj, NULL, + INTEL_FRONTBUFFER_OVERLAY(overlay->crtc->pipe)); + i915_gem_object_ggtt_unpin(obj); i915_gem_object_put(obj); overlay->old_vid_bo = NULL; } -static void intel_overlay_off_tail(struct intel_overlay *overlay) +static void intel_overlay_off_tail(struct i915_gem_active *active, + struct drm_i915_gem_request *req) { + struct intel_overlay *overlay = + container_of(active, typeof(*overlay), last_flip); struct drm_i915_gem_object *obj = overlay->vid_bo; /* never have the overlay hw on without showing a frame */ @@ -387,27 +394,16 @@ static int intel_overlay_off(struct intel_overlay *overlay) } intel_ring_advance(ring); - return intel_overlay_do_wait_request(overlay, req, intel_overlay_off_tail); + return intel_overlay_do_wait_request(overlay, req, + intel_overlay_off_tail); } /* recover from an interruption due to a signal * We have to be careful not to repeat work forever an make forward progess. */ static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay) { - int ret; - - if (overlay->last_flip_req == NULL) - return 0; - - ret = i915_wait_request(overlay->last_flip_req); - if (ret) - return ret; - - if (overlay->flip_tail) - overlay->flip_tail(overlay); - - i915_gem_request_assign(&overlay->last_flip_req, NULL); - return 0; + return i915_gem_active_retire(&overlay->last_flip, + &overlay->i915->drm.struct_mutex); } /* Wait for pending overlay flip and release old frame. @@ -452,13 +448,9 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay) intel_overlay_release_old_vid_tail); if (ret) return ret; - } + } else + intel_overlay_release_old_vid_tail(&overlay->last_flip, NULL); - intel_overlay_release_old_vid_tail(overlay); - - - i915_gem_track_fb(overlay->old_vid_bo, NULL, - INTEL_FRONTBUFFER_OVERLAY(overlay->crtc->pipe)); return 0; } @@ -471,7 +463,6 @@ void intel_overlay_reset(struct drm_i915_private *dev_priv) intel_overlay_release_old_vid(overlay); - overlay->last_flip_req = NULL; overlay->old_xscale = 0; overlay->old_yscale = 0; overlay->crtc = NULL; @@ -882,12 +873,7 @@ int intel_overlay_switch_off(struct intel_overlay *overlay) iowrite32(0, ®s->OCMD); intel_overlay_unmap_regs(overlay, regs); - ret = intel_overlay_off(overlay); - if (ret != 0) - return ret; - - intel_overlay_off_tail(overlay); - return 0; + return intel_overlay_off(overlay); } static int check_overlay_possible_on_crtc(struct intel_overlay *overlay, -- cgit v0.10.2 From 7da844c5c6fcb7a3575dd20701b481fe86918ab0 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 Aug 2016 07:52:38 +0100 Subject: drm/i915: Move the special case wait-request handling to its one caller Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470293567-10811-19-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index a91e79f..85ec5ca 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -731,28 +731,3 @@ complete: return ret; } - -/** - * Waits for a request to be signaled, and cleans up the - * request and object lists appropriately for that event. - */ -int i915_wait_request(struct drm_i915_gem_request *req) -{ - int ret; - - lockdep_assert_held(&req->i915->drm.struct_mutex); - GEM_BUG_ON(list_empty(&req->link)); - - ret = __i915_wait_request(req, - req->i915->mm.interruptible, - NULL, - NULL); - if (ret) - return ret; - - /* If the GPU hung, we want to keep the requests to find the guilty. */ - if (!i915_reset_in_progress(&req->i915->gpu_error)) - i915_gem_request_retire_upto(req); - - return 0; -} diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index ed16704..bc19980 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -220,10 +220,6 @@ int __i915_wait_request(struct drm_i915_gem_request *req, struct intel_rps_client *rps) __attribute__((nonnull(1))); -int __must_check -i915_wait_request(struct drm_i915_gem_request *req) - __attribute__((nonnull)); - static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine); /** diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 5b0eac2..542cf58 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2269,6 +2269,7 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes) { struct intel_ring *ring = req->ring; struct drm_i915_gem_request *target; + int ret; intel_ring_update_space(ring); if (ring->space >= bytes) @@ -2298,7 +2299,18 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes) if (WARN_ON(&target->ring_link == &ring->request_list)) return -ENOSPC; - return i915_wait_request(target); + ret = __i915_wait_request(target, true, NULL, NULL); + if (ret) + return ret; + + if (i915_reset_in_progress(&target->i915->gpu_error)) + return -EAGAIN; + + i915_gem_request_retire_upto(target); + + intel_ring_update_space(ring); + GEM_BUG_ON(ring->space < bytes); + return 0; } int intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) @@ -2336,10 +2348,6 @@ int intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) int ret = wait_for_space(req, wait_bytes); if (unlikely(ret)) return ret; - - intel_ring_update_space(ring); - if (unlikely(ring->space < wait_bytes)) - return -EAGAIN; } if (unlikely(need_wrap)) { -- cgit v0.10.2 From 37db14700e995aa7b74e09b2a1bfe786f0db0121 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 Aug 2016 07:52:39 +0100 Subject: drm/i915: Disable waitboosting for a saturated engine If the user floods the GPU with so many requests that the engine stalls waiting for free space, don't automatically promote the GPU to maximum frequencies. If the GPU really is saturated with work, it will migrate to high clocks by itself, otherwise it is merely a user flooding us with busy-work. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470293567-10811-20-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 542cf58..4ab6d23 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2299,7 +2299,7 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes) if (WARN_ON(&target->ring_link == &ring->request_list)) return -ENOSPC; - ret = __i915_wait_request(target, true, NULL, NULL); + ret = __i915_wait_request(target, true, NULL, NO_WAITBOOST); if (ret) return ret; -- cgit v0.10.2 From 776f32364d625305041e1760233c52fdb71d2563 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 Aug 2016 07:52:40 +0100 Subject: drm/i915: s/__i915_wait_request/i915_wait_request/ There is only one wait on request function now, so drop the "expert" indication of leading __. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470293567-10811-21-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index a2eaa44..9541217 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1420,7 +1420,7 @@ i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, mutex_unlock(&dev->struct_mutex); ret = 0; for (i = 0; ret == 0 && i < n; i++) - ret = __i915_wait_request(requests[i], true, NULL, rps); + ret = i915_wait_request(requests[i], true, NULL, rps); mutex_lock(&dev->struct_mutex); for (i = 0; i < n; i++) @@ -2726,9 +2726,9 @@ out: for (i = 0; i < n; i++) { if (ret == 0) - ret = __i915_wait_request(requests[i], true, - args->timeout_ns > 0 ? &args->timeout_ns : NULL, - to_rps_client(file)); + ret = i915_wait_request(requests[i], true, + args->timeout_ns > 0 ? &args->timeout_ns : NULL, + to_rps_client(file)); i915_gem_request_put(requests[i]); } return ret; @@ -2744,10 +2744,10 @@ __i915_gem_object_sync(struct drm_i915_gem_request *to, return 0; if (!i915.semaphores) { - ret = __i915_wait_request(from, - from->i915->mm.interruptible, - NULL, - NO_WAITBOOST); + ret = i915_wait_request(from, + from->i915->mm.interruptible, + NULL, + NO_WAITBOOST); if (ret) return ret; } else { @@ -3712,7 +3712,7 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) if (target == NULL) return 0; - ret = __i915_wait_request(target, true, NULL, NULL); + ret = i915_wait_request(target, true, NULL, NULL); i915_gem_request_put(target); return ret; diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 85ec5ca..8549375 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -70,9 +70,9 @@ static signed long i915_fence_wait(struct fence *fence, timeout = NULL; } - ret = __i915_wait_request(to_request(fence), - interruptible, timeout, - NO_WAITBOOST); + ret = i915_wait_request(to_request(fence), + interruptible, timeout, + NO_WAITBOOST); if (ret == -ETIME) return 0; @@ -579,7 +579,7 @@ bool __i915_spin_request(const struct drm_i915_gem_request *req, } /** - * __i915_wait_request - wait until execution of request has finished + * i915_wait_request - wait until execution of request has finished * @req: duh! * @interruptible: do an interruptible wait (normally yes) * @timeout: in - how long to wait (NULL forever); out - how much time remaining @@ -595,10 +595,10 @@ bool __i915_spin_request(const struct drm_i915_gem_request *req, * Returns 0 if the request was found within the alloted time. Else returns the * errno with remaining time filled in timeout argument. */ -int __i915_wait_request(struct drm_i915_gem_request *req, - bool interruptible, - s64 *timeout, - struct intel_rps_client *rps) +int i915_wait_request(struct drm_i915_gem_request *req, + bool interruptible, + s64 *timeout, + struct intel_rps_client *rps) { int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; DEFINE_WAIT(reset); diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index bc19980..26ca697 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -214,10 +214,10 @@ struct intel_rps_client; #define IS_RPS_CLIENT(p) (!IS_ERR(p)) #define IS_RPS_USER(p) (!IS_ERR_OR_NULL(p)) -int __i915_wait_request(struct drm_i915_gem_request *req, - bool interruptible, - s64 *timeout, - struct intel_rps_client *rps) +int i915_wait_request(struct drm_i915_gem_request *req, + bool interruptible, + s64 *timeout, + struct intel_rps_client *rps) __attribute__((nonnull(1))); static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine); @@ -418,7 +418,7 @@ i915_gem_active_wait(const struct i915_gem_active *active, struct mutex *mutex) if (!request) return 0; - return __i915_wait_request(request, true, NULL, NULL); + return i915_wait_request(request, true, NULL, NULL); } /** @@ -441,7 +441,7 @@ i915_gem_active_retire(struct i915_gem_active *active, if (!request) return 0; - ret = __i915_wait_request(request, true, NULL, NULL); + ret = i915_wait_request(request, true, NULL, NULL); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index e57521d..651a84b 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -83,7 +83,7 @@ static void wait_rendering(struct drm_i915_gem_object *obj) mutex_unlock(&dev->struct_mutex); for (i = 0; i < n; i++) - __i915_wait_request(requests[i], false, NULL, NULL); + i915_wait_request(requests[i], false, NULL, NULL); mutex_lock(&dev->struct_mutex); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index d54a3ea..da9dcac 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -11463,9 +11463,9 @@ static void intel_mmio_flip_work_func(struct work_struct *w) struct reservation_object *resv; if (work->flip_queued_req) - WARN_ON(__i915_wait_request(work->flip_queued_req, - false, NULL, - NO_WAITBOOST)); + WARN_ON(i915_wait_request(work->flip_queued_req, + false, NULL, + NO_WAITBOOST)); /* For framebuffer backed by dmabuf, wait for fence */ resv = i915_gem_object_get_dmabuf_resv(obj); @@ -13508,8 +13508,8 @@ static int intel_atomic_prepare_commit(struct drm_device *dev, if (!intel_plane_state->wait_req) continue; - ret = __i915_wait_request(intel_plane_state->wait_req, - true, NULL, NULL); + ret = i915_wait_request(intel_plane_state->wait_req, + true, NULL, NULL); if (ret) { /* Any hang should be swallowed by the wait */ WARN_ON(ret == -EIO); @@ -13621,8 +13621,8 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) if (!intel_plane_state->wait_req) continue; - ret = __i915_wait_request(intel_plane_state->wait_req, - true, NULL, NULL); + ret = i915_wait_request(intel_plane_state->wait_req, + true, NULL, NULL); /* EIO should be eaten, and we can't get interrupted in the * worker, and blocking commits have waited already. */ WARN_ON(ret); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 4ab6d23..ac2e610 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2240,9 +2240,9 @@ int intel_engine_idle(struct intel_engine_cs *engine) link); /* Make sure we do not trigger any retires */ - return __i915_wait_request(req, - req->i915->mm.interruptible, - NULL, NULL); + return i915_wait_request(req, + req->i915->mm.interruptible, + NULL, NULL); } int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request) @@ -2299,7 +2299,7 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes) if (WARN_ON(&target->ring_link == &ring->request_list)) return -ENOSPC; - ret = __i915_wait_request(target, true, NULL, NO_WAITBOOST); + ret = i915_wait_request(target, true, NULL, NO_WAITBOOST); if (ret) return ret; -- cgit v0.10.2 From 909d074c31713dd1c43e8c7665f9e42791c672b8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 Aug 2016 07:52:41 +0100 Subject: drm/i915: Double check activity before relocations If the object is active and we need to perform a relocation upon it, we need to take the slow relocation path. Before we do, double check the active requests to see if they have completed. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470293567-10811-22-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 5e1fb85..2f7173d 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -441,6 +441,20 @@ relocate_entry_clflush(struct drm_i915_gem_object *obj, return 0; } +static bool object_is_idle(struct drm_i915_gem_object *obj) +{ + unsigned long active = obj->active; + int idx; + + for_each_active(active, idx) { + if (!i915_gem_active_is_idle(&obj->last_read[idx], + &obj->base.dev->struct_mutex)) + return false; + } + + return true; +} + static int i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, struct eb_vmas *eb, @@ -524,7 +538,7 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, } /* We can't wait for rendering with pagefaults disabled */ - if (obj->active && pagefault_disabled()) + if (pagefault_disabled() && !object_is_idle(obj)) return -EFAULT; if (use_cpu_reloc(obj)) -- cgit v0.10.2 From 4b8de8e68a2a5e2e7bdd755eacf18b5e9ce1c729 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 Aug 2016 07:52:42 +0100 Subject: drm/i915: Move request list retirement to i915_gem_request.c As the list retirement is now clean of implementation details, we can move it closer to the request management. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470293567-10811-23-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 9541217..2afb435 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2534,50 +2534,6 @@ void i915_gem_reset(struct drm_device *dev) i915_gem_restore_fences(dev); } -/** - * This function clears the request list as sequence numbers are passed. - * @engine: engine to retire requests on - */ -void -i915_gem_retire_requests_ring(struct intel_engine_cs *engine) -{ - while (!list_empty(&engine->request_list)) { - struct drm_i915_gem_request *request; - - request = list_first_entry(&engine->request_list, - struct drm_i915_gem_request, - link); - - if (!i915_gem_request_completed(request)) - break; - - i915_gem_request_retire_upto(request); - } -} - -void i915_gem_retire_requests(struct drm_i915_private *dev_priv) -{ - struct intel_engine_cs *engine; - - lockdep_assert_held(&dev_priv->drm.struct_mutex); - - if (dev_priv->gt.active_engines == 0) - return; - - GEM_BUG_ON(!dev_priv->gt.awake); - - for_each_engine(engine, dev_priv) { - i915_gem_retire_requests_ring(engine); - if (list_empty(&engine->request_list)) - dev_priv->gt.active_engines &= ~intel_engine_flag(engine); - } - - if (dev_priv->gt.active_engines == 0) - queue_delayed_work(dev_priv->wq, - &dev_priv->gt.idle_work, - msecs_to_jiffies(100)); -} - static void i915_gem_retire_work_handler(struct work_struct *work) { diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 8549375..6faa848 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -731,3 +731,38 @@ complete: return ret; } + +void i915_gem_retire_requests_ring(struct intel_engine_cs *engine) +{ + struct drm_i915_gem_request *request, *next; + + list_for_each_entry_safe(request, next, &engine->request_list, link) { + if (!i915_gem_request_completed(request)) + break; + + i915_gem_request_retire(request); + } +} + +void i915_gem_retire_requests(struct drm_i915_private *dev_priv) +{ + struct intel_engine_cs *engine; + + lockdep_assert_held(&dev_priv->drm.struct_mutex); + + if (dev_priv->gt.active_engines == 0) + return; + + GEM_BUG_ON(!dev_priv->gt.awake); + + for_each_engine(engine, dev_priv) { + i915_gem_retire_requests_ring(engine); + if (list_empty(&engine->request_list)) + dev_priv->gt.active_engines &= ~intel_engine_flag(engine); + } + + if (dev_priv->gt.active_engines == 0) + queue_delayed_work(dev_priv->wq, + &dev_priv->gt.idle_work, + msecs_to_jiffies(100)); +} -- cgit v0.10.2 From 5cf3d28098695f4e0641f164367ebb821185789b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 Aug 2016 07:52:43 +0100 Subject: drm/i915: i915_vma_move_to_active prep patch This patch is broken out of the next just to remove the code motion from that patch and make it more readable. What we do here is move the i915_vma_move_to_active() to i915_gem_execbuffer.c and put the three stages (read, write, fenced) together so that future modifications to active handling are all located in the same spot. The importance of this is so that we can more simply control the order in which the requests are place in the retirement list (i.e. control the order at which we retire and so control the lifetimes to avoid having to hold onto references). Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470293567-10811-24-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 6b57d8f..a1c4c76 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3153,7 +3153,8 @@ int __must_check i915_mutex_lock_interruptible(struct drm_device *dev); int i915_gem_object_sync(struct drm_i915_gem_object *obj, struct drm_i915_gem_request *to); void i915_vma_move_to_active(struct i915_vma *vma, - struct drm_i915_gem_request *req); + struct drm_i915_gem_request *req, + unsigned int flags); int i915_gem_dumb_create(struct drm_file *file_priv, struct drm_device *dev, struct drm_mode_create_dumb *args); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 2afb435..ce57c50 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2330,24 +2330,6 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj) return obj->mapping; } -void i915_vma_move_to_active(struct i915_vma *vma, - struct drm_i915_gem_request *req) -{ - struct drm_i915_gem_object *obj = vma->obj; - struct intel_engine_cs *engine; - - engine = i915_gem_request_get_engine(req); - - /* Add a reference if we're newly entering the active list. */ - if (obj->active == 0) - i915_gem_object_get(obj); - obj->active |= intel_engine_flag(engine); - - i915_gem_active_set(&obj->last_read[engine->id], req); - - list_move_tail(&vma->vm_link, &vma->vm->active_list); -} - static void i915_gem_object_retire__write(struct i915_gem_active *active, struct drm_i915_gem_request *request) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 24383f0..823e74c 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -816,8 +816,8 @@ static int do_rcs_switch(struct drm_i915_gem_request *req) * MI_SET_CONTEXT instead of when the next seqno has completed. */ if (from != NULL) { - from->engine[RCS].state->base.read_domains = I915_GEM_DOMAIN_INSTRUCTION; - i915_vma_move_to_active(i915_gem_obj_to_ggtt(from->engine[RCS].state), req); + struct drm_i915_gem_object *obj = from->engine[RCS].state; + /* As long as MI_SET_CONTEXT is serializing, ie. it flushes the * whole damn pipeline, we don't need to explicitly mark the * object dirty. The only exception is that the context must be @@ -825,10 +825,11 @@ static int do_rcs_switch(struct drm_i915_gem_request *req) * able to defer doing this until we know the object would be * swapped, but there is no way to do that yet. */ - from->engine[RCS].state->dirty = 1; + obj->base.read_domains = I915_GEM_DOMAIN_INSTRUCTION; + i915_vma_move_to_active(i915_gem_obj_to_ggtt(obj), req, 0); /* obj is kept alive until the next request by its active ref */ - i915_gem_object_ggtt_unpin(from->engine[RCS].state); + i915_gem_object_ggtt_unpin(obj); i915_gem_context_put(from); } engine->last_context = i915_gem_context_get(to); diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 2f7173d..8bf20f5 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1143,43 +1143,64 @@ i915_gem_validate_context(struct drm_device *dev, struct drm_file *file, return ctx; } +void i915_vma_move_to_active(struct i915_vma *vma, + struct drm_i915_gem_request *req, + unsigned int flags) +{ + struct drm_i915_gem_object *obj = vma->obj; + const unsigned int idx = req->engine->id; + + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + + obj->dirty = 1; /* be paranoid */ + + /* Add a reference if we're newly entering the active list. */ + if (obj->active == 0) + i915_gem_object_get(obj); + obj->active |= 1 << idx; + i915_gem_active_set(&obj->last_read[idx], req); + + if (flags & EXEC_OBJECT_WRITE) { + i915_gem_active_set(&obj->last_write, req); + + intel_fb_obj_invalidate(obj, ORIGIN_CS); + + /* update for the implicit flush after a batch */ + obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS; + } + + if (flags & EXEC_OBJECT_NEEDS_FENCE) { + i915_gem_active_set(&obj->last_fence, req); + if (flags & __EXEC_OBJECT_HAS_FENCE) { + struct drm_i915_private *dev_priv = req->i915; + + list_move_tail(&dev_priv->fence_regs[obj->fence_reg].lru_list, + &dev_priv->mm.fence_list); + } + } + + list_move_tail(&vma->vm_link, &vma->vm->active_list); +} + static void i915_gem_execbuffer_move_to_active(struct list_head *vmas, struct drm_i915_gem_request *req) { - struct intel_engine_cs *engine = i915_gem_request_get_engine(req); struct i915_vma *vma; list_for_each_entry(vma, vmas, exec_list) { - struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; struct drm_i915_gem_object *obj = vma->obj; u32 old_read = obj->base.read_domains; u32 old_write = obj->base.write_domain; - obj->dirty = 1; /* be paranoid */ obj->base.write_domain = obj->base.pending_write_domain; - if (obj->base.write_domain == 0) + if (obj->base.write_domain) + vma->exec_entry->flags |= EXEC_OBJECT_WRITE; + else obj->base.pending_read_domains |= obj->base.read_domains; obj->base.read_domains = obj->base.pending_read_domains; - i915_vma_move_to_active(vma, req); - if (obj->base.write_domain) { - i915_gem_active_set(&obj->last_write, req); - - intel_fb_obj_invalidate(obj, ORIGIN_CS); - - /* update for the implicit flush after a batch */ - obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS; - } - if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) { - i915_gem_active_set(&obj->last_fence, req); - if (entry->flags & __EXEC_OBJECT_HAS_FENCE) { - struct drm_i915_private *dev_priv = engine->i915; - list_move_tail(&dev_priv->fence_regs[obj->fence_reg].lru_list, - &dev_priv->mm.fence_list); - } - } - + i915_vma_move_to_active(vma, req, vma->exec_entry->flags); trace_i915_gem_object_change_domain(obj, old_read, old_write); } } diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c index f85c550..9023667 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.c +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c @@ -217,7 +217,7 @@ int i915_gem_render_state_init(struct drm_i915_gem_request *req) goto err_unpin; } - i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req); + i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req, 0); err_unpin: i915_gem_object_ggtt_unpin(so.obj); err_obj: -- cgit v0.10.2 From b0decaf75bd902a11c932005c88924947ac00b8c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 Aug 2016 07:52:44 +0100 Subject: drm/i915: Track active vma requests Hook the vma itself into the i915_gem_request_retire() so that we can accurately track when a solitary vma is inactive (as opposed to having to wait for the entire object to be idle). This improves the interaction when using multiple contexts (with full-ppgtt) and eliminates some frequent list walking when retiring objects after a completed request. A side-effect is that we get an active vma reference for free. The consequence of this is shown in the next patch... v2: Update inline names to be consistent with i915_gem_object_get_active() Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470293567-10811-25-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index fe3c823..b35e617 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -365,7 +365,7 @@ static int per_file_stats(int id, void *ptr, void *data) continue; } - if (obj->active) /* XXX per-vma statistic */ + if (i915_vma_is_active(vma)) stats->active += vma->node.size; else stats->inactive += vma->node.size; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index ce57c50..51660ce 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2347,7 +2347,6 @@ i915_gem_object_retire__read(struct i915_gem_active *active, int idx = request->engine->id; struct drm_i915_gem_object *obj = container_of(active, struct drm_i915_gem_object, last_read[idx]); - struct i915_vma *vma; GEM_BUG_ON((obj->active & (1 << idx)) == 0); @@ -2359,12 +2358,9 @@ i915_gem_object_retire__read(struct i915_gem_active *active, * so that we don't steal from recently used but inactive objects * (unless we are forced to ofc!) */ - list_move_tail(&obj->global_list, &request->i915->mm.bound_list); - - list_for_each_entry(vma, &obj->vma_list, obj_link) { - if (!list_empty(&vma->vm_link)) - list_move_tail(&vma->vm_link, &vma->vm->inactive_list); - } + if (obj->bind_count) + list_move_tail(&obj->global_list, + &request->i915->mm.bound_list); i915_gem_object_put(obj); } @@ -2797,8 +2793,29 @@ static void __i915_vma_iounmap(struct i915_vma *vma) static int __i915_vma_unbind(struct i915_vma *vma, bool wait) { struct drm_i915_gem_object *obj = vma->obj; + unsigned long active; int ret; + /* First wait upon any activity as retiring the request may + * have side-effects such as unpinning or even unbinding this vma. + */ + active = i915_vma_get_active(vma); + if (active && wait) { + int idx; + + for_each_active(active, idx) { + ret = i915_gem_active_retire(&vma->last_read[idx], + &vma->vm->dev->struct_mutex); + if (ret) + return ret; + } + + GEM_BUG_ON(i915_vma_is_active(vma)); + } + + if (vma->pin_count) + return -EBUSY; + if (list_empty(&vma->obj_link)) return 0; @@ -2807,18 +2824,9 @@ static int __i915_vma_unbind(struct i915_vma *vma, bool wait) return 0; } - if (vma->pin_count) - return -EBUSY; - GEM_BUG_ON(obj->bind_count == 0); GEM_BUG_ON(!obj->pages); - if (wait) { - ret = i915_gem_object_wait_rendering(obj, false); - if (ret) - return ret; - } - if (vma->is_ggtt && vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { i915_gem_object_finish_gtt(obj); @@ -3201,9 +3209,6 @@ i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) int i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) { - struct drm_device *dev = obj->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct i915_ggtt *ggtt = &dev_priv->ggtt; uint32_t old_write_domain, old_read_domains; struct i915_vma *vma; int ret; @@ -3256,9 +3261,10 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) /* And bump the LRU for this access */ vma = i915_gem_obj_to_ggtt(obj); - if (vma && drm_mm_node_allocated(&vma->node) && !obj->active) - list_move_tail(&vma->vm_link, - &ggtt->base.inactive_list); + if (vma && + drm_mm_node_allocated(&vma->node) && + !i915_vma_is_active(vma)) + list_move_tail(&vma->vm_link, &vma->vm->inactive_list); return 0; } diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 8bf20f5..5e3b505 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1154,7 +1154,13 @@ void i915_vma_move_to_active(struct i915_vma *vma, obj->dirty = 1; /* be paranoid */ - /* Add a reference if we're newly entering the active list. */ + /* Add a reference if we're newly entering the active list. + * The order in which we add operations to the retirement queue is + * vital here: mark_active adds to the start of the callback list, + * such that subsequent callbacks are called first. Therefore we + * add the active reference first and queue for it to be dropped + * *last*. + */ if (obj->active == 0) i915_gem_object_get(obj); obj->active |= 1 << idx; @@ -1179,6 +1185,8 @@ void i915_vma_move_to_active(struct i915_vma *vma, } } + i915_vma_set_active(vma, idx); + i915_gem_active_set(&vma->last_read[idx], req); list_move_tail(&vma->vm_link, &vma->vm->active_list); } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index ad97892..3e5d39d 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -3327,12 +3327,30 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev) i915_ggtt_flush(dev_priv); } +static void +i915_vma_retire(struct i915_gem_active *active, + struct drm_i915_gem_request *rq) +{ + const unsigned int idx = rq->engine->id; + struct i915_vma *vma = + container_of(active, struct i915_vma, last_read[idx]); + + GEM_BUG_ON(!i915_vma_has_active_engine(vma, idx)); + + i915_vma_clear_active(vma, idx); + if (i915_vma_is_active(vma)) + return; + + list_move_tail(&vma->vm_link, &vma->vm->inactive_list); +} + static struct i915_vma * __i915_gem_vma_create(struct drm_i915_gem_object *obj, struct i915_address_space *vm, const struct i915_ggtt_view *ggtt_view) { struct i915_vma *vma; + int i; if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view)) return ERR_PTR(-EINVAL); @@ -3344,6 +3362,8 @@ __i915_gem_vma_create(struct drm_i915_gem_object *obj, INIT_LIST_HEAD(&vma->vm_link); INIT_LIST_HEAD(&vma->obj_link); INIT_LIST_HEAD(&vma->exec_list); + for (i = 0; i < ARRAY_SIZE(vma->last_read); i++) + init_request_active(&vma->last_read[i], i915_vma_retire); vma->vm = vm; vma->obj = obj; vma->is_ggtt = i915_is_ggtt(vm); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index cf8e3fc..bfd3c11 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -36,6 +36,8 @@ #include +#include "i915_gem_request.h" + struct drm_i915_file_private; typedef uint32_t gen6_pte_t; @@ -179,6 +181,9 @@ struct i915_vma { struct i915_address_space *vm; void __iomem *iomap; + unsigned int active; + struct i915_gem_active last_read[I915_NUM_ENGINES]; + /** Flags and address space this VMA is bound to */ #define GLOBAL_BIND (1<<0) #define LOCAL_BIND (1<<1) @@ -222,6 +227,34 @@ struct i915_vma { #define DRM_I915_GEM_OBJECT_MAX_PIN_COUNT 0xf }; +static inline unsigned int i915_vma_get_active(const struct i915_vma *vma) +{ + return vma->active; +} + +static inline bool i915_vma_is_active(const struct i915_vma *vma) +{ + return i915_vma_get_active(vma); +} + +static inline void i915_vma_set_active(struct i915_vma *vma, + unsigned int engine) +{ + vma->active |= BIT(engine); +} + +static inline void i915_vma_clear_active(struct i915_vma *vma, + unsigned int engine) +{ + vma->active &= ~BIT(engine); +} + +static inline bool i915_vma_has_active_engine(const struct i915_vma *vma, + unsigned int engine) +{ + return vma->active & BIT(engine); +} + struct i915_page_dma { struct page *page; union { -- cgit v0.10.2 From b1f788c6acb2f34622dd5f4c3d5210c6e3945e78 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 Aug 2016 07:52:45 +0100 Subject: drm/i915: Release vma when the handle is closed In order to prevent a leak of the vma on shared objects, we need to hook into the object_close callback to destroy the vma on the object for this file. However, if we destroyed that vma immediately we may cause unexpected application stalls as we try to unbind a busy vma - hence we defer the unbind to when we retire the vma. v2: Keep vma allocated until closed. This is useful for a later optimisation, but it is required now in order to handle potential recursion of i915_vma_unbind() by retiring itself. v3: Comments are important. Testcase: igt/gem_ppggtt/flink-and-close-vma-leak Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Daniele Ceraolo Spurio Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470293567-10811-26-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 50c5640..8cfc264 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -2578,6 +2578,7 @@ static struct drm_driver driver = { .postclose = i915_driver_postclose, .set_busid = drm_pci_set_busid, + .gem_close_object = i915_gem_close_object, .gem_free_object = i915_gem_free_object, .gem_vm_ops = &i915_gem_vm_ops, diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index a1c4c76..f470ea1 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3014,8 +3014,8 @@ struct drm_i915_gem_object *i915_gem_object_create(struct drm_device *dev, size_t size); struct drm_i915_gem_object *i915_gem_object_create_from_data( struct drm_device *dev, const void *data, size_t size); +void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file); void i915_gem_free_object(struct drm_gem_object *obj); -void i915_gem_vma_destroy(struct i915_vma *vma); /* Flags used by pin/bind&friends. */ #define PIN_MAPPABLE (1<<0) @@ -3048,6 +3048,8 @@ int __must_check i915_vma_unbind(struct i915_vma *vma); * _guarantee_ VMA in question is _not in use_ anywhere. */ int __must_check __i915_vma_unbind_no_wait(struct i915_vma *vma); +void i915_vma_close(struct i915_vma *vma); +void i915_vma_destroy(struct i915_vma *vma); int i915_gem_object_unbind(struct drm_i915_gem_object *obj); int i915_gem_object_put_pages(struct drm_i915_gem_object *obj); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 51660ce..5a66ad4 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2596,6 +2596,19 @@ out_rearm: } } +void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file) +{ + struct drm_i915_gem_object *obj = to_intel_bo(gem); + struct drm_i915_file_private *fpriv = file->driver_priv; + struct i915_vma *vma, *vn; + + mutex_lock(&obj->base.dev->struct_mutex); + list_for_each_entry_safe(vma, vn, &obj->vma_list, obj_link) + if (vma->vm->file == fpriv) + i915_vma_close(vma); + mutex_unlock(&obj->base.dev->struct_mutex); +} + /** * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT * @dev: drm device pointer @@ -2803,26 +2816,32 @@ static int __i915_vma_unbind(struct i915_vma *vma, bool wait) if (active && wait) { int idx; + /* When a closed VMA is retired, it is unbound - eek. + * In order to prevent it from being recursively closed, + * take a pin on the vma so that the second unbind is + * aborted. + */ + vma->pin_count++; + for_each_active(active, idx) { ret = i915_gem_active_retire(&vma->last_read[idx], &vma->vm->dev->struct_mutex); if (ret) - return ret; + break; } + vma->pin_count--; + if (ret) + return ret; + GEM_BUG_ON(i915_vma_is_active(vma)); } if (vma->pin_count) return -EBUSY; - if (list_empty(&vma->obj_link)) - return 0; - - if (!drm_mm_node_allocated(&vma->node)) { - i915_gem_vma_destroy(vma); - return 0; - } + if (!drm_mm_node_allocated(&vma->node)) + goto destroy; GEM_BUG_ON(obj->bind_count == 0); GEM_BUG_ON(!obj->pages); @@ -2855,7 +2874,6 @@ static int __i915_vma_unbind(struct i915_vma *vma, bool wait) } drm_mm_remove_node(&vma->node); - i915_gem_vma_destroy(vma); /* Since the unbound list is global, only move to that list if * no more VMAs exist. */ @@ -2869,6 +2887,10 @@ static int __i915_vma_unbind(struct i915_vma *vma, bool wait) */ i915_gem_object_unpin_pages(obj); +destroy: + if (unlikely(vma->closed)) + i915_vma_destroy(vma); + return 0; } @@ -3043,7 +3065,7 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, if (offset & (alignment - 1) || offset + size > end) { ret = -EINVAL; - goto err_free_vma; + goto err_vma; } vma->node.start = offset; vma->node.size = size; @@ -3055,7 +3077,7 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, ret = drm_mm_reserve_node(&vm->mm, &vma->node); } if (ret) - goto err_free_vma; + goto err_vma; } else { if (flags & PIN_HIGH) { search_flag = DRM_MM_SEARCH_BELOW; @@ -3080,7 +3102,7 @@ search_free: if (ret == 0) goto search_free; - goto err_free_vma; + goto err_vma; } } if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) { @@ -3101,8 +3123,7 @@ search_free: err_remove_node: drm_mm_remove_node(&vma->node); -err_free_vma: - i915_gem_vma_destroy(vma); +err_vma: vma = ERR_PTR(ret); err_unpin: i915_gem_object_unpin_pages(obj); @@ -4051,21 +4072,18 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj) trace_i915_gem_object_destroy(obj); + /* All file-owned VMA should have been released by this point through + * i915_gem_close_object(), or earlier by i915_gem_context_close(). + * However, the object may also be bound into the global GTT (e.g. + * older GPUs without per-process support, or for direct access through + * the GTT either for the user or for scanout). Those VMA still need to + * unbound now. + */ list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) { - int ret; - + GEM_BUG_ON(!vma->is_ggtt); + GEM_BUG_ON(i915_vma_is_active(vma)); vma->pin_count = 0; - ret = __i915_vma_unbind_no_wait(vma); - if (WARN_ON(ret == -ERESTARTSYS)) { - bool was_interruptible; - - was_interruptible = dev_priv->mm.interruptible; - dev_priv->mm.interruptible = false; - - WARN_ON(i915_vma_unbind(vma)); - - dev_priv->mm.interruptible = was_interruptible; - } + i915_vma_close(vma); } GEM_BUG_ON(obj->bind_count); @@ -4129,22 +4147,6 @@ struct i915_vma *i915_gem_obj_to_ggtt_view(struct drm_i915_gem_object *obj, return NULL; } -void i915_gem_vma_destroy(struct i915_vma *vma) -{ - WARN_ON(vma->node.allocated); - - /* Keep the vma as a placeholder in the execbuffer reservation lists */ - if (!list_empty(&vma->exec_list)) - return; - - if (!vma->is_ggtt) - i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm)); - - list_del(&vma->obj_link); - - kmem_cache_free(to_i915(vma->obj->base.dev)->vmas, vma); -} - static void i915_gem_stop_engines(struct drm_device *dev) { diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 81f7b43..3437ced 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -182,8 +182,8 @@ found: struct i915_vma, exec_list); if (drm_mm_scan_remove_block(&vma->node)) { + vma->pin_count++; list_move(&vma->exec_list, &eviction_list); - i915_gem_object_get(vma->obj); continue; } list_del_init(&vma->exec_list); @@ -191,18 +191,14 @@ found: /* Unbinding will emit any required flushes */ while (!list_empty(&eviction_list)) { - struct drm_i915_gem_object *obj; - vma = list_first_entry(&eviction_list, struct i915_vma, exec_list); - obj = vma->obj; list_del_init(&vma->exec_list); + vma->pin_count--; if (ret == 0) ret = i915_vma_unbind(vma); - - i915_gem_object_put(obj); } return ret; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 3e5d39d..3e9d735 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -3342,6 +3342,31 @@ i915_vma_retire(struct i915_gem_active *active, return; list_move_tail(&vma->vm_link, &vma->vm->inactive_list); + if (unlikely(vma->closed && !vma->pin_count)) + WARN_ON(i915_vma_unbind(vma)); +} + +void i915_vma_destroy(struct i915_vma *vma) +{ + GEM_BUG_ON(vma->node.allocated); + GEM_BUG_ON(i915_vma_is_active(vma)); + GEM_BUG_ON(!vma->closed); + + list_del(&vma->vm_link); + if (!vma->is_ggtt) + i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm)); + + kmem_cache_free(to_i915(vma->obj->base.dev)->vmas, vma); +} + +void i915_vma_close(struct i915_vma *vma) +{ + GEM_BUG_ON(vma->closed); + vma->closed = true; + + list_del_init(&vma->obj_link); + if (!i915_vma_is_active(vma) && !vma->pin_count) + WARN_ON(__i915_vma_unbind_no_wait(vma)); } static struct i915_vma * diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index bfd3c11..deb9dbc 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -189,6 +189,7 @@ struct i915_vma { #define LOCAL_BIND (1<<1) unsigned int bound : 4; bool is_ggtt : 1; + bool closed : 1; /** * Support different GGTT views into the same object. -- cgit v0.10.2 From 50e046b6a0ac42fdab4d3708224da8a0ae4997df Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 Aug 2016 07:52:46 +0100 Subject: drm/i915: Mark the context and address space as closed When the user closes the context mark it and the dependent address space as closed. As we use an asynchronous destruct method, this has two purposes. First it allows us to flag the closed context and detect internal errors if we to create any new objects for it (as it is removed from the user's namespace, these should be internal bugs only). And secondly, it allows us to immediately reap stale vma. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470293567-10811-27-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index f470ea1..ce472c9 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -907,6 +907,7 @@ struct i915_gem_context { struct list_head link; u8 remap_slice; + bool closed:1; }; enum fb_op_origin { diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 5a66ad4..85a06dc 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2857,12 +2857,15 @@ static int __i915_vma_unbind(struct i915_vma *vma, bool wait) __i915_vma_iounmap(vma); } - trace_i915_vma_unbind(vma); - - vma->vm->unbind_vma(vma); + if (likely(!vma->vm->closed)) { + trace_i915_vma_unbind(vma); + vma->vm->unbind_vma(vma); + } vma->bound = 0; - list_del_init(&vma->vm_link); + drm_mm_remove_node(&vma->node); + list_move_tail(&vma->vm_link, &vma->vm->unbound_list); + if (vma->is_ggtt) { if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { obj->map_and_fenceable = false; @@ -2873,8 +2876,6 @@ static int __i915_vma_unbind(struct i915_vma *vma, bool wait) vma->ggtt_view.pages = NULL; } - drm_mm_remove_node(&vma->node); - /* Since the unbound list is global, only move to that list if * no more VMAs exist. */ if (--obj->bind_count == 0) @@ -3116,7 +3117,7 @@ search_free: goto err_remove_node; list_move_tail(&obj->global_list, &dev_priv->mm.bound_list); - list_add_tail(&vma->vm_link, &vm->inactive_list); + list_move_tail(&vma->vm_link, &vm->inactive_list); obj->bind_count++; return vma; diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 823e74c..a4ee623 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -156,6 +156,7 @@ void i915_gem_context_free(struct kref *ctx_ref) lockdep_assert_held(&ctx->i915->drm.struct_mutex); trace_i915_context_free(ctx); + GEM_BUG_ON(!ctx->closed); /* * This context is going away and we need to remove all VMAs still @@ -224,6 +225,37 @@ i915_gem_alloc_context_obj(struct drm_device *dev, size_t size) return obj; } +static void i915_ppgtt_close(struct i915_address_space *vm) +{ + struct list_head *phases[] = { + &vm->active_list, + &vm->inactive_list, + &vm->unbound_list, + NULL, + }, **phase; + + GEM_BUG_ON(vm->closed); + vm->closed = true; + + for (phase = phases; *phase; phase++) { + struct i915_vma *vma, *vn; + + list_for_each_entry_safe(vma, vn, *phase, vm_link) + if (!vma->closed) + i915_vma_close(vma); + } +} + +static void context_close(struct i915_gem_context *ctx) +{ + GEM_BUG_ON(ctx->closed); + ctx->closed = true; + if (ctx->ppgtt) + i915_ppgtt_close(&ctx->ppgtt->base); + ctx->file_priv = ERR_PTR(-EBADF); + i915_gem_context_put(ctx); +} + static int assign_hw_id(struct drm_i915_private *dev_priv, unsigned *out) { int ret; @@ -305,7 +337,7 @@ __create_hw_context(struct drm_device *dev, return ctx; err_out: - i915_gem_context_put(ctx); + context_close(ctx); return ERR_PTR(ret); } @@ -334,7 +366,7 @@ i915_gem_create_context(struct drm_device *dev, DRM_DEBUG_DRIVER("PPGTT setup failed (%ld)\n", PTR_ERR(ppgtt)); idr_remove(&file_priv->context_idr, ctx->user_handle); - i915_gem_context_put(ctx); + context_close(ctx); return ERR_CAST(ppgtt); } @@ -505,7 +537,7 @@ void i915_gem_context_fini(struct drm_device *dev) lockdep_assert_held(&dev->struct_mutex); - i915_gem_context_put(dctx); + context_close(dctx); dev_priv->kernel_context = NULL; ida_destroy(&dev_priv->context_hw_ida); @@ -515,8 +547,7 @@ static int context_idr_cleanup(int id, void *p, void *data) { struct i915_gem_context *ctx = p; - ctx->file_priv = ERR_PTR(-EBADF); - i915_gem_context_put(ctx); + context_close(ctx); return 0; } @@ -1014,7 +1045,7 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data, } idr_remove(&file_priv->context_idr, ctx->user_handle); - i915_gem_context_put(ctx); + context_close(ctx); mutex_unlock(&dev->struct_mutex); DRM_DEBUG_DRIVER("HW context %d destroyed\n", args->ctx_id); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 3e9d735..d42463c 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2121,6 +2121,7 @@ static void i915_address_space_init(struct i915_address_space *vm, drm_mm_init(&vm->mm, vm->start, vm->total); INIT_LIST_HEAD(&vm->active_list); INIT_LIST_HEAD(&vm->inactive_list); + INIT_LIST_HEAD(&vm->unbound_list); list_add_tail(&vm->global_link, &dev_priv->vm_list); } @@ -2213,9 +2214,10 @@ void i915_ppgtt_release(struct kref *kref) trace_i915_ppgtt_release(&ppgtt->base); - /* vmas should already be unbound */ + /* vmas should already be unbound and destroyed */ WARN_ON(!list_empty(&ppgtt->base.active_list)); WARN_ON(!list_empty(&ppgtt->base.inactive_list)); + WARN_ON(!list_empty(&ppgtt->base.unbound_list)); list_del(&ppgtt->base.global_link); drm_mm_takedown(&ppgtt->base.mm); @@ -3377,6 +3379,8 @@ __i915_gem_vma_create(struct drm_i915_gem_object *obj, struct i915_vma *vma; int i; + GEM_BUG_ON(vm->closed); + if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view)) return ERR_PTR(-EINVAL); @@ -3384,11 +3388,11 @@ __i915_gem_vma_create(struct drm_i915_gem_object *obj, if (vma == NULL) return ERR_PTR(-ENOMEM); - INIT_LIST_HEAD(&vma->vm_link); INIT_LIST_HEAD(&vma->obj_link); INIT_LIST_HEAD(&vma->exec_list); for (i = 0; i < ARRAY_SIZE(vma->last_read); i++) init_request_active(&vma->last_read[i], i915_vma_retire); + list_add(&vma->vm_link, &vm->unbound_list); vma->vm = vm; vma->obj = obj; vma->is_ggtt = i915_is_ggtt(vm); @@ -3429,6 +3433,7 @@ i915_gem_obj_lookup_or_create_ggtt_vma(struct drm_i915_gem_object *obj, if (!vma) vma = __i915_gem_vma_create(obj, &ggtt->base, view); + GEM_BUG_ON(vma->closed); return vma; } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index deb9dbc..f6cc3fe 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -319,6 +319,8 @@ struct i915_address_space { u64 start; /* Start offset always 0 for dri2 */ u64 total; /* size addr space maps (ex. 2GB for ggtt) */ + bool closed; + struct i915_page_scratch *scratch_page; struct i915_page_table *scratch_pt; struct i915_page_directory *scratch_pd; @@ -347,6 +349,13 @@ struct i915_address_space { */ struct list_head inactive_list; + /** + * List of vma that have been unbound. + * + * A reference is not held on the buffer while on this list. + */ + struct list_head unbound_list; + /* FIXME: Need a more generic return type */ gen6_pte_t (*pte_encode)(dma_addr_t addr, enum i915_cache_level level, diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index 2c321c8..bc91ffe 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -707,7 +707,7 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_device *dev, vma->bound |= GLOBAL_BIND; __i915_vma_set_map_and_fenceable(vma); - list_add_tail(&vma->vm_link, &ggtt->base.inactive_list); + list_move_tail(&vma->vm_link, &ggtt->base.inactive_list); obj->bind_count++; list_add_tail(&obj->global_list, &dev_priv->mm.bound_list); -- cgit v0.10.2 From df0e9a287da83f3fd17b47d5f0682f48c722aee7 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 Aug 2016 07:52:47 +0100 Subject: Revert "drm/i915: Clean up associated VMAs on context destruction" This reverts commit e9f24d5fb7cf3628b195b18ff3ac4e37937ceeae. The patch was only a stop-gap measure that fixed half the problem - the leak of the fbcon when restarting X. A complete solution required releasing the VMA when the object itself was closed rather than rely on file/process exit. The previous patches add the VMA tracking necessary to do close them along with the object, context or file, and so the time has come to remove the partial fix. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470293567-10811-28-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index ce472c9..66b98fa 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3044,11 +3044,6 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, u32 flags); void __i915_vma_set_map_and_fenceable(struct i915_vma *vma); int __must_check i915_vma_unbind(struct i915_vma *vma); -/* - * BEWARE: Do not use the function below unless you can _absolutely_ - * _guarantee_ VMA in question is _not in use_ anywhere. - */ -int __must_check __i915_vma_unbind_no_wait(struct i915_vma *vma); void i915_vma_close(struct i915_vma *vma); void i915_vma_destroy(struct i915_vma *vma); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 85a06dc..86ff14c 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2803,7 +2803,7 @@ static void __i915_vma_iounmap(struct i915_vma *vma) vma->iomap = NULL; } -static int __i915_vma_unbind(struct i915_vma *vma, bool wait) +int i915_vma_unbind(struct i915_vma *vma) { struct drm_i915_gem_object *obj = vma->obj; unsigned long active; @@ -2813,7 +2813,7 @@ static int __i915_vma_unbind(struct i915_vma *vma, bool wait) * have side-effects such as unpinning or even unbinding this vma. */ active = i915_vma_get_active(vma); - if (active && wait) { + if (active) { int idx; /* When a closed VMA is retired, it is unbound - eek. @@ -2895,16 +2895,6 @@ destroy: return 0; } -int i915_vma_unbind(struct i915_vma *vma) -{ - return __i915_vma_unbind(vma, true); -} - -int __i915_vma_unbind_no_wait(struct i915_vma *vma) -{ - return __i915_vma_unbind(vma, false); -} - int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index a4ee623..eff6d39 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -134,21 +134,6 @@ static int get_context_size(struct drm_i915_private *dev_priv) return ret; } -static void i915_gem_context_clean(struct i915_gem_context *ctx) -{ - struct i915_hw_ppgtt *ppgtt = ctx->ppgtt; - struct i915_vma *vma, *next; - - if (!ppgtt) - return; - - list_for_each_entry_safe(vma, next, &ppgtt->base.inactive_list, - vm_link) { - if (WARN_ON(__i915_vma_unbind_no_wait(vma))) - break; - } -} - void i915_gem_context_free(struct kref *ctx_ref) { struct i915_gem_context *ctx = container_of(ctx_ref, typeof(*ctx), ref); @@ -158,13 +143,6 @@ void i915_gem_context_free(struct kref *ctx_ref) trace_i915_context_free(ctx); GEM_BUG_ON(!ctx->closed); - /* - * This context is going away and we need to remove all VMAs still - * around. This is to handle imported shared objects for which - * destructor did not run when their handles were closed. - */ - i915_gem_context_clean(ctx); - i915_ppgtt_put(ctx->ppgtt); for (i = 0; i < I915_NUM_ENGINES; i++) { diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index d42463c..685ca2a 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -3368,7 +3368,7 @@ void i915_vma_close(struct i915_vma *vma) list_del_init(&vma->obj_link); if (!i915_vma_is_active(vma) && !vma->pin_count) - WARN_ON(__i915_vma_unbind_no_wait(vma)); + WARN_ON(i915_vma_unbind(vma)); } static struct i915_vma * -- cgit v0.10.2 From 5ac9793bf9f43cb4d8ecdcc521be1ba87057841a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 27 Jul 2016 19:11:17 +0100 Subject: drm/i915: Fix use of engine->index for register offset Since commit de1add360522 ("drm/i915: Decouple execbuf uAPI from internal implementation") the index of the engine (its engine->id) in the internal list no longer matches the hardware id. However, in a couple of locations we missed fixing up the difference. In this case, RING_FAULT_REG() refers to engine->id which is now not what the register offset actually should be. Fortunately, in both case we should be more or less looping over 0..I915_NUM_ENGINES. Fixes: de1add360522 ("drm/i915: Decouple execbuf uAPI from internal...") Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/1469643077-2523-2-git-send-email-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen Cc: stable@vger.kernel.org diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 2f93d4a..f38a5e2 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -1648,7 +1648,7 @@ enum skl_disp_power_wells { #define ARB_MODE_BWGTLB_DISABLE (1<<9) #define ARB_MODE_SWIZZLE_BDW (1<<1) #define RENDER_HWS_PGA_GEN7 _MMIO(0x04080) -#define RING_FAULT_REG(engine) _MMIO(0x4094 + 0x100*(engine)->id) +#define RING_FAULT_REG(engine) _MMIO(0x4094 + 0x100*(engine)->hw_id) #define RING_FAULT_GTTSEL_MASK (1<<11) #define RING_FAULT_SRCID(x) (((x) >> 3) & 0xff) #define RING_FAULT_FAULT_TYPE(x) (((x) >> 1) & 0x3) -- cgit v0.10.2 From 36dbc4d76918d7557b686f807106dcc799174b12 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 Aug 2016 08:43:53 +0100 Subject: drm/i915/fbc: FBC causes display flicker when VT-d is enabled on Skylake MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Erratum SKL075: Display Flicker May Occur When Both VT-d And FBC Are Enabled "Display flickering may occur when both FBC (Frame Buffer Compression) and VT - d (Intel® Virtualization Technology for Directed I/O) are enabled and in use by the display controller." Ville found the w/a name in the database: WaFbcTurnOffFbcWhenHyperVisorIsUsed:skl,bxt and also dug out that it affects Broxton. v2: Log when the quirk is applied. v3: Ensure i915.enable_fbc is false when !HAS_FBC() v4: Fix function name after rebase v5: Add Broxton to the workaround Note for backporting to stable, we need to add #define mkwrite_device_info(ptr) \ ((struct intel_device_info *)INTEL_INFO(ptr)) Signed-off-by: Chris Wilson Cc: Paulo Zanoni Cc: Ville Syrjälä Reviewed-by: Ville Syrjälä Cc: stable@vger.kernel.org Link: http://patchwork.freedesktop.org/patch/msgid/1470296633-20388-1-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c index 8147eb9..d4be076 100644 --- a/drivers/gpu/drm/i915/intel_fbc.c +++ b/drivers/gpu/drm/i915/intel_fbc.c @@ -1229,12 +1229,29 @@ static int intel_sanitize_fbc_option(struct drm_i915_private *dev_priv) if (i915.enable_fbc >= 0) return !!i915.enable_fbc; + if (!HAS_FBC(dev_priv)) + return 0; + if (IS_BROADWELL(dev_priv)) return 1; return 0; } +static bool need_fbc_vtd_wa(struct drm_i915_private *dev_priv) +{ +#ifdef CONFIG_INTEL_IOMMU + /* WaFbcTurnOffFbcWhenHyperVisorIsUsed:skl,bxt */ + if (intel_iommu_gfx_mapped && + (IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv))) { + DRM_INFO("Disabling framebuffer compression (FBC) to prevent screen flicker with VT-d enabled\n"); + return true; + } +#endif + + return false; +} + /** * intel_fbc_init - Initialize FBC * @dev_priv: the i915 device @@ -1252,6 +1269,9 @@ void intel_fbc_init(struct drm_i915_private *dev_priv) fbc->active = false; fbc->work.scheduled = false; + if (need_fbc_vtd_wa(dev_priv)) + mkwrite_device_info(dev_priv)->has_fbc = false; + i915.enable_fbc = intel_sanitize_fbc_option(dev_priv); DRM_DEBUG_KMS("Sanitized enable_fbc value: %d\n", i915.enable_fbc); -- cgit v0.10.2 From 1dd5b6f2020389e75bb3d269c038497f065e68c9 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 Aug 2016 09:09:53 +0100 Subject: drm/i915: Add missing rpm wakelock to GGTT pread Joonas spotted a discrepancy between the pwrite and pread ioctls, in that pwrite takes the rpm wakelock around its GGTT access, The wakelock is required in order for the GTT to function. In disregard for the current convention, we take the rpm wakelock around the access itself rather than around the struct_mutex as the nesting is not strictly required and such ordering will one day be fixed by explicitly noting the barrier dependencies between the GGTT and rpm. Fixes: b50a53715f09 ("drm/i915: Support for pread/pwrite ...") Reported-by: Joonas Lahtinen Signed-off-by: Chris Wilson Cc: Ankitprasad Sharma Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Cc: drm-intel-fixes@lists.freedesktop.org Link: http://patchwork.freedesktop.org/patch/msgid/1470298193-21765-1-git-send-email-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 86ff14c..03bad1b 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -887,9 +887,12 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data, ret = i915_gem_shmem_pread(dev, obj, args, file); /* pread for non shmem backed objects */ - if (ret == -EFAULT || ret == -ENODEV) + if (ret == -EFAULT || ret == -ENODEV) { + intel_runtime_pm_get(to_i915(dev)); ret = i915_gem_gtt_pread(dev, obj, args->size, args->offset, args->data_ptr); + intel_runtime_pm_put(to_i915(dev)); + } out: i915_gem_object_put(obj); -- cgit v0.10.2 From fe5a66f91c88202fcbd5ccd2637f4dff35ed5cad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 29 Jul 2016 16:52:39 +0300 Subject: drm/i915: Read PSR caps/intermediate freqs/etc. only once on eDP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently we re-read a bunch of static eDP panel caps from the DPCD over and over again. Let's do it only once to save some time and effort. v2: Make thing less confusing with intel_edp_init_dpcd() (Chris) Move no_aux_handshake setup in there as well v3: Move tps3/rate printout to intel_dp_long_pulse() so that we'll still get them on eDP as well Cc: Chris Wilson Signed-off-by: Ville Syrjälä Reviewed-by: Chris Wilson (v1) Link: http://patchwork.freedesktop.org/patch/msgid/1469800359-7087-1-git-send-email-ville.syrjala@linux.intel.com diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 001f74f..0a9ade9 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -3395,84 +3395,67 @@ intel_dp_link_down(struct intel_dp *intel_dp) } static bool -intel_dp_get_dpcd(struct intel_dp *intel_dp) +intel_dp_read_dpcd(struct intel_dp *intel_dp) { - struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); - struct drm_device *dev = dig_port->base.base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - if (drm_dp_dpcd_read(&intel_dp->aux, 0x000, intel_dp->dpcd, sizeof(intel_dp->dpcd)) < 0) return false; /* aux transfer failed */ DRM_DEBUG_KMS("DPCD: %*ph\n", (int) sizeof(intel_dp->dpcd), intel_dp->dpcd); - if (intel_dp->dpcd[DP_DPCD_REV] == 0) - return false; /* DPCD not present */ + return intel_dp->dpcd[DP_DPCD_REV] != 0; +} - if (drm_dp_dpcd_read(&intel_dp->aux, DP_SINK_COUNT, - &intel_dp->sink_count, 1) < 0) - return false; +static bool +intel_edp_init_dpcd(struct intel_dp *intel_dp) +{ + struct drm_i915_private *dev_priv = + to_i915(dp_to_dig_port(intel_dp)->base.base.dev); - /* - * Sink count can change between short pulse hpd hence - * a member variable in intel_dp will track any changes - * between short pulse interrupts. - */ - intel_dp->sink_count = DP_GET_SINK_COUNT(intel_dp->sink_count); + /* this function is meant to be called only once */ + WARN_ON(intel_dp->dpcd[DP_DPCD_REV] != 0); - /* - * SINK_COUNT == 0 and DOWNSTREAM_PORT_PRESENT == 1 implies that - * a dongle is present but no display. Unless we require to know - * if a dongle is present or not, we don't need to update - * downstream port information. So, an early return here saves - * time from performing other operations which are not required. - */ - if (!is_edp(intel_dp) && !intel_dp->sink_count) + if (!intel_dp_read_dpcd(intel_dp)) return false; - /* Check if the panel supports PSR */ - memset(intel_dp->psr_dpcd, 0, sizeof(intel_dp->psr_dpcd)); - if (is_edp(intel_dp)) { - drm_dp_dpcd_read(&intel_dp->aux, DP_PSR_SUPPORT, - intel_dp->psr_dpcd, - sizeof(intel_dp->psr_dpcd)); - if (intel_dp->psr_dpcd[0] & DP_PSR_IS_SUPPORTED) { - dev_priv->psr.sink_support = true; - DRM_DEBUG_KMS("Detected EDP PSR Panel.\n"); - } - - if (INTEL_INFO(dev)->gen >= 9 && - (intel_dp->psr_dpcd[0] & DP_PSR2_IS_SUPPORTED)) { - uint8_t frame_sync_cap; - - dev_priv->psr.sink_support = true; - drm_dp_dpcd_read(&intel_dp->aux, - DP_SINK_DEVICE_AUX_FRAME_SYNC_CAP, - &frame_sync_cap, 1); - dev_priv->psr.aux_frame_sync = frame_sync_cap ? true : false; - /* PSR2 needs frame sync as well */ - dev_priv->psr.psr2_support = dev_priv->psr.aux_frame_sync; - DRM_DEBUG_KMS("PSR2 %s on sink", - dev_priv->psr.psr2_support ? "supported" : "not supported"); - } - - /* Read the eDP Display control capabilities registers */ - memset(intel_dp->edp_dpcd, 0, sizeof(intel_dp->edp_dpcd)); - if ((intel_dp->dpcd[DP_EDP_CONFIGURATION_CAP] & DP_DPCD_DISPLAY_CONTROL_CAPABLE) && - (drm_dp_dpcd_read(&intel_dp->aux, DP_EDP_DPCD_REV, - intel_dp->edp_dpcd, sizeof(intel_dp->edp_dpcd)) == - sizeof(intel_dp->edp_dpcd))) - DRM_DEBUG_KMS("EDP DPCD : %*ph\n", (int) sizeof(intel_dp->edp_dpcd), - intel_dp->edp_dpcd); - } + if (intel_dp->dpcd[DP_DPCD_REV] >= 0x11) + dev_priv->no_aux_handshake = intel_dp->dpcd[DP_MAX_DOWNSPREAD] & + DP_NO_AUX_HANDSHAKE_LINK_TRAINING; - DRM_DEBUG_KMS("Display Port TPS3 support: source %s, sink %s\n", - yesno(intel_dp_source_supports_hbr2(intel_dp)), - yesno(drm_dp_tps3_supported(intel_dp->dpcd))); + /* Check if the panel supports PSR */ + drm_dp_dpcd_read(&intel_dp->aux, DP_PSR_SUPPORT, + intel_dp->psr_dpcd, + sizeof(intel_dp->psr_dpcd)); + if (intel_dp->psr_dpcd[0] & DP_PSR_IS_SUPPORTED) { + dev_priv->psr.sink_support = true; + DRM_DEBUG_KMS("Detected EDP PSR Panel.\n"); + } + + if (INTEL_GEN(dev_priv) >= 9 && + (intel_dp->psr_dpcd[0] & DP_PSR2_IS_SUPPORTED)) { + uint8_t frame_sync_cap; + + dev_priv->psr.sink_support = true; + drm_dp_dpcd_read(&intel_dp->aux, + DP_SINK_DEVICE_AUX_FRAME_SYNC_CAP, + &frame_sync_cap, 1); + dev_priv->psr.aux_frame_sync = frame_sync_cap ? true : false; + /* PSR2 needs frame sync as well */ + dev_priv->psr.psr2_support = dev_priv->psr.aux_frame_sync; + DRM_DEBUG_KMS("PSR2 %s on sink", + dev_priv->psr.psr2_support ? "supported" : "not supported"); + } + + /* Read the eDP Display control capabilities registers */ + if ((intel_dp->dpcd[DP_EDP_CONFIGURATION_CAP] & DP_DPCD_DISPLAY_CONTROL_CAPABLE) && + drm_dp_dpcd_read(&intel_dp->aux, DP_EDP_DPCD_REV, + intel_dp->edp_dpcd, sizeof(intel_dp->edp_dpcd) == + sizeof(intel_dp->edp_dpcd))) + DRM_DEBUG_KMS("EDP DPCD : %*ph\n", (int) sizeof(intel_dp->edp_dpcd), + intel_dp->edp_dpcd); /* Intermediate frequency support */ - if (is_edp(intel_dp) && (intel_dp->edp_dpcd[0] >= 0x03)) { /* eDp v1.4 or higher */ + if (intel_dp->edp_dpcd[0] >= 0x03) { /* eDp v1.4 or higher */ __le16 sink_rates[DP_MAX_SUPPORTED_RATES]; int i; @@ -3491,7 +3474,36 @@ intel_dp_get_dpcd(struct intel_dp *intel_dp) intel_dp->num_sink_rates = i; } - intel_dp_print_rates(intel_dp); + return true; +} + + +static bool +intel_dp_get_dpcd(struct intel_dp *intel_dp) +{ + if (!intel_dp_read_dpcd(intel_dp)) + return false; + + if (drm_dp_dpcd_read(&intel_dp->aux, DP_SINK_COUNT, + &intel_dp->sink_count, 1) < 0) + return false; + + /* + * Sink count can change between short pulse hpd hence + * a member variable in intel_dp will track any changes + * between short pulse interrupts. + */ + intel_dp->sink_count = DP_GET_SINK_COUNT(intel_dp->sink_count); + + /* + * SINK_COUNT == 0 and DOWNSTREAM_PORT_PRESENT == 1 implies that + * a dongle is present but no display. Unless we require to know + * if a dongle is present or not, we don't need to update + * downstream port information. So, an early return here saves + * time from performing other operations which are not required. + */ + if (!is_edp(intel_dp) && !intel_dp->sink_count) + return false; if (!(intel_dp->dpcd[DP_DOWNSTREAMPORT_PRESENT] & DP_DWN_STRM_PORT_PRESENT)) @@ -4252,6 +4264,12 @@ intel_dp_long_pulse(struct intel_connector *intel_connector) if (intel_encoder->type != INTEL_OUTPUT_EDP) intel_encoder->type = INTEL_OUTPUT_DP; + DRM_DEBUG_KMS("Display Port TPS3 support: source %s, sink %s\n", + yesno(intel_dp_source_supports_hbr2(intel_dp)), + yesno(drm_dp_tps3_supported(intel_dp->dpcd))); + + intel_dp_print_rates(intel_dp); + intel_dp_probe_oui(intel_dp); ret = intel_dp_probe_mst(intel_dp); @@ -5413,14 +5431,9 @@ static bool intel_edp_init_connector(struct intel_dp *intel_dp, pps_unlock(intel_dp); /* Cache DPCD and EDID for edp. */ - has_dpcd = intel_dp_get_dpcd(intel_dp); + has_dpcd = intel_edp_init_dpcd(intel_dp); - if (has_dpcd) { - if (intel_dp->dpcd[DP_DPCD_REV] >= 0x11) - dev_priv->no_aux_handshake = - intel_dp->dpcd[DP_MAX_DOWNSPREAD] & - DP_NO_AUX_HANDSHAKE_LINK_TRAINING; - } else { + if (!has_dpcd) { /* if this fails, presume the device is a ghost */ DRM_INFO("failed to retrieve link info, disabling eDP\n"); goto out_vdd_off; -- cgit v0.10.2 From 64ee2fd25cc5ca86b18b153ac0f310964ca9fe0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 28 Jul 2016 17:50:39 +0300 Subject: drm/i915: Avoid mixing up SST and MST in DDI setup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The MST vs. SST selection should depend purely on the choice of the connector/encoder. So don't try to determine the correct DDI mode based on the intel_dp->is_mst, which simply tells us whether the sink is in MST mode or not. Instead derive the information from the encoder type. Since the link training code deals in non-fake encoders, we'll also need to keep a second copy of that information around, which we'll now designate as 'link_mst'. Cc: Maarten Lankhorst Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1469717448-4297-4-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: Maarten Lankhorst Reviewed-by: Daniel Vetter diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index fc2ef2d..0400825 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -1126,7 +1126,6 @@ void intel_ddi_enable_transcoder_func(struct drm_crtc *crtc) { struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct intel_encoder *intel_encoder = intel_ddi_get_crtc_encoder(crtc); - struct drm_encoder *encoder = &intel_encoder->base; struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = to_i915(dev); enum pipe pipe = intel_crtc->pipe; @@ -1192,29 +1191,15 @@ void intel_ddi_enable_transcoder_func(struct drm_crtc *crtc) temp |= TRANS_DDI_MODE_SELECT_HDMI; else temp |= TRANS_DDI_MODE_SELECT_DVI; - } else if (type == INTEL_OUTPUT_ANALOG) { temp |= TRANS_DDI_MODE_SELECT_FDI; temp |= (intel_crtc->config->fdi_lanes - 1) << 1; - } else if (type == INTEL_OUTPUT_DP || type == INTEL_OUTPUT_EDP) { - struct intel_dp *intel_dp = enc_to_intel_dp(encoder); - - if (intel_dp->is_mst) { - temp |= TRANS_DDI_MODE_SELECT_DP_MST; - } else - temp |= TRANS_DDI_MODE_SELECT_DP_SST; - + temp |= TRANS_DDI_MODE_SELECT_DP_SST; temp |= DDI_PORT_WIDTH(intel_crtc->config->lane_count); } else if (type == INTEL_OUTPUT_DP_MST) { - struct intel_dp *intel_dp = &enc_to_mst(encoder)->primary->dp; - - if (intel_dp->is_mst) { - temp |= TRANS_DDI_MODE_SELECT_DP_MST; - } else - temp |= TRANS_DDI_MODE_SELECT_DP_SST; - + temp |= TRANS_DDI_MODE_SELECT_DP_MST; temp |= DDI_PORT_WIDTH(intel_crtc->config->lane_count); } else { WARN(1, "Invalid encoder type %d for pipe %c\n", @@ -2139,7 +2124,7 @@ void intel_ddi_prepare_link_retrain(struct intel_dp *intel_dp) val = DP_TP_CTL_ENABLE | DP_TP_CTL_LINK_TRAIN_PAT1 | DP_TP_CTL_SCRAMBLE_DISABLE; - if (intel_dp->is_mst) + if (intel_dp->link_mst) val |= DP_TP_CTL_MODE_MST; else { val |= DP_TP_CTL_MODE_SST; diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 0a9ade9..b245870 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1651,6 +1651,7 @@ void intel_dp_set_link_params(struct intel_dp *intel_dp, { intel_dp->link_rate = pipe_config->port_clock; intel_dp->lane_count = pipe_config->lane_count; + intel_dp->link_mst = intel_crtc_has_type(pipe_config, INTEL_OUTPUT_DP_MST); } static void intel_dp_prepare(struct intel_encoder *encoder) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 50cdc89..55af7f0 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -852,6 +852,7 @@ struct intel_dp { int link_rate; uint8_t lane_count; uint8_t sink_count; + bool link_mst; bool has_audio; bool detect_done; enum hdmi_force_audio force_audio; -- cgit v0.10.2 From 477321e0130c58f7fc3e41753ad95e934aaa7abc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 28 Jul 2016 17:50:40 +0300 Subject: drm/i915: Reject mixing MST and SST/HDMI on the same digital port MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We can't mix MST with SST/HDMI on the same physical port, so we'll need to reject such configurations in check_digital_port_conflicts(). Nothing else will prevent this as MST has its fake encoders and its own connectors so the cloning checks won't catch this. The same digital port can be used multiple times, but only if all the encoders involved are MST encoders, so we only want to check MST vs. SST/HDMI, not MST vs. MST. And SST/HDMI vs. SST/HDMI we already check. Cc: Maarten Lankhorst Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1469717448-4297-5-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: Maarten Lankhorst Reviewed-by: Daniel Vetter diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index da9dcac..483ddfe 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -12300,6 +12300,7 @@ static bool check_digital_port_conflicts(struct drm_atomic_state *state) struct drm_device *dev = state->dev; struct drm_connector *connector; unsigned int used_ports = 0; + unsigned int used_mst_ports = 0; /* * Walk the connector list instead of the encoder @@ -12336,11 +12337,20 @@ static bool check_digital_port_conflicts(struct drm_atomic_state *state) return false; used_ports |= port_mask; + break; + case INTEL_OUTPUT_DP_MST: + used_mst_ports |= + 1 << enc_to_mst(&encoder->base)->primary->port; + break; default: break; } } + /* can't mix MST and SST/HDMI on the same port */ + if (used_ports & used_mst_ports) + return false; + return true; } -- cgit v0.10.2 From f64425a82bdb5c3d7e09ba765716da88a9b00eec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 28 Jul 2016 17:50:41 +0300 Subject: drm/i915: Track active streams also for DP SST MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit s/active_mst_links/active_streams/ and use it also for SST. We can then use this information in the hpd handling to see if the link is active or not, and thus whether we may need to retrain. Cc: Ander Conselvan de Oliveira Cc: Jim Bride Cc: Manasi D Navare Cc: Durgadoss R Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1469717448-4297-6-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: Daniel Vetter diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 0400825..ac8700b 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -1641,6 +1641,9 @@ static void intel_ddi_pre_enable(struct intel_encoder *intel_encoder) intel_ddi_init_dp_buf_reg(intel_encoder); + WARN_ON(intel_dp->active_streams != 0); + intel_dp->active_streams++; + intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON); intel_dp_start_link_train(intel_dp); if (port != PORT_A || INTEL_INFO(dev_priv)->gen >= 9) @@ -1767,6 +1770,13 @@ static void intel_disable_ddi(struct intel_encoder *intel_encoder) intel_psr_disable(intel_dp); intel_edp_backlight_off(intel_dp); } + + if (type == INTEL_OUTPUT_DP || type == INTEL_OUTPUT_EDP) { + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); + + intel_dp->active_streams--; + WARN_ON(intel_dp->active_streams != 0); + } } bool bxt_ddi_phy_is_enabled(struct drm_i915_private *dev_priv, diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index b245870..38f2033 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -2685,6 +2685,9 @@ static void intel_enable_dp(struct intel_encoder *encoder) lane_mask); } + WARN_ON(intel_dp->active_streams != 0); + intel_dp->active_streams++; + intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON); intel_dp_start_link_train(intel_dp); intel_dp_stop_link_train(intel_dp); @@ -3344,6 +3347,9 @@ intel_dp_link_down(struct intel_dp *intel_dp) DRM_DEBUG_KMS("\n"); + intel_dp->active_streams--; + WARN_ON(intel_dp->active_streams != 0); + if ((IS_GEN7(dev) && port == PORT_A) || (HAS_PCH_CPT(dev) && port != PORT_A)) { DP &= ~DP_LINK_TRAIN_MASK_CPT; @@ -3833,7 +3839,7 @@ go_again: if (bret == true) { /* check link status - esi[10] = 0x200c */ - if (intel_dp->active_mst_links && + if (intel_dp->active_streams && !drm_dp_channel_eq_ok(&esi[10], intel_dp->lane_count)) { DRM_DEBUG_KMS("channel EQ not ok, retraining\n"); intel_dp_start_link_train(intel_dp); diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c index 629337d..0beca91 100644 --- a/drivers/gpu/drm/i915/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/intel_dp_mst.c @@ -99,7 +99,7 @@ static void intel_mst_disable_dp(struct intel_encoder *encoder) struct intel_dp *intel_dp = &intel_dig_port->dp; int ret; - DRM_DEBUG_KMS("%d\n", intel_dp->active_mst_links); + DRM_DEBUG_KMS("%d\n", intel_dp->active_streams); drm_dp_mst_reset_vcpi_slots(&intel_dp->mst_mgr, intel_mst->connector->port); @@ -115,7 +115,7 @@ static void intel_mst_post_disable_dp(struct intel_encoder *encoder) struct intel_digital_port *intel_dig_port = intel_mst->primary; struct intel_dp *intel_dp = &intel_dig_port->dp; - DRM_DEBUG_KMS("%d\n", intel_dp->active_mst_links); + DRM_DEBUG_KMS("%d\n", intel_dp->active_streams); /* this can fail */ drm_dp_check_act_status(&intel_dp->mst_mgr); @@ -124,10 +124,10 @@ static void intel_mst_post_disable_dp(struct intel_encoder *encoder) drm_dp_mst_deallocate_vcpi(&intel_dp->mst_mgr, intel_mst->connector->port); - intel_dp->active_mst_links--; + intel_dp->active_streams--; intel_mst->connector = NULL; - if (intel_dp->active_mst_links == 0) { + if (intel_dp->active_streams == 0) { intel_dig_port->base.post_disable(&intel_dig_port->base); intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_OFF); } @@ -165,11 +165,11 @@ static void intel_mst_pre_enable_dp(struct intel_encoder *encoder) */ found->encoder = encoder; - DRM_DEBUG_KMS("%d\n", intel_dp->active_mst_links); + DRM_DEBUG_KMS("%d\n", intel_dp->active_streams); intel_mst->connector = found; - if (intel_dp->active_mst_links == 0) { + if (intel_dp->active_streams == 0) { intel_ddi_clk_select(&intel_dig_port->base, intel_crtc->config); intel_prepare_dp_ddi_buffers(&intel_dig_port->base); @@ -193,7 +193,7 @@ static void intel_mst_pre_enable_dp(struct intel_encoder *encoder) } - intel_dp->active_mst_links++; + intel_dp->active_streams++; temp = I915_READ(DP_TP_STATUS(port)); I915_WRITE(DP_TP_STATUS(port), temp); @@ -210,7 +210,7 @@ static void intel_mst_enable_dp(struct intel_encoder *encoder) enum port port = intel_dig_port->port; int ret; - DRM_DEBUG_KMS("%d\n", intel_dp->active_mst_links); + DRM_DEBUG_KMS("%d\n", intel_dp->active_streams); if (intel_wait_for_register(dev_priv, DP_TP_STATUS(port), diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 55af7f0..b846623 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -894,7 +894,7 @@ struct intel_dp { bool can_mst; /* this port supports mst */ bool is_mst; - int active_mst_links; + int active_streams; /* number of active streams (for SST and MST both) */ /* connector directly attached - won't be use for modeset in mst world */ struct intel_connector *attached_connector; -- cgit v0.10.2 From c4e3170a0cdeec5ba9749a49a757b7ea722829d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 29 Jul 2016 16:51:16 +0300 Subject: drm/i915: Allow MST sinks to work even if drm_probe_ddc() fails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With HSW + Dell UP2414Q (at least) drm_probe_ddc() occasionally fails, and then we'll assume that the entire display has been disconnected. We don't need the EDID from the main link, so we can simply check if the sink is MST capable, and if so treat is as connected. v2: Skip drm_probe_ddc() entirely for MST (Daniel) Cc: Ander Conselvan de Oliveira Cc: Jim Bride Cc: Manasi D Navare Cc: Durgadoss R Signed-off-by: Ville Syrjälä Reviewed-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1469800276-6979-1-git-send-email-ville.syrjala@linux.intel.com diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 38f2033..a0c99c0 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -3545,7 +3545,7 @@ intel_dp_probe_oui(struct intel_dp *intel_dp) } static bool -intel_dp_probe_mst(struct intel_dp *intel_dp) +intel_dp_can_mst(struct intel_dp *intel_dp) { u8 buf[1]; @@ -3558,18 +3558,30 @@ intel_dp_probe_mst(struct intel_dp *intel_dp) if (intel_dp->dpcd[DP_DPCD_REV] < 0x12) return false; - if (drm_dp_dpcd_read(&intel_dp->aux, DP_MSTM_CAP, buf, 1)) { - if (buf[0] & DP_MST_CAP) { - DRM_DEBUG_KMS("Sink is MST capable\n"); - intel_dp->is_mst = true; - } else { - DRM_DEBUG_KMS("Sink is not MST capable\n"); - intel_dp->is_mst = false; - } - } + if (drm_dp_dpcd_read(&intel_dp->aux, DP_MSTM_CAP, buf, 1) != 1) + return false; - drm_dp_mst_topology_mgr_set_mst(&intel_dp->mst_mgr, intel_dp->is_mst); - return intel_dp->is_mst; + return buf[0] & DP_MST_CAP; +} + +static void +intel_dp_configure_mst(struct intel_dp *intel_dp) +{ + if (!i915.enable_dp_mst) + return; + + if (!intel_dp->can_mst) + return; + + intel_dp->is_mst = intel_dp_can_mst(intel_dp); + + if (intel_dp->is_mst) + DRM_DEBUG_KMS("Sink is MST capable\n"); + else + DRM_DEBUG_KMS("Sink is not MST capable\n"); + + drm_dp_mst_topology_mgr_set_mst(&intel_dp->mst_mgr, + intel_dp->is_mst); } static int intel_dp_sink_crc_stop(struct intel_dp *intel_dp) @@ -3999,6 +4011,9 @@ intel_dp_detect_dpcd(struct intel_dp *intel_dp) connector_status_connected : connector_status_disconnected; } + if (intel_dp_can_mst(intel_dp)) + return connector_status_connected; + /* If no HPD, poke DDC gently */ if (drm_probe_ddc(&intel_dp->aux.ddc)) return connector_status_connected; @@ -4236,7 +4251,6 @@ intel_dp_long_pulse(struct intel_connector *intel_connector) struct drm_device *dev = connector->dev; enum drm_connector_status status; enum intel_display_power_domain power_domain; - bool ret; u8 sink_irq_vector; power_domain = intel_display_port_aux_power_domain(intel_encoder); @@ -4279,8 +4293,9 @@ intel_dp_long_pulse(struct intel_connector *intel_connector) intel_dp_probe_oui(intel_dp); - ret = intel_dp_probe_mst(intel_dp); - if (ret) { + intel_dp_configure_mst(intel_dp); + + if (intel_dp->is_mst) { /* * If we are in MST mode then this connector * won't appear connected or have anything -- cgit v0.10.2 From 1354f734c5f3e9f08f97d22aff97612ee7ec9d3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 28 Jul 2016 17:50:45 +0300 Subject: drm/i915: Remove useless rate_to_index() usage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No need to iterate the rates array in intel_dp_max_link_rate(). We know the max rate will be the last entry, and we already know the size. Cc: Ander Conselvan de Oliveira Cc: Jim Bride Cc: Manasi D Navare Cc: Durgadoss R Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1469717448-4297-10-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: Daniel Vetter diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index a0c99c0..65943ae 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1447,7 +1447,7 @@ intel_dp_max_link_rate(struct intel_dp *intel_dp) if (WARN_ON(len <= 0)) return 162000; - return rates[rate_to_index(0, rates) - 1]; + return rates[len - 1]; } int intel_dp_rate_select(struct intel_dp *intel_dp, int rate) -- cgit v0.10.2 From 65fbb4e79978c88e817f22094308dcf1a3442bee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 28 Jul 2016 17:50:47 +0300 Subject: drm/i915: Don't try to ack sink irqs when there are none MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit My ASUS PB278 at least doesn't seem to appreciate when you try to ack sink irqs when there are none. Results in this sort of dmesg spam [drm:drm_dp_dpcd_access] too many retries, giving up Let's skip the ack if there are no pending irqs. I have no clue why we do this in two places. One of them likely should just go away. Oh, and MST has its own sink irq handler too... Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1469717448-4297-12-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: Daniel Vetter diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 65943ae..53d97f6 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -3940,7 +3940,7 @@ static bool intel_dp_short_pulse(struct intel_dp *intel_dp) { struct drm_device *dev = intel_dp_to_dev(intel_dp); - u8 sink_irq_vector; + u8 sink_irq_vector = 0; u8 old_sink_count = intel_dp->sink_count; bool ret; @@ -3967,7 +3967,8 @@ intel_dp_short_pulse(struct intel_dp *intel_dp) /* Try to read the source of the interrupt */ if (intel_dp->dpcd[DP_DPCD_REV] >= 0x11 && - intel_dp_get_sink_irq(intel_dp, &sink_irq_vector)) { + intel_dp_get_sink_irq(intel_dp, &sink_irq_vector) && + sink_irq_vector != 0) { /* Clear interrupt source */ drm_dp_dpcd_writeb(&intel_dp->aux, DP_DEVICE_SERVICE_IRQ_VECTOR, @@ -4251,7 +4252,7 @@ intel_dp_long_pulse(struct intel_connector *intel_connector) struct drm_device *dev = connector->dev; enum drm_connector_status status; enum intel_display_power_domain power_domain; - u8 sink_irq_vector; + u8 sink_irq_vector = 0; power_domain = intel_display_port_aux_power_domain(intel_encoder); intel_display_power_get(to_i915(dev), power_domain); @@ -4330,7 +4331,8 @@ intel_dp_long_pulse(struct intel_connector *intel_connector) /* Try to read the source of the interrupt */ if (intel_dp->dpcd[DP_DPCD_REV] >= 0x11 && - intel_dp_get_sink_irq(intel_dp, &sink_irq_vector)) { + intel_dp_get_sink_irq(intel_dp, &sink_irq_vector) && + sink_irq_vector != 0) { /* Clear interrupt source */ drm_dp_dpcd_writeb(&intel_dp->aux, DP_DEVICE_SERVICE_IRQ_VECTOR, -- cgit v0.10.2 From d838a110f0b310d408ebe6b5a97e36ec27555ebf Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 3 Aug 2016 17:09:00 +0100 Subject: drm/i915: Acquire audio powerwell for HD-Audio registers On Haswell/Broadwell, the HD-Audio block is inside the HDMI/display power well and so the sna-hda audio codec acquires the display power well while it is operational. However, Skylake separates the powerwells again, but yet we still need the audio powerwell to setup the registers. (But then the hardware uses those registers even while powered off???) Acquiring the powerwell around setting the chicken bits when setting up the audio channel does at least silence the WARNs from touching our registers whilst unpowered. We silence our own test cases, but maybe there is a latent bug in using the audio channel? v2: Grab both rpm wakelock and audio wakelock Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96214 Fixes: 03b135cebc47 "ALSA: hda - remove dependency on i915 power well for SKL") Signed-off-by: Chris Wilson Cc: Libin Yang Cc: Takashi Iwai Cc: Marius Vlad Tested-by: Hans de Goede Cc: stable@vger.kernel.org Link: http://patchwork.freedesktop.org/patch/msgid/1470240540-29004-1-git-send-email-chris@chris-wilson.co.uk Reviewed-by: Daniel Vetter diff --git a/drivers/gpu/drm/i915/intel_audio.c b/drivers/gpu/drm/i915/intel_audio.c index 6700a7b..d32f586 100644 --- a/drivers/gpu/drm/i915/intel_audio.c +++ b/drivers/gpu/drm/i915/intel_audio.c @@ -600,6 +600,8 @@ static void i915_audio_component_codec_wake_override(struct device *dev, if (!IS_SKYLAKE(dev_priv) && !IS_KABYLAKE(dev_priv)) return; + i915_audio_component_get_power(dev); + /* * Enable/disable generating the codec wake signal, overriding the * internal logic to generate the codec wake to controller. @@ -615,6 +617,8 @@ static void i915_audio_component_codec_wake_override(struct device *dev, I915_WRITE(HSW_AUD_CHICKENBIT, tmp); usleep_range(1000, 1500); } + + i915_audio_component_put_power(dev); } /* Get CDCLK in kHz */ @@ -648,6 +652,7 @@ static int i915_audio_component_sync_audio_rate(struct device *dev, !IS_HASWELL(dev_priv)) return 0; + i915_audio_component_get_power(dev); mutex_lock(&dev_priv->av_mutex); /* 1. get the pipe */ intel_encoder = dev_priv->dig_port_map[port]; @@ -698,6 +703,7 @@ static int i915_audio_component_sync_audio_rate(struct device *dev, unlock: mutex_unlock(&dev_priv->av_mutex); + i915_audio_component_put_power(dev); return err; } -- cgit v0.10.2 From 9332f3b1b99a7cb738755b138988838d33ba6748 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 Aug 2016 16:32:17 +0100 Subject: drm/i915: Combine loops within i915_gem_evict_something Slight micro-optimise to produce combine loops so that gcc is able to optimise the inner-loops concisely. Since we are reviewing the loops, we can update the comments to describe the current state of affairs, in particular the distinction between evicting from the global GTT (which may contain untracked items and transient global pins) and the per-process GTT. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470324762-2545-1-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 3437ced..016be73 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -34,6 +34,19 @@ #include "i915_trace.h" static bool +gpu_is_idle(struct drm_i915_private *dev_priv) +{ + struct intel_engine_cs *engine; + + for_each_engine(engine, dev_priv) { + if (!list_empty(&engine->request_list)) + return false; + } + + return true; +} + +static bool mark_free(struct i915_vma *vma, struct list_head *unwind) { if (vma->pin_count) @@ -76,37 +89,31 @@ i915_gem_evict_something(struct drm_device *dev, struct i915_address_space *vm, unsigned long start, unsigned long end, unsigned flags) { - struct list_head eviction_list, unwind_list; - struct i915_vma *vma; - int ret = 0; - int pass = 0; + struct drm_i915_private *dev_priv = to_i915(dev); + struct list_head eviction_list; + struct list_head *phases[] = { + &vm->inactive_list, + &vm->active_list, + NULL, + }, **phase; + struct i915_vma *vma, *next; + int ret; trace_i915_gem_evict(dev, min_size, alignment, flags); /* * The goal is to evict objects and amalgamate space in LRU order. * The oldest idle objects reside on the inactive list, which is in - * retirement order. The next objects to retire are those on the (per - * ring) active list that do not have an outstanding flush. Once the - * hardware reports completion (the seqno is updated after the - * batchbuffer has been finished) the clean buffer objects would - * be retired to the inactive list. Any dirty objects would be added - * to the tail of the flushing list. So after processing the clean - * active objects we need to emit a MI_FLUSH to retire the flushing - * list, hence the retirement order of the flushing list is in - * advance of the dirty objects on the active lists. + * retirement order. The next objects to retire are those in flight, + * on the active list, again in retirement order. * * The retirement sequence is thus: * 1. Inactive objects (already retired) - * 2. Clean active objects - * 3. Flushing list - * 4. Dirty active objects. + * 2. Active objects (will stall on unbinding) * * On each list, the oldest objects lie at the HEAD with the freshest * object on the TAIL. */ - - INIT_LIST_HEAD(&unwind_list); if (start != 0 || end != vm->total) { drm_mm_init_scan_with_range(&vm->mm, min_size, alignment, cache_level, @@ -114,79 +121,71 @@ i915_gem_evict_something(struct drm_device *dev, struct i915_address_space *vm, } else drm_mm_init_scan(&vm->mm, min_size, alignment, cache_level); -search_again: - /* First see if there is a large enough contiguous idle region... */ - list_for_each_entry(vma, &vm->inactive_list, vm_link) { - if (mark_free(vma, &unwind_list)) - goto found; - } - if (flags & PIN_NONBLOCK) - goto none; + phases[1] = NULL; - /* Now merge in the soon-to-be-expired objects... */ - list_for_each_entry(vma, &vm->active_list, vm_link) { - if (mark_free(vma, &unwind_list)) - goto found; - } +search_again: + INIT_LIST_HEAD(&eviction_list); + phase = phases; + do { + list_for_each_entry(vma, *phase, vm_link) + if (mark_free(vma, &eviction_list)) + goto found; + } while (*++phase); -none: /* Nothing found, clean up and bail out! */ - while (!list_empty(&unwind_list)) { - vma = list_first_entry(&unwind_list, - struct i915_vma, - exec_list); + list_for_each_entry_safe(vma, next, &eviction_list, exec_list) { ret = drm_mm_scan_remove_block(&vma->node); BUG_ON(ret); - list_del_init(&vma->exec_list); + INIT_LIST_HEAD(&vma->exec_list); } /* Can we unpin some objects such as idle hw contents, - * or pending flips? + * or pending flips? But since only the GGTT has global entries + * such as scanouts, rinbuffers and contexts, we can skip the + * purge when inspecting per-process local address spaces. */ - if (flags & PIN_NONBLOCK) + if (!i915_is_ggtt(vm) || flags & PIN_NONBLOCK) return -ENOSPC; - /* Only idle the GPU and repeat the search once */ - if (pass++ == 0) { - struct drm_i915_private *dev_priv = to_i915(dev); - - if (i915_is_ggtt(vm)) { - ret = i915_gem_switch_to_kernel_context(dev_priv); - if (ret) - return ret; - } - - ret = i915_gem_wait_for_idle(dev_priv); - if (ret) - return ret; - - i915_gem_retire_requests(dev_priv); - goto search_again; + if (gpu_is_idle(dev_priv)) { + /* If we still have pending pageflip completions, drop + * back to userspace to give our workqueues time to + * acquire our locks and unpin the old scanouts. + */ + return intel_has_pending_fb_unpin(dev) ? -EAGAIN : -ENOSPC; } - /* If we still have pending pageflip completions, drop - * back to userspace to give our workqueues time to - * acquire our locks and unpin the old scanouts. + /* Not everything in the GGTT is tracked via vma (otherwise we + * could evict as required with minimal stalling) so we are forced + * to idle the GPU and explicitly retire outstanding requests in + * the hopes that we can then remove contexts and the like only + * bound by their active reference. */ - return intel_has_pending_fb_unpin(dev) ? -EAGAIN : -ENOSPC; + ret = i915_gem_switch_to_kernel_context(dev_priv); + if (ret) + return ret; + + ret = i915_gem_wait_for_idle(dev_priv); + if (ret) + return ret; + + i915_gem_retire_requests(dev_priv); + goto search_again; found: /* drm_mm doesn't allow any other other operations while - * scanning, therefore store to be evicted objects on a - * temporary list. */ - INIT_LIST_HEAD(&eviction_list); - while (!list_empty(&unwind_list)) { - vma = list_first_entry(&unwind_list, - struct i915_vma, - exec_list); - if (drm_mm_scan_remove_block(&vma->node)) { + * scanning, therefore store to-be-evicted objects on a + * temporary list and take a reference for all before + * calling unbind (which may remove the active reference + * of any of our objects, thus corrupting the list). + */ + list_for_each_entry_safe(vma, next, &eviction_list, exec_list) { + if (drm_mm_scan_remove_block(&vma->node)) vma->pin_count++; - list_move(&vma->exec_list, &eviction_list); - continue; - } - list_del_init(&vma->exec_list); + else + list_del_init(&vma->exec_list); } /* Unbinding will emit any required flushes */ @@ -200,7 +199,6 @@ found: if (ret == 0) ret = i915_vma_unbind(vma); } - return ret; } @@ -279,7 +277,6 @@ int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle) return ret; i915_gem_retire_requests(dev_priv); - WARN_ON(!list_empty(&vm->active_list)); } -- cgit v0.10.2 From e522ac2324f384e1fafd1a4ae6ebf38095dc6695 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 Aug 2016 16:32:18 +0100 Subject: drm/i915: Remove surplus drm_device parameter to i915_gem_evict_something() Eviction is VM local, so we can ignore the significance of the drm_device in the caller, and leave it to i915_gem_evict_something() to manage itself. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470324762-2545-2-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 66b98fa..fbda38f 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3398,8 +3398,7 @@ int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, void *data, struct drm_file *file); /* i915_gem_evict.c */ -int __must_check i915_gem_evict_something(struct drm_device *dev, - struct i915_address_space *vm, +int __must_check i915_gem_evict_something(struct i915_address_space *vm, int min_size, unsigned alignment, unsigned cache_level, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 03bad1b..2868a3a 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3089,7 +3089,7 @@ search_free: search_flag, alloc_flag); if (ret) { - ret = i915_gem_evict_something(dev, vm, size, alignment, + ret = i915_gem_evict_something(vm, size, alignment, obj->cache_level, start, end, flags); diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 016be73..4bce72f 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -61,7 +61,6 @@ mark_free(struct i915_vma *vma, struct list_head *unwind) /** * i915_gem_evict_something - Evict vmas to make room for binding a new one - * @dev: drm_device * @vm: address space to evict from * @min_size: size of the desired free space * @alignment: alignment constraint of the desired free space @@ -84,12 +83,12 @@ mark_free(struct i915_vma *vma, struct list_head *unwind) * memory in e.g. the shrinker. */ int -i915_gem_evict_something(struct drm_device *dev, struct i915_address_space *vm, +i915_gem_evict_something(struct i915_address_space *vm, int min_size, unsigned alignment, unsigned cache_level, unsigned long start, unsigned long end, unsigned flags) { - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(vm->dev); struct list_head eviction_list; struct list_head *phases[] = { &vm->inactive_list, @@ -99,7 +98,7 @@ i915_gem_evict_something(struct drm_device *dev, struct i915_address_space *vm, struct i915_vma *vma, *next; int ret; - trace_i915_gem_evict(dev, min_size, alignment, flags); + trace_i915_gem_evict(vm, min_size, alignment, flags); /* * The goal is to evict objects and amalgamate space in LRU order. @@ -154,7 +153,7 @@ search_again: * back to userspace to give our workqueues time to * acquire our locks and unpin the old scanouts. */ - return intel_has_pending_fb_unpin(dev) ? -EAGAIN : -ENOSPC; + return intel_has_pending_fb_unpin(vm->dev) ? -EAGAIN : -ENOSPC; } /* Not everything in the GGTT is tracked via vma (otherwise we diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 685ca2a..f1f14e5 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2012,7 +2012,7 @@ alloc: 0, ggtt->base.total, DRM_MM_TOPDOWN); if (ret == -ENOSPC && !retried) { - ret = i915_gem_evict_something(dev, &ggtt->base, + ret = i915_gem_evict_something(&ggtt->base, GEN6_PD_SIZE, GEN6_PD_ALIGN, I915_CACHE_NONE, 0, ggtt->base.total, diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 9e43c0a..1787980 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -394,25 +394,27 @@ DEFINE_EVENT(i915_gem_object, i915_gem_object_destroy, ); TRACE_EVENT(i915_gem_evict, - TP_PROTO(struct drm_device *dev, u32 size, u32 align, unsigned flags), - TP_ARGS(dev, size, align, flags), + TP_PROTO(struct i915_address_space *vm, u32 size, u32 align, unsigned int flags), + TP_ARGS(vm, size, align, flags), TP_STRUCT__entry( __field(u32, dev) + __field(struct i915_address_space *, vm) __field(u32, size) __field(u32, align) - __field(unsigned, flags) + __field(unsigned int, flags) ), TP_fast_assign( - __entry->dev = dev->primary->index; + __entry->dev = vm->dev->primary->index; + __entry->vm = vm; __entry->size = size; __entry->align = align; __entry->flags = flags; ), - TP_printk("dev=%d, size=%d, align=%d %s", - __entry->dev, __entry->size, __entry->align, + TP_printk("dev=%d, vm=%p, size=%d, align=%d %s", + __entry->dev, __entry->vm, __entry->size, __entry->align, __entry->flags & PIN_MAPPABLE ? ", mappable" : "") ); -- cgit v0.10.2 From 115003e9ff0454687af35b9cb16ba970bf28dc61 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 Aug 2016 16:32:19 +0100 Subject: drm/i915: Double check the active status on the batch pool We should not rely on obj->active being uptodate unless we manually flush it. Instead, we can verify that the next available batch object is idle by looking at its last active request (and checking it for completion). v2: remove the struct drm_device forward declaration added in the process of removing its necessity Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470324762-2545-3-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c index 825981b5..ed98959 100644 --- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c +++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c @@ -41,15 +41,15 @@ /** * i915_gem_batch_pool_init() - initialize a batch buffer pool - * @dev: the drm device + * @engine: the associated request submission engine * @pool: the batch buffer pool */ -void i915_gem_batch_pool_init(struct drm_device *dev, +void i915_gem_batch_pool_init(struct intel_engine_cs *engine, struct i915_gem_batch_pool *pool) { int n; - pool->dev = dev; + pool->engine = engine; for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) INIT_LIST_HEAD(&pool->cache_list[n]); @@ -65,7 +65,7 @@ void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool) { int n; - WARN_ON(!mutex_is_locked(&pool->dev->struct_mutex)); + lockdep_assert_held(&pool->engine->i915->drm.struct_mutex); for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) { struct drm_i915_gem_object *obj, *next; @@ -101,7 +101,7 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool, struct list_head *list; int n; - WARN_ON(!mutex_is_locked(&pool->dev->struct_mutex)); + lockdep_assert_held(&pool->engine->i915->drm.struct_mutex); /* Compute a power-of-two bucket, but throw everything greater than * 16KiB into the same bucket: i.e. the the buckets hold objects of @@ -114,7 +114,8 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool, list_for_each_entry_safe(tmp, next, list, batch_pool_link) { /* The batches are strictly LRU ordered */ - if (tmp->active) + if (!i915_gem_active_is_idle(&tmp->last_read[pool->engine->id], + &tmp->base.dev->struct_mutex)) break; /* While we're looping, do some clean up */ @@ -133,7 +134,7 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool, if (obj == NULL) { int ret; - obj = i915_gem_object_create(pool->dev, size); + obj = i915_gem_object_create(&pool->engine->i915->drm, size); if (IS_ERR(obj)) return obj; diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.h b/drivers/gpu/drm/i915/i915_gem_batch_pool.h index 848e907..10d5ac4 100644 --- a/drivers/gpu/drm/i915/i915_gem_batch_pool.h +++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.h @@ -27,13 +27,15 @@ #include "i915_drv.h" +struct intel_engine_cs; + struct i915_gem_batch_pool { - struct drm_device *dev; + struct intel_engine_cs *engine; struct list_head cache_list[4]; }; /* i915_gem_batch_pool.c */ -void i915_gem_batch_pool_init(struct drm_device *dev, +void i915_gem_batch_pool_init(struct intel_engine_cs *engine, struct i915_gem_batch_pool *pool); void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool); struct drm_i915_gem_object* diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 202ad83..f495969 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -185,7 +185,7 @@ void intel_engine_setup_common(struct intel_engine_cs *engine) engine->fence_context = fence_context_alloc(1); intel_engine_init_hangcheck(engine); - i915_gem_batch_pool_init(&engine->i915->drm, &engine->batch_pool); + i915_gem_batch_pool_init(engine, &engine->batch_pool); } /** -- cgit v0.10.2 From 0340d9fd0f74309f435d5324b885c9ca1967262c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 Aug 2016 16:32:20 +0100 Subject: drm/i915: Remove request retirement before each batch This reimplements the denial-of-service protection against igt from commit 227f782e4667 ("drm/i915: Retire requests before creating a new one") and transfers the stall from before each batch into get_pages(). The issue is that the stall is increasing latency between batches which is detrimental in some cases (especially coupled with execlists) to keeping the GPU well fed. Also we have made the observation that retiring requests can of itself free objects (and requests) and therefore makes a good first step when shrinking. v2: Recycle objects prior to i915_gem_object_get_pages() v3: Remove the reference to the ring from i915_gem_requests_ring() as it operates on an intel_engine_cs. v4: Since commit 9b5f4e5ed6fd ("drm/i915: Retire oldest completed request before allocating next") we no longer need the safeguard to retire requests before get_pages(). We no longer see the huge latencies when hitting the shrinker between allocations. Signed-off-by: Chris Wilson Cc: Daniel Vetter Cc: Joonas Lahtinen Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470324762-2545-4-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index fbda38f..2de3d16 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3169,7 +3169,6 @@ struct drm_i915_gem_request * i915_gem_find_active_request(struct intel_engine_cs *engine); void i915_gem_retire_requests(struct drm_i915_private *dev_priv); -void i915_gem_retire_requests_ring(struct intel_engine_cs *engine); static inline u32 i915_reset_counter(struct i915_gpu_error *error) { diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 5e3b505..0593ea3 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -781,8 +781,6 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *engine, bool has_fenced_gpu_access = INTEL_GEN(engine->i915) < 4; int retry; - i915_gem_retire_requests_ring(engine); - vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm; INIT_LIST_HEAD(&ordered_vmas); diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 6faa848..773b942 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -732,7 +732,7 @@ complete: return ret; } -void i915_gem_retire_requests_ring(struct intel_engine_cs *engine) +static void engine_retire_requests(struct intel_engine_cs *engine) { struct drm_i915_gem_request *request, *next; @@ -756,7 +756,7 @@ void i915_gem_retire_requests(struct drm_i915_private *dev_priv) GEM_BUG_ON(!dev_priv->gt.awake); for_each_engine(engine, dev_priv) { - i915_gem_retire_requests_ring(engine); + engine_retire_requests(engine); if (list_empty(&engine->request_list)) dev_priv->gt.active_engines &= ~intel_engine_flag(engine); } -- cgit v0.10.2 From e655bc35fdfdaae540136b524b574d2fb3ea9998 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 Aug 2016 16:32:21 +0100 Subject: drm/i915: Remove i915_gem_execbuffer_retire_commands() Move the single line to the callsite as the name is now misleading, and the purpose is solely to add the request to the execution queue. Here, we can see that if we failed to dispatch the batch from the request, we can forgo flushing the GPU when closing the request. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470324762-2545-5-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 0593ea3..63984c4 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1211,13 +1211,6 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas, } } -static void -i915_gem_execbuffer_retire_commands(struct i915_execbuffer_params *params) -{ - /* Add a breadcrumb for the completion of the batch buffer */ - __i915_add_request(params->request, params->batch_obj, true); -} - static int i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req) { @@ -1692,7 +1685,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, ret = execbuf_submit(params, args, &eb->vmas); err_request: - i915_gem_execbuffer_retire_commands(params); + __i915_add_request(params->request, params->batch_obj, ret == 0); err_batch_unpin: /* -- cgit v0.10.2 From 2ffffd0f85ab90f38569c39ef0455824511e80e2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 Aug 2016 16:32:22 +0100 Subject: drm/i915: Fix up vma alignment to be u64 This is not the full fix, as we are required to percolate the u64 nature down through the drm_mm stack, but this is required now to prevent explosions due to mismatch between execbuf (eb_vma_misplaced) and vma binding (i915_vma_misplaced) - and reduces the risk of spurious changes as we adjust the vma interface in the next patches. v2: long long casts not required for u64 printk (%llx) Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470324762-2545-6-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 2de3d16..74a3135 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3032,13 +3032,13 @@ void i915_gem_free_object(struct drm_gem_object *obj); int __must_check i915_gem_object_pin(struct drm_i915_gem_object *obj, struct i915_address_space *vm, - uint32_t alignment, - uint64_t flags); + u64 alignment, + u64 flags); int __must_check i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, const struct i915_ggtt_view *view, - uint32_t alignment, - uint64_t flags); + u64 alignment, + u64 flags); int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, u32 flags); @@ -3398,11 +3398,9 @@ int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, void *data, /* i915_gem_evict.c */ int __must_check i915_gem_evict_something(struct i915_address_space *vm, - int min_size, - unsigned alignment, + u64 min_size, u64 alignment, unsigned cache_level, - unsigned long start, - unsigned long end, + u64 start, u64 end, unsigned flags); int __must_check i915_gem_evict_for_vma(struct i915_vma *target); int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 2868a3a..d8e1505 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2963,8 +2963,8 @@ static struct i915_vma * i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, struct i915_address_space *vm, const struct i915_ggtt_view *ggtt_view, - unsigned alignment, - uint64_t flags) + u64 alignment, + u64 flags) { struct drm_device *dev = obj->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -3023,7 +3023,7 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, alignment = flags & PIN_MAPPABLE ? fence_alignment : unfenced_alignment; if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) { - DRM_DEBUG("Invalid object (view type=%u) alignment requested %u\n", + DRM_DEBUG("Invalid object (view type=%u) alignment requested %llx\n", ggtt_view ? ggtt_view->type : 0, alignment); return ERR_PTR(-EINVAL); @@ -3678,7 +3678,7 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) } static bool -i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags) +i915_vma_misplaced(struct i915_vma *vma, u64 alignment, u64 flags) { struct drm_i915_gem_object *obj = vma->obj; @@ -3727,8 +3727,8 @@ static int i915_gem_object_do_pin(struct drm_i915_gem_object *obj, struct i915_address_space *vm, const struct i915_ggtt_view *ggtt_view, - uint32_t alignment, - uint64_t flags) + u64 alignment, + u64 flags) { struct drm_i915_private *dev_priv = to_i915(obj->base.dev); struct i915_vma *vma; @@ -3757,7 +3757,7 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj, if (i915_vma_misplaced(vma, alignment, flags)) { WARN(vma->pin_count, "bo is already pinned in %s with incorrect alignment:" - " offset=%08x %08x, req.alignment=%x, req.map_and_fenceable=%d," + " offset=%08x %08x, req.alignment=%llx, req.map_and_fenceable=%d," " obj->map_and_fenceable=%d\n", ggtt_view ? "ggtt" : "ppgtt", upper_32_bits(vma->node.start), @@ -3798,8 +3798,8 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj, int i915_gem_object_pin(struct drm_i915_gem_object *obj, struct i915_address_space *vm, - uint32_t alignment, - uint64_t flags) + u64 alignment, + u64 flags) { return i915_gem_object_do_pin(obj, vm, i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL, @@ -3809,8 +3809,8 @@ i915_gem_object_pin(struct drm_i915_gem_object *obj, int i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, const struct i915_ggtt_view *view, - uint32_t alignment, - uint64_t flags) + u64 alignment, + u64 flags) { struct drm_device *dev = obj->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 4bce72f..ef12ecd 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -84,8 +84,9 @@ mark_free(struct i915_vma *vma, struct list_head *unwind) */ int i915_gem_evict_something(struct i915_address_space *vm, - int min_size, unsigned alignment, unsigned cache_level, - unsigned long start, unsigned long end, + u64 min_size, u64 alignment, + unsigned cache_level, + u64 start, u64 end, unsigned flags) { struct drm_i915_private *dev_priv = to_i915(vm->dev); -- cgit v0.10.2 From 91b2db6f65fbbb1a6688bcc2e52596b723ea2472 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 Aug 2016 16:32:23 +0100 Subject: drm/i915: Pad GTT views of exec objects up to user specified size Our GPUs impose certain requirements upon buffers that depend upon how exactly they are used. Typically this is expressed as that they require a larger surface than would be naively computed by pitch * height. Normally such requirements are hidden away in the userspace driver, but when we accept pointers from strangers and later impose extra conditions on them, the original client allocator has no idea about the monstrosities in the GPU and we require the userspace driver to inform the kernel how many padding pages are required beyond the client allocation. v2: Long time, no see v3: Try an anonymous union for uapi struct compatibility Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/1470324762-2545-7-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 74a3135..1e13693 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3032,11 +3032,13 @@ void i915_gem_free_object(struct drm_gem_object *obj); int __must_check i915_gem_object_pin(struct drm_i915_gem_object *obj, struct i915_address_space *vm, + u64 size, u64 alignment, u64 flags); int __must_check i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, const struct i915_ggtt_view *view, + u64 size, u64 alignment, u64 flags); @@ -3313,8 +3315,8 @@ i915_gem_obj_ggtt_pin(struct drm_i915_gem_object *obj, struct drm_i915_private *dev_priv = to_i915(obj->base.dev); struct i915_ggtt *ggtt = &dev_priv->ggtt; - return i915_gem_object_pin(obj, &ggtt->base, - alignment, flags | PIN_GLOBAL); + return i915_gem_object_pin(obj, &ggtt->base, 0, alignment, + flags | PIN_GLOBAL); } void i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index d8e1505..b4af5d1 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1692,7 +1692,7 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) } /* Now pin it into the GTT if needed */ - ret = i915_gem_object_ggtt_pin(obj, &view, 0, PIN_MAPPABLE); + ret = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE); if (ret) goto unlock; @@ -2956,6 +2956,7 @@ static bool i915_gem_valid_gtt_space(struct i915_vma *vma, * @obj: object to bind * @vm: address space to bind into * @ggtt_view: global gtt view if applicable + * @size: requested size in bytes (can be larger than the VMA) * @alignment: requested alignment * @flags: mask of PIN_* flags to use */ @@ -2963,21 +2964,20 @@ static struct i915_vma * i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, struct i915_address_space *vm, const struct i915_ggtt_view *ggtt_view, + u64 size, u64 alignment, u64 flags) { struct drm_device *dev = obj->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct i915_ggtt *ggtt = &dev_priv->ggtt; - u32 fence_alignment, unfenced_alignment; - u32 search_flag, alloc_flag; u64 start, end; - u64 size, fence_size; + u32 search_flag, alloc_flag; struct i915_vma *vma; int ret; if (i915_is_ggtt(vm)) { - u32 view_size; + u32 fence_size, fence_alignment, unfenced_alignment; + u64 view_size; if (WARN_ON(!ggtt_view)) return ERR_PTR(-EINVAL); @@ -2995,48 +2995,39 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, view_size, obj->tiling_mode, false); - size = flags & PIN_MAPPABLE ? fence_size : view_size; + size = max(size, view_size); + if (flags & PIN_MAPPABLE) + size = max_t(u64, size, fence_size); + + if (alignment == 0) + alignment = flags & PIN_MAPPABLE ? fence_alignment : + unfenced_alignment; + if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) { + DRM_DEBUG("Invalid object (view type=%u) alignment requested %llx\n", + ggtt_view ? ggtt_view->type : 0, + alignment); + return ERR_PTR(-EINVAL); + } } else { - fence_size = i915_gem_get_gtt_size(dev, - obj->base.size, - obj->tiling_mode); - fence_alignment = i915_gem_get_gtt_alignment(dev, - obj->base.size, - obj->tiling_mode, - true); - unfenced_alignment = - i915_gem_get_gtt_alignment(dev, - obj->base.size, - obj->tiling_mode, - false); - size = flags & PIN_MAPPABLE ? fence_size : obj->base.size; + size = max_t(u64, size, obj->base.size); + alignment = 4096; } start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; end = vm->total; if (flags & PIN_MAPPABLE) - end = min_t(u64, end, ggtt->mappable_end); + end = min_t(u64, end, dev_priv->ggtt.mappable_end); if (flags & PIN_ZONE_4G) end = min_t(u64, end, (1ULL << 32) - PAGE_SIZE); - if (alignment == 0) - alignment = flags & PIN_MAPPABLE ? fence_alignment : - unfenced_alignment; - if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) { - DRM_DEBUG("Invalid object (view type=%u) alignment requested %llx\n", - ggtt_view ? ggtt_view->type : 0, - alignment); - return ERR_PTR(-EINVAL); - } - /* If binding the object/GGTT view requires more space than the entire * aperture has, reject it early before evicting everything in a vain * attempt to find space. */ if (size > end) { - DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: size=%llu > %s aperture=%llu\n", + DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: request=%llu [object=%zd] > %s aperture=%llu\n", ggtt_view ? ggtt_view->type : 0, - size, + size, obj->base.size, flags & PIN_MAPPABLE ? "mappable" : "total", end); return ERR_PTR(-E2BIG); @@ -3530,7 +3521,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, * (e.g. libkms for the bootup splash), we have to ensure that we * always use map_and_fenceable for all scanout buffers. */ - ret = i915_gem_object_ggtt_pin(obj, view, alignment, + ret = i915_gem_object_ggtt_pin(obj, view, 0, alignment, view->type == I915_GGTT_VIEW_NORMAL ? PIN_MAPPABLE : 0); if (ret) @@ -3678,12 +3669,14 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) } static bool -i915_vma_misplaced(struct i915_vma *vma, u64 alignment, u64 flags) +i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) { struct drm_i915_gem_object *obj = vma->obj; - if (alignment && - vma->node.start & (alignment - 1)) + if (vma->node.size < size) + return true; + + if (alignment && vma->node.start & (alignment - 1)) return true; if (flags & PIN_MAPPABLE && !obj->map_and_fenceable) @@ -3727,6 +3720,7 @@ static int i915_gem_object_do_pin(struct drm_i915_gem_object *obj, struct i915_address_space *vm, const struct i915_ggtt_view *ggtt_view, + u64 size, u64 alignment, u64 flags) { @@ -3754,7 +3748,7 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj, if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT)) return -EBUSY; - if (i915_vma_misplaced(vma, alignment, flags)) { + if (i915_vma_misplaced(vma, size, alignment, flags)) { WARN(vma->pin_count, "bo is already pinned in %s with incorrect alignment:" " offset=%08x %08x, req.alignment=%llx, req.map_and_fenceable=%d," @@ -3775,8 +3769,8 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj, bound = vma ? vma->bound : 0; if (vma == NULL || !drm_mm_node_allocated(&vma->node)) { - vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view, alignment, - flags); + vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view, + size, alignment, flags); if (IS_ERR(vma)) return PTR_ERR(vma); } else { @@ -3798,17 +3792,19 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj, int i915_gem_object_pin(struct drm_i915_gem_object *obj, struct i915_address_space *vm, + u64 size, u64 alignment, u64 flags) { return i915_gem_object_do_pin(obj, vm, i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL, - alignment, flags); + size, alignment, flags); } int i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, const struct i915_ggtt_view *view, + u64 size, u64 alignment, u64 flags) { @@ -3819,7 +3815,7 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, BUG_ON(!view); return i915_gem_object_do_pin(obj, &ggtt->base, view, - alignment, flags | PIN_GLOBAL); + size, alignment, flags | PIN_GLOBAL); } void diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 63984c4..d2e27e7 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -682,10 +682,14 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, flags |= PIN_HIGH; } - ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags); + ret = i915_gem_object_pin(obj, vma->vm, + entry->pad_to_size, + entry->alignment, + flags); if ((ret == -ENOSPC || ret == -E2BIG) && only_mappable_for_reloc(entry->flags)) ret = i915_gem_object_pin(obj, vma->vm, + entry->pad_to_size, entry->alignment, flags & ~PIN_MAPPABLE); if (ret) @@ -748,6 +752,9 @@ eb_vma_misplaced(struct i915_vma *vma) vma->node.start & (entry->alignment - 1)) return true; + if (vma->node.size < entry->pad_to_size) + return true; + if (entry->flags & EXEC_OBJECT_PINNED && vma->node.start != entry->offset) return true; @@ -1091,6 +1098,14 @@ validate_exec_list(struct drm_device *dev, if (exec[i].alignment && !is_power_of_2(exec[i].alignment)) return -EINVAL; + /* pad_to_size was once a reserved field, so sanitize it */ + if (exec[i].flags & EXEC_OBJECT_PAD_TO_SIZE) { + if (offset_in_page(exec[i].pad_to_size)) + return -EINVAL; + } else { + exec[i].pad_to_size = 0; + } + /* First check for malicious input causing overflow in * the worst case where we need to allocate the entire * relocation tree as a single array. diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 33ce5ff..0f29273 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -727,11 +727,15 @@ struct drm_i915_gem_exec_object2 { #define EXEC_OBJECT_WRITE (1<<2) #define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3) #define EXEC_OBJECT_PINNED (1<<4) +#define EXEC_OBJECT_PAD_TO_SIZE (1<<5) /* All remaining bits are MBZ and RESERVED FOR FUTURE USE */ -#define __EXEC_OBJECT_UNKNOWN_FLAGS (-(EXEC_OBJECT_PINNED<<1)) +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PAD_TO_SIZE<<1) __u64 flags; - __u64 rsvd1; + union { + __u64 rsvd1; + __u64 pad_to_size; + }; __u64 rsvd2; }; -- cgit v0.10.2 From 37508589903f8ab8b9329df0e7647a45d10a2a7c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 Aug 2016 16:32:24 +0100 Subject: drm/i915: Reduce WARN(i915_gem_valid_gtt_space) to a debug-only check i915_gem_valid_gtt_space() is used after inserting the VMA to double check the list - the location should have been chosen to pass all the restrictions. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470324762-2545-8-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index b4af5d1..b836a33 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3090,10 +3090,7 @@ search_free: goto err_vma; } } - if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) { - ret = -EINVAL; - goto err_remove_node; - } + GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level)); trace_i915_vma_bind(vma, flags); ret = i915_vma_bind(vma, obj->cache_level, flags); -- cgit v0.10.2 From 3b16525cc4c1a43e9053cfdc414356eea24bdfad Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 Aug 2016 16:32:25 +0100 Subject: drm/i915: Split insertion/binding of an object into the VM Split the insertion into the address space's range manager and binding of that object into the GTT to simplify the code flow when pinning a VMA. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470324762-2545-9-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index b836a33..238d80e 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2961,12 +2961,12 @@ static bool i915_gem_valid_gtt_space(struct i915_vma *vma, * @flags: mask of PIN_* flags to use */ static struct i915_vma * -i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, - struct i915_address_space *vm, - const struct i915_ggtt_view *ggtt_view, - u64 size, - u64 alignment, - u64 flags) +i915_gem_object_insert_into_vm(struct drm_i915_gem_object *obj, + struct i915_address_space *vm, + const struct i915_ggtt_view *ggtt_view, + u64 size, + u64 alignment, + u64 flags) { struct drm_device *dev = obj->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -3092,19 +3092,12 @@ search_free: } GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level)); - trace_i915_vma_bind(vma, flags); - ret = i915_vma_bind(vma, obj->cache_level, flags); - if (ret) - goto err_remove_node; - list_move_tail(&obj->global_list, &dev_priv->mm.bound_list); list_move_tail(&vma->vm_link, &vm->inactive_list); obj->bind_count++; return vma; -err_remove_node: - drm_mm_remove_node(&vma->node); err_vma: vma = ERR_PTR(ret); err_unpin: @@ -3764,24 +3757,26 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj, } } - bound = vma ? vma->bound : 0; if (vma == NULL || !drm_mm_node_allocated(&vma->node)) { - vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view, - size, alignment, flags); + vma = i915_gem_object_insert_into_vm(obj, vm, ggtt_view, + size, alignment, flags); if (IS_ERR(vma)) return PTR_ERR(vma); - } else { - ret = i915_vma_bind(vma, obj->cache_level, flags); - if (ret) - return ret; } + bound = vma->bound; + ret = i915_vma_bind(vma, obj->cache_level, flags); + if (ret) + return ret; + if (ggtt_view && ggtt_view->type == I915_GGTT_VIEW_NORMAL && (bound ^ vma->bound) & GLOBAL_BIND) { __i915_vma_set_map_and_fenceable(vma); WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable); } + GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags)); + vma->pin_count++; return 0; } -- cgit v0.10.2 From 954c4691218d9e3736dec089c0a5546391c8f3df Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 Aug 2016 16:32:26 +0100 Subject: drm/i915: Convert 4096 alignment request to 0 for drm_mm allocations As we always allocate in chunks of 4096 (that being both the PAGE_SIZE and our own GTT_PAGE_SIZE), we know that all results from the drm_mm are aligned to at least 4096. The drm_mm allocator itself is optimised for alignment == 0, and so by converting alignments of 4096 to 0 we can satisfy our own requirements and still hit the faster path. Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470324762-2545-10-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 238d80e..e0e5256 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3072,6 +3072,15 @@ i915_gem_object_insert_into_vm(struct drm_i915_gem_object *obj, alloc_flag = DRM_MM_CREATE_DEFAULT; } + /* We only allocate in PAGE_SIZE/GTT_PAGE_SIZE (4096) chunks, + * so we know that we always have a minimum alignment of 4096. + * The drm_mm range manager is optimised to return results + * with zero alignment, so where possible use the optimal + * path. + */ + if (alignment <= 4096) + alignment = 0; + search_free: ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node, size, alignment, -- cgit v0.10.2 From ad1a7d20a1034ac916b6f73b2e1146920f709eaf Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 Aug 2016 16:32:27 +0100 Subject: drm/i915: Update the GGTT size/alignment query functions In order to be consistent with other address space functions, we want to pass around 64-bit sizes, even though all known global GTT are limited to 4GiB. Similarly, we are trying to be consistent in using the _ggtt_ nomenclature when referring to the special global GTT. v2: Update docs to consistently state "global GTT". Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470324762-2545-11-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 1e13693..b6e56ec 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3241,11 +3241,9 @@ int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int i915_gem_open(struct drm_device *dev, struct drm_file *file); void i915_gem_release(struct drm_device *dev, struct drm_file *file); -uint32_t -i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode); -uint32_t -i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size, - int tiling_mode, bool fenced); +u64 i915_gem_get_ggtt_size(struct drm_device *dev, u64 size, int tiling_mode); +u64 i915_gem_get_ggtt_alignment(struct drm_device *dev, u64 size, + int tiling_mode, bool fenced); int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, enum i915_cache_level cache_level); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e0e5256..92fa400 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1847,46 +1847,57 @@ i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv) i915_gem_release_mmap(obj); } -uint32_t -i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode) +/** + * i915_gem_get_ggtt_size - return required global GTT size for an object + * @dev: drm device + * @size: object size + * @tiling_mode: tiling mode + * + * Return the required global GTT size for an object, taking into account + * potential fence register mapping. + */ +u64 i915_gem_get_ggtt_size(struct drm_device *dev, u64 size, int tiling_mode) { - uint32_t gtt_size; + u64 ggtt_size; - if (INTEL_INFO(dev)->gen >= 4 || + GEM_BUG_ON(size == 0); + + if (INTEL_GEN(dev) >= 4 || tiling_mode == I915_TILING_NONE) return size; /* Previous chips need a power-of-two fence region when tiling */ if (IS_GEN3(dev)) - gtt_size = 1024*1024; + ggtt_size = 1024*1024; else - gtt_size = 512*1024; + ggtt_size = 512*1024; - while (gtt_size < size) - gtt_size <<= 1; + while (ggtt_size < size) + ggtt_size <<= 1; - return gtt_size; + return ggtt_size; } /** - * i915_gem_get_gtt_alignment - return required GTT alignment for an object + * i915_gem_get_ggtt_alignment - return required global GTT alignment * @dev: drm device * @size: object size * @tiling_mode: tiling mode - * @fenced: is fenced alignemned required or not + * @fenced: is fenced alignment required or not * - * Return the required GTT alignment for an object, taking into account + * Return the required global GTT alignment for an object, taking into account * potential fence register mapping. */ -uint32_t -i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size, - int tiling_mode, bool fenced) +u64 i915_gem_get_ggtt_alignment(struct drm_device *dev, u64 size, + int tiling_mode, bool fenced) { + GEM_BUG_ON(size == 0); + /* * Minimum alignment is 4k (GTT page size), but might be greater * if a fence register is needed for the object. */ - if (INTEL_INFO(dev)->gen >= 4 || (!fenced && IS_G33(dev)) || + if (INTEL_GEN(dev) >= 4 || (!fenced && IS_G33(dev)) || tiling_mode == I915_TILING_NONE) return 4096; @@ -1894,7 +1905,7 @@ i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size, * Previous chips need to be aligned to the size of the smallest * fence register that can contain the object. */ - return i915_gem_get_gtt_size(dev, size, tiling_mode); + return i915_gem_get_ggtt_size(dev, size, tiling_mode); } static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) @@ -2984,17 +2995,17 @@ i915_gem_object_insert_into_vm(struct drm_i915_gem_object *obj, view_size = i915_ggtt_view_size(obj, ggtt_view); - fence_size = i915_gem_get_gtt_size(dev, - view_size, - obj->tiling_mode); - fence_alignment = i915_gem_get_gtt_alignment(dev, - view_size, - obj->tiling_mode, - true); - unfenced_alignment = i915_gem_get_gtt_alignment(dev, - view_size, - obj->tiling_mode, - false); + fence_size = i915_gem_get_ggtt_size(dev, + view_size, + obj->tiling_mode); + fence_alignment = i915_gem_get_ggtt_alignment(dev, + view_size, + obj->tiling_mode, + true); + unfenced_alignment = i915_gem_get_ggtt_alignment(dev, + view_size, + obj->tiling_mode, + false); size = max(size, view_size); if (flags & PIN_MAPPABLE) size = max_t(u64, size, fence_size); @@ -3698,13 +3709,13 @@ void __i915_vma_set_map_and_fenceable(struct i915_vma *vma) bool mappable, fenceable; u32 fence_size, fence_alignment; - fence_size = i915_gem_get_gtt_size(obj->base.dev, - obj->base.size, - obj->tiling_mode); - fence_alignment = i915_gem_get_gtt_alignment(obj->base.dev, - obj->base.size, - obj->tiling_mode, - true); + fence_size = i915_gem_get_ggtt_size(obj->base.dev, + obj->base.size, + obj->tiling_mode); + fence_alignment = i915_gem_get_ggtt_alignment(obj->base.dev, + obj->base.size, + obj->tiling_mode, + true); fenceable = (vma->node.size == fence_size && (vma->node.start & (fence_alignment - 1)) == 0); diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index fa2eb4a..4e42da6 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -133,7 +133,8 @@ i915_gem_object_fence_ok(struct drm_i915_gem_object *obj, int tiling_mode) return false; } - size = i915_gem_get_gtt_size(obj->base.dev, obj->base.size, tiling_mode); + size = i915_gem_get_ggtt_size(obj->base.dev, + obj->base.size, tiling_mode); if (i915_gem_obj_ggtt_size(obj) != size) return false; -- cgit v0.10.2 From a9f1481f41152d535a92ea63ffde9e2bea341461 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 Aug 2016 16:32:28 +0100 Subject: drm/i915: Update i915_gem_get_ggtt_size/_alignment to use drm_i915_private For consistency, internal functions should take drm_i915_private rather than drm_device. Now that we are subclassing drm_device, there are no more size wins, but being consistent is its own blessing. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470324762-2545-12-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index b6e56ec..3d73394 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3241,8 +3241,9 @@ int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int i915_gem_open(struct drm_device *dev, struct drm_file *file); void i915_gem_release(struct drm_device *dev, struct drm_file *file); -u64 i915_gem_get_ggtt_size(struct drm_device *dev, u64 size, int tiling_mode); -u64 i915_gem_get_ggtt_alignment(struct drm_device *dev, u64 size, +u64 i915_gem_get_ggtt_size(struct drm_i915_private *dev_priv, u64 size, + int tiling_mode); +u64 i915_gem_get_ggtt_alignment(struct drm_i915_private *dev_priv, u64 size, int tiling_mode, bool fenced); int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 92fa400..e07c373 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1849,25 +1849,26 @@ i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv) /** * i915_gem_get_ggtt_size - return required global GTT size for an object - * @dev: drm device + * @dev_priv: i915 device * @size: object size * @tiling_mode: tiling mode * * Return the required global GTT size for an object, taking into account * potential fence register mapping. */ -u64 i915_gem_get_ggtt_size(struct drm_device *dev, u64 size, int tiling_mode) +u64 i915_gem_get_ggtt_size(struct drm_i915_private *dev_priv, + u64 size, int tiling_mode) { u64 ggtt_size; GEM_BUG_ON(size == 0); - if (INTEL_GEN(dev) >= 4 || + if (INTEL_GEN(dev_priv) >= 4 || tiling_mode == I915_TILING_NONE) return size; /* Previous chips need a power-of-two fence region when tiling */ - if (IS_GEN3(dev)) + if (IS_GEN3(dev_priv)) ggtt_size = 1024*1024; else ggtt_size = 512*1024; @@ -1880,7 +1881,7 @@ u64 i915_gem_get_ggtt_size(struct drm_device *dev, u64 size, int tiling_mode) /** * i915_gem_get_ggtt_alignment - return required global GTT alignment - * @dev: drm device + * @dev_priv: i915 device * @size: object size * @tiling_mode: tiling mode * @fenced: is fenced alignment required or not @@ -1888,7 +1889,7 @@ u64 i915_gem_get_ggtt_size(struct drm_device *dev, u64 size, int tiling_mode) * Return the required global GTT alignment for an object, taking into account * potential fence register mapping. */ -u64 i915_gem_get_ggtt_alignment(struct drm_device *dev, u64 size, +u64 i915_gem_get_ggtt_alignment(struct drm_i915_private *dev_priv, u64 size, int tiling_mode, bool fenced) { GEM_BUG_ON(size == 0); @@ -1897,7 +1898,7 @@ u64 i915_gem_get_ggtt_alignment(struct drm_device *dev, u64 size, * Minimum alignment is 4k (GTT page size), but might be greater * if a fence register is needed for the object. */ - if (INTEL_GEN(dev) >= 4 || (!fenced && IS_G33(dev)) || + if (INTEL_GEN(dev_priv) >= 4 || (!fenced && IS_G33(dev_priv)) || tiling_mode == I915_TILING_NONE) return 4096; @@ -1905,7 +1906,7 @@ u64 i915_gem_get_ggtt_alignment(struct drm_device *dev, u64 size, * Previous chips need to be aligned to the size of the smallest * fence register that can contain the object. */ - return i915_gem_get_ggtt_size(dev, size, tiling_mode); + return i915_gem_get_ggtt_size(dev_priv, size, tiling_mode); } static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) @@ -2995,14 +2996,14 @@ i915_gem_object_insert_into_vm(struct drm_i915_gem_object *obj, view_size = i915_ggtt_view_size(obj, ggtt_view); - fence_size = i915_gem_get_ggtt_size(dev, + fence_size = i915_gem_get_ggtt_size(dev_priv, view_size, obj->tiling_mode); - fence_alignment = i915_gem_get_ggtt_alignment(dev, + fence_alignment = i915_gem_get_ggtt_alignment(dev_priv, view_size, obj->tiling_mode, true); - unfenced_alignment = i915_gem_get_ggtt_alignment(dev, + unfenced_alignment = i915_gem_get_ggtt_alignment(dev_priv, view_size, obj->tiling_mode, false); @@ -3706,13 +3707,14 @@ i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) void __i915_vma_set_map_and_fenceable(struct i915_vma *vma) { struct drm_i915_gem_object *obj = vma->obj; + struct drm_i915_private *dev_priv = to_i915(obj->base.dev); bool mappable, fenceable; u32 fence_size, fence_alignment; - fence_size = i915_gem_get_ggtt_size(obj->base.dev, + fence_size = i915_gem_get_ggtt_size(dev_priv, obj->base.size, obj->tiling_mode); - fence_alignment = i915_gem_get_ggtt_alignment(obj->base.dev, + fence_alignment = i915_gem_get_ggtt_alignment(dev_priv, obj->base.size, obj->tiling_mode, true); @@ -3721,7 +3723,7 @@ void __i915_vma_set_map_and_fenceable(struct i915_vma *vma) (vma->node.start & (fence_alignment - 1)) == 0); mappable = (vma->node.start + fence_size <= - to_i915(obj->base.dev)->ggtt.mappable_end); + dev_priv->ggtt.mappable_end); obj->map_and_fenceable = mappable && fenceable; } diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index 4e42da6..b7f9875 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -117,15 +117,16 @@ i915_tiling_ok(struct drm_device *dev, int stride, int size, int tiling_mode) static bool i915_gem_object_fence_ok(struct drm_i915_gem_object *obj, int tiling_mode) { + struct drm_i915_private *dev_priv = to_i915(obj->base.dev); u32 size; if (tiling_mode == I915_TILING_NONE) return true; - if (INTEL_INFO(obj->base.dev)->gen >= 4) + if (INTEL_GEN(dev_priv) >= 4) return true; - if (IS_GEN3(obj->base.dev)) { + if (IS_GEN3(dev_priv)) { if (i915_gem_obj_ggtt_offset(obj) & ~I915_FENCE_START_MASK) return false; } else { @@ -133,8 +134,7 @@ i915_gem_object_fence_ok(struct drm_i915_gem_object *obj, int tiling_mode) return false; } - size = i915_gem_get_ggtt_size(obj->base.dev, - obj->base.size, tiling_mode); + size = i915_gem_get_ggtt_size(dev_priv, obj->base.size, tiling_mode); if (i915_gem_obj_ggtt_size(obj) != size) return false; -- cgit v0.10.2 From de18003328d1dcf845c451945461e55bb8801fd6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 Aug 2016 16:32:29 +0100 Subject: drm/i915: Record allocated vma size Tracking the size of the VMA as allocated allows us to dramatically reduce the complexity of later functions (like inserting the VMA in to the drm_mm range manager). Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470324762-2545-13-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e07c373..646cd0d 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2980,53 +2980,36 @@ i915_gem_object_insert_into_vm(struct drm_i915_gem_object *obj, u64 alignment, u64 flags) { - struct drm_device *dev = obj->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - u64 start, end; - u32 search_flag, alloc_flag; + struct drm_i915_private *dev_priv = to_i915(obj->base.dev); struct i915_vma *vma; + u64 start, end; + u64 min_alignment; int ret; - if (i915_is_ggtt(vm)) { - u32 fence_size, fence_alignment, unfenced_alignment; - u64 view_size; - - if (WARN_ON(!ggtt_view)) - return ERR_PTR(-EINVAL); - - view_size = i915_ggtt_view_size(obj, ggtt_view); - - fence_size = i915_gem_get_ggtt_size(dev_priv, - view_size, - obj->tiling_mode); - fence_alignment = i915_gem_get_ggtt_alignment(dev_priv, - view_size, - obj->tiling_mode, - true); - unfenced_alignment = i915_gem_get_ggtt_alignment(dev_priv, - view_size, - obj->tiling_mode, - false); - size = max(size, view_size); - if (flags & PIN_MAPPABLE) - size = max_t(u64, size, fence_size); - - if (alignment == 0) - alignment = flags & PIN_MAPPABLE ? fence_alignment : - unfenced_alignment; - if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) { - DRM_DEBUG("Invalid object (view type=%u) alignment requested %llx\n", - ggtt_view ? ggtt_view->type : 0, - alignment); - return ERR_PTR(-EINVAL); - } - } else { - size = max_t(u64, size, obj->base.size); - alignment = 4096; + vma = ggtt_view ? + i915_gem_obj_lookup_or_create_ggtt_vma(obj, ggtt_view) : + i915_gem_obj_lookup_or_create_vma(obj, vm); + if (IS_ERR(vma)) + return vma; + + size = max(size, vma->size); + if (flags & PIN_MAPPABLE) + size = i915_gem_get_ggtt_size(dev_priv, size, obj->tiling_mode); + + min_alignment = + i915_gem_get_ggtt_alignment(dev_priv, size, obj->tiling_mode, + flags & PIN_MAPPABLE); + if (alignment == 0) + alignment = min_alignment; + if (alignment & (min_alignment - 1)) { + DRM_DEBUG("Invalid object alignment requested %llu, minimum %llu\n", + alignment, min_alignment); + return ERR_PTR(-EINVAL); } start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; - end = vm->total; + + end = vma->vm->total; if (flags & PIN_MAPPABLE) end = min_t(u64, end, dev_priv->ggtt.mappable_end); if (flags & PIN_ZONE_4G) @@ -3037,8 +3020,7 @@ i915_gem_object_insert_into_vm(struct drm_i915_gem_object *obj, * attempt to find space. */ if (size > end) { - DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: request=%llu [object=%zd] > %s aperture=%llu\n", - ggtt_view ? ggtt_view->type : 0, + DRM_DEBUG("Attempting to bind an object larger than the aperture: request=%llu [object=%zd] > %s aperture=%llu\n", size, obj->base.size, flags & PIN_MAPPABLE ? "mappable" : "total", end); @@ -3051,31 +3033,27 @@ i915_gem_object_insert_into_vm(struct drm_i915_gem_object *obj, i915_gem_object_pin_pages(obj); - vma = ggtt_view ? i915_gem_obj_lookup_or_create_ggtt_vma(obj, ggtt_view) : - i915_gem_obj_lookup_or_create_vma(obj, vm); - - if (IS_ERR(vma)) - goto err_unpin; - if (flags & PIN_OFFSET_FIXED) { uint64_t offset = flags & PIN_OFFSET_MASK; - - if (offset & (alignment - 1) || offset + size > end) { + if (offset & (alignment - 1) || offset > end - size) { ret = -EINVAL; - goto err_vma; + goto err_unpin; } + vma->node.start = offset; vma->node.size = size; vma->node.color = obj->cache_level; - ret = drm_mm_reserve_node(&vm->mm, &vma->node); + ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node); if (ret) { ret = i915_gem_evict_for_vma(vma); if (ret == 0) - ret = drm_mm_reserve_node(&vm->mm, &vma->node); + ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node); + if (ret) + goto err_unpin; } - if (ret) - goto err_vma; } else { + u32 search_flag, alloc_flag; + if (flags & PIN_HIGH) { search_flag = DRM_MM_SEARCH_BELOW; alloc_flag = DRM_MM_CREATE_TOP; @@ -3094,36 +3072,35 @@ i915_gem_object_insert_into_vm(struct drm_i915_gem_object *obj, alignment = 0; search_free: - ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node, + ret = drm_mm_insert_node_in_range_generic(&vma->vm->mm, + &vma->node, size, alignment, obj->cache_level, start, end, search_flag, alloc_flag); if (ret) { - ret = i915_gem_evict_something(vm, size, alignment, + ret = i915_gem_evict_something(vma->vm, size, alignment, obj->cache_level, start, end, flags); if (ret == 0) goto search_free; - goto err_vma; + goto err_unpin; } } GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level)); list_move_tail(&obj->global_list, &dev_priv->mm.bound_list); - list_move_tail(&vma->vm_link, &vm->inactive_list); + list_move_tail(&vma->vm_link, &vma->vm->inactive_list); obj->bind_count++; return vma; -err_vma: - vma = ERR_PTR(ret); err_unpin: i915_gem_object_unpin_pages(obj); - return vma; + return ERR_PTR(ret); } bool diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index f1f14e5..b8c1e9d 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -184,7 +184,7 @@ static void ppgtt_unbind_vma(struct i915_vma *vma) { vma->vm->clear_range(vma->vm, vma->node.start, - vma->obj->base.size, + vma->size, true); } @@ -2695,28 +2695,18 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma, static void ggtt_unbind_vma(struct i915_vma *vma) { - struct drm_device *dev = vma->vm->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct drm_i915_gem_object *obj = vma->obj; - const uint64_t size = min_t(uint64_t, - obj->base.size, - vma->node.size); + struct i915_hw_ppgtt *appgtt = to_i915(vma->vm->dev)->mm.aliasing_ppgtt; + const u64 size = min(vma->size, vma->node.size); - if (vma->bound & GLOBAL_BIND) { + if (vma->bound & GLOBAL_BIND) vma->vm->clear_range(vma->vm, - vma->node.start, - size, + vma->node.start, size, true); - } - - if (dev_priv->mm.aliasing_ppgtt && vma->bound & LOCAL_BIND) { - struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt; + if (vma->bound & LOCAL_BIND && appgtt) appgtt->base.clear_range(&appgtt->base, - vma->node.start, - size, + vma->node.start, size, true); - } } void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj) @@ -3374,14 +3364,14 @@ void i915_vma_close(struct i915_vma *vma) static struct i915_vma * __i915_gem_vma_create(struct drm_i915_gem_object *obj, struct i915_address_space *vm, - const struct i915_ggtt_view *ggtt_view) + const struct i915_ggtt_view *view) { struct i915_vma *vma; int i; GEM_BUG_ON(vm->closed); - if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view)) + if (WARN_ON(i915_is_ggtt(vm) != !!view)) return ERR_PTR(-EINVAL); vma = kmem_cache_zalloc(to_i915(obj->base.dev)->vmas, GFP_KERNEL); @@ -3395,12 +3385,22 @@ __i915_gem_vma_create(struct drm_i915_gem_object *obj, list_add(&vma->vm_link, &vm->unbound_list); vma->vm = vm; vma->obj = obj; + vma->size = obj->base.size; vma->is_ggtt = i915_is_ggtt(vm); - if (i915_is_ggtt(vm)) - vma->ggtt_view = *ggtt_view; - else + if (i915_is_ggtt(vm)) { + vma->ggtt_view = *view; + if (view->type == I915_GGTT_VIEW_PARTIAL) { + vma->size = view->params.partial.size; + vma->size <<= PAGE_SHIFT; + } else if (view->type == I915_GGTT_VIEW_ROTATED) { + vma->size = + intel_rotation_info_size(&view->params.rotated); + vma->size <<= PAGE_SHIFT; + } + } else { i915_ppgtt_get(i915_vm_to_ppgtt(vm)); + } list_add_tail(&vma->obj_link, &obj->vma_list); @@ -3685,29 +3685,6 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, return 0; } -/** - * i915_ggtt_view_size - Get the size of a GGTT view. - * @obj: Object the view is of. - * @view: The view in question. - * - * @return The size of the GGTT view in bytes. - */ -size_t -i915_ggtt_view_size(struct drm_i915_gem_object *obj, - const struct i915_ggtt_view *view) -{ - if (view->type == I915_GGTT_VIEW_NORMAL) { - return obj->base.size; - } else if (view->type == I915_GGTT_VIEW_ROTATED) { - return intel_rotation_info_size(&view->params.rotated) << PAGE_SHIFT; - } else if (view->type == I915_GGTT_VIEW_PARTIAL) { - return view->params.partial.size << PAGE_SHIFT; - } else { - WARN_ONCE(1, "GGTT view %u not implemented!\n", view->type); - return obj->base.size; - } -} - void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) { void __iomem *ptr; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index f6cc3fe..2c0360c 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -180,6 +180,7 @@ struct i915_vma { struct drm_i915_gem_object *obj; struct i915_address_space *vm; void __iomem *iomap; + u64 size; unsigned int active; struct i915_gem_active last_read[I915_NUM_ENGINES]; @@ -608,10 +609,6 @@ i915_ggtt_view_equal(const struct i915_ggtt_view *a, return true; } -size_t -i915_ggtt_view_size(struct drm_i915_gem_object *obj, - const struct i915_ggtt_view *view); - /** * i915_vma_pin_iomap - calls ioremap_wc to map the GGTT VMA via the aperture * @vma: VMA to iomap -- cgit v0.10.2 From 20dfbde463c84d5e0eb32b9f3f84992eb583a698 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 Aug 2016 16:32:30 +0100 Subject: drm/i915: Wrap vma->pin_count accessors with small inline helpers In the next few patches, the VMA pinning API is overhauled and to reduce the churn we pull out the update to the accessors into a prep patch. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470324762-2545-14-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index b35e617..4401a2a 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -168,7 +168,7 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) if (obj->base.name) seq_printf(m, " (name: %d)", obj->base.name); list_for_each_entry(vma, &obj->vma_list, obj_link) { - if (vma->pin_count > 0) + if (i915_vma_is_pinned(vma)) pin_count++; } seq_printf(m, " (pinned x %d)", pin_count); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 646cd0d..8925591 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -153,10 +153,10 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, pinned = 0; mutex_lock(&dev->struct_mutex); list_for_each_entry(vma, &ggtt->base.active_list, vm_link) - if (vma->pin_count) + if (i915_vma_is_pinned(vma)) pinned += vma->node.size; list_for_each_entry(vma, &ggtt->base.inactive_list, vm_link) - if (vma->pin_count) + if (i915_vma_is_pinned(vma)) pinned += vma->node.size; mutex_unlock(&dev->struct_mutex); @@ -2809,7 +2809,7 @@ static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj) static void __i915_vma_iounmap(struct i915_vma *vma) { - GEM_BUG_ON(vma->pin_count); + GEM_BUG_ON(i915_vma_is_pinned(vma)); if (vma->iomap == NULL) return; @@ -2836,7 +2836,7 @@ int i915_vma_unbind(struct i915_vma *vma) * take a pin on the vma so that the second unbind is * aborted. */ - vma->pin_count++; + __i915_vma_pin(vma); for_each_active(active, idx) { ret = i915_gem_active_retire(&vma->last_read[idx], @@ -2845,14 +2845,14 @@ int i915_vma_unbind(struct i915_vma *vma) break; } - vma->pin_count--; + __i915_vma_unpin(vma); if (ret) return ret; GEM_BUG_ON(i915_vma_is_active(vma)); } - if (vma->pin_count) + if (i915_vma_is_pinned(vma)) return -EBUSY; if (!drm_mm_node_allocated(&vma->node)) @@ -3297,7 +3297,7 @@ restart: if (!drm_mm_node_allocated(&vma->node)) continue; - if (vma->pin_count) { + if (i915_vma_is_pinned(vma)) { DRM_DEBUG("can not change the cache level of pinned objects\n"); return -EBUSY; } @@ -3734,11 +3734,11 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj, i915_gem_obj_to_vma(obj, vm); if (vma) { - if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT)) + if (WARN_ON(i915_vma_pin_count(vma) == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT)) return -EBUSY; if (i915_vma_misplaced(vma, size, alignment, flags)) { - WARN(vma->pin_count, + WARN(i915_vma_is_pinned(vma), "bo is already pinned in %s with incorrect alignment:" " offset=%08x %08x, req.alignment=%llx, req.map_and_fenceable=%d," " obj->map_and_fenceable=%d\n", @@ -3776,7 +3776,7 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj, GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags)); - vma->pin_count++; + __i915_vma_pin(vma); return 0; } @@ -3815,10 +3815,10 @@ i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj, { struct i915_vma *vma = i915_gem_obj_to_ggtt_view(obj, view); - WARN_ON(vma->pin_count == 0); + WARN_ON(!i915_vma_is_pinned(vma)); WARN_ON(!i915_gem_obj_ggtt_bound_view(obj, view)); - --vma->pin_count; + __i915_vma_unpin(vma); } int @@ -4686,7 +4686,7 @@ bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj) { struct i915_vma *vma; list_for_each_entry(vma, &obj->vma_list, obj_link) - if (vma->pin_count > 0) + if (i915_vma_is_pinned(vma)) return true; return false; diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index ef12ecd..7be4258 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -49,7 +49,7 @@ gpu_is_idle(struct drm_i915_private *dev_priv) static bool mark_free(struct i915_vma *vma, struct list_head *unwind) { - if (vma->pin_count) + if (i915_vma_is_pinned(vma)) return false; if (WARN_ON(!list_empty(&vma->exec_list))) @@ -183,7 +183,7 @@ found: */ list_for_each_entry_safe(vma, next, &eviction_list, exec_list) { if (drm_mm_scan_remove_block(&vma->node)) - vma->pin_count++; + __i915_vma_pin(vma); else list_del_init(&vma->exec_list); } @@ -195,7 +195,7 @@ found: exec_list); list_del_init(&vma->exec_list); - vma->pin_count--; + __i915_vma_unpin(vma); if (ret == 0) ret = i915_vma_unbind(vma); } @@ -220,8 +220,8 @@ i915_gem_evict_for_vma(struct i915_vma *target) vma = container_of(node, typeof(*vma), node); - if (vma->pin_count) { - if (!vma->exec_entry || (vma->pin_count > 1)) + if (i915_vma_is_pinned(vma)) { + if (!vma->exec_entry || i915_vma_pin_count(vma) > 1) /* Object is pinned for some other use */ return -EBUSY; @@ -281,7 +281,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle) } list_for_each_entry_safe(vma, next, &vm->inactive_list, vm_link) - if (vma->pin_count == 0) + if (!i915_vma_is_pinned(vma)) WARN_ON(i915_vma_unbind(vma)); return 0; diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index d2e27e7..82ed80f 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -261,7 +261,7 @@ i915_gem_execbuffer_unreserve_vma(struct i915_vma *vma) i915_gem_object_unpin_fence(obj); if (entry->flags & __EXEC_OBJECT_HAS_PIN) - vma->pin_count--; + __i915_vma_unpin(vma); entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN); } diff --git a/drivers/gpu/drm/i915/i915_gem_fence.c b/drivers/gpu/drm/i915/i915_gem_fence.c index dbaab9c..3b462da 100644 --- a/drivers/gpu/drm/i915/i915_gem_fence.c +++ b/drivers/gpu/drm/i915/i915_gem_fence.c @@ -431,7 +431,7 @@ i915_gem_object_pin_fence(struct drm_i915_gem_object *obj) WARN_ON(!ggtt_vma || dev_priv->fence_regs[obj->fence_reg].pin_count > - ggtt_vma->pin_count); + i915_vma_pin_count(ggtt_vma)); dev_priv->fence_regs[obj->fence_reg].pin_count++; return true; } else diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index b8c1e9d..088b89b 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -3334,7 +3334,7 @@ i915_vma_retire(struct i915_gem_active *active, return; list_move_tail(&vma->vm_link, &vma->vm->inactive_list); - if (unlikely(vma->closed && !vma->pin_count)) + if (unlikely(vma->closed && !i915_vma_is_pinned(vma))) WARN_ON(i915_vma_unbind(vma)); } @@ -3357,7 +3357,7 @@ void i915_vma_close(struct i915_vma *vma) vma->closed = true; list_del_init(&vma->obj_link); - if (!i915_vma_is_active(vma) && !vma->pin_count) + if (!i915_vma_is_active(vma) && !i915_vma_is_pinned(vma)) WARN_ON(i915_vma_unbind(vma)); } @@ -3666,12 +3666,12 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, if (vma->bound == 0 && vma->vm->allocate_va_range) { /* XXX: i915_vma_pin() will fix this +- hack */ - vma->pin_count++; + __i915_vma_pin(vma); trace_i915_va_alloc(vma); ret = vma->vm->allocate_va_range(vma->vm, vma->node.start, vma->node.size); - vma->pin_count--; + __i915_vma_unpin(vma); if (ret) return ret; } @@ -3707,6 +3707,6 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) vma->iomap = ptr; } - vma->pin_count++; + __i915_vma_pin(vma); return ptr; } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 2c0360c..eee22fc 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -609,6 +609,34 @@ i915_ggtt_view_equal(const struct i915_ggtt_view *a, return true; } +static inline int i915_vma_pin_count(const struct i915_vma *vma) +{ + return vma->pin_count; +} + +static inline bool i915_vma_is_pinned(const struct i915_vma *vma) +{ + return i915_vma_pin_count(vma); +} + +static inline void __i915_vma_pin(struct i915_vma *vma) +{ + vma->pin_count++; + GEM_BUG_ON(!i915_vma_is_pinned(vma)); +} + +static inline void __i915_vma_unpin(struct i915_vma *vma) +{ + GEM_BUG_ON(!i915_vma_is_pinned(vma)); + vma->pin_count--; +} + +static inline void i915_vma_unpin(struct i915_vma *vma) +{ + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + __i915_vma_unpin(vma); +} + /** * i915_vma_pin_iomap - calls ioremap_wc to map the GGTT VMA via the aperture * @vma: VMA to iomap @@ -637,9 +665,8 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma); static inline void i915_vma_unpin_iomap(struct i915_vma *vma) { lockdep_assert_held(&vma->vm->dev->struct_mutex); - GEM_BUG_ON(vma->pin_count == 0); GEM_BUG_ON(vma->iomap == NULL); - vma->pin_count--; + i915_vma_unpin(vma); } #endif diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index c19f72e..d94eb90 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -818,7 +818,7 @@ static u32 capture_pinned_bo(struct drm_i915_error_buffer *err, break; list_for_each_entry(vma, &obj->vma_list, obj_link) - if (vma->vm == vm && vma->pin_count > 0) + if (vma->vm == vm && i915_vma_is_pinned(vma)) capture_bo(err++, vma); } @@ -1230,7 +1230,7 @@ static void i915_gem_capture_vm(struct drm_i915_private *dev_priv, list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { list_for_each_entry(vma, &obj->vma_list, obj_link) - if (vma->vm == vm && vma->pin_count > 0) + if (vma->vm == vm && i915_vma_is_pinned(vma)) i++; } error->pinned_bo_count[ndx] = i - error->active_bo_count[ndx]; -- cgit v0.10.2 From 59bfa1248e22d65e6273eec6f8043c8e4450c2ba Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 Aug 2016 16:32:31 +0100 Subject: drm/i915: Start passing around i915_vma from execbuffer During execbuffer we look up the i915_vma in order to reserve them in the VM. However, we then do a double lookup of the vma in order to then pin them, all because we lack the necessary interfaces to operate on i915_vma - so introduce i915_vma_pin()! v2: Tidy parameter lists to remove one level of redirection in the hot path. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470324762-2545-15-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 3d73394..cda8238 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3018,23 +3018,6 @@ struct drm_i915_gem_object *i915_gem_object_create_from_data( void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file); void i915_gem_free_object(struct drm_gem_object *obj); -/* Flags used by pin/bind&friends. */ -#define PIN_MAPPABLE (1<<0) -#define PIN_NONBLOCK (1<<1) -#define PIN_GLOBAL (1<<2) -#define PIN_OFFSET_BIAS (1<<3) -#define PIN_USER (1<<4) -#define PIN_UPDATE (1<<5) -#define PIN_ZONE_4G (1<<6) -#define PIN_HIGH (1<<7) -#define PIN_OFFSET_FIXED (1<<8) -#define PIN_OFFSET_MASK (~4095) -int __must_check -i915_gem_object_pin(struct drm_i915_gem_object *obj, - struct i915_address_space *vm, - u64 size, - u64 alignment, - u64 flags); int __must_check i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, const struct i915_ggtt_view *view, @@ -3311,11 +3294,8 @@ i915_gem_obj_ggtt_pin(struct drm_i915_gem_object *obj, uint32_t alignment, unsigned flags) { - struct drm_i915_private *dev_priv = to_i915(obj->base.dev); - struct i915_ggtt *ggtt = &dev_priv->ggtt; - - return i915_gem_object_pin(obj, &ggtt->base, 0, alignment, - flags | PIN_GLOBAL); + return i915_gem_object_ggtt_pin(obj, &i915_ggtt_view_normal, + 0, alignment, flags); } void i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 8925591..d38275f 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2963,34 +2963,30 @@ static bool i915_gem_valid_gtt_space(struct i915_vma *vma, } /** - * Finds free space in the GTT aperture and binds the object or a view of it - * there. - * @obj: object to bind - * @vm: address space to bind into - * @ggtt_view: global gtt view if applicable + * i915_vma_insert - finds a slot for the vma in its address space + * @vma: the vma * @size: requested size in bytes (can be larger than the VMA) - * @alignment: requested alignment + * @alignment: required alignment * @flags: mask of PIN_* flags to use + * + * First we try to allocate some free space that meets the requirements for + * the VMA. Failiing that, if the flags permit, it will evict an old VMA, + * preferrably the oldest idle entry to make room for the new VMA. + * + * Returns: + * 0 on success, negative error code otherwise. */ -static struct i915_vma * -i915_gem_object_insert_into_vm(struct drm_i915_gem_object *obj, - struct i915_address_space *vm, - const struct i915_ggtt_view *ggtt_view, - u64 size, - u64 alignment, - u64 flags) +static int +i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) { - struct drm_i915_private *dev_priv = to_i915(obj->base.dev); - struct i915_vma *vma; + struct drm_i915_private *dev_priv = to_i915(vma->vm->dev); + struct drm_i915_gem_object *obj = vma->obj; u64 start, end; u64 min_alignment; int ret; - vma = ggtt_view ? - i915_gem_obj_lookup_or_create_ggtt_vma(obj, ggtt_view) : - i915_gem_obj_lookup_or_create_vma(obj, vm); - if (IS_ERR(vma)) - return vma; + GEM_BUG_ON(vma->bound); + GEM_BUG_ON(drm_mm_node_allocated(&vma->node)); size = max(size, vma->size); if (flags & PIN_MAPPABLE) @@ -3004,7 +3000,7 @@ i915_gem_object_insert_into_vm(struct drm_i915_gem_object *obj, if (alignment & (min_alignment - 1)) { DRM_DEBUG("Invalid object alignment requested %llu, minimum %llu\n", alignment, min_alignment); - return ERR_PTR(-EINVAL); + return -EINVAL; } start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; @@ -3024,17 +3020,17 @@ i915_gem_object_insert_into_vm(struct drm_i915_gem_object *obj, size, obj->base.size, flags & PIN_MAPPABLE ? "mappable" : "total", end); - return ERR_PTR(-E2BIG); + return -E2BIG; } ret = i915_gem_object_get_pages(obj); if (ret) - return ERR_PTR(ret); + return ret; i915_gem_object_pin_pages(obj); if (flags & PIN_OFFSET_FIXED) { - uint64_t offset = flags & PIN_OFFSET_MASK; + u64 offset = flags & PIN_OFFSET_MASK; if (offset & (alignment - 1) || offset > end - size) { ret = -EINVAL; goto err_unpin; @@ -3096,11 +3092,11 @@ search_free: list_move_tail(&vma->vm_link, &vma->vm->inactive_list); obj->bind_count++; - return vma; + return 0; err_unpin: i915_gem_object_unpin_pages(obj); - return ERR_PTR(ret); + return ret; } bool @@ -3661,6 +3657,9 @@ i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) { struct drm_i915_gem_object *obj = vma->obj; + if (!drm_mm_node_allocated(&vma->node)) + return false; + if (vma->node.size < size) return true; @@ -3705,91 +3704,42 @@ void __i915_vma_set_map_and_fenceable(struct i915_vma *vma) obj->map_and_fenceable = mappable && fenceable; } -static int -i915_gem_object_do_pin(struct drm_i915_gem_object *obj, - struct i915_address_space *vm, - const struct i915_ggtt_view *ggtt_view, - u64 size, - u64 alignment, - u64 flags) +int +i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) { - struct drm_i915_private *dev_priv = to_i915(obj->base.dev); - struct i915_vma *vma; - unsigned bound; + unsigned int bound = vma->bound; int ret; - if (WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base)) - return -ENODEV; - - if (WARN_ON(flags & (PIN_GLOBAL | PIN_MAPPABLE) && !i915_is_ggtt(vm))) - return -EINVAL; - - if (WARN_ON((flags & (PIN_MAPPABLE | PIN_GLOBAL)) == PIN_MAPPABLE)) - return -EINVAL; - - if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view)) - return -EINVAL; - - vma = ggtt_view ? i915_gem_obj_to_ggtt_view(obj, ggtt_view) : - i915_gem_obj_to_vma(obj, vm); - - if (vma) { - if (WARN_ON(i915_vma_pin_count(vma) == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT)) - return -EBUSY; + GEM_BUG_ON((flags & (PIN_GLOBAL | PIN_USER)) == 0); + GEM_BUG_ON((flags & PIN_GLOBAL) && !vma->is_ggtt); - if (i915_vma_misplaced(vma, size, alignment, flags)) { - WARN(i915_vma_is_pinned(vma), - "bo is already pinned in %s with incorrect alignment:" - " offset=%08x %08x, req.alignment=%llx, req.map_and_fenceable=%d," - " obj->map_and_fenceable=%d\n", - ggtt_view ? "ggtt" : "ppgtt", - upper_32_bits(vma->node.start), - lower_32_bits(vma->node.start), - alignment, - !!(flags & PIN_MAPPABLE), - obj->map_and_fenceable); - ret = i915_vma_unbind(vma); - if (ret) - return ret; + if (WARN_ON(i915_vma_pin_count(vma) == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT)) + return -EBUSY; - vma = NULL; - } - } + /* Pin early to prevent the shrinker/eviction logic from destroying + * our vma as we insert and bind. + */ + __i915_vma_pin(vma); - if (vma == NULL || !drm_mm_node_allocated(&vma->node)) { - vma = i915_gem_object_insert_into_vm(obj, vm, ggtt_view, - size, alignment, flags); - if (IS_ERR(vma)) - return PTR_ERR(vma); + if (!bound) { + ret = i915_vma_insert(vma, size, alignment, flags); + if (ret) + goto err; } - bound = vma->bound; - ret = i915_vma_bind(vma, obj->cache_level, flags); + ret = i915_vma_bind(vma, vma->obj->cache_level, flags); if (ret) - return ret; + goto err; - if (ggtt_view && ggtt_view->type == I915_GGTT_VIEW_NORMAL && - (bound ^ vma->bound) & GLOBAL_BIND) { + if ((bound ^ vma->bound) & GLOBAL_BIND) __i915_vma_set_map_and_fenceable(vma); - WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable); - } GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags)); - - __i915_vma_pin(vma); return 0; -} -int -i915_gem_object_pin(struct drm_i915_gem_object *obj, - struct i915_address_space *vm, - u64 size, - u64 alignment, - u64 flags) -{ - return i915_gem_object_do_pin(obj, vm, - i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL, - size, alignment, flags); +err: + __i915_vma_unpin(vma); + return ret; } int @@ -3799,14 +3749,35 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, u64 alignment, u64 flags) { - struct drm_device *dev = obj->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct i915_ggtt *ggtt = &dev_priv->ggtt; + struct i915_vma *vma; + int ret; BUG_ON(!view); - return i915_gem_object_do_pin(obj, &ggtt->base, view, - size, alignment, flags | PIN_GLOBAL); + vma = i915_gem_obj_lookup_or_create_ggtt_vma(obj, view); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + if (i915_vma_misplaced(vma, size, alignment, flags)) { + if (flags & PIN_NONBLOCK && + (i915_vma_is_pinned(vma) || i915_vma_is_active(vma))) + return -ENOSPC; + + WARN(i915_vma_is_pinned(vma), + "bo is already pinned in ggtt with incorrect alignment:" + " offset=%08x %08x, req.alignment=%llx, req.map_and_fenceable=%d," + " obj->map_and_fenceable=%d\n", + upper_32_bits(vma->node.start), + lower_32_bits(vma->node.start), + alignment, + !!(flags & PIN_MAPPABLE), + obj->map_and_fenceable); + ret = i915_vma_unbind(vma); + if (ret) + return ret; + } + + return i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL); } void diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 82ed80f..d95a043 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -45,11 +45,10 @@ struct i915_execbuffer_params { struct drm_device *dev; struct drm_file *file; - u32 dispatch_flags; - u32 args_batch_start_offset; - u32 batch_obj_vm_offset; + struct i915_vma *batch; + u32 dispatch_flags; + u32 args_batch_start_offset; struct intel_engine_cs *engine; - struct drm_i915_gem_object *batch_obj; struct i915_gem_context *ctx; struct drm_i915_gem_request *request; }; @@ -102,6 +101,26 @@ eb_reset(struct eb_vmas *eb) memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head)); } +static struct i915_vma * +eb_get_batch(struct eb_vmas *eb) +{ + struct i915_vma *vma = list_entry(eb->vmas.prev, typeof(*vma), exec_list); + + /* + * SNA is doing fancy tricks with compressing batch buffers, which leads + * to negative relocation deltas. Usually that works out ok since the + * relocate address is still positive, except when the batch is placed + * very low in the GTT. Ensure this doesn't happen. + * + * Note that actual hangs have only been observed on gen7, but for + * paranoia do it everywhere. + */ + if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0) + vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS; + + return vma; +} + static int eb_lookup_vmas(struct eb_vmas *eb, struct drm_i915_gem_exec_object2 *exec, @@ -198,35 +217,6 @@ err: return ret; } -static inline struct i915_vma * -eb_get_batch_vma(struct eb_vmas *eb) -{ - /* The batch is always the LAST item in the VMA list */ - struct i915_vma *vma = list_last_entry(&eb->vmas, typeof(*vma), exec_list); - - return vma; -} - -static struct drm_i915_gem_object * -eb_get_batch(struct eb_vmas *eb) -{ - struct i915_vma *vma = eb_get_batch_vma(eb); - - /* - * SNA is doing fancy tricks with compressing batch buffers, which leads - * to negative relocation deltas. Usually that works out ok since the - * relocate address is still positive, except when the batch is placed - * very low in the GTT. Ensure this doesn't happen. - * - * Note that actual hangs have only been observed on gen7, but for - * paranoia do it everywhere. - */ - if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0) - vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS; - - return vma->obj; -} - static struct i915_vma *eb_get_vma(struct eb_vmas *eb, unsigned long handle) { if (eb->and < 0) { @@ -682,16 +672,16 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, flags |= PIN_HIGH; } - ret = i915_gem_object_pin(obj, vma->vm, - entry->pad_to_size, - entry->alignment, - flags); - if ((ret == -ENOSPC || ret == -E2BIG) && + ret = i915_vma_pin(vma, + entry->pad_to_size, + entry->alignment, + flags); + if ((ret == -ENOSPC || ret == -E2BIG) && only_mappable_for_reloc(entry->flags)) - ret = i915_gem_object_pin(obj, vma->vm, - entry->pad_to_size, - entry->alignment, - flags & ~PIN_MAPPABLE); + ret = i915_vma_pin(vma, + entry->pad_to_size, + entry->alignment, + flags & ~PIN_MAPPABLE); if (ret) return ret; @@ -1252,11 +1242,11 @@ i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req) return 0; } -static struct drm_i915_gem_object* +static struct i915_vma* i915_gem_execbuffer_parse(struct intel_engine_cs *engine, struct drm_i915_gem_exec_object2 *shadow_exec_entry, - struct eb_vmas *eb, struct drm_i915_gem_object *batch_obj, + struct eb_vmas *eb, u32 batch_start_offset, u32 batch_len, bool is_master) @@ -1268,7 +1258,7 @@ i915_gem_execbuffer_parse(struct intel_engine_cs *engine, shadow_batch_obj = i915_gem_batch_pool_get(&engine->batch_pool, PAGE_ALIGN(batch_len)); if (IS_ERR(shadow_batch_obj)) - return shadow_batch_obj; + return ERR_CAST(shadow_batch_obj); ret = intel_engine_cmd_parser(engine, batch_obj, @@ -1293,14 +1283,12 @@ i915_gem_execbuffer_parse(struct intel_engine_cs *engine, i915_gem_object_get(shadow_batch_obj); list_add_tail(&vma->exec_list, &eb->vmas); - shadow_batch_obj->base.pending_read_domains = I915_GEM_DOMAIN_COMMAND; - - return shadow_batch_obj; + return vma; err: i915_gem_object_unpin_pages(shadow_batch_obj); if (ret == -EACCES) /* unhandled chained batch */ - return batch_obj; + return NULL; else return ERR_PTR(ret); } @@ -1381,11 +1369,11 @@ execbuf_submit(struct i915_execbuffer_params *params, } exec_len = args->batch_len; - exec_start = params->batch_obj_vm_offset + + exec_start = params->batch->node.start + params->args_batch_start_offset; if (exec_len == 0) - exec_len = params->batch_obj->base.size; + exec_len = params->batch->size; ret = params->engine->emit_bb_start(params->request, exec_start, exec_len, @@ -1489,7 +1477,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, struct drm_i915_private *dev_priv = to_i915(dev); struct i915_ggtt *ggtt = &dev_priv->ggtt; struct eb_vmas *eb; - struct drm_i915_gem_object *batch_obj; struct drm_i915_gem_exec_object2 shadow_exec_entry; struct intel_engine_cs *engine; struct i915_gem_context *ctx; @@ -1583,7 +1570,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, goto err; /* take note of the batch buffer before we might reorder the lists */ - batch_obj = eb_get_batch(eb); + params->batch = eb_get_batch(eb); /* Move the objects en-masse into the GTT, evicting if necessary. */ need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0; @@ -1607,7 +1594,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, } /* Set the pending read domains for the batch buffer to COMMAND */ - if (batch_obj->base.pending_write_domain) { + if (params->batch->obj->base.pending_write_domain) { DRM_DEBUG("Attempting to use self-modifying batch buffer\n"); ret = -EINVAL; goto err; @@ -1615,26 +1602,20 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, params->args_batch_start_offset = args->batch_start_offset; if (intel_engine_needs_cmd_parser(engine) && args->batch_len) { - struct drm_i915_gem_object *parsed_batch_obj; - - parsed_batch_obj = i915_gem_execbuffer_parse(engine, - &shadow_exec_entry, - eb, - batch_obj, - args->batch_start_offset, - args->batch_len, - drm_is_current_master(file)); - if (IS_ERR(parsed_batch_obj)) { - ret = PTR_ERR(parsed_batch_obj); + struct i915_vma *vma; + + vma = i915_gem_execbuffer_parse(engine, &shadow_exec_entry, + params->batch->obj, + eb, + args->batch_start_offset, + args->batch_len, + drm_is_current_master(file)); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); goto err; } - /* - * parsed_batch_obj == batch_obj means batch not fully parsed: - * Accept, but don't promote to secure. - */ - - if (parsed_batch_obj != batch_obj) { + if (vma) { /* * Batch parsed and accepted: * @@ -1646,16 +1627,18 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, */ dispatch_flags |= I915_DISPATCH_SECURE; params->args_batch_start_offset = 0; - batch_obj = parsed_batch_obj; + params->batch = vma; } } - batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND; + params->batch->obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND; /* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure * batch" bit. Hence we need to pin secure batches into the global gtt. * hsw should have this fixed, but bdw mucks it up again. */ if (dispatch_flags & I915_DISPATCH_SECURE) { + struct drm_i915_gem_object *obj = params->batch->obj; + /* * So on first glance it looks freaky that we pin the batch here * outside of the reservation loop. But: @@ -1666,13 +1649,12 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, * fitting due to fragmentation. * So this is actually safe. */ - ret = i915_gem_obj_ggtt_pin(batch_obj, 0, 0); + ret = i915_gem_obj_ggtt_pin(obj, 0, 0); if (ret) goto err; - params->batch_obj_vm_offset = i915_gem_obj_ggtt_offset(batch_obj); - } else - params->batch_obj_vm_offset = i915_gem_obj_offset(batch_obj, vm); + params->batch = i915_gem_obj_to_ggtt(obj); + } /* Allocate a request for this batch buffer nice and early. */ params->request = i915_gem_request_alloc(engine, ctx); @@ -1695,12 +1677,11 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, params->file = file; params->engine = engine; params->dispatch_flags = dispatch_flags; - params->batch_obj = batch_obj; params->ctx = ctx; ret = execbuf_submit(params, args, &eb->vmas); err_request: - __i915_add_request(params->request, params->batch_obj, ret == 0); + __i915_add_request(params->request, params->batch->obj, ret == 0); err_batch_unpin: /* @@ -1710,8 +1691,7 @@ err_batch_unpin: * active. */ if (dispatch_flags & I915_DISPATCH_SECURE) - i915_gem_object_ggtt_unpin(batch_obj); - + i915_vma_unpin(params->batch); err: /* the request owns the ref now */ i915_gem_context_put(ctx); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 088b89b..84b786c 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -3665,13 +3665,10 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, return 0; if (vma->bound == 0 && vma->vm->allocate_va_range) { - /* XXX: i915_vma_pin() will fix this +- hack */ - __i915_vma_pin(vma); trace_i915_va_alloc(vma); ret = vma->vm->allocate_va_range(vma->vm, vma->node.start, vma->node.size); - __i915_vma_unpin(vma); if (ret) return ret; } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index eee22fc..c63cc1b 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -609,6 +609,20 @@ i915_ggtt_view_equal(const struct i915_ggtt_view *a, return true; } +int __must_check +i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags); +/* Flags used by pin/bind&friends. */ +#define PIN_MAPPABLE BIT(0) +#define PIN_NONBLOCK BIT(1) +#define PIN_GLOBAL BIT(2) +#define PIN_OFFSET_BIAS BIT(3) +#define PIN_USER BIT(4) +#define PIN_UPDATE BIT(5) +#define PIN_ZONE_4G BIT(6) +#define PIN_HIGH BIT(7) +#define PIN_OFFSET_FIXED BIT(8) +#define PIN_OFFSET_MASK (~4095) + static inline int i915_vma_pin_count(const struct i915_vma *vma) { return vma->pin_count; -- cgit v0.10.2 From 3272db53136f6be7555fb294db3a6e3f372b9380 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 Aug 2016 16:32:32 +0100 Subject: drm/i915: Combine all i915_vma bitfields into a single set of flags In preparation to perform some magic to speed up i915_vma_pin(), which is among the hottest of hot paths in execbuf, refactor all the bitfields accessed by i915_vma_pin() into a single unified set of flags. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470324762-2545-16-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 4401a2a..8d47d1bf 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -125,7 +125,7 @@ static u64 i915_gem_obj_total_ggtt_size(struct drm_i915_gem_object *obj) struct i915_vma *vma; list_for_each_entry(vma, &obj->vma_list, obj_link) { - if (vma->is_ggtt && drm_mm_node_allocated(&vma->node)) + if (i915_vma_is_ggtt(vma) && drm_mm_node_allocated(&vma->node)) size += vma->node.size; } @@ -181,9 +181,9 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) continue; seq_printf(m, " (%sgtt offset: %08llx, size: %08llx", - vma->is_ggtt ? "g" : "pp", + i915_vma_is_ggtt(vma) ? "g" : "pp", vma->node.start, vma->node.size); - if (vma->is_ggtt) + if (i915_vma_is_ggtt(vma)) seq_printf(m, ", type: %u", vma->ggtt_view.type); seq_puts(m, ")"); } @@ -356,7 +356,7 @@ static int per_file_stats(int id, void *ptr, void *data) if (!drm_mm_node_allocated(&vma->node)) continue; - if (vma->is_ggtt) { + if (i915_vma_is_ggtt(vma)) { stats->global += vma->node.size; } else { struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vma->vm); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index d38275f..3ade87b 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2861,7 +2861,8 @@ int i915_vma_unbind(struct i915_vma *vma) GEM_BUG_ON(obj->bind_count == 0); GEM_BUG_ON(!obj->pages); - if (vma->is_ggtt && vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { + if (i915_vma_is_ggtt(vma) && + vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { i915_gem_object_finish_gtt(obj); /* release the fence reg _after_ flushing */ @@ -2876,12 +2877,12 @@ int i915_vma_unbind(struct i915_vma *vma) trace_i915_vma_unbind(vma); vma->vm->unbind_vma(vma); } - vma->bound = 0; + vma->flags &= ~(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND); drm_mm_remove_node(&vma->node); list_move_tail(&vma->vm_link, &vma->vm->unbound_list); - if (vma->is_ggtt) { + if (i915_vma_is_ggtt(vma)) { if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { obj->map_and_fenceable = false; } else if (vma->ggtt_view.pages) { @@ -2904,7 +2905,7 @@ int i915_vma_unbind(struct i915_vma *vma) i915_gem_object_unpin_pages(obj); destroy: - if (unlikely(vma->closed)) + if (unlikely(i915_vma_is_closed(vma))) i915_vma_destroy(vma); return 0; @@ -2985,7 +2986,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) u64 min_alignment; int ret; - GEM_BUG_ON(vma->bound); + GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND)); GEM_BUG_ON(drm_mm_node_allocated(&vma->node)); size = max(size, vma->size); @@ -3707,13 +3708,14 @@ void __i915_vma_set_map_and_fenceable(struct i915_vma *vma) int i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) { - unsigned int bound = vma->bound; + unsigned int bound; int ret; GEM_BUG_ON((flags & (PIN_GLOBAL | PIN_USER)) == 0); - GEM_BUG_ON((flags & PIN_GLOBAL) && !vma->is_ggtt); + GEM_BUG_ON((flags & PIN_GLOBAL) && !i915_vma_is_ggtt(vma)); - if (WARN_ON(i915_vma_pin_count(vma) == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT)) + bound = vma->flags; + if (WARN_ON((bound & I915_VMA_PIN_MASK) == I915_VMA_PIN_MASK)) return -EBUSY; /* Pin early to prevent the shrinker/eviction logic from destroying @@ -3721,7 +3723,7 @@ i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) */ __i915_vma_pin(vma); - if (!bound) { + if ((bound & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND)) == 0) { ret = i915_vma_insert(vma, size, alignment, flags); if (ret) goto err; @@ -3731,7 +3733,7 @@ i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) if (ret) goto err; - if ((bound ^ vma->bound) & GLOBAL_BIND) + if ((bound ^ vma->flags) & I915_VMA_GLOBAL_BIND) __i915_vma_set_map_and_fenceable(vma); GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags)); @@ -4032,9 +4034,9 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj) * unbound now. */ list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) { - GEM_BUG_ON(!vma->is_ggtt); + GEM_BUG_ON(!i915_vma_is_ggtt(vma)); GEM_BUG_ON(i915_vma_is_active(vma)); - vma->pin_count = 0; + vma->flags &= ~I915_VMA_PIN_MASK; i915_vma_close(vma); } GEM_BUG_ON(obj->bind_count); @@ -4094,7 +4096,8 @@ struct i915_vma *i915_gem_obj_to_ggtt_view(struct drm_i915_gem_object *obj, GEM_BUG_ON(!view); list_for_each_entry(vma, &obj->vma_list, obj_link) - if (vma->is_ggtt && i915_ggtt_view_equal(&vma->ggtt_view, view)) + if (i915_vma_is_ggtt(vma) && + i915_ggtt_view_equal(&vma->ggtt_view, view)) return vma; return NULL; } @@ -4583,7 +4586,7 @@ u64 i915_gem_obj_offset(struct drm_i915_gem_object *o, WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base); list_for_each_entry(vma, &o->vma_list, obj_link) { - if (vma->is_ggtt && + if (i915_vma_is_ggtt(vma) && vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) continue; if (vma->vm == vm) @@ -4601,7 +4604,8 @@ u64 i915_gem_obj_ggtt_offset_view(struct drm_i915_gem_object *o, struct i915_vma *vma; list_for_each_entry(vma, &o->vma_list, obj_link) - if (vma->is_ggtt && i915_ggtt_view_equal(&vma->ggtt_view, view)) + if (i915_vma_is_ggtt(vma) && + i915_ggtt_view_equal(&vma->ggtt_view, view)) return vma->node.start; WARN(1, "global vma for this object not found. (view=%u)\n", view->type); @@ -4614,7 +4618,7 @@ bool i915_gem_obj_bound(struct drm_i915_gem_object *o, struct i915_vma *vma; list_for_each_entry(vma, &o->vma_list, obj_link) { - if (vma->is_ggtt && + if (i915_vma_is_ggtt(vma) && vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) continue; if (vma->vm == vm && drm_mm_node_allocated(&vma->node)) @@ -4630,7 +4634,7 @@ bool i915_gem_obj_ggtt_bound_view(struct drm_i915_gem_object *o, struct i915_vma *vma; list_for_each_entry(vma, &o->vma_list, obj_link) - if (vma->is_ggtt && + if (i915_vma_is_ggtt(vma) && i915_ggtt_view_equal(&vma->ggtt_view, view) && drm_mm_node_allocated(&vma->node)) return true; @@ -4645,7 +4649,7 @@ unsigned long i915_gem_obj_ggtt_size(struct drm_i915_gem_object *o) GEM_BUG_ON(list_empty(&o->vma_list)); list_for_each_entry(vma, &o->vma_list, obj_link) { - if (vma->is_ggtt && + if (i915_vma_is_ggtt(vma) && vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) return vma->node.size; } diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index eff6d39..dc7c0ae 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -219,7 +219,7 @@ static void i915_ppgtt_close(struct i915_address_space *vm) struct i915_vma *vma, *vn; list_for_each_entry_safe(vma, vn, *phase, vm_link) - if (!vma->closed) + if (!i915_vma_is_closed(vma)) i915_vma_close(vma); } } diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index d95a043..a0c95ab 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -717,7 +717,7 @@ need_reloc_mappable(struct i915_vma *vma) if (entry->relocation_count == 0) return false; - if (!vma->is_ggtt) + if (!i915_vma_is_ggtt(vma)) return false; /* See also use_cpu_reloc() */ @@ -736,7 +736,8 @@ eb_vma_misplaced(struct i915_vma *vma) struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; struct drm_i915_gem_object *obj = vma->obj; - WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP && !vma->is_ggtt); + WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP && + !i915_vma_is_ggtt(vma)); if (entry->alignment && vma->node.start & (entry->alignment - 1)) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 84b786c..af012b1 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2652,7 +2652,7 @@ static int ggtt_bind_vma(struct i915_vma *vma, * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally * upgrade to both bound if we bind either to avoid double-binding. */ - vma->bound |= GLOBAL_BIND | LOCAL_BIND; + vma->flags |= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND; return 0; } @@ -2674,14 +2674,14 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma, pte_flags |= PTE_READ_ONLY; - if (flags & GLOBAL_BIND) { + if (flags & I915_VMA_GLOBAL_BIND) { vma->vm->insert_entries(vma->vm, vma->ggtt_view.pages, vma->node.start, cache_level, pte_flags); } - if (flags & LOCAL_BIND) { + if (flags & I915_VMA_LOCAL_BIND) { struct i915_hw_ppgtt *appgtt = to_i915(vma->vm->dev)->mm.aliasing_ppgtt; appgtt->base.insert_entries(&appgtt->base, @@ -2698,12 +2698,12 @@ static void ggtt_unbind_vma(struct i915_vma *vma) struct i915_hw_ppgtt *appgtt = to_i915(vma->vm->dev)->mm.aliasing_ppgtt; const u64 size = min(vma->size, vma->node.size); - if (vma->bound & GLOBAL_BIND) + if (vma->flags & I915_VMA_GLOBAL_BIND) vma->vm->clear_range(vma->vm, vma->node.start, size, true); - if (vma->bound & LOCAL_BIND && appgtt) + if (vma->flags & I915_VMA_LOCAL_BIND && appgtt) appgtt->base.clear_range(&appgtt->base, vma->node.start, size, true); @@ -3334,7 +3334,7 @@ i915_vma_retire(struct i915_gem_active *active, return; list_move_tail(&vma->vm_link, &vma->vm->inactive_list); - if (unlikely(vma->closed && !i915_vma_is_pinned(vma))) + if (unlikely(i915_vma_is_closed(vma) && !i915_vma_is_pinned(vma))) WARN_ON(i915_vma_unbind(vma)); } @@ -3342,10 +3342,10 @@ void i915_vma_destroy(struct i915_vma *vma) { GEM_BUG_ON(vma->node.allocated); GEM_BUG_ON(i915_vma_is_active(vma)); - GEM_BUG_ON(!vma->closed); + GEM_BUG_ON(!i915_vma_is_closed(vma)); list_del(&vma->vm_link); - if (!vma->is_ggtt) + if (!i915_vma_is_ggtt(vma)) i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm)); kmem_cache_free(to_i915(vma->obj->base.dev)->vmas, vma); @@ -3353,8 +3353,8 @@ void i915_vma_destroy(struct i915_vma *vma) void i915_vma_close(struct i915_vma *vma) { - GEM_BUG_ON(vma->closed); - vma->closed = true; + GEM_BUG_ON(i915_vma_is_closed(vma)); + vma->flags |= I915_VMA_CLOSED; list_del_init(&vma->obj_link); if (!i915_vma_is_active(vma) && !i915_vma_is_pinned(vma)) @@ -3386,9 +3386,9 @@ __i915_gem_vma_create(struct drm_i915_gem_object *obj, vma->vm = vm; vma->obj = obj; vma->size = obj->base.size; - vma->is_ggtt = i915_is_ggtt(vm); if (i915_is_ggtt(vm)) { + vma->flags |= I915_VMA_GGTT; vma->ggtt_view = *view; if (view->type == I915_GGTT_VIEW_PARTIAL) { vma->size = view->params.partial.size; @@ -3433,7 +3433,7 @@ i915_gem_obj_lookup_or_create_ggtt_vma(struct drm_i915_gem_object *obj, if (!vma) vma = __i915_gem_vma_create(obj, &ggtt->base, view); - GEM_BUG_ON(vma->closed); + GEM_BUG_ON(i915_vma_is_closed(vma)); return vma; } @@ -3644,27 +3644,28 @@ i915_get_ggtt_vma_pages(struct i915_vma *vma) int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, u32 flags) { - int ret; u32 bind_flags; + u32 vma_flags; + int ret; if (WARN_ON(flags == 0)) return -EINVAL; bind_flags = 0; if (flags & PIN_GLOBAL) - bind_flags |= GLOBAL_BIND; + bind_flags |= I915_VMA_GLOBAL_BIND; if (flags & PIN_USER) - bind_flags |= LOCAL_BIND; + bind_flags |= I915_VMA_LOCAL_BIND; + vma_flags = vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND); if (flags & PIN_UPDATE) - bind_flags |= vma->bound; + bind_flags |= vma_flags; else - bind_flags &= ~vma->bound; - + bind_flags &= ~vma_flags; if (bind_flags == 0) return 0; - if (vma->bound == 0 && vma->vm->allocate_va_range) { + if (vma_flags == 0 && vma->vm->allocate_va_range) { trace_i915_va_alloc(vma); ret = vma->vm->allocate_va_range(vma->vm, vma->node.start, @@ -3677,8 +3678,7 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, if (ret) return ret; - vma->bound |= bind_flags; - + vma->flags |= bind_flags; return 0; } @@ -3690,8 +3690,8 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) if (WARN_ON(!vma->obj->map_and_fenceable)) return IO_ERR_PTR(-ENODEV); - GEM_BUG_ON(!vma->is_ggtt); - GEM_BUG_ON((vma->bound & GLOBAL_BIND) == 0); + GEM_BUG_ON(!i915_vma_is_ggtt(vma)); + GEM_BUG_ON((vma->flags & I915_VMA_GLOBAL_BIND) == 0); ptr = vma->iomap; if (ptr == NULL) { diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index c63cc1b..5134c04 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -182,15 +182,28 @@ struct i915_vma { void __iomem *iomap; u64 size; - unsigned int active; - struct i915_gem_active last_read[I915_NUM_ENGINES]; + unsigned int flags; + /** + * How many users have pinned this object in GTT space. The following + * users can each hold at most one reference: pwrite/pread, execbuffer + * (objects are not allowed multiple times for the same batchbuffer), + * and the framebuffer code. When switching/pageflipping, the + * framebuffer code has at most two buffers pinned per crtc. + * + * In the worst case this is 1 + 1 + 1 + 2*2 = 7. That would fit into 3 + * bits with absolutely no headroom. So use 4 bits. + */ +#define I915_VMA_PIN_MASK 0xf /** Flags and address space this VMA is bound to */ -#define GLOBAL_BIND (1<<0) -#define LOCAL_BIND (1<<1) - unsigned int bound : 4; - bool is_ggtt : 1; - bool closed : 1; +#define I915_VMA_GLOBAL_BIND BIT(5) +#define I915_VMA_LOCAL_BIND BIT(6) + +#define I915_VMA_GGTT BIT(7) +#define I915_VMA_CLOSED BIT(8) + + unsigned int active; + struct i915_gem_active last_read[I915_NUM_ENGINES]; /** * Support different GGTT views into the same object. @@ -215,20 +228,18 @@ struct i915_vma { struct hlist_node exec_node; unsigned long exec_handle; struct drm_i915_gem_exec_object2 *exec_entry; - - /** - * How many users have pinned this object in GTT space. The following - * users can each hold at most one reference: pwrite/pread, execbuffer - * (objects are not allowed multiple times for the same batchbuffer), - * and the framebuffer code. When switching/pageflipping, the - * framebuffer code has at most two buffers pinned per crtc. - * - * In the worst case this is 1 + 1 + 1 + 2*2 = 7. That would fit into 3 - * bits with absolutely no headroom. So use 4 bits. */ - unsigned int pin_count:4; -#define DRM_I915_GEM_OBJECT_MAX_PIN_COUNT 0xf }; +static inline bool i915_vma_is_ggtt(const struct i915_vma *vma) +{ + return vma->flags & I915_VMA_GGTT; +} + +static inline bool i915_vma_is_closed(const struct i915_vma *vma) +{ + return vma->flags & I915_VMA_CLOSED; +} + static inline unsigned int i915_vma_get_active(const struct i915_vma *vma) { return vma->active; @@ -625,7 +636,7 @@ i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags); static inline int i915_vma_pin_count(const struct i915_vma *vma) { - return vma->pin_count; + return vma->flags & I915_VMA_PIN_MASK; } static inline bool i915_vma_is_pinned(const struct i915_vma *vma) @@ -635,14 +646,14 @@ static inline bool i915_vma_is_pinned(const struct i915_vma *vma) static inline void __i915_vma_pin(struct i915_vma *vma) { - vma->pin_count++; + vma->flags++; GEM_BUG_ON(!i915_vma_is_pinned(vma)); } static inline void __i915_vma_unpin(struct i915_vma *vma) { GEM_BUG_ON(!i915_vma_is_pinned(vma)); - vma->pin_count--; + vma->flags--; } static inline void i915_vma_unpin(struct i915_vma *vma) diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index 64d179d..b577635 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -53,7 +53,7 @@ static bool any_vma_pinned(struct drm_i915_gem_object *obj) struct i915_vma *vma; list_for_each_entry(vma, &obj->vma_list, obj_link) - if (vma->pin_count) + if (i915_vma_is_pinned(vma)) return true; return false; diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index bc91ffe..1327961 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -705,7 +705,7 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_device *dev, goto err; } - vma->bound |= GLOBAL_BIND; + vma->flags |= I915_VMA_GLOBAL_BIND; __i915_vma_set_map_and_fenceable(vma); list_move_tail(&vma->vm_link, &ggtt->base.inactive_list); obj->bind_count++; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index d94eb90..cc28ad4 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -669,14 +669,14 @@ i915_error_object_create(struct drm_i915_private *dev_priv, if (i915_is_ggtt(vm)) vma = i915_gem_obj_to_ggtt(src); use_ggtt = (src->cache_level == I915_CACHE_NONE && - vma && (vma->bound & GLOBAL_BIND) && + vma && (vma->flags & I915_VMA_GLOBAL_BIND) && reloc_offset + num_pages * PAGE_SIZE <= ggtt->mappable_end); /* Cannot access stolen address directly, try to use the aperture */ if (src->stolen) { use_ggtt = true; - if (!(vma && vma->bound & GLOBAL_BIND)) + if (!(vma && vma->flags & I915_VMA_GLOBAL_BIND)) goto unwind; reloc_offset = i915_gem_obj_ggtt_offset(src); -- cgit v0.10.2 From 305bc234a87fe8814149d36100b4b544caaddd00 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 Aug 2016 16:32:33 +0100 Subject: drm/i915: Make i915_vma_pin() small and inline Not only is i915_vma_pin() called for every single object on every single execbuf, it is usually a simple increment as the VMA is already bound for execution by the GPU. Rearrange the tests for unbound and pin_count overflow so that we can do the increment and test very cheaply and compact enough to inline the operation into execbuf. The trick used is to note that we can check for an overflow bit (keeping space available for it inside the flags) at the same time as checking the binding bits. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470324762-2545-17-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 3ade87b..f9e45ad 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3705,23 +3705,19 @@ void __i915_vma_set_map_and_fenceable(struct i915_vma *vma) obj->map_and_fenceable = mappable && fenceable; } -int -i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) +int __i915_vma_do_pin(struct i915_vma *vma, + u64 size, u64 alignment, u64 flags) { - unsigned int bound; + unsigned int bound = vma->flags; int ret; GEM_BUG_ON((flags & (PIN_GLOBAL | PIN_USER)) == 0); GEM_BUG_ON((flags & PIN_GLOBAL) && !i915_vma_is_ggtt(vma)); - bound = vma->flags; - if (WARN_ON((bound & I915_VMA_PIN_MASK) == I915_VMA_PIN_MASK)) - return -EBUSY; - - /* Pin early to prevent the shrinker/eviction logic from destroying - * our vma as we insert and bind. - */ - __i915_vma_pin(vma); + if (WARN_ON(bound & I915_VMA_PIN_OVERFLOW)) { + ret = -EBUSY; + goto err; + } if ((bound & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND)) == 0) { ret = i915_vma_insert(vma, size, alignment, flags); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 5134c04..cc56206 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -194,13 +194,15 @@ struct i915_vma { * bits with absolutely no headroom. So use 4 bits. */ #define I915_VMA_PIN_MASK 0xf +#define I915_VMA_PIN_OVERFLOW BIT(5) /** Flags and address space this VMA is bound to */ -#define I915_VMA_GLOBAL_BIND BIT(5) -#define I915_VMA_LOCAL_BIND BIT(6) +#define I915_VMA_GLOBAL_BIND BIT(6) +#define I915_VMA_LOCAL_BIND BIT(7) +#define I915_VMA_BIND_MASK (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND | I915_VMA_PIN_OVERFLOW) -#define I915_VMA_GGTT BIT(7) -#define I915_VMA_CLOSED BIT(8) +#define I915_VMA_GGTT BIT(8) +#define I915_VMA_CLOSED BIT(9) unsigned int active; struct i915_gem_active last_read[I915_NUM_ENGINES]; @@ -620,20 +622,39 @@ i915_ggtt_view_equal(const struct i915_ggtt_view *a, return true; } -int __must_check -i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags); /* Flags used by pin/bind&friends. */ -#define PIN_MAPPABLE BIT(0) -#define PIN_NONBLOCK BIT(1) -#define PIN_GLOBAL BIT(2) -#define PIN_OFFSET_BIAS BIT(3) -#define PIN_USER BIT(4) -#define PIN_UPDATE BIT(5) -#define PIN_ZONE_4G BIT(6) -#define PIN_HIGH BIT(7) -#define PIN_OFFSET_FIXED BIT(8) +#define PIN_NONBLOCK BIT(0) +#define PIN_MAPPABLE BIT(1) +#define PIN_ZONE_4G BIT(2) + +#define PIN_MBZ BIT(5) /* I915_VMA_PIN_OVERFLOW */ +#define PIN_GLOBAL BIT(6) /* I915_VMA_GLOBAL_BIND */ +#define PIN_USER BIT(7) /* I915_VMA_LOCAL_BIND */ +#define PIN_UPDATE BIT(8) + +#define PIN_HIGH BIT(9) +#define PIN_OFFSET_BIAS BIT(10) +#define PIN_OFFSET_FIXED BIT(11) #define PIN_OFFSET_MASK (~4095) +int __i915_vma_do_pin(struct i915_vma *vma, + u64 size, u64 alignment, u64 flags); +static inline int __must_check +i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) +{ + BUILD_BUG_ON(PIN_MBZ != I915_VMA_PIN_OVERFLOW); + BUILD_BUG_ON(PIN_GLOBAL != I915_VMA_GLOBAL_BIND); + BUILD_BUG_ON(PIN_USER != I915_VMA_LOCAL_BIND); + + /* Pin early to prevent the shrinker/eviction logic from destroying + * our vma as we insert and bind. + */ + if (likely(((++vma->flags ^ flags) & I915_VMA_BIND_MASK) == 0)) + return 0; + + return __i915_vma_do_pin(vma, size, alignment, flags); +} + static inline int i915_vma_pin_count(const struct i915_vma *vma) { return vma->flags & I915_VMA_PIN_MASK; @@ -647,7 +668,7 @@ static inline bool i915_vma_is_pinned(const struct i915_vma *vma) static inline void __i915_vma_pin(struct i915_vma *vma) { vma->flags++; - GEM_BUG_ON(!i915_vma_is_pinned(vma)); + GEM_BUG_ON(vma->flags & I915_VMA_PIN_OVERFLOW); } static inline void __i915_vma_unpin(struct i915_vma *vma) -- cgit v0.10.2 From de895082f797882b541141ceac9e407eeeb3ceca Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 Aug 2016 16:32:34 +0100 Subject: drm/i915: Remove highly confusing i915_gem_obj_ggtt_pin() Since i915_gem_obj_ggtt_pin() is an idiom breaking curry function for i915_gem_object_ggtt_pin(), spare us the confusion and remove it. Removing it now simplifies later patches to change the i915_vma_pin() (and friends) interface. v2: Add a redundant GEM_BUG_ON(!view) to i915_gem_obj_lookup_or_create_ggtt_vma() Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470324762-2545-18-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index cda8238..7ba9905 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3289,15 +3289,6 @@ static inline bool i915_gem_obj_ggtt_bound(struct drm_i915_gem_object *obj) unsigned long i915_gem_obj_ggtt_size(struct drm_i915_gem_object *obj); -static inline int __must_check -i915_gem_obj_ggtt_pin(struct drm_i915_gem_object *obj, - uint32_t alignment, - unsigned flags) -{ - return i915_gem_object_ggtt_pin(obj, &i915_ggtt_view_normal, - 0, alignment, flags); -} - void i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj, const struct i915_ggtt_view *view); static inline void diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index f9e45ad..fdacedc 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -652,7 +652,7 @@ i915_gem_gtt_pread(struct drm_device *dev, uint64_t offset; int ret; - ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE); + ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE); if (ret) { ret = insert_mappable_node(dev_priv, &node, PAGE_SIZE); if (ret) @@ -949,7 +949,8 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915, if (obj->tiling_mode != I915_TILING_NONE) return -EFAULT; - ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK); + ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, + PIN_MAPPABLE | PIN_NONBLOCK); if (ret) { ret = insert_mappable_node(i915, &node, PAGE_SIZE); if (ret) @@ -3719,7 +3720,7 @@ int __i915_vma_do_pin(struct i915_vma *vma, goto err; } - if ((bound & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND)) == 0) { + if ((bound & I915_VMA_BIND_MASK) == 0) { ret = i915_vma_insert(vma, size, alignment, flags); if (ret) goto err; @@ -3750,7 +3751,8 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, struct i915_vma *vma; int ret; - BUG_ON(!view); + if (!view) + view = &i915_ggtt_view_normal; vma = i915_gem_obj_lookup_or_create_ggtt_vma(obj, view); if (IS_ERR(vma)) @@ -3782,12 +3784,7 @@ void i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj, const struct i915_ggtt_view *view) { - struct i915_vma *vma = i915_gem_obj_to_ggtt_view(obj, view); - - WARN_ON(!i915_vma_is_pinned(vma)); - WARN_ON(!i915_gem_obj_ggtt_bound_view(obj, view)); - - __i915_vma_unpin(vma); + i915_vma_unpin(i915_gem_obj_to_ggtt_view(obj, view)); } int diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index dc7c0ae..bb72af5 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -763,9 +763,8 @@ static int do_rcs_switch(struct drm_i915_gem_request *req) return 0; /* Trying to pin first makes error handling easier. */ - ret = i915_gem_obj_ggtt_pin(to->engine[RCS].state, - to->ggtt_alignment, - 0); + ret = i915_gem_object_ggtt_pin(to->engine[RCS].state, NULL, 0, + to->ggtt_alignment, 0); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index a0c95ab..109bb9d 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1270,7 +1270,7 @@ i915_gem_execbuffer_parse(struct intel_engine_cs *engine, if (ret) goto err; - ret = i915_gem_obj_ggtt_pin(shadow_batch_obj, 0, 0); + ret = i915_gem_object_ggtt_pin(shadow_batch_obj, NULL, 0, 0, 0); if (ret) goto err; @@ -1650,7 +1650,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, * fitting due to fragmentation. * So this is actually safe. */ - ret = i915_gem_obj_ggtt_pin(obj, 0, 0); + ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0); if (ret) goto err; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index af012b1..db97155 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -3430,6 +3430,8 @@ i915_gem_obj_lookup_or_create_ggtt_vma(struct drm_i915_gem_object *obj, struct i915_ggtt *ggtt = &dev_priv->ggtt; struct i915_vma *vma = i915_gem_obj_to_ggtt_view(obj, view); + GEM_BUG_ON(!view); + if (!vma) vma = __i915_gem_vma_create(obj, &ggtt->base, view); diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c index 9023667..57fd767 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.c +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c @@ -191,7 +191,7 @@ int i915_gem_render_state_init(struct drm_i915_gem_request *req) if (IS_ERR(so.obj)) return PTR_ERR(so.obj); - ret = i915_gem_obj_ggtt_pin(so.obj, 4096, 0); + ret = i915_gem_object_ggtt_pin(so.obj, NULL, 0, 0, 0); if (ret) goto err_obj; diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index c9105f6..03a5cef 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -635,8 +635,8 @@ gem_allocate_guc_obj(struct drm_i915_private *dev_priv, u32 size) return NULL; } - if (i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, - PIN_OFFSET_BIAS | GUC_WOPCM_TOP)) { + if (i915_gem_object_ggtt_pin(obj, NULL, 0, PAGE_SIZE, + PIN_OFFSET_BIAS | GUC_WOPCM_TOP)) { i915_gem_object_put(obj); return NULL; } diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c index b883efd..3763e30 100644 --- a/drivers/gpu/drm/i915/intel_guc_loader.c +++ b/drivers/gpu/drm/i915/intel_guc_loader.c @@ -323,7 +323,7 @@ static int guc_ucode_xfer(struct drm_i915_private *dev_priv) return ret; } - ret = i915_gem_obj_ggtt_pin(guc_fw->guc_fw_obj, 0, 0); + ret = i915_gem_object_ggtt_pin(guc_fw->guc_fw_obj, NULL, 0, 0, 0); if (ret) { DRM_DEBUG_DRIVER("pin failed %d\n", ret); return ret; diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 622cd0b..361977f 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -799,8 +799,9 @@ static int intel_lr_context_pin(struct i915_gem_context *ctx, if (ce->pin_count++) return 0; - ret = i915_gem_obj_ggtt_pin(ce->state, GEN8_LR_CONTEXT_ALIGN, - PIN_OFFSET_BIAS | GUC_WOPCM_TOP); + ret = i915_gem_object_ggtt_pin(ce->state, NULL, + 0, GEN8_LR_CONTEXT_ALIGN, + PIN_OFFSET_BIAS | GUC_WOPCM_TOP); if (ret) goto err; @@ -1203,7 +1204,8 @@ static int lrc_setup_wa_ctx_obj(struct intel_engine_cs *engine, u32 size) return ret; } - ret = i915_gem_obj_ggtt_pin(engine->wa_ctx.obj, PAGE_SIZE, 0); + ret = i915_gem_object_ggtt_pin(engine->wa_ctx.obj, NULL, + 0, PAGE_SIZE, 0); if (ret) { DRM_DEBUG_DRIVER("pin LRC WA ctx backing obj failed: %d\n", ret); diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index 2c598d6..217fefc 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -1401,7 +1401,8 @@ void intel_setup_overlay(struct drm_i915_private *dev_priv) } overlay->flip_addr = reg_bo->phys_handle->busaddr; } else { - ret = i915_gem_obj_ggtt_pin(reg_bo, PAGE_SIZE, PIN_MAPPABLE); + ret = i915_gem_object_ggtt_pin(reg_bo, NULL, + 0, PAGE_SIZE, PIN_MAPPABLE); if (ret) { DRM_ERROR("failed to pin overlay register bo\n"); goto out_free_bo; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index ac2e610..a862234 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -639,7 +639,7 @@ int intel_init_pipe_control(struct intel_engine_cs *engine, int size) goto err; } - ret = i915_gem_obj_ggtt_pin(obj, 4096, PIN_HIGH); + ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 4096, PIN_HIGH); if (ret) goto err_unref; @@ -1896,7 +1896,7 @@ static int init_status_page(struct intel_engine_cs *engine) * actualy map it). */ flags |= PIN_MAPPABLE; - ret = i915_gem_obj_ggtt_pin(obj, 4096, flags); + ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 4096, flags); if (ret) { err_unref: i915_gem_object_put(obj); @@ -1943,7 +1943,7 @@ int intel_ring_pin(struct intel_ring *ring) int ret; if (HAS_LLC(dev_priv) && !obj->stolen) { - ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, flags); + ret = i915_gem_object_ggtt_pin(obj, NULL, 0, PAGE_SIZE, flags); if (ret) return ret; @@ -1957,8 +1957,8 @@ int intel_ring_pin(struct intel_ring *ring) goto err_unpin; } } else { - ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, - flags | PIN_MAPPABLE); + ret = i915_gem_object_ggtt_pin(obj, NULL, 0, PAGE_SIZE, + flags | PIN_MAPPABLE); if (ret) return ret; @@ -2092,7 +2092,8 @@ static int intel_ring_context_pin(struct i915_gem_context *ctx, return 0; if (ce->state) { - ret = i915_gem_obj_ggtt_pin(ce->state, ctx->ggtt_alignment, 0); + ret = i915_gem_object_ggtt_pin(ce->state, NULL, 0, + ctx->ggtt_alignment, 0); if (ret) goto error; } @@ -2649,7 +2650,7 @@ static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv, i915.semaphores = 0; } else { i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); - ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_NONBLOCK); + ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0); if (ret != 0) { i915_gem_object_put(obj); DRM_ERROR("Failed to pin semaphore bo. Disabling semaphores\n"); -- cgit v0.10.2 From 5d723d7afd320e687ebb59f7ac741b0ab02d77e8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 Aug 2016 16:32:35 +0100 Subject: drm/i915: Separate intel_frontbuffer into its own header In view of adding inline functions into the intel_frontbuffer section, we first split the header into its own file so that we can integrate it more easily with kerneldoc. Signed-off-by: Chris Wilson Cc: Daniel Vetter Reviewed-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1470324762-2545-19-git-send-email-chris@chris-wilson.co.uk diff --git a/Documentation/gpu/i915.rst b/Documentation/gpu/i915.rst index 2fe5952..87aaffc 100644 --- a/Documentation/gpu/i915.rst +++ b/Documentation/gpu/i915.rst @@ -70,6 +70,9 @@ Frontbuffer Tracking .. kernel-doc:: drivers/gpu/drm/i915/intel_frontbuffer.c :doc: frontbuffer tracking +.. kernel-doc:: drivers/gpu/drm/i915/intel_frontbuffer.h + :internal: + .. kernel-doc:: drivers/gpu/drm/i915/intel_frontbuffer.c :internal: diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index fdacedc..1960705 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -33,6 +33,7 @@ #include "i915_vgpu.h" #include "i915_trace.h" #include "intel_drv.h" +#include "intel_frontbuffer.h" #include "intel_mocs.h" #include #include diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 109bb9d..e8e194f 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -31,6 +31,7 @@ #include "i915_drv.h" #include "i915_trace.h" #include "intel_drv.h" +#include "intel_frontbuffer.h" #include #include diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 483ddfe..229b1c4 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -34,6 +34,7 @@ #include #include #include "intel_drv.h" +#include "intel_frontbuffer.h" #include #include "i915_drv.h" #include "i915_gem_dmabuf.h" diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index b846623..8357c0e 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1135,21 +1135,10 @@ void intel_ddi_clock_get(struct intel_encoder *encoder, void intel_ddi_set_vc_payload_alloc(struct drm_crtc *crtc, bool state); uint32_t ddi_signal_levels(struct intel_dp *intel_dp); -/* intel_frontbuffer.c */ -void intel_fb_obj_invalidate(struct drm_i915_gem_object *obj, - enum fb_op_origin origin); -void intel_frontbuffer_flip_prepare(struct drm_device *dev, - unsigned frontbuffer_bits); -void intel_frontbuffer_flip_complete(struct drm_device *dev, - unsigned frontbuffer_bits); -void intel_frontbuffer_flip(struct drm_device *dev, - unsigned frontbuffer_bits); unsigned int intel_fb_align_height(struct drm_device *dev, unsigned int height, uint32_t pixel_format, uint64_t fb_format_modifier); -void intel_fb_obj_flush(struct drm_i915_gem_object *obj, bool retire, - enum fb_op_origin origin); u32 intel_fb_stride_alignment(const struct drm_i915_private *dev_priv, uint64_t fb_modifier, uint32_t pixel_format); diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index 6344999..0436b48 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -42,6 +42,7 @@ #include #include #include "intel_drv.h" +#include "intel_frontbuffer.h" #include #include "i915_drv.h" diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.c b/drivers/gpu/drm/i915/intel_frontbuffer.c index ac85357..b83a700 100644 --- a/drivers/gpu/drm/i915/intel_frontbuffer.c +++ b/drivers/gpu/drm/i915/intel_frontbuffer.c @@ -63,6 +63,7 @@ #include #include "intel_drv.h" +#include "intel_frontbuffer.h" #include "i915_drv.h" /** diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.h b/drivers/gpu/drm/i915/intel_frontbuffer.h new file mode 100644 index 0000000..3d00614 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_frontbuffer.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2014-2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef __INTEL_FRONTBUFFER_H__ +#define __INTEL_FRONTBUFFER_H__ + +struct drm_device; +struct drm_i915_private; +struct drm_i915_gem_object; + +void intel_fb_obj_invalidate(struct drm_i915_gem_object *obj, + enum fb_op_origin origin); +void intel_frontbuffer_flip_prepare(struct drm_device *dev, + unsigned frontbuffer_bits); +void intel_frontbuffer_flip_complete(struct drm_device *dev, + unsigned frontbuffer_bits); +void intel_frontbuffer_flip(struct drm_device *dev, + unsigned frontbuffer_bits); +void intel_fb_obj_flush(struct drm_i915_gem_object *obj, bool retire, + enum fb_op_origin origin); + +#endif /* __INTEL_FRONTBUFFER_H__ */ diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index 217fefc..fd891fe 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -30,6 +30,7 @@ #include "i915_drv.h" #include "i915_reg.h" #include "intel_drv.h" +#include "intel_frontbuffer.h" /* Limits for overlay size. According to intel doc, the real limits are: * Y width: 4095, UV width (planar): 2047, Y height: 2047, diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index 0de935a..e045295 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -36,6 +36,7 @@ #include #include #include "intel_drv.h" +#include "intel_frontbuffer.h" #include #include "i915_drv.h" -- cgit v0.10.2 From b5add9591ca5b869b8c9c559e16ccab8a8ba4727 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 Aug 2016 16:32:36 +0100 Subject: drm/i915: Make fb_tracking.lock a spinlock We only need a very lightweight mechanism here as the locking is only used for co-ordinating a bitfield. v2: Move the cheap unlikely tests into the caller v3: Move the kerneldoc into the header (now separated out into intel_fronbuffer.h for better kerneldoc and readability) Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtien Cc: Daniel Vetter Reviewed-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1470324762-2545-20-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 7ba9905..b26f5b1 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1669,7 +1669,7 @@ struct intel_pipe_crc { }; struct i915_frontbuffer_tracking { - struct mutex lock; + spinlock_t lock; /* * Tracking bits for delayed frontbuffer flushing du to gpu activity or diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 1960705..68110b9 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4455,7 +4455,7 @@ i915_gem_load_init(struct drm_device *dev) dev_priv->mm.interruptible = true; - mutex_init(&dev_priv->fb_tracking.lock); + spin_lock_init(&dev_priv->fb_tracking.lock); } void i915_gem_load_cleanup(struct drm_device *dev) diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.c b/drivers/gpu/drm/i915/intel_frontbuffer.c index b83a700..f15486a 100644 --- a/drivers/gpu/drm/i915/intel_frontbuffer.c +++ b/drivers/gpu/drm/i915/intel_frontbuffer.c @@ -66,35 +66,19 @@ #include "intel_frontbuffer.h" #include "i915_drv.h" -/** - * intel_fb_obj_invalidate - invalidate frontbuffer object - * @obj: GEM object to invalidate - * @origin: which operation caused the invalidation - * - * This function gets called every time rendering on the given object starts and - * frontbuffer caching (fbc, low refresh rate for DRRS, panel self refresh) must - * be invalidated. For ORIGIN_CS any subsequent invalidation will be delayed - * until the rendering completes or a flip on this frontbuffer plane is - * scheduled. - */ -void intel_fb_obj_invalidate(struct drm_i915_gem_object *obj, - enum fb_op_origin origin) +void __intel_fb_obj_invalidate(struct drm_i915_gem_object *obj, + enum fb_op_origin origin) { struct drm_device *dev = obj->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); WARN_ON(!mutex_is_locked(&dev->struct_mutex)); - if (!obj->frontbuffer_bits) - return; - if (origin == ORIGIN_CS) { - mutex_lock(&dev_priv->fb_tracking.lock); - dev_priv->fb_tracking.busy_bits - |= obj->frontbuffer_bits; - dev_priv->fb_tracking.flip_bits - &= ~obj->frontbuffer_bits; - mutex_unlock(&dev_priv->fb_tracking.lock); + spin_lock(&dev_priv->fb_tracking.lock); + dev_priv->fb_tracking.busy_bits |= obj->frontbuffer_bits; + dev_priv->fb_tracking.flip_bits &= ~obj->frontbuffer_bits; + spin_unlock(&dev_priv->fb_tracking.lock); } intel_psr_invalidate(dev, obj->frontbuffer_bits); @@ -121,9 +105,9 @@ static void intel_frontbuffer_flush(struct drm_device *dev, struct drm_i915_private *dev_priv = to_i915(dev); /* Delay flushing when rings are still busy.*/ - mutex_lock(&dev_priv->fb_tracking.lock); + spin_lock(&dev_priv->fb_tracking.lock); frontbuffer_bits &= ~dev_priv->fb_tracking.busy_bits; - mutex_unlock(&dev_priv->fb_tracking.lock); + spin_unlock(&dev_priv->fb_tracking.lock); if (!frontbuffer_bits) return; @@ -133,18 +117,9 @@ static void intel_frontbuffer_flush(struct drm_device *dev, intel_fbc_flush(dev_priv, frontbuffer_bits, origin); } -/** - * intel_fb_obj_flush - flush frontbuffer object - * @obj: GEM object to flush - * @retire: set when retiring asynchronous rendering - * @origin: which operation caused the flush - * - * This function gets called every time rendering on the given object has - * completed and frontbuffer caching can be started again. If @retire is true - * then any delayed flushes will be unblocked. - */ -void intel_fb_obj_flush(struct drm_i915_gem_object *obj, - bool retire, enum fb_op_origin origin) +void __intel_fb_obj_flush(struct drm_i915_gem_object *obj, + bool retire, + enum fb_op_origin origin) { struct drm_device *dev = obj->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -152,21 +127,18 @@ void intel_fb_obj_flush(struct drm_i915_gem_object *obj, WARN_ON(!mutex_is_locked(&dev->struct_mutex)); - if (!obj->frontbuffer_bits) - return; - frontbuffer_bits = obj->frontbuffer_bits; if (retire) { - mutex_lock(&dev_priv->fb_tracking.lock); + spin_lock(&dev_priv->fb_tracking.lock); /* Filter out new bits since rendering started. */ frontbuffer_bits &= dev_priv->fb_tracking.busy_bits; - dev_priv->fb_tracking.busy_bits &= ~frontbuffer_bits; - mutex_unlock(&dev_priv->fb_tracking.lock); + spin_unlock(&dev_priv->fb_tracking.lock); } - intel_frontbuffer_flush(dev, frontbuffer_bits, origin); + if (frontbuffer_bits) + intel_frontbuffer_flush(dev, frontbuffer_bits, origin); } /** @@ -186,11 +158,11 @@ void intel_frontbuffer_flip_prepare(struct drm_device *dev, { struct drm_i915_private *dev_priv = to_i915(dev); - mutex_lock(&dev_priv->fb_tracking.lock); + spin_lock(&dev_priv->fb_tracking.lock); dev_priv->fb_tracking.flip_bits |= frontbuffer_bits; /* Remove stale busy bits due to the old buffer. */ dev_priv->fb_tracking.busy_bits &= ~frontbuffer_bits; - mutex_unlock(&dev_priv->fb_tracking.lock); + spin_unlock(&dev_priv->fb_tracking.lock); intel_psr_single_frame_update(dev, frontbuffer_bits); } @@ -210,13 +182,14 @@ void intel_frontbuffer_flip_complete(struct drm_device *dev, { struct drm_i915_private *dev_priv = to_i915(dev); - mutex_lock(&dev_priv->fb_tracking.lock); + spin_lock(&dev_priv->fb_tracking.lock); /* Mask any cancelled flips. */ frontbuffer_bits &= dev_priv->fb_tracking.flip_bits; dev_priv->fb_tracking.flip_bits &= ~frontbuffer_bits; - mutex_unlock(&dev_priv->fb_tracking.lock); + spin_unlock(&dev_priv->fb_tracking.lock); - intel_frontbuffer_flush(dev, frontbuffer_bits, ORIGIN_FLIP); + if (frontbuffer_bits) + intel_frontbuffer_flush(dev, frontbuffer_bits, ORIGIN_FLIP); } /** @@ -235,10 +208,10 @@ void intel_frontbuffer_flip(struct drm_device *dev, { struct drm_i915_private *dev_priv = to_i915(dev); - mutex_lock(&dev_priv->fb_tracking.lock); + spin_lock(&dev_priv->fb_tracking.lock); /* Remove stale busy bits due to the old buffer. */ dev_priv->fb_tracking.busy_bits &= ~frontbuffer_bits; - mutex_unlock(&dev_priv->fb_tracking.lock); + spin_unlock(&dev_priv->fb_tracking.lock); intel_frontbuffer_flush(dev, frontbuffer_bits, ORIGIN_FLIP); } diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.h b/drivers/gpu/drm/i915/intel_frontbuffer.h index 3d00614..60a0ec1 100644 --- a/drivers/gpu/drm/i915/intel_frontbuffer.h +++ b/drivers/gpu/drm/i915/intel_frontbuffer.h @@ -28,15 +28,57 @@ struct drm_device; struct drm_i915_private; struct drm_i915_gem_object; -void intel_fb_obj_invalidate(struct drm_i915_gem_object *obj, - enum fb_op_origin origin); void intel_frontbuffer_flip_prepare(struct drm_device *dev, unsigned frontbuffer_bits); void intel_frontbuffer_flip_complete(struct drm_device *dev, unsigned frontbuffer_bits); void intel_frontbuffer_flip(struct drm_device *dev, unsigned frontbuffer_bits); -void intel_fb_obj_flush(struct drm_i915_gem_object *obj, bool retire, - enum fb_op_origin origin); + +void __intel_fb_obj_invalidate(struct drm_i915_gem_object *obj, + enum fb_op_origin origin); +void __intel_fb_obj_flush(struct drm_i915_gem_object *obj, + bool retire, + enum fb_op_origin origin); + +/** + * intel_fb_obj_invalidate - invalidate frontbuffer object + * @obj: GEM object to invalidate + * @origin: which operation caused the invalidation + * + * This function gets called every time rendering on the given object starts and + * frontbuffer caching (fbc, low refresh rate for DRRS, panel self refresh) must + * be invalidated. For ORIGIN_CS any subsequent invalidation will be delayed + * until the rendering completes or a flip on this frontbuffer plane is + * scheduled. + */ +static inline void intel_fb_obj_invalidate(struct drm_i915_gem_object *obj, + enum fb_op_origin origin) +{ + if (!obj->frontbuffer_bits) + return; + + __intel_fb_obj_invalidate(obj, origin); +} + +/** + * intel_fb_obj_flush - flush frontbuffer object + * @obj: GEM object to flush + * @retire: set when retiring asynchronous rendering + * @origin: which operation caused the flush + * + * This function gets called every time rendering on the given object has + * completed and frontbuffer caching can be started again. If @retire is true + * then any delayed flushes will be unblocked. + */ +static inline void intel_fb_obj_flush(struct drm_i915_gem_object *obj, + bool retire, + enum fb_op_origin origin) +{ + if (!obj->frontbuffer_bits) + return; + + __intel_fb_obj_flush(obj, retire, origin); +} #endif /* __INTEL_FRONTBUFFER_H__ */ -- cgit v0.10.2 From faf5bf0ad62b332769199cc09c678287ab3c5f08 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 Aug 2016 16:32:37 +0100 Subject: drm/i915: Use atomics to manipulate obj->frontbuffer_bits The individual bits inside obj->frontbuffer_bits are protected by each plane->mutex, but the whole bitfield may be accessed by multiple KMS operations simultaneously and so the RMW need to be under atomics. However, for updating the single field we do not need to mandate that it be under the struct_mutex, one more step towards its removal as the de facto BKL. Signed-off-by: Chris Wilson Cc: Daniel Vetter Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470324762-2545-21-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 8d47d1bf..9796b07 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -138,6 +138,7 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) struct drm_i915_private *dev_priv = to_i915(obj->base.dev); struct intel_engine_cs *engine; struct i915_vma *vma; + unsigned int frontbuffer_bits; int pin_count = 0; enum intel_engine_id id; @@ -204,8 +205,9 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) if (engine) seq_printf(m, " (%s)", engine->name); - if (obj->frontbuffer_bits) - seq_printf(m, " (frontbuffer: 0x%03x)", obj->frontbuffer_bits); + frontbuffer_bits = atomic_read(&obj->frontbuffer_bits); + if (frontbuffer_bits) + seq_printf(m, " (frontbuffer: 0x%03x)", frontbuffer_bits); } static int i915_gem_object_list_info(struct seq_file *m, void *data) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index b26f5b1..3de75e8 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2127,8 +2127,6 @@ struct drm_i915_gem_object_ops { */ #define INTEL_MAX_SPRITE_BITS_PER_PIPE 5 #define INTEL_FRONTBUFFER_BITS_PER_PIPE 8 -#define INTEL_FRONTBUFFER_BITS \ - (INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES) #define INTEL_FRONTBUFFER_PRIMARY(pipe) \ (1 << (INTEL_FRONTBUFFER_BITS_PER_PIPE * (pipe))) #define INTEL_FRONTBUFFER_CURSOR(pipe) \ @@ -2216,7 +2214,7 @@ struct drm_i915_gem_object { unsigned int cache_level:3; unsigned int cache_dirty:1; - unsigned int frontbuffer_bits:INTEL_FRONTBUFFER_BITS; + atomic_t frontbuffer_bits; unsigned int has_wc_mmap; /** Count of VMA actually bound by this object */ diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 68110b9..03eb094 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4040,7 +4040,7 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj) if (obj->stolen) i915_gem_object_unpin_pages(obj); - WARN_ON(obj->frontbuffer_bits); + WARN_ON(atomic_read(&obj->frontbuffer_bits)); if (obj->pages && obj->madv == I915_MADV_WILLNEED && dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES && @@ -4557,16 +4557,23 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old, struct drm_i915_gem_object *new, unsigned frontbuffer_bits) { + /* Control of individual bits within the mask are guarded by + * the owning plane->mutex, i.e. we can never see concurrent + * manipulation of individual bits. But since the bitfield as a whole + * is updated using RMW, we need to use atomics in order to update + * the bits. + */ + BUILD_BUG_ON(INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES > + sizeof(atomic_t) * BITS_PER_BYTE); + if (old) { - WARN_ON(!mutex_is_locked(&old->base.dev->struct_mutex)); - WARN_ON(!(old->frontbuffer_bits & frontbuffer_bits)); - old->frontbuffer_bits &= ~frontbuffer_bits; + WARN_ON(!(atomic_read(&old->frontbuffer_bits) & frontbuffer_bits)); + atomic_andnot(frontbuffer_bits, &old->frontbuffer_bits); } if (new) { - WARN_ON(!mutex_is_locked(&new->base.dev->struct_mutex)); - WARN_ON(new->frontbuffer_bits & frontbuffer_bits); - new->frontbuffer_bits |= frontbuffer_bits; + WARN_ON(atomic_read(&new->frontbuffer_bits) & frontbuffer_bits); + atomic_or(frontbuffer_bits, &new->frontbuffer_bits); } } diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 229b1c4..5bc8206 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2601,7 +2601,8 @@ valid_fb: primary->fb = primary->state->fb = fb; primary->crtc = primary->state->crtc = &intel_crtc->base; intel_crtc->base.state->plane_mask |= (1 << drm_plane_index(primary)); - obj->frontbuffer_bits |= to_intel_plane(primary)->frontbuffer_bit; + atomic_or(to_intel_plane(primary)->frontbuffer_bit, + &obj->frontbuffer_bits); } static void i9xx_update_primary_plane(struct drm_plane *primary, @@ -13810,19 +13811,12 @@ static void intel_atomic_track_fbs(struct drm_atomic_state *state) { struct drm_plane_state *old_plane_state; struct drm_plane *plane; - struct drm_i915_gem_object *obj, *old_obj; - struct intel_plane *intel_plane; int i; - mutex_lock(&state->dev->struct_mutex); - for_each_plane_in_state(state, plane, old_plane_state, i) { - obj = intel_fb_obj(plane->state->fb); - old_obj = intel_fb_obj(old_plane_state->fb); - intel_plane = to_intel_plane(plane); - - i915_gem_track_fb(old_obj, obj, intel_plane->frontbuffer_bit); - } - mutex_unlock(&state->dev->struct_mutex); + for_each_plane_in_state(state, plane, old_plane_state, i) + i915_gem_track_fb(intel_fb_obj(old_plane_state->fb), + intel_fb_obj(plane->state->fb), + to_intel_plane(plane)->frontbuffer_bit); } /** diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.c b/drivers/gpu/drm/i915/intel_frontbuffer.c index f15486a..0e5da90 100644 --- a/drivers/gpu/drm/i915/intel_frontbuffer.c +++ b/drivers/gpu/drm/i915/intel_frontbuffer.c @@ -67,23 +67,22 @@ #include "i915_drv.h" void __intel_fb_obj_invalidate(struct drm_i915_gem_object *obj, - enum fb_op_origin origin) + enum fb_op_origin origin, + unsigned int frontbuffer_bits) { struct drm_device *dev = obj->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - WARN_ON(!mutex_is_locked(&dev->struct_mutex)); - if (origin == ORIGIN_CS) { spin_lock(&dev_priv->fb_tracking.lock); - dev_priv->fb_tracking.busy_bits |= obj->frontbuffer_bits; - dev_priv->fb_tracking.flip_bits &= ~obj->frontbuffer_bits; + dev_priv->fb_tracking.busy_bits |= frontbuffer_bits; + dev_priv->fb_tracking.flip_bits &= ~frontbuffer_bits; spin_unlock(&dev_priv->fb_tracking.lock); } - intel_psr_invalidate(dev, obj->frontbuffer_bits); - intel_edp_drrs_invalidate(dev, obj->frontbuffer_bits); - intel_fbc_invalidate(dev_priv, obj->frontbuffer_bits, origin); + intel_psr_invalidate(dev, frontbuffer_bits); + intel_edp_drrs_invalidate(dev, frontbuffer_bits); + intel_fbc_invalidate(dev_priv, frontbuffer_bits, origin); } /** @@ -119,15 +118,11 @@ static void intel_frontbuffer_flush(struct drm_device *dev, void __intel_fb_obj_flush(struct drm_i915_gem_object *obj, bool retire, - enum fb_op_origin origin) + enum fb_op_origin origin, + unsigned int frontbuffer_bits) { struct drm_device *dev = obj->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - unsigned frontbuffer_bits; - - WARN_ON(!mutex_is_locked(&dev->struct_mutex)); - - frontbuffer_bits = obj->frontbuffer_bits; if (retire) { spin_lock(&dev_priv->fb_tracking.lock); diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.h b/drivers/gpu/drm/i915/intel_frontbuffer.h index 60a0ec1..0c85b20 100644 --- a/drivers/gpu/drm/i915/intel_frontbuffer.h +++ b/drivers/gpu/drm/i915/intel_frontbuffer.h @@ -36,10 +36,12 @@ void intel_frontbuffer_flip(struct drm_device *dev, unsigned frontbuffer_bits); void __intel_fb_obj_invalidate(struct drm_i915_gem_object *obj, - enum fb_op_origin origin); + enum fb_op_origin origin, + unsigned int frontbuffer_bits); void __intel_fb_obj_flush(struct drm_i915_gem_object *obj, bool retire, - enum fb_op_origin origin); + enum fb_op_origin origin, + unsigned int frontbuffer_bits); /** * intel_fb_obj_invalidate - invalidate frontbuffer object @@ -55,10 +57,13 @@ void __intel_fb_obj_flush(struct drm_i915_gem_object *obj, static inline void intel_fb_obj_invalidate(struct drm_i915_gem_object *obj, enum fb_op_origin origin) { - if (!obj->frontbuffer_bits) + unsigned int frontbuffer_bits; + + frontbuffer_bits = atomic_read(&obj->frontbuffer_bits); + if (!frontbuffer_bits) return; - __intel_fb_obj_invalidate(obj, origin); + __intel_fb_obj_invalidate(obj, origin, frontbuffer_bits); } /** @@ -75,10 +80,13 @@ static inline void intel_fb_obj_flush(struct drm_i915_gem_object *obj, bool retire, enum fb_op_origin origin) { - if (!obj->frontbuffer_bits) + unsigned int frontbuffer_bits; + + frontbuffer_bits = atomic_read(&obj->frontbuffer_bits); + if (!frontbuffer_bits) return; - __intel_fb_obj_flush(obj, retire, origin); + __intel_fb_obj_flush(obj, retire, origin, frontbuffer_bits); } #endif /* __INTEL_FRONTBUFFER_H__ */ -- cgit v0.10.2 From 5748b6a1f48eae5b8513dd88ab6da4c176686665 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 Aug 2016 16:32:38 +0100 Subject: drm/i915: Use dev_priv consistently through the intel_frontbuffer interface Rather than a mismash of struct drm_device *dev and struct drm_i915_private *dev_priv being used freely within a function, be consistent and only pass along dev_priv. Signed-off-by: Chris Wilson Reviewed-by: Daniel Vetter Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470324762-2545-22-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 5bc8206..9068676 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -4566,12 +4566,11 @@ static void intel_post_plane_update(struct intel_crtc_state *old_crtc_state) struct drm_atomic_state *old_state = old_crtc_state->base.state; struct intel_crtc_state *pipe_config = to_intel_crtc_state(crtc->base.state); - struct drm_device *dev = crtc->base.dev; struct drm_plane *primary = crtc->base.primary; struct drm_plane_state *old_pri_state = drm_atomic_get_existing_plane_state(old_state, primary); - intel_frontbuffer_flip(dev, pipe_config->fb_bits); + intel_frontbuffer_flip(to_i915(crtc->base.dev), pipe_config->fb_bits); crtc->wm.cxsr_allowed = true; @@ -4694,7 +4693,7 @@ static void intel_crtc_disable_planes(struct drm_crtc *crtc, unsigned plane_mask * to compute the mask of flip planes precisely. For the time being * consider this a flip to a NULL plane. */ - intel_frontbuffer_flip(dev, INTEL_FRONTBUFFER_ALL_MASK(pipe)); + intel_frontbuffer_flip(to_i915(dev), INTEL_FRONTBUFFER_ALL_MASK(pipe)); } static void ironlake_crtc_enable(struct drm_crtc *crtc) @@ -10952,7 +10951,8 @@ static void intel_unpin_work_fn(struct work_struct *__work) i915_gem_request_put(work->flip_queued_req); - intel_frontbuffer_flip_complete(dev, to_intel_plane(primary)->frontbuffer_bit); + intel_frontbuffer_flip_complete(to_i915(dev), + to_intel_plane(primary)->frontbuffer_bit); intel_fbc_post_update(crtc); drm_framebuffer_unreference(work->old_fb); @@ -11727,7 +11727,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, to_intel_plane(primary)->frontbuffer_bit); mutex_unlock(&dev->struct_mutex); - intel_frontbuffer_flip_prepare(dev, + intel_frontbuffer_flip_prepare(to_i915(dev), to_intel_plane(primary)->frontbuffer_bit); trace_i915_flip_request(intel_crtc->plane, obj); diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 53d97f6..0aadc65 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -5228,7 +5228,7 @@ unlock: /** * intel_edp_drrs_invalidate - Disable Idleness DRRS - * @dev: DRM device + * @dev_priv: i915 device * @frontbuffer_bits: frontbuffer plane tracking bits * * This function gets called everytime rendering on the given planes start. @@ -5236,10 +5236,9 @@ unlock: * * Dirty frontbuffers relevant to DRRS are tracked in busy_frontbuffer_bits. */ -void intel_edp_drrs_invalidate(struct drm_device *dev, - unsigned frontbuffer_bits) +void intel_edp_drrs_invalidate(struct drm_i915_private *dev_priv, + unsigned int frontbuffer_bits) { - struct drm_i915_private *dev_priv = to_i915(dev); struct drm_crtc *crtc; enum pipe pipe; @@ -5271,7 +5270,7 @@ void intel_edp_drrs_invalidate(struct drm_device *dev, /** * intel_edp_drrs_flush - Restart Idleness DRRS - * @dev: DRM device + * @dev_priv: i915 device * @frontbuffer_bits: frontbuffer plane tracking bits * * This function gets called every time rendering on the given planes has @@ -5281,10 +5280,9 @@ void intel_edp_drrs_invalidate(struct drm_device *dev, * * Dirty frontbuffers relevant to DRRS are tracked in busy_frontbuffer_bits. */ -void intel_edp_drrs_flush(struct drm_device *dev, - unsigned frontbuffer_bits) +void intel_edp_drrs_flush(struct drm_i915_private *dev_priv, + unsigned int frontbuffer_bits) { - struct drm_i915_private *dev_priv = to_i915(dev); struct drm_crtc *crtc; enum pipe pipe; diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 8357c0e..a978866 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1374,11 +1374,12 @@ uint32_t intel_dp_pack_aux(const uint8_t *src, int src_bytes); void intel_plane_destroy(struct drm_plane *plane); void intel_edp_drrs_enable(struct intel_dp *intel_dp); void intel_edp_drrs_disable(struct intel_dp *intel_dp); -void intel_edp_drrs_invalidate(struct drm_device *dev, - unsigned frontbuffer_bits); -void intel_edp_drrs_flush(struct drm_device *dev, unsigned frontbuffer_bits); +void intel_edp_drrs_invalidate(struct drm_i915_private *dev_priv, + unsigned int frontbuffer_bits); +void intel_edp_drrs_flush(struct drm_i915_private *dev_priv, + unsigned int frontbuffer_bits); bool intel_digital_port_connected(struct drm_i915_private *dev_priv, - struct intel_digital_port *port); + struct intel_digital_port *port); void intel_dp_program_link_training_pattern(struct intel_dp *intel_dp, @@ -1551,13 +1552,13 @@ static inline void intel_backlight_device_unregister(struct intel_connector *con /* intel_psr.c */ void intel_psr_enable(struct intel_dp *intel_dp); void intel_psr_disable(struct intel_dp *intel_dp); -void intel_psr_invalidate(struct drm_device *dev, +void intel_psr_invalidate(struct drm_i915_private *dev_priv, unsigned frontbuffer_bits); -void intel_psr_flush(struct drm_device *dev, +void intel_psr_flush(struct drm_i915_private *dev_priv, unsigned frontbuffer_bits, enum fb_op_origin origin); void intel_psr_init(struct drm_device *dev); -void intel_psr_single_frame_update(struct drm_device *dev, +void intel_psr_single_frame_update(struct drm_i915_private *dev_priv, unsigned frontbuffer_bits); /* intel_runtime_pm.c */ diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.c b/drivers/gpu/drm/i915/intel_frontbuffer.c index 0e5da90..966de4c 100644 --- a/drivers/gpu/drm/i915/intel_frontbuffer.c +++ b/drivers/gpu/drm/i915/intel_frontbuffer.c @@ -70,8 +70,7 @@ void __intel_fb_obj_invalidate(struct drm_i915_gem_object *obj, enum fb_op_origin origin, unsigned int frontbuffer_bits) { - struct drm_device *dev = obj->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(obj->base.dev); if (origin == ORIGIN_CS) { spin_lock(&dev_priv->fb_tracking.lock); @@ -80,14 +79,14 @@ void __intel_fb_obj_invalidate(struct drm_i915_gem_object *obj, spin_unlock(&dev_priv->fb_tracking.lock); } - intel_psr_invalidate(dev, frontbuffer_bits); - intel_edp_drrs_invalidate(dev, frontbuffer_bits); + intel_psr_invalidate(dev_priv, frontbuffer_bits); + intel_edp_drrs_invalidate(dev_priv, frontbuffer_bits); intel_fbc_invalidate(dev_priv, frontbuffer_bits, origin); } /** * intel_frontbuffer_flush - flush frontbuffer - * @dev: DRM device + * @dev_priv: i915 device * @frontbuffer_bits: frontbuffer plane tracking bits * @origin: which operation caused the flush * @@ -97,12 +96,10 @@ void __intel_fb_obj_invalidate(struct drm_i915_gem_object *obj, * * Can be called without any locks held. */ -static void intel_frontbuffer_flush(struct drm_device *dev, +static void intel_frontbuffer_flush(struct drm_i915_private *dev_priv, unsigned frontbuffer_bits, enum fb_op_origin origin) { - struct drm_i915_private *dev_priv = to_i915(dev); - /* Delay flushing when rings are still busy.*/ spin_lock(&dev_priv->fb_tracking.lock); frontbuffer_bits &= ~dev_priv->fb_tracking.busy_bits; @@ -111,8 +108,8 @@ static void intel_frontbuffer_flush(struct drm_device *dev, if (!frontbuffer_bits) return; - intel_edp_drrs_flush(dev, frontbuffer_bits); - intel_psr_flush(dev, frontbuffer_bits, origin); + intel_edp_drrs_flush(dev_priv, frontbuffer_bits); + intel_psr_flush(dev_priv, frontbuffer_bits, origin); intel_fbc_flush(dev_priv, frontbuffer_bits, origin); } @@ -121,8 +118,7 @@ void __intel_fb_obj_flush(struct drm_i915_gem_object *obj, enum fb_op_origin origin, unsigned int frontbuffer_bits) { - struct drm_device *dev = obj->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(obj->base.dev); if (retire) { spin_lock(&dev_priv->fb_tracking.lock); @@ -133,12 +129,12 @@ void __intel_fb_obj_flush(struct drm_i915_gem_object *obj, } if (frontbuffer_bits) - intel_frontbuffer_flush(dev, frontbuffer_bits, origin); + intel_frontbuffer_flush(dev_priv, frontbuffer_bits, origin); } /** * intel_frontbuffer_flip_prepare - prepare asynchronous frontbuffer flip - * @dev: DRM device + * @dev_priv: i915 device * @frontbuffer_bits: frontbuffer plane tracking bits * * This function gets called after scheduling a flip on @obj. The actual @@ -148,23 +144,21 @@ void __intel_fb_obj_flush(struct drm_i915_gem_object *obj, * * Can be called without any locks held. */ -void intel_frontbuffer_flip_prepare(struct drm_device *dev, +void intel_frontbuffer_flip_prepare(struct drm_i915_private *dev_priv, unsigned frontbuffer_bits) { - struct drm_i915_private *dev_priv = to_i915(dev); - spin_lock(&dev_priv->fb_tracking.lock); dev_priv->fb_tracking.flip_bits |= frontbuffer_bits; /* Remove stale busy bits due to the old buffer. */ dev_priv->fb_tracking.busy_bits &= ~frontbuffer_bits; spin_unlock(&dev_priv->fb_tracking.lock); - intel_psr_single_frame_update(dev, frontbuffer_bits); + intel_psr_single_frame_update(dev_priv, frontbuffer_bits); } /** * intel_frontbuffer_flip_complete - complete asynchronous frontbuffer flip - * @dev: DRM device + * @dev_priv: i915 device * @frontbuffer_bits: frontbuffer plane tracking bits * * This function gets called after the flip has been latched and will complete @@ -172,11 +166,9 @@ void intel_frontbuffer_flip_prepare(struct drm_device *dev, * * Can be called without any locks held. */ -void intel_frontbuffer_flip_complete(struct drm_device *dev, +void intel_frontbuffer_flip_complete(struct drm_i915_private *dev_priv, unsigned frontbuffer_bits) { - struct drm_i915_private *dev_priv = to_i915(dev); - spin_lock(&dev_priv->fb_tracking.lock); /* Mask any cancelled flips. */ frontbuffer_bits &= dev_priv->fb_tracking.flip_bits; @@ -184,12 +176,13 @@ void intel_frontbuffer_flip_complete(struct drm_device *dev, spin_unlock(&dev_priv->fb_tracking.lock); if (frontbuffer_bits) - intel_frontbuffer_flush(dev, frontbuffer_bits, ORIGIN_FLIP); + intel_frontbuffer_flush(dev_priv, + frontbuffer_bits, ORIGIN_FLIP); } /** * intel_frontbuffer_flip - synchronous frontbuffer flip - * @dev: DRM device + * @dev_priv: i915 device * @frontbuffer_bits: frontbuffer plane tracking bits * * This function gets called after scheduling a flip on @obj. This is for @@ -198,15 +191,13 @@ void intel_frontbuffer_flip_complete(struct drm_device *dev, * * Can be called without any locks held. */ -void intel_frontbuffer_flip(struct drm_device *dev, +void intel_frontbuffer_flip(struct drm_i915_private *dev_priv, unsigned frontbuffer_bits) { - struct drm_i915_private *dev_priv = to_i915(dev); - spin_lock(&dev_priv->fb_tracking.lock); /* Remove stale busy bits due to the old buffer. */ dev_priv->fb_tracking.busy_bits &= ~frontbuffer_bits; spin_unlock(&dev_priv->fb_tracking.lock); - intel_frontbuffer_flush(dev, frontbuffer_bits, ORIGIN_FLIP); + intel_frontbuffer_flush(dev_priv, frontbuffer_bits, ORIGIN_FLIP); } diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.h b/drivers/gpu/drm/i915/intel_frontbuffer.h index 0c85b20..76ceb53 100644 --- a/drivers/gpu/drm/i915/intel_frontbuffer.h +++ b/drivers/gpu/drm/i915/intel_frontbuffer.h @@ -24,15 +24,14 @@ #ifndef __INTEL_FRONTBUFFER_H__ #define __INTEL_FRONTBUFFER_H__ -struct drm_device; struct drm_i915_private; struct drm_i915_gem_object; -void intel_frontbuffer_flip_prepare(struct drm_device *dev, +void intel_frontbuffer_flip_prepare(struct drm_i915_private *dev_priv, unsigned frontbuffer_bits); -void intel_frontbuffer_flip_complete(struct drm_device *dev, +void intel_frontbuffer_flip_complete(struct drm_i915_private *dev_priv, unsigned frontbuffer_bits); -void intel_frontbuffer_flip(struct drm_device *dev, +void intel_frontbuffer_flip(struct drm_i915_private *dev_priv, unsigned frontbuffer_bits); void __intel_fb_obj_invalidate(struct drm_i915_gem_object *obj, diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index fd891fe..413a203 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -840,8 +840,7 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay, overlay->old_vid_bo = overlay->vid_bo; overlay->vid_bo = new_bo; - intel_frontbuffer_flip(&dev_priv->drm, - INTEL_FRONTBUFFER_OVERLAY(pipe)); + intel_frontbuffer_flip(dev_priv, INTEL_FRONTBUFFER_OVERLAY(pipe)); return 0; diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c index 68bd0bb..adf2ce0 100644 --- a/drivers/gpu/drm/i915/intel_psr.c +++ b/drivers/gpu/drm/i915/intel_psr.c @@ -628,9 +628,8 @@ unlock: mutex_unlock(&dev_priv->psr.lock); } -static void intel_psr_exit(struct drm_device *dev) +static void intel_psr_exit(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); struct intel_dp *intel_dp = dev_priv->psr.enabled; struct drm_crtc *crtc = dp_to_dig_port(intel_dp)->base.base.crtc; enum pipe pipe = to_intel_crtc(crtc)->pipe; @@ -639,7 +638,7 @@ static void intel_psr_exit(struct drm_device *dev) if (!dev_priv->psr.active) return; - if (HAS_DDI(dev)) { + if (HAS_DDI(dev_priv)) { val = I915_READ(EDP_PSR_CTL); WARN_ON(!(val & EDP_PSR_ENABLE)); @@ -674,7 +673,7 @@ static void intel_psr_exit(struct drm_device *dev) /** * intel_psr_single_frame_update - Single Frame Update - * @dev: DRM device + * @dev_priv: i915 device * @frontbuffer_bits: frontbuffer plane tracking bits * * Some platforms support a single frame update feature that is used to @@ -682,10 +681,9 @@ static void intel_psr_exit(struct drm_device *dev) * So far it is only implemented for Valleyview and Cherryview because * hardware requires this to be done before a page flip. */ -void intel_psr_single_frame_update(struct drm_device *dev, +void intel_psr_single_frame_update(struct drm_i915_private *dev_priv, unsigned frontbuffer_bits) { - struct drm_i915_private *dev_priv = to_i915(dev); struct drm_crtc *crtc; enum pipe pipe; u32 val; @@ -694,7 +692,7 @@ void intel_psr_single_frame_update(struct drm_device *dev, * Single frame update is already supported on BDW+ but it requires * many W/A and it isn't really needed. */ - if (!IS_VALLEYVIEW(dev) && !IS_CHERRYVIEW(dev)) + if (!IS_VALLEYVIEW(dev_priv) && !IS_CHERRYVIEW(dev_priv)) return; mutex_lock(&dev_priv->psr.lock); @@ -720,7 +718,7 @@ void intel_psr_single_frame_update(struct drm_device *dev, /** * intel_psr_invalidate - Invalidade PSR - * @dev: DRM device + * @dev_priv: i915 device * @frontbuffer_bits: frontbuffer plane tracking bits * * Since the hardware frontbuffer tracking has gaps we need to integrate @@ -730,10 +728,9 @@ void intel_psr_single_frame_update(struct drm_device *dev, * * Dirty frontbuffers relevant to PSR are tracked in busy_frontbuffer_bits." */ -void intel_psr_invalidate(struct drm_device *dev, +void intel_psr_invalidate(struct drm_i915_private *dev_priv, unsigned frontbuffer_bits) { - struct drm_i915_private *dev_priv = to_i915(dev); struct drm_crtc *crtc; enum pipe pipe; @@ -750,14 +747,14 @@ void intel_psr_invalidate(struct drm_device *dev, dev_priv->psr.busy_frontbuffer_bits |= frontbuffer_bits; if (frontbuffer_bits) - intel_psr_exit(dev); + intel_psr_exit(dev_priv); mutex_unlock(&dev_priv->psr.lock); } /** * intel_psr_flush - Flush PSR - * @dev: DRM device + * @dev_priv: i915 device * @frontbuffer_bits: frontbuffer plane tracking bits * @origin: which operation caused the flush * @@ -768,10 +765,9 @@ void intel_psr_invalidate(struct drm_device *dev, * * Dirty frontbuffers relevant to PSR are tracked in busy_frontbuffer_bits. */ -void intel_psr_flush(struct drm_device *dev, +void intel_psr_flush(struct drm_i915_private *dev_priv, unsigned frontbuffer_bits, enum fb_op_origin origin) { - struct drm_i915_private *dev_priv = to_i915(dev); struct drm_crtc *crtc; enum pipe pipe; @@ -789,7 +785,7 @@ void intel_psr_flush(struct drm_device *dev, /* By definition flush = invalidate + flush */ if (frontbuffer_bits) - intel_psr_exit(dev); + intel_psr_exit(dev_priv); if (!dev_priv->psr.active && !dev_priv->psr.busy_frontbuffer_bits) if (!work_busy(&dev_priv->psr.work.work)) -- cgit v0.10.2 From 573adb396241e2aa46401529abdba93bc1886a48 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 Aug 2016 16:32:39 +0100 Subject: drm/i915: Move obj->active:5 to obj->flags We are motivated to avoid using a bitfield for obj->active for a couple of reasons. Firstly, we wish to document our lockless read of obj->active using READ_ONCE inside i915_gem_busy_ioctl() and that requires an integral type (i.e. not a bitfield). Secondly, gcc produces abysmal code when presented with a bitfield and that shows up high on the profiles of request tracking (mainly due to excess memory traffic as it converts the bitfield to a register and back and generates frequent AGI in the process). v2: BIT, break up a long line in compute the other engines, new paint for i915_gem_object_is_active (now i915_gem_object_get_active). Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470324762-2545-23-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 9796b07..24d63e2 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -91,7 +91,7 @@ static int i915_capabilities(struct seq_file *m, void *data) static char get_active_flag(struct drm_i915_gem_object *obj) { - return obj->active ? '*' : ' '; + return i915_gem_object_is_active(obj) ? '*' : ' '; } static char get_pin_flag(struct drm_i915_gem_object *obj) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 3de75e8..db5dc5b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2155,12 +2155,16 @@ struct drm_i915_gem_object { struct list_head batch_pool_link; + unsigned long flags; /** * This is set if the object is on the active lists (has pending * rendering and so a non-zero seqno), and is not set if it i s on * inactive (ready to be unbound) list. */ - unsigned int active:I915_NUM_ENGINES; +#define I915_BO_ACTIVE_SHIFT 0 +#define I915_BO_ACTIVE_MASK ((1 << I915_NUM_ENGINES) - 1) +#define __I915_BO_ACTIVE(bo) \ + ((READ_ONCE((bo)->flags) >> I915_BO_ACTIVE_SHIFT) & I915_BO_ACTIVE_MASK) /** * This is set if the object has been written to since last bound @@ -2325,6 +2329,37 @@ i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj) return obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE; } +static inline unsigned long +i915_gem_object_get_active(const struct drm_i915_gem_object *obj) +{ + return (obj->flags >> I915_BO_ACTIVE_SHIFT) & I915_BO_ACTIVE_MASK; +} + +static inline bool +i915_gem_object_is_active(const struct drm_i915_gem_object *obj) +{ + return i915_gem_object_get_active(obj); +} + +static inline void +i915_gem_object_set_active(struct drm_i915_gem_object *obj, int engine) +{ + obj->flags |= BIT(engine + I915_BO_ACTIVE_SHIFT); +} + +static inline void +i915_gem_object_clear_active(struct drm_i915_gem_object *obj, int engine) +{ + obj->flags &= ~BIT(engine + I915_BO_ACTIVE_SHIFT); +} + +static inline bool +i915_gem_object_has_active_engine(const struct drm_i915_gem_object *obj, + int engine) +{ + return obj->flags & BIT(engine + I915_BO_ACTIVE_SHIFT); +} + /* * Optimised SGL iterator for GEM objects */ diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 03eb094..0e2b00f 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1358,7 +1358,7 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, if (!readonly) { active = obj->last_read; - active_mask = obj->active; + active_mask = i915_gem_object_get_active(obj); } else { active_mask = 1; active = &obj->last_write; @@ -1402,7 +1402,7 @@ i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, BUG_ON(!mutex_is_locked(&dev->struct_mutex)); BUG_ON(!dev_priv->mm.interruptible); - active_mask = obj->active; + active_mask = i915_gem_object_get_active(obj); if (!active_mask) return 0; @@ -2365,10 +2365,10 @@ i915_gem_object_retire__read(struct i915_gem_active *active, struct drm_i915_gem_object *obj = container_of(active, struct drm_i915_gem_object, last_read[idx]); - GEM_BUG_ON((obj->active & (1 << idx)) == 0); + GEM_BUG_ON(!i915_gem_object_has_active_engine(obj, idx)); - obj->active &= ~(1 << idx); - if (obj->active) + i915_gem_object_clear_active(obj, idx); + if (i915_gem_object_is_active(obj)) return; /* Bump our place on the bound list to keep it roughly in LRU order @@ -2672,7 +2672,7 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) return -ENOENT; } - if (!obj->active) + if (!i915_gem_object_is_active(obj)) goto out; for (i = 0; i < I915_NUM_ENGINES; i++) { @@ -2760,7 +2760,7 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj, lockdep_assert_held(&obj->base.dev->struct_mutex); - active_mask = obj->active; + active_mask = i915_gem_object_get_active(obj); if (!active_mask) return 0; @@ -3811,7 +3811,7 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, * become non-busy without any further actions. */ args->busy = 0; - if (obj->active) { + if (i915_gem_object_is_active(obj)) { struct drm_i915_gem_request *req; int i; diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index e8e194f..a1da302 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -434,7 +434,7 @@ relocate_entry_clflush(struct drm_i915_gem_object *obj, static bool object_is_idle(struct drm_i915_gem_object *obj) { - unsigned long active = obj->active; + unsigned long active = i915_gem_object_get_active(obj); int idx; for_each_active(active, idx) { @@ -990,11 +990,21 @@ err: return ret; } +static unsigned int eb_other_engines(struct drm_i915_gem_request *req) +{ + unsigned int mask; + + mask = ~intel_engine_flag(req->engine) & I915_BO_ACTIVE_MASK; + mask <<= I915_BO_ACTIVE_SHIFT; + + return mask; +} + static int i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req, struct list_head *vmas) { - const unsigned other_rings = ~intel_engine_flag(req->engine); + const unsigned int other_rings = eb_other_engines(req); struct i915_vma *vma; uint32_t flush_domains = 0; bool flush_chipset = false; @@ -1003,7 +1013,7 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req, list_for_each_entry(vma, vmas, exec_list) { struct drm_i915_gem_object *obj = vma->obj; - if (obj->active & other_rings) { + if (obj->flags & other_rings) { ret = i915_gem_object_sync(obj, req); if (ret) return ret; @@ -1166,9 +1176,9 @@ void i915_vma_move_to_active(struct i915_vma *vma, * add the active reference first and queue for it to be dropped * *last*. */ - if (obj->active == 0) + if (!i915_gem_object_is_active(obj)) i915_gem_object_get(obj); - obj->active |= 1 << idx; + i915_gem_object_set_active(obj, idx); i915_gem_active_set(&obj->last_read[idx], req); if (flags & EXEC_OBJECT_WRITE) { diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index b577635..bcd85bd 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -182,7 +182,8 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, !is_vmalloc_addr(obj->mapping)) continue; - if ((flags & I915_SHRINK_ACTIVE) == 0 && obj->active) + if ((flags & I915_SHRINK_ACTIVE) == 0 && + i915_gem_object_is_active(obj)) continue; if (!can_release_pages(obj)) @@ -267,7 +268,7 @@ i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc) count += obj->base.size >> PAGE_SHIFT; list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { - if (!obj->active && can_release_pages(obj)) + if (!i915_gem_object_is_active(obj) && can_release_pages(obj)) count += obj->base.size >> PAGE_SHIFT; } diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index 651a84b..53f64fc 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -67,7 +67,7 @@ static void wait_rendering(struct drm_i915_gem_object *obj) struct drm_i915_gem_request *requests[I915_NUM_ENGINES]; int i, n; - if (!obj->active) + if (!i915_gem_object_is_active(obj)) return; n = 0; -- cgit v0.10.2 From 00e60f2659321a65a737c6aae6e106ed63ae993d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 Aug 2016 16:32:40 +0100 Subject: drm/i915: Move i915_gem_object_wait_rendering() Just move it earlier so that we can use the companion nonblocking version in a couple of more callsites without having to add a forward declaration. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470324762-2545-24-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 0e2b00f..16b388e 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -302,6 +302,109 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj) return ret; } +/** + * Ensures that all rendering to the object has completed and the object is + * safe to unbind from the GTT or access from the CPU. + * @obj: i915 gem object + * @readonly: waiting for just read access or read-write access + */ +int +i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, + bool readonly) +{ + struct reservation_object *resv; + struct i915_gem_active *active; + unsigned long active_mask; + int idx; + + lockdep_assert_held(&obj->base.dev->struct_mutex); + + if (!readonly) { + active = obj->last_read; + active_mask = i915_gem_object_get_active(obj); + } else { + active_mask = 1; + active = &obj->last_write; + } + + for_each_active(active_mask, idx) { + int ret; + + ret = i915_gem_active_wait(&active[idx], + &obj->base.dev->struct_mutex); + if (ret) + return ret; + } + + resv = i915_gem_object_get_dmabuf_resv(obj); + if (resv) { + long err; + + err = reservation_object_wait_timeout_rcu(resv, !readonly, true, + MAX_SCHEDULE_TIMEOUT); + if (err < 0) + return err; + } + + return 0; +} + +/* A nonblocking variant of the above wait. This is a highly dangerous routine + * as the object state may change during this call. + */ +static __must_check int +i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, + struct intel_rps_client *rps, + bool readonly) +{ + struct drm_device *dev = obj->base.dev; + struct drm_i915_gem_request *requests[I915_NUM_ENGINES]; + struct i915_gem_active *active; + unsigned long active_mask; + int ret, i, n = 0; + + lockdep_assert_held(&dev->struct_mutex); + GEM_BUG_ON(!to_i915(dev)->mm.interruptible); + + active_mask = i915_gem_object_get_active(obj); + if (!active_mask) + return 0; + + if (!readonly) { + active = obj->last_read; + } else { + active_mask = 1; + active = &obj->last_write; + } + + for_each_active(active_mask, i) { + struct drm_i915_gem_request *req; + + req = i915_gem_active_get(&active[i], + &obj->base.dev->struct_mutex); + if (req) + requests[n++] = req; + } + + mutex_unlock(&dev->struct_mutex); + ret = 0; + for (i = 0; ret == 0 && i < n; i++) + ret = i915_wait_request(requests[i], true, NULL, rps); + mutex_lock(&dev->struct_mutex); + + for (i = 0; i < n; i++) + i915_gem_request_put(requests[i]); + + return ret; +} + +static struct intel_rps_client *to_rps_client(struct drm_file *file) +{ + struct drm_i915_file_private *fpriv = file->driver_priv; + + return &fpriv->rps; +} + int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align) @@ -1339,107 +1442,6 @@ put_rpm: return ret; } -/** - * Ensures that all rendering to the object has completed and the object is - * safe to unbind from the GTT or access from the CPU. - * @obj: i915 gem object - * @readonly: waiting for read access or write - */ -int -i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, - bool readonly) -{ - struct reservation_object *resv; - struct i915_gem_active *active; - unsigned long active_mask; - int idx, ret; - - lockdep_assert_held(&obj->base.dev->struct_mutex); - - if (!readonly) { - active = obj->last_read; - active_mask = i915_gem_object_get_active(obj); - } else { - active_mask = 1; - active = &obj->last_write; - } - - for_each_active(active_mask, idx) { - ret = i915_gem_active_wait(&active[idx], - &obj->base.dev->struct_mutex); - if (ret) - return ret; - } - - resv = i915_gem_object_get_dmabuf_resv(obj); - if (resv) { - long err; - - err = reservation_object_wait_timeout_rcu(resv, !readonly, true, - MAX_SCHEDULE_TIMEOUT); - if (err < 0) - return err; - } - - return 0; -} - -/* A nonblocking variant of the above wait. This is a highly dangerous routine - * as the object state may change during this call. - */ -static __must_check int -i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, - struct intel_rps_client *rps, - bool readonly) -{ - struct drm_device *dev = obj->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct drm_i915_gem_request *requests[I915_NUM_ENGINES]; - struct i915_gem_active *active; - unsigned long active_mask; - int ret, i, n = 0; - - BUG_ON(!mutex_is_locked(&dev->struct_mutex)); - BUG_ON(!dev_priv->mm.interruptible); - - active_mask = i915_gem_object_get_active(obj); - if (!active_mask) - return 0; - - if (!readonly) { - active = obj->last_read; - } else { - active_mask = 1; - active = &obj->last_write; - } - - for_each_active(active_mask, i) { - struct drm_i915_gem_request *req; - - req = i915_gem_active_get(&active[i], - &obj->base.dev->struct_mutex); - if (req) - requests[n++] = req; - } - - mutex_unlock(&dev->struct_mutex); - ret = 0; - for (i = 0; ret == 0 && i < n; i++) - ret = i915_wait_request(requests[i], true, NULL, rps); - mutex_lock(&dev->struct_mutex); - - for (i = 0; i < n; i++) - i915_gem_request_put(requests[i]); - - return ret; -} - -static struct intel_rps_client *to_rps_client(struct drm_file *file) -{ - struct drm_i915_file_private *fpriv = file->driver_priv; - return &fpriv->rps; -} - static enum fb_op_origin write_origin(struct drm_i915_gem_object *obj, unsigned domain) { -- cgit v0.10.2 From 0eafec6d3244802d469712682b0f513963c23eff Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 Aug 2016 16:32:41 +0100 Subject: drm/i915: Enable lockless lookup of request tracking via RCU If we enable RCU for the requests (providing a grace period where we can inspect a "dead" request before it is freed), we can allow callers to carefully perform lockless lookup of an active request. However, by enabling deferred freeing of requests, we can potentially hog a lot of memory when dealing with tens of thousands of requests per second - with a quick insertion of a synchronize_rcu() inside our shrinker callback, that issue disappears. v2: Currently, it is our responsibility to handle reclaim i.e. to avoid hogging memory with the delayed slab frees. At the moment, we wait for a grace period in the shrinker, and block for all RCU callbacks on oom. Suggested alternatives focus on flushing our RCU callback when we have a certain number of outstanding request frees, and blocking on that flush after a second high watermark. (So rather than wait for the system to run out of memory, we stop issuing requests - both are nondeterministic.) Paul E. McKenney wrote: Another approach is synchronize_rcu() after some largish number of requests. The advantage of this approach is that it throttles the production of callbacks at the source. The corresponding disadvantage is that it slows things up. Another approach is to use call_rcu(), but if the previous call_rcu() is still in flight, block waiting for it. Yet another approach is the get_state_synchronize_rcu() / cond_synchronize_rcu() pair. The idea is to do something like this: cond_synchronize_rcu(cookie); cookie = get_state_synchronize_rcu(); You would of course do an initial get_state_synchronize_rcu() to get things going. This would not block unless there was less than one grace period's worth of time between invocations. But this assumes a busy system, where there is almost always a grace period in flight. But you can make that happen as follows: cond_synchronize_rcu(cookie); cookie = get_state_synchronize_rcu(); call_rcu(&my_rcu_head, noop_function); Note that you need additional code to make sure that the old callback has completed before doing a new one. Setting and clearing a flag with appropriate memory ordering control suffices (e.g,. smp_load_acquire() and smp_store_release()). v3: More comments on compiler and processor order of operations within the RCU lookup and discover we can use rcu_access_pointer() here instead. v4: Wrap i915_gem_active_get_rcu() to take the rcu_read_lock itself. Signed-off-by: Chris Wilson Cc: Maarten Lankhorst Cc: "Goel, Akash" Cc: Josh Triplett Cc: Daniel Vetter Reviewed-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1470324762-2545-25-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 16b388e..bc41478 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4431,7 +4431,9 @@ i915_gem_load_init(struct drm_device *dev) dev_priv->requests = kmem_cache_create("i915_gem_request", sizeof(struct drm_i915_gem_request), 0, - SLAB_HWCACHE_ALIGN, + SLAB_HWCACHE_ALIGN | + SLAB_RECLAIM_ACCOUNT | + SLAB_DESTROY_BY_RCU, NULL); INIT_LIST_HEAD(&dev_priv->context_list); @@ -4467,6 +4469,9 @@ void i915_gem_load_cleanup(struct drm_device *dev) kmem_cache_destroy(dev_priv->requests); kmem_cache_destroy(dev_priv->vmas); kmem_cache_destroy(dev_priv->objects); + + /* And ensure that our DESTROY_BY_RCU slabs are truly destroyed */ + rcu_barrier(); } int i915_gem_freeze_late(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 773b942..3fecb8f 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -205,7 +205,7 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) prefetchw(next); INIT_LIST_HEAD(&active->link); - active->request = NULL; + RCU_INIT_POINTER(active->request, NULL); active->retire(active, request); } diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index 26ca697..6002adc 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -183,6 +183,12 @@ i915_gem_request_get(struct drm_i915_gem_request *req) return to_request(fence_get(&req->fence)); } +static inline struct drm_i915_gem_request * +i915_gem_request_get_rcu(struct drm_i915_gem_request *req) +{ + return to_request(fence_get_rcu(&req->fence)); +} + static inline void i915_gem_request_put(struct drm_i915_gem_request *req) { @@ -286,7 +292,7 @@ typedef void (*i915_gem_retire_fn)(struct i915_gem_active *, struct drm_i915_gem_request *); struct i915_gem_active { - struct drm_i915_gem_request *request; + struct drm_i915_gem_request __rcu *request; struct list_head link; i915_gem_retire_fn retire; }; @@ -327,13 +333,19 @@ i915_gem_active_set(struct i915_gem_active *active, struct drm_i915_gem_request *request) { list_move(&active->link, &request->active_list); - active->request = request; + rcu_assign_pointer(active->request, request); } static inline struct drm_i915_gem_request * __i915_gem_active_peek(const struct i915_gem_active *active) { - return active->request; + /* Inside the error capture (running with the driver in an unknown + * state), we want to bend the rules slightly (a lot). + * + * Work is in progress to make it safer, in the meantime this keeps + * the known issue from spamming the logs. + */ + return rcu_dereference_protected(active->request, 1); } /** @@ -349,7 +361,29 @@ i915_gem_active_peek(const struct i915_gem_active *active, struct mutex *mutex) { struct drm_i915_gem_request *request; - request = active->request; + request = rcu_dereference_protected(active->request, + lockdep_is_held(mutex)); + if (!request || i915_gem_request_completed(request)) + return NULL; + + return request; +} + +/** + * i915_gem_active_peek_rcu - report the active request being monitored + * @active - the active tracker + * + * i915_gem_active_peek_rcu() returns the current request being tracked if + * still active, or NULL. It does not obtain a reference on the request + * for the caller, and inspection of the request is only valid under + * the RCU lock. + */ +static inline struct drm_i915_gem_request * +i915_gem_active_peek_rcu(const struct i915_gem_active *active) +{ + struct drm_i915_gem_request *request; + + request = rcu_dereference(active->request); if (!request || i915_gem_request_completed(request)) return NULL; @@ -370,6 +404,119 @@ i915_gem_active_get(const struct i915_gem_active *active, struct mutex *mutex) } /** + * __i915_gem_active_get_rcu - return a reference to the active request + * @active - the active tracker + * + * __i915_gem_active_get() returns a reference to the active request, or NULL + * if the active tracker is idle. The caller must hold the RCU read lock, but + * the returned pointer is safe to use outside of RCU. + */ +static inline struct drm_i915_gem_request * +__i915_gem_active_get_rcu(const struct i915_gem_active *active) +{ + /* Performing a lockless retrieval of the active request is super + * tricky. SLAB_DESTROY_BY_RCU merely guarantees that the backing + * slab of request objects will not be freed whilst we hold the + * RCU read lock. It does not guarantee that the request itself + * will not be freed and then *reused*. Viz, + * + * Thread A Thread B + * + * req = active.request + * retire(req) -> free(req); + * (req is now first on the slab freelist) + * active.request = NULL + * + * req = new submission on a new object + * ref(req) + * + * To prevent the request from being reused whilst the caller + * uses it, we take a reference like normal. Whilst acquiring + * the reference we check that it is not in a destroyed state + * (refcnt == 0). That prevents the request being reallocated + * whilst the caller holds on to it. To check that the request + * was not reallocated as we acquired the reference we have to + * check that our request remains the active request across + * the lookup, in the same manner as a seqlock. The visibility + * of the pointer versus the reference counting is controlled + * by using RCU barriers (rcu_dereference and rcu_assign_pointer). + * + * In the middle of all that, we inspect whether the request is + * complete. Retiring is lazy so the request may be completed long + * before the active tracker is updated. Querying whether the + * request is complete is far cheaper (as it involves no locked + * instructions setting cachelines to exclusive) than acquiring + * the reference, so we do it first. The RCU read lock ensures the + * pointer dereference is valid, but does not ensure that the + * seqno nor HWS is the right one! However, if the request was + * reallocated, that means the active tracker's request was complete. + * If the new request is also complete, then both are and we can + * just report the active tracker is idle. If the new request is + * incomplete, then we acquire a reference on it and check that + * it remained the active request. + */ + do { + struct drm_i915_gem_request *request; + + request = rcu_dereference(active->request); + if (!request || i915_gem_request_completed(request)) + return NULL; + + request = i915_gem_request_get_rcu(request); + + /* What stops the following rcu_access_pointer() from occurring + * before the above i915_gem_request_get_rcu()? If we were + * to read the value before pausing to get the reference to + * the request, we may not notice a change in the active + * tracker. + * + * The rcu_access_pointer() is a mere compiler barrier, which + * means both the CPU and compiler are free to perform the + * memory read without constraint. The compiler only has to + * ensure that any operations after the rcu_access_pointer() + * occur afterwards in program order. This means the read may + * be performed earlier by an out-of-order CPU, or adventurous + * compiler. + * + * The atomic operation at the heart of + * i915_gem_request_get_rcu(), see fence_get_rcu(), is + * atomic_inc_not_zero() which is only a full memory barrier + * when successful. That is, if i915_gem_request_get_rcu() + * returns the request (and so with the reference counted + * incremented) then the following read for rcu_access_pointer() + * must occur after the atomic operation and so confirm + * that this request is the one currently being tracked. + */ + if (!request || request == rcu_access_pointer(active->request)) + return rcu_pointer_handoff(request); + + i915_gem_request_put(request); + } while (1); +} + +/** + * i915_gem_active_get_unlocked - return a reference to the active request + * @active - the active tracker + * + * i915_gem_active_get_unlocked() returns a reference to the active request, + * or NULL if the active tracker is idle. The reference is obtained under RCU, + * so no locking is required by the caller. + * + * The reference should be freed with i915_gem_request_put(). + */ +static inline struct drm_i915_gem_request * +i915_gem_active_get_unlocked(const struct i915_gem_active *active) +{ + struct drm_i915_gem_request *request; + + rcu_read_lock(); + request = __i915_gem_active_get_rcu(active); + rcu_read_unlock(); + + return request; +} + +/** * i915_gem_active_isset - report whether the active tracker is assigned * @active - the active tracker * @@ -380,7 +527,7 @@ i915_gem_active_get(const struct i915_gem_active *active, struct mutex *mutex) static inline bool i915_gem_active_isset(const struct i915_gem_active *active) { - return active->request; + return rcu_access_pointer(active->request); } /** @@ -437,7 +584,8 @@ i915_gem_active_retire(struct i915_gem_active *active, struct drm_i915_gem_request *request; int ret; - request = active->request; + request = rcu_dereference_protected(active->request, + lockdep_is_held(mutex)); if (!request) return 0; @@ -446,7 +594,8 @@ i915_gem_active_retire(struct i915_gem_active *active, return ret; list_del_init(&active->link); - active->request = NULL; + RCU_INIT_POINTER(active->request, NULL); + active->retire(active, request); return 0; diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index bcd85bd..1341cb5 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -205,6 +205,8 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, intel_runtime_pm_put(dev_priv); i915_gem_retire_requests(dev_priv); + /* expedite the RCU grace period to free some request slabs */ + synchronize_rcu_expedited(); return count; } @@ -225,10 +227,15 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, */ unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv) { - return i915_gem_shrink(dev_priv, -1UL, - I915_SHRINK_BOUND | - I915_SHRINK_UNBOUND | - I915_SHRINK_ACTIVE); + unsigned long freed; + + freed = i915_gem_shrink(dev_priv, -1UL, + I915_SHRINK_BOUND | + I915_SHRINK_UNBOUND | + I915_SHRINK_ACTIVE); + rcu_barrier(); /* wait until our RCU delayed slab frees are completed */ + + return freed; } static bool i915_gem_shrinker_lock(struct drm_device *dev, bool *unlock) -- cgit v0.10.2 From ad778f8967ea2f0bfda02701f918bcfcd495b721 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 Aug 2016 16:32:42 +0100 Subject: drm/i915: Export our request as a dma-buf fence on the reservation object If the GEM objects being rendered with in this request have been exported via dma-buf to a third party, hook ourselves into the dma-buf reservation object so that the third party can serialise with our rendering via the dma-buf fences. Testcase: igt/prime_busy Signed-off-by: Chris Wilson Reviewed-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1470324762-2545-26-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c index 3a00ab3..c60a8d5b 100644 --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c @@ -23,9 +23,13 @@ * Authors: * Dave Airlie */ + +#include +#include + #include + #include "i915_drv.h" -#include static struct drm_i915_gem_object *dma_buf_to_obj(struct dma_buf *buf) { @@ -218,25 +222,73 @@ static const struct dma_buf_ops i915_dmabuf_ops = { .end_cpu_access = i915_gem_end_cpu_access, }; +static void export_fences(struct drm_i915_gem_object *obj, + struct dma_buf *dma_buf) +{ + struct reservation_object *resv = dma_buf->resv; + struct drm_i915_gem_request *req; + unsigned long active; + int idx; + + active = __I915_BO_ACTIVE(obj); + if (!active) + return; + + /* Serialise with execbuf to prevent concurrent fence-loops */ + mutex_lock(&obj->base.dev->struct_mutex); + + /* Mark the object for future fences before racily adding old fences */ + obj->base.dma_buf = dma_buf; + + ww_mutex_lock(&resv->lock, NULL); + + for_each_active(active, idx) { + req = i915_gem_active_get(&obj->last_read[idx], + &obj->base.dev->struct_mutex); + if (!req) + continue; + + if (reservation_object_reserve_shared(resv) == 0) + reservation_object_add_shared_fence(resv, &req->fence); + + i915_gem_request_put(req); + } + + req = i915_gem_active_get(&obj->last_write, + &obj->base.dev->struct_mutex); + if (req) { + reservation_object_add_excl_fence(resv, &req->fence); + i915_gem_request_put(req); + } + + ww_mutex_unlock(&resv->lock); + mutex_unlock(&obj->base.dev->struct_mutex); +} + struct dma_buf *i915_gem_prime_export(struct drm_device *dev, struct drm_gem_object *gem_obj, int flags) { struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); DEFINE_DMA_BUF_EXPORT_INFO(exp_info); + struct dma_buf *dma_buf; exp_info.ops = &i915_dmabuf_ops; exp_info.size = gem_obj->size; exp_info.flags = flags; exp_info.priv = gem_obj; - if (obj->ops->dmabuf_export) { int ret = obj->ops->dmabuf_export(obj); if (ret) return ERR_PTR(ret); } - return dma_buf_export(&exp_info); + dma_buf = dma_buf_export(&exp_info); + if (IS_ERR(dma_buf)) + return dma_buf; + + export_fences(obj, dma_buf); + return dma_buf; } static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index a1da302..7183474 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -26,14 +26,18 @@ * */ +#include +#include +#include + #include #include + #include "i915_drv.h" +#include "i915_gem_dmabuf.h" #include "i915_trace.h" #include "intel_drv.h" #include "intel_frontbuffer.h" -#include -#include #define __EXEC_OBJECT_HAS_PIN (1<<31) #define __EXEC_OBJECT_HAS_FENCE (1<<30) @@ -1205,6 +1209,28 @@ void i915_vma_move_to_active(struct i915_vma *vma, list_move_tail(&vma->vm_link, &vma->vm->active_list); } +static void eb_export_fence(struct drm_i915_gem_object *obj, + struct drm_i915_gem_request *req, + unsigned int flags) +{ + struct reservation_object *resv; + + resv = i915_gem_object_get_dmabuf_resv(obj); + if (!resv) + return; + + /* Ignore errors from failing to allocate the new fence, we can't + * handle an error right now. Worst case should be missed + * synchronisation leading to rendering corruption. + */ + ww_mutex_lock(&resv->lock, NULL); + if (flags & EXEC_OBJECT_WRITE) + reservation_object_add_excl_fence(resv, &req->fence); + else if (reservation_object_reserve_shared(resv) == 0) + reservation_object_add_shared_fence(resv, &req->fence); + ww_mutex_unlock(&resv->lock); +} + static void i915_gem_execbuffer_move_to_active(struct list_head *vmas, struct drm_i915_gem_request *req) @@ -1224,6 +1250,7 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas, obj->base.read_domains = obj->base.pending_read_domains; i915_vma_move_to_active(vma, req, vma->exec_entry->flags); + eb_export_fence(obj, req, vma->exec_entry->flags); trace_i915_gem_object_change_domain(obj, old_read, old_write); } } -- cgit v0.10.2 From 055c3ff69d440928964228455ec29b071258d5fa Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 4 Aug 2016 14:08:00 -0700 Subject: drm/i915/gen9: Give one extra block per line for SKL plane WM calculations The bspec was updated a couple weeks ago to add an extra block per line to plane watermark calculations for linear pixel formats. Bspec update 115327 description: "Gen9+ - Updated the plane blocks per line calculation for linear cases. Adds +1 for all linear cases to handle the non-block aligned stride cases." Cc: Lyude Cc: drm-intel-fixes@lists.freedesktop.org Signed-off-by: Matt Roper Link: http://patchwork.freedesktop.org/patch/msgid/1470344880-27394-1-git-send-email-matthew.d.roper@intel.com Reviewed-by: Lyude diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 63f454a..948f244 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3352,6 +3352,8 @@ static uint32_t skl_wm_method2(uint32_t pixel_rate, uint32_t pipe_htotal, plane_bytes_per_line *= 4; plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512); plane_blocks_per_line /= 4; + } else if (tiling == DRM_FORMAT_MOD_NONE) { + plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512) + 1; } else { plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512); } -- cgit v0.10.2 From 5ac9056753e79ac5ad1ccc3c99b311688e46e8c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 2 Aug 2016 15:21:57 +0300 Subject: drm/i915: Fix iboost setting for SKL Y/U DP DDI buffer translation entry 2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The spec was recently fixed to have the correct iboost setting for the SKL Y/U DP DDI buffer translation table entry 2. Update our tables to match. Cc: David Weinehall Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1470140517-13011-1-git-send-email-ville.syrjala@linux.intel.com Cc: stable@vger.kernel.org Reviewed-by: David Weinehall diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index ac8700b..b8f729a 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -145,7 +145,7 @@ static const struct ddi_buf_trans skl_ddi_translations_dp[] = { static const struct ddi_buf_trans skl_u_ddi_translations_dp[] = { { 0x0000201B, 0x000000A2, 0x0 }, { 0x00005012, 0x00000088, 0x0 }, - { 0x80007011, 0x000000CD, 0x0 }, + { 0x80007011, 0x000000CD, 0x1 }, { 0x80009010, 0x000000C0, 0x1 }, { 0x0000201B, 0x0000009D, 0x0 }, { 0x80005012, 0x000000C0, 0x1 }, @@ -158,7 +158,7 @@ static const struct ddi_buf_trans skl_u_ddi_translations_dp[] = { static const struct ddi_buf_trans skl_y_ddi_translations_dp[] = { { 0x00000018, 0x000000A2, 0x0 }, { 0x00005012, 0x00000088, 0x0 }, - { 0x80007011, 0x000000CD, 0x0 }, + { 0x80007011, 0x000000CD, 0x3 }, { 0x80009010, 0x000000C0, 0x3 }, { 0x00000018, 0x0000009D, 0x0 }, { 0x80005012, 0x000000C0, 0x3 }, -- cgit v0.10.2 From 2467658e2de3ca1dee6688b6b0b6f48263dfd532 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 5 Aug 2016 10:14:06 +0100 Subject: drm/i915: Introduce i915_gem_active_wait_unlocked() It is useful to be able to wait on pending rendering without grabbing the struct_mutex. We can do this by using the i915_gem_active_get_rcu() primitive to acquire a reference to the pending request without requiring struct_mutex, just the RCU read lock, and then call i915_wait_request(). v2: Rebase onto new i915_gem_active_get_unlocked() semantics that take the RCU read lock on behalf of the caller. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470388464-28458-1-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index 6002adc..15495d1 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -569,6 +569,46 @@ i915_gem_active_wait(const struct i915_gem_active *active, struct mutex *mutex) } /** + * i915_gem_active_wait_unlocked - waits until the request is completed + * @active - the active request on which to wait + * @interruptible - whether the wait can be woken by a userspace signal + * @timeout - how long to wait at most + * @rps - userspace client to charge for a waitboost + * + * i915_gem_active_wait_unlocked() waits until the request is completed before + * returning, without requiring any locks to be held. Note that it does not + * retire any requests before returning. + * + * This function relies on RCU in order to acquire the reference to the active + * request without holding any locks. See __i915_gem_active_get_rcu() for the + * glory details on how that is managed. Once the reference is acquired, we + * can then wait upon the request, and afterwards release our reference, + * free of any locking. + * + * This function wraps i915_wait_request(), see it for the full details on + * the arguments. + * + * Returns 0 if successful, or a negative error code. + */ +static inline int +i915_gem_active_wait_unlocked(const struct i915_gem_active *active, + bool interruptible, + s64 *timeout, + struct intel_rps_client *rps) +{ + struct drm_i915_gem_request *request; + int ret = 0; + + request = i915_gem_active_get_unlocked(active); + if (request) { + ret = i915_wait_request(request, interruptible, timeout, rps); + i915_gem_request_put(request); + } + + return ret; +} + +/** * i915_gem_active_retire - waits until the request is retired * @active - the active request on which to wait * -- cgit v0.10.2 From b8f9096d6a51e3ac31deb0f57a9a323059bb4281 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 5 Aug 2016 10:14:07 +0100 Subject: drm/i915: Convert non-blocking waits for requests over to using RCU We can completely avoid taking the struct_mutex around the non-blocking waits by switching over to the RCU request management (trading the mutex for a RCU read lock and some complex atomic operations). The improvement is that we gain further contention reduction, and overall the code become simpler due to the reduced mutex dancing. v2: Move i915_gem_fault tracepoint back to the start of the function, before the unlocked wait. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470388464-28458-2-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index bc41478..cc6d102 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -349,24 +349,20 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, return 0; } -/* A nonblocking variant of the above wait. This is a highly dangerous routine - * as the object state may change during this call. +/* A nonblocking variant of the above wait. Must be called prior to + * acquiring the mutex for the object, as the object state may change + * during this call. A reference must be held by the caller for the object. */ static __must_check int -i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, - struct intel_rps_client *rps, - bool readonly) +__unsafe_wait_rendering(struct drm_i915_gem_object *obj, + struct intel_rps_client *rps, + bool readonly) { - struct drm_device *dev = obj->base.dev; - struct drm_i915_gem_request *requests[I915_NUM_ENGINES]; struct i915_gem_active *active; unsigned long active_mask; - int ret, i, n = 0; - - lockdep_assert_held(&dev->struct_mutex); - GEM_BUG_ON(!to_i915(dev)->mm.interruptible); + int idx; - active_mask = i915_gem_object_get_active(obj); + active_mask = __I915_BO_ACTIVE(obj); if (!active_mask) return 0; @@ -377,25 +373,16 @@ i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, active = &obj->last_write; } - for_each_active(active_mask, i) { - struct drm_i915_gem_request *req; + for_each_active(active_mask, idx) { + int ret; - req = i915_gem_active_get(&active[i], - &obj->base.dev->struct_mutex); - if (req) - requests[n++] = req; + ret = i915_gem_active_wait_unlocked(&active[idx], + true, NULL, rps); + if (ret) + return ret; } - mutex_unlock(&dev->struct_mutex); - ret = 0; - for (i = 0; ret == 0 && i < n; i++) - ret = i915_wait_request(requests[i], true, NULL, rps); - mutex_lock(&dev->struct_mutex); - - for (i = 0; i < n; i++) - i915_gem_request_put(requests[i]); - - return ret; + return 0; } static struct intel_rps_client *to_rps_client(struct drm_file *file) @@ -1467,10 +1454,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, int ret; /* Only handle setting domains to types used by the CPU. */ - if (write_domain & I915_GEM_GPU_DOMAINS) - return -EINVAL; - - if (read_domains & I915_GEM_GPU_DOMAINS) + if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS) return -EINVAL; /* Having something in the write domain implies it's in the read @@ -1479,25 +1463,21 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, if (write_domain != 0 && read_domains != write_domain) return -EINVAL; - ret = i915_mutex_lock_interruptible(dev); - if (ret) - return ret; - obj = i915_gem_object_lookup(file, args->handle); - if (!obj) { - ret = -ENOENT; - goto unlock; - } + if (!obj) + return -ENOENT; /* Try to flush the object off the GPU without holding the lock. * We will repeat the flush holding the lock in the normal manner * to catch cases where we are gazumped. */ - ret = i915_gem_object_wait_rendering__nonblocking(obj, - to_rps_client(file), - !write_domain); + ret = __unsafe_wait_rendering(obj, to_rps_client(file), !write_domain); + if (ret) + goto err; + + ret = i915_mutex_lock_interruptible(dev); if (ret) - goto unref; + goto err; if (read_domains & I915_GEM_DOMAIN_GTT) ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); @@ -1507,11 +1487,13 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, if (write_domain != 0) intel_fb_obj_invalidate(obj, write_origin(obj, write_domain)); -unref: i915_gem_object_put(obj); -unlock: mutex_unlock(&dev->struct_mutex); return ret; + +err: + i915_gem_object_put_unlocked(obj); + return ret; } /** @@ -1648,36 +1630,36 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) struct drm_i915_private *dev_priv = to_i915(dev); struct i915_ggtt *ggtt = &dev_priv->ggtt; struct i915_ggtt_view view = i915_ggtt_view_normal; + bool write = !!(vmf->flags & FAULT_FLAG_WRITE); pgoff_t page_offset; unsigned long pfn; - int ret = 0; - bool write = !!(vmf->flags & FAULT_FLAG_WRITE); - - intel_runtime_pm_get(dev_priv); + int ret; /* We don't use vmf->pgoff since that has the fake offset */ page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >> PAGE_SHIFT; - ret = i915_mutex_lock_interruptible(dev); - if (ret) - goto out; - trace_i915_gem_object_fault(obj, page_offset, true, write); /* Try to flush the object off the GPU first without holding the lock. - * Upon reacquiring the lock, we will perform our sanity checks and then + * Upon acquiring the lock, we will perform our sanity checks and then * repeat the flush holding the lock in the normal manner to catch cases * where we are gazumped. */ - ret = i915_gem_object_wait_rendering__nonblocking(obj, NULL, !write); + ret = __unsafe_wait_rendering(obj, NULL, !write); if (ret) - goto unlock; + goto err; + + intel_runtime_pm_get(dev_priv); + + ret = i915_mutex_lock_interruptible(dev); + if (ret) + goto err_rpm; /* Access to snoopable pages through the GTT is incoherent. */ if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) { ret = -EFAULT; - goto unlock; + goto err_unlock; } /* Use a partial view if the object is bigger than the aperture. */ @@ -1698,15 +1680,15 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) /* Now pin it into the GTT if needed */ ret = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE); if (ret) - goto unlock; + goto err_unlock; ret = i915_gem_object_set_to_gtt_domain(obj, write); if (ret) - goto unpin; + goto err_unpin; ret = i915_gem_object_get_fence(obj); if (ret) - goto unpin; + goto err_unpin; /* Finally, remap it using the new GTT offset */ pfn = ggtt->mappable_base + @@ -1751,11 +1733,13 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) (unsigned long)vmf->virtual_address, pfn + page_offset); } -unpin: +err_unpin: i915_gem_object_ggtt_unpin_view(obj, &view); -unlock: +err_unlock: mutex_unlock(&dev->struct_mutex); -out: +err_rpm: + intel_runtime_pm_put(dev_priv); +err: switch (ret) { case -EIO: /* @@ -1796,8 +1780,6 @@ out: ret = VM_FAULT_SIGBUS; break; } - - intel_runtime_pm_put(dev_priv); return ret; } -- cgit v0.10.2 From 8a3b3d576c933d99fe774e9cb88005b87c3fb5c4 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 5 Aug 2016 10:14:08 +0100 Subject: drm/i915: Convert non-blocking userptr waits for requests over to using RCU We can completely avoid taking the struct_mutex around the non-blocking waits by switching over to the RCU request management (trading the mutex for a RCU read lock and some complex atomic operations). The improvement is that we gain further contention reduction, and overall the code become simpler due to the reduced mutex dancing. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470388464-28458-3-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index 53f64fc..96ab616 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -63,32 +63,12 @@ struct i915_mmu_object { static void wait_rendering(struct drm_i915_gem_object *obj) { - struct drm_device *dev = obj->base.dev; - struct drm_i915_gem_request *requests[I915_NUM_ENGINES]; - int i, n; - - if (!i915_gem_object_is_active(obj)) - return; - - n = 0; - for (i = 0; i < I915_NUM_ENGINES; i++) { - struct drm_i915_gem_request *req; + unsigned long active = __I915_BO_ACTIVE(obj); + int idx; - req = i915_gem_active_get(&obj->last_read[i], - &obj->base.dev->struct_mutex); - if (req) - requests[n++] = req; - } - - mutex_unlock(&dev->struct_mutex); - - for (i = 0; i < n; i++) - i915_wait_request(requests[i], false, NULL, NULL); - - mutex_lock(&dev->struct_mutex); - - for (i = 0; i < n; i++) - i915_gem_request_put(requests[i]); + for_each_active(active, idx) + i915_gem_active_wait_unlocked(&obj->last_read[idx], + false, NULL, NULL); } static void cancel_userptr(struct work_struct *work) @@ -97,6 +77,8 @@ static void cancel_userptr(struct work_struct *work) struct drm_i915_gem_object *obj = mo->obj; struct drm_device *dev = obj->base.dev; + wait_rendering(obj); + mutex_lock(&dev->struct_mutex); /* Cancel any active worker and force us to re-evaluate gup */ obj->userptr.work = NULL; @@ -105,8 +87,6 @@ static void cancel_userptr(struct work_struct *work) struct drm_i915_private *dev_priv = to_i915(dev); bool was_interruptible; - wait_rendering(obj); - was_interruptible = dev_priv->mm.interruptible; dev_priv->mm.interruptible = false; -- cgit v0.10.2 From f826ee21e594438f5f87e1125e0d8f5ad49b749a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 5 Aug 2016 10:14:09 +0100 Subject: drm/i915/userptr: Remove superfluous interruptible=false on waiting Inside the kthread context, we can't be interrupted by signals so touching the mm.interruptible flag is pointless and wait-request now consumes EIO itself. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470388464-28458-4-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index 96ab616..57218cc 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -84,16 +84,9 @@ static void cancel_userptr(struct work_struct *work) obj->userptr.work = NULL; if (obj->pages != NULL) { - struct drm_i915_private *dev_priv = to_i915(dev); - bool was_interruptible; - - was_interruptible = dev_priv->mm.interruptible; - dev_priv->mm.interruptible = false; - + /* We are inside a kthread context and can't be interrupted */ WARN_ON(i915_gem_object_unbind(obj)); WARN_ON(i915_gem_object_put_pages(obj)); - - dev_priv->mm.interruptible = was_interruptible; } i915_gem_object_put(obj); -- cgit v0.10.2 From 90f4fcd56bda1f52381bbd5034e8fb32688e779b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 5 Aug 2016 10:14:10 +0100 Subject: drm/i915: Remove forced stop ring on suspend/unload Before suspending (or unloading), we would first wait upon all rendering to be completed and then disable the rings. This later step is a remanent from DRI1 days when we did not use request tracking for all operations upon the ring. Now that we are sure we are waiting upon the very last operation by the engine, we can forgo clobbering the ring registers, though we do keep the assert that the engine is indeed idle before sleeping. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470388464-28458-5-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index db5dc5b..abdfb97 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2004,7 +2004,6 @@ struct drm_i915_private { /* Abstract the submission mechanism (legacy ringbuffer or execlists) away */ struct { void (*cleanup_engine)(struct intel_engine_cs *engine); - void (*stop_engine)(struct intel_engine_cs *engine); /** * Is the GPU currently considered idle, or busy executing diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index cc6d102..395f2ce 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4080,16 +4080,6 @@ struct i915_vma *i915_gem_obj_to_ggtt_view(struct drm_i915_gem_object *obj, return NULL; } -static void -i915_gem_stop_engines(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_engine_cs *engine; - - for_each_engine(engine, dev_priv) - dev_priv->gt.stop_engine(engine); -} - int i915_gem_suspend(struct drm_device *dev) { @@ -4118,12 +4108,6 @@ i915_gem_suspend(struct drm_device *dev) i915_gem_retire_requests(dev_priv); - /* Note that rather than stopping the engines, all we have to do - * is assert that every RING_HEAD == RING_TAIL (all execution complete) - * and similar for all logical context images (to ensure they are - * all ready for hibernation). - */ - i915_gem_stop_engines(dev); i915_gem_context_lost(dev_priv); mutex_unlock(&dev->struct_mutex); @@ -4308,10 +4292,8 @@ int i915_gem_init(struct drm_device *dev) if (!i915.enable_execlists) { dev_priv->gt.cleanup_engine = intel_engine_cleanup; - dev_priv->gt.stop_engine = intel_engine_stop; } else { dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup; - dev_priv->gt.stop_engine = intel_logical_ring_stop; } /* This is just a security blanket to placate dragons. diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index a07da54..309c5d9 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -760,31 +760,6 @@ void intel_execlists_cancel_requests(struct intel_engine_cs *engine) } } -void intel_logical_ring_stop(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int ret; - - if (!intel_engine_initialized(engine)) - return; - - ret = intel_engine_idle(engine); - if (ret) - DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n", - engine->name, ret); - - /* TODO: Is this correct with Execlists enabled? */ - I915_WRITE_MODE(engine, _MASKED_BIT_ENABLE(STOP_RING)); - if (intel_wait_for_register(dev_priv, - RING_MI_MODE(engine->mmio_base), - MODE_IDLE, MODE_IDLE, - 1000)) { - DRM_ERROR("%s :timed out trying to stop ring\n", engine->name); - return; - } - I915_WRITE_MODE(engine, _MASKED_BIT_DISABLE(STOP_RING)); -} - static int intel_lr_context_pin(struct i915_gem_context *ctx, struct intel_engine_cs *engine) { @@ -1717,7 +1692,6 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine) dev_priv = engine->i915; if (engine->buffer) { - intel_logical_ring_stop(engine); WARN_ON((I915_READ_MODE(engine) & MODE_IDLE) == 0); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index a862234..4593a65 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2203,7 +2203,6 @@ void intel_engine_cleanup(struct intel_engine_cs *engine) dev_priv = engine->i915; if (engine->buffer) { - intel_engine_stop(engine); WARN_ON(!IS_GEN2(dev_priv) && (I915_READ_MODE(engine) & MODE_IDLE) == 0); intel_ring_unpin(engine->buffer); @@ -2907,18 +2906,3 @@ int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine) return intel_init_ring_buffer(engine); } - -void intel_engine_stop(struct intel_engine_cs *engine) -{ - int ret; - - if (!intel_engine_initialized(engine)) - return; - - ret = intel_engine_idle(engine); - if (ret) - DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n", - engine->name, ret); - - stop_ring(engine); -} -- cgit v0.10.2 From dcff85c8443e7ad6abda897678d2fd5a950a64ad Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 5 Aug 2016 10:14:11 +0100 Subject: drm/i915: Enable i915_gem_wait_for_idle() without holding struct_mutex The principal motivation for this was to try and eliminate the struct_mutex from i915_gem_suspend - but we still need to hold the mutex current for the i915_gem_context_lost(). (The issue there is that there may be an indirect lockdep cycle between cpu_hotplug (i.e. suspend) and struct_mutex via the stop_machine().) For the moment, enabling last request tracking for the engine, allows us to do busyness checking and waiting without requiring the struct_mutex - which is useful in its own right. As a side-effect of having a robust means for tracking engine busyness, we can replace our other busyness heuristic, that of comparing against the last submitted seqno. For paranoid reasons, we have a semi-ordered check of that seqno inside the hangchecker, which we can now improve to an ordered check of the engine's busyness (removing a locked xchg in the process). v2: Pass along "bool interruptible" as being unlocked we cannot rely on i915->mm.interruptible being stable or even under our control. v3: Replace check Ironlake i915_gpu_busy() with the common precalculated value Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470388464-28458-6-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 24d63e2..1faea38 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -4925,7 +4925,7 @@ i915_drop_caches_set(void *data, u64 val) return ret; if (val & DROP_ACTIVE) { - ret = i915_gem_wait_for_idle(dev_priv); + ret = i915_gem_wait_for_idle(dev_priv, true); if (ret) goto unlock; } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index abdfb97..6eff312 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3233,7 +3233,8 @@ int __must_check i915_gem_init(struct drm_device *dev); int __must_check i915_gem_init_hw(struct drm_device *dev); void i915_gem_init_swizzling(struct drm_device *dev); void i915_gem_cleanup_engines(struct drm_device *dev); -int __must_check i915_gem_wait_for_idle(struct drm_i915_private *dev_priv); +int __must_check i915_gem_wait_for_idle(struct drm_i915_private *dev_priv, + bool interruptible); int __must_check i915_gem_suspend(struct drm_device *dev); void i915_gem_resume(struct drm_device *dev); int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 395f2ce..f01987e 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2438,13 +2438,18 @@ static void i915_gem_reset_engine_status(struct intel_engine_cs *engine) static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine) { + struct drm_i915_gem_request *request; struct intel_ring *ring; + request = i915_gem_active_peek(&engine->last_request, + &engine->i915->drm.struct_mutex); + /* Mark all pending requests as complete so that any concurrent * (lockless) lookup doesn't try and wait upon the request as we * reset it. */ - intel_engine_init_seqno(engine, engine->last_submitted_seqno); + if (request) + intel_engine_init_seqno(engine, request->fence.seqno); /* * Clear the execlists queue up before freeing the requests, as those @@ -2466,15 +2471,9 @@ static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine) * implicit references on things like e.g. ppgtt address spaces through * the request. */ - if (!list_empty(&engine->request_list)) { - struct drm_i915_gem_request *request; - - request = list_last_entry(&engine->request_list, - struct drm_i915_gem_request, - link); - + if (request) i915_gem_request_retire_upto(request); - } + GEM_BUG_ON(intel_engine_is_active(engine)); /* Having flushed all requests from all queues, we know that all * ringbuffers must now be empty. However, since we do not reclaim @@ -2897,18 +2896,17 @@ destroy: return 0; } -int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv) +int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv, + bool interruptible) { struct intel_engine_cs *engine; int ret; - lockdep_assert_held(&dev_priv->drm.struct_mutex); - for_each_engine(engine, dev_priv) { if (engine->last_context == NULL) continue; - ret = intel_engine_idle(engine); + ret = intel_engine_idle(engine, interruptible); if (ret) return ret; } @@ -4080,11 +4078,10 @@ struct i915_vma *i915_gem_obj_to_ggtt_view(struct drm_i915_gem_object *obj, return NULL; } -int -i915_gem_suspend(struct drm_device *dev) +int i915_gem_suspend(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); - int ret = 0; + int ret; intel_suspend_gt_powersave(dev_priv); @@ -4102,7 +4099,7 @@ i915_gem_suspend(struct drm_device *dev) if (ret) goto err; - ret = i915_gem_wait_for_idle(dev_priv); + ret = i915_gem_wait_for_idle(dev_priv, true); if (ret) goto err; diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 7be4258..f76c06e 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -39,7 +39,7 @@ gpu_is_idle(struct drm_i915_private *dev_priv) struct intel_engine_cs *engine; for_each_engine(engine, dev_priv) { - if (!list_empty(&engine->request_list)) + if (intel_engine_is_active(engine)) return false; } @@ -167,7 +167,7 @@ search_again: if (ret) return ret; - ret = i915_gem_wait_for_idle(dev_priv); + ret = i915_gem_wait_for_idle(dev_priv, true); if (ret) return ret; @@ -272,7 +272,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle) return ret; } - ret = i915_gem_wait_for_idle(dev_priv); + ret = i915_gem_wait_for_idle(dev_priv, true); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index db97155..c1d7997 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2248,7 +2248,7 @@ static bool do_idling(struct drm_i915_private *dev_priv) if (unlikely(ggtt->do_idle_maps)) { dev_priv->mm.interruptible = false; - if (i915_gem_wait_for_idle(dev_priv)) { + if (i915_gem_wait_for_idle(dev_priv, false)) { DRM_ERROR("Failed to wait for idle; VT'd may hang.\n"); /* Wait a bit, in hopes it avoids the hang */ udelay(10); diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 3fecb8f..1f91dc8 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -265,7 +265,7 @@ static int i915_gem_init_seqno(struct drm_i915_private *dev_priv, u32 seqno) /* Carefully retire all requests without writing to the rings */ for_each_engine(engine, dev_priv) { - ret = intel_engine_idle(engine); + ret = intel_engine_idle(engine, true); if (ret) return ret; } @@ -486,7 +486,8 @@ void __i915_add_request(struct drm_i915_gem_request *request, */ request->emitted_jiffies = jiffies; request->previous_seqno = engine->last_submitted_seqno; - smp_store_mb(engine->last_submitted_seqno, request->fence.seqno); + engine->last_submitted_seqno = request->fence.seqno; + i915_gem_active_set(&engine->last_request, request); list_add_tail(&request->link, &engine->request_list); list_add_tail(&request->ring_link, &ring->request_list); @@ -757,7 +758,7 @@ void i915_gem_retire_requests(struct drm_i915_private *dev_priv) for_each_engine(engine, dev_priv) { engine_retire_requests(engine); - if (list_empty(&engine->request_list)) + if (!intel_engine_is_active(engine)) dev_priv->gt.active_engines &= ~intel_engine_flag(engine); } diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index 15495d1..3496e28 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -29,6 +29,17 @@ #include "i915_gem.h" +struct intel_wait { + struct rb_node node; + struct task_struct *tsk; + u32 seqno; +}; + +struct intel_signal_node { + struct rb_node node; + struct intel_wait wait; +}; + /** * Request queue structure. * diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index 1341cb5..23d7037 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -412,7 +412,7 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr return NOTIFY_DONE; /* Force everything onto the inactive lists */ - ret = i915_gem_wait_for_idle(dev_priv); + ret = i915_gem_wait_for_idle(dev_priv, false); if (ret) goto out; diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index e586500..006a855 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -2805,13 +2805,6 @@ static void gen8_disable_vblank(struct drm_device *dev, unsigned int pipe) } static bool -ring_idle(struct intel_engine_cs *engine, u32 seqno) -{ - return i915_seqno_passed(seqno, - READ_ONCE(engine->last_submitted_seqno)); -} - -static bool ipehr_is_semaphore_wait(struct intel_engine_cs *engine, u32 ipehr) { if (INTEL_GEN(engine->i915) >= 8) { @@ -3131,7 +3124,7 @@ static void i915_hangcheck_elapsed(struct work_struct *work) user_interrupts = 0; if (engine->hangcheck.seqno == seqno) { - if (ring_idle(engine, seqno)) { + if (!intel_engine_is_active(engine)) { engine->hangcheck.action = HANGCHECK_IDLE; if (busy) { /* Safeguard against driver failure */ diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index f495969..e9b301a 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -166,6 +166,12 @@ void intel_engine_init_hangcheck(struct intel_engine_cs *engine) memset(&engine->hangcheck, 0, sizeof(engine->hangcheck)); } +static void intel_engine_init_requests(struct intel_engine_cs *engine) +{ + init_request_active(&engine->last_request, NULL); + INIT_LIST_HEAD(&engine->request_list); +} + /** * intel_engines_setup_common - setup engine state not requiring hw access * @engine: Engine to setup. @@ -177,13 +183,13 @@ void intel_engine_init_hangcheck(struct intel_engine_cs *engine) */ void intel_engine_setup_common(struct intel_engine_cs *engine) { - INIT_LIST_HEAD(&engine->request_list); INIT_LIST_HEAD(&engine->buffers); INIT_LIST_HEAD(&engine->execlist_queue); spin_lock_init(&engine->execlist_lock); engine->fence_context = fence_context_alloc(1); + intel_engine_init_requests(engine); intel_engine_init_hangcheck(engine); i915_gem_batch_pool_init(engine, &engine->batch_pool); } diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 6bd352a..eedcace 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -6328,19 +6328,11 @@ EXPORT_SYMBOL_GPL(i915_gpu_lower); */ bool i915_gpu_busy(void) { - struct drm_i915_private *dev_priv; - struct intel_engine_cs *engine; bool ret = false; spin_lock_irq(&mchdev_lock); - if (!i915_mch_dev) - goto out_unlock; - dev_priv = i915_mch_dev; - - for_each_engine(engine, dev_priv) - ret |= !list_empty(&engine->request_list); - -out_unlock: + if (i915_mch_dev) + ret = i915_mch_dev->gt.awake; spin_unlock_irq(&mchdev_lock); return ret; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 4593a65..322274a 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2227,24 +2227,6 @@ void intel_engine_cleanup(struct intel_engine_cs *engine) engine->i915 = NULL; } -int intel_engine_idle(struct intel_engine_cs *engine) -{ - struct drm_i915_gem_request *req; - - /* Wait upon the last request to be completed */ - if (list_empty(&engine->request_list)) - return 0; - - req = list_entry(engine->request_list.prev, - struct drm_i915_gem_request, - link); - - /* Make sure we do not trigger any retires */ - return i915_wait_request(req, - req->i915->mm.interruptible, - NULL, NULL); -} - int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request) { int ret; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 88952bf..43e545e 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -3,6 +3,7 @@ #include #include "i915_gem_batch_pool.h" +#include "i915_gem_request.h" #define I915_CMD_HASH_ORDER 9 @@ -307,6 +308,13 @@ struct intel_engine_cs { */ u32 last_submitted_seqno; + /* An RCU guarded pointer to the last request. No reference is + * held to the request, users must carefully acquire a reference to + * the request using i915_gem_active_get_request_rcu(), or hold the + * struct_mutex. + */ + struct i915_gem_active last_request; + struct i915_gem_context *last_context; struct intel_engine_hangcheck hangcheck; @@ -465,7 +473,6 @@ static inline u32 intel_ring_offset(struct intel_ring *ring, u32 value) int __intel_ring_space(int head, int tail, int size); void intel_ring_update_space(struct intel_ring *ring); -int __must_check intel_engine_idle(struct intel_engine_cs *engine); void intel_engine_init_seqno(struct intel_engine_cs *engine, u32 seqno); int intel_init_pipe_control(struct intel_engine_cs *engine, int size); @@ -475,6 +482,14 @@ void intel_engine_setup_common(struct intel_engine_cs *engine); int intel_engine_init_common(struct intel_engine_cs *engine); void intel_engine_cleanup_common(struct intel_engine_cs *engine); +static inline int intel_engine_idle(struct intel_engine_cs *engine, + bool interruptible) +{ + /* Wait upon the last request to be completed */ + return i915_gem_active_wait_unlocked(&engine->last_request, + interruptible, NULL, NULL); +} + int intel_init_render_ring_buffer(struct intel_engine_cs *engine); int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine); int intel_init_bsd2_ring_buffer(struct intel_engine_cs *engine); @@ -504,17 +519,6 @@ static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine) } /* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */ -struct intel_wait { - struct rb_node node; - struct task_struct *tsk; - u32 seqno; -}; - -struct intel_signal_node { - struct rb_node node; - struct intel_wait wait; -}; - int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine); static inline void intel_wait_init(struct intel_wait *wait, u32 seqno) @@ -561,4 +565,9 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); unsigned int intel_kick_waiters(struct drm_i915_private *i915); unsigned int intel_kick_signalers(struct drm_i915_private *i915); +static inline bool intel_engine_is_active(struct intel_engine_cs *engine) +{ + return i915_gem_active_isset(&engine->last_request); +} + #endif /* _INTEL_RINGBUFFER_H_ */ -- cgit v0.10.2 From 307dc25bf64fb54575d60cf700c7b5b39f183b88 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 5 Aug 2016 10:14:12 +0100 Subject: drm/i915: Simplify do_idling() (Ironlake vt-d w/a) Now that we pass along the expected interruptible nature for the wait-for-idle, we do not need to modify the global i915->mm.interruptible for this single call. (Only the immediate call to i915_gem_wait_for_idle() takes the interruptible status as the other action, dma_map_sg(), is independent of i915.ko) Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470388464-28458-7-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index c1d7997..8b4f2f3 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2241,31 +2241,6 @@ static bool needs_idle_maps(struct drm_i915_private *dev_priv) return false; } -static bool do_idling(struct drm_i915_private *dev_priv) -{ - struct i915_ggtt *ggtt = &dev_priv->ggtt; - bool ret = dev_priv->mm.interruptible; - - if (unlikely(ggtt->do_idle_maps)) { - dev_priv->mm.interruptible = false; - if (i915_gem_wait_for_idle(dev_priv, false)) { - DRM_ERROR("Failed to wait for idle; VT'd may hang.\n"); - /* Wait a bit, in hopes it avoids the hang */ - udelay(10); - } - } - - return ret; -} - -static void undo_idling(struct drm_i915_private *dev_priv, bool interruptible) -{ - struct i915_ggtt *ggtt = &dev_priv->ggtt; - - if (unlikely(ggtt->do_idle_maps)) - dev_priv->mm.interruptible = interruptible; -} - void i915_check_and_clear_faults(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; @@ -2713,14 +2688,18 @@ void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj) { struct drm_device *dev = obj->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - bool interruptible; + struct i915_ggtt *ggtt = &dev_priv->ggtt; - interruptible = do_idling(dev_priv); + if (unlikely(ggtt->do_idle_maps)) { + if (i915_gem_wait_for_idle(dev_priv, false)) { + DRM_ERROR("Failed to wait for idle; VT'd may hang.\n"); + /* Wait a bit, in hopes it avoids the hang */ + udelay(10); + } + } dma_unmap_sg(&dev->pdev->dev, obj->pages->sgl, obj->pages->nents, PCI_DMA_BIDIRECTIONAL); - - undo_idling(dev_priv, interruptible); } static void i915_gtt_color_adjust(struct drm_mm_node *node, -- cgit v0.10.2 From 5cba5be6b61d310590f48670f6285cdb83441b91 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 5 Aug 2016 10:14:13 +0100 Subject: drm/i915/shrinker: Wait before acquiring struct_mutex under oom We can now wait for the GPU (all engines) to become idle without requiring the struct_mutex. Inside the shrinker, we need to currently take the struct_mutex in order to purge objects and to purge the objects we need the GPU to be idle - causing a stall whilst we hold the struct_mutex. We can hide most of that stall by performing the wait before taking the struct_mutex and only doing essential waits for new rendering on objects to be freed. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470388464-28458-8-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index 23d7037..9b92b64 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -323,17 +323,22 @@ i915_gem_shrinker_lock_uninterruptible(struct drm_i915_private *dev_priv, struct shrinker_lock_uninterruptible *slu, int timeout_ms) { - unsigned long timeout = msecs_to_jiffies(timeout_ms) + 1; + unsigned long timeout = jiffies + msecs_to_jiffies_timeout(timeout_ms); + + do { + if (i915_gem_wait_for_idle(dev_priv, false) == 0 && + i915_gem_shrinker_lock(&dev_priv->drm, &slu->unlock)) + break; - while (!i915_gem_shrinker_lock(&dev_priv->drm, &slu->unlock)) { schedule_timeout_killable(1); if (fatal_signal_pending(current)) return false; - if (--timeout == 0) { + + if (time_after(jiffies, timeout)) { pr_err("Unable to lock GPU to purge memory.\n"); return false; } - } + } while (1); slu->was_interruptible = dev_priv->mm.interruptible; dev_priv->mm.interruptible = false; -- cgit v0.10.2 From f3f6184c5fab11d57a0c28524db2ddd6a68cb34a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 5 Aug 2016 10:14:14 +0100 Subject: drm/i915: Tidy generation of the GTT mmap offset If we make the observation that mmap-offsets are only released when we free an object, we can then deduce that the shrinker only creates free space in the mmap arena indirectly by flushing the request list and freeing expired objects. If we combine this with the lockless vma-manager and lockless idling, we can avoid taking our big struct_mutex until we need to actually free the requests. One side-effect is that we defer the madvise checking until we need the pages (i.e. the fault handler). This brings us into line with the other delayed checks (and madvise in general). v2: s/ret/err/ and use if (!err) rather than if (ret == 0) Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470388464-28458-9-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index f01987e..b841c39 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1898,36 +1898,28 @@ u64 i915_gem_get_ggtt_alignment(struct drm_i915_private *dev_priv, u64 size, static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) { struct drm_i915_private *dev_priv = to_i915(obj->base.dev); - int ret; - - dev_priv->mm.shrinker_no_lock_stealing = true; + int err; - ret = drm_gem_create_mmap_offset(&obj->base); - if (ret != -ENOSPC) - goto out; + err = drm_gem_create_mmap_offset(&obj->base); + if (!err) + return 0; - /* Badly fragmented mmap space? The only way we can recover - * space is by destroying unwanted objects. We can't randomly release - * mmap_offsets as userspace expects them to be persistent for the - * lifetime of the objects. The closest we can is to release the - * offsets on purgeable objects by truncating it and marking it purged, - * which prevents userspace from ever using that object again. + /* We can idle the GPU locklessly to flush stale objects, but in order + * to claim that space for ourselves, we need to take the big + * struct_mutex to free the requests+objects and allocate our slot. */ - i915_gem_shrink(dev_priv, - obj->base.size >> PAGE_SHIFT, - I915_SHRINK_BOUND | - I915_SHRINK_UNBOUND | - I915_SHRINK_PURGEABLE); - ret = drm_gem_create_mmap_offset(&obj->base); - if (ret != -ENOSPC) - goto out; + err = i915_gem_wait_for_idle(dev_priv, true); + if (err) + return err; - i915_gem_shrink_all(dev_priv); - ret = drm_gem_create_mmap_offset(&obj->base); -out: - dev_priv->mm.shrinker_no_lock_stealing = false; + err = i915_mutex_lock_interruptible(&dev_priv->drm); + if (!err) { + i915_gem_retire_requests(dev_priv); + err = drm_gem_create_mmap_offset(&obj->base); + mutex_unlock(&dev_priv->drm.struct_mutex); + } - return ret; + return err; } static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj) @@ -1944,32 +1936,15 @@ i915_gem_mmap_gtt(struct drm_file *file, struct drm_i915_gem_object *obj; int ret; - ret = i915_mutex_lock_interruptible(dev); - if (ret) - return ret; - obj = i915_gem_object_lookup(file, handle); - if (!obj) { - ret = -ENOENT; - goto unlock; - } - - if (obj->madv != I915_MADV_WILLNEED) { - DRM_DEBUG("Attempting to mmap a purgeable buffer\n"); - ret = -EFAULT; - goto out; - } + if (!obj) + return -ENOENT; ret = i915_gem_object_create_mmap_offset(obj); - if (ret) - goto out; + if (ret == 0) + *offset = drm_vma_node_offset_addr(&obj->base.vma_node); - *offset = drm_vma_node_offset_addr(&obj->base.vma_node); - -out: - i915_gem_object_put(obj); -unlock: - mutex_unlock(&dev->struct_mutex); + i915_gem_object_put_unlocked(obj); return ret; } -- cgit v0.10.2 From 3b4e896f14b165298d4a85ee4da735633892eeb3 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 5 Aug 2016 10:14:15 +0100 Subject: drm/i915: Remove unused no-shrinker-steal After removing the user of this wart, we can remove the wart entirely. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470388464-28458-10-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 6eff312..31a614f 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1318,7 +1318,6 @@ struct i915_gem_mm { struct notifier_block oom_notifier; struct notifier_block vmap_notifier; struct shrinker shrinker; - bool shrinker_no_lock_stealing; /** LRU list of objects with fence regs on them. */ struct list_head fence_list; diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index 9b92b64..b80802b 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -244,9 +244,6 @@ static bool i915_gem_shrinker_lock(struct drm_device *dev, bool *unlock) if (!mutex_is_locked_by(&dev->struct_mutex, current)) return false; - if (to_i915(dev)->mm.shrinker_no_lock_stealing) - return false; - *unlock = false; } else *unlock = true; -- cgit v0.10.2 From 258a5edee0a306d52f5f84ca8a45736efa309ca4 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 5 Aug 2016 10:14:16 +0100 Subject: drm/i915: Do a nonblocking wait first in pread/pwrite If we try and read or write to an active request, we first must wait upon the GPU completing that request. Let's do that without holding the mutex (and so allow someone else to access the GPU whilst we wait). Upon completion, we will acquire the mutex and only then start the operation (i.e. we do not rely on state from before the initial wait). v2: Repaint the goto labels v3: Move the tracepoints back to the start of the ioctls Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470388464-28458-11-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index b841c39..640f96f 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -956,25 +956,27 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data, args->size)) return -EFAULT; - ret = i915_mutex_lock_interruptible(dev); - if (ret) - return ret; - obj = i915_gem_object_lookup(file, args->handle); - if (!obj) { - ret = -ENOENT; - goto unlock; - } + if (!obj) + return -ENOENT; /* Bounds check source. */ if (args->offset > obj->base.size || args->size > obj->base.size - args->offset) { ret = -EINVAL; - goto out; + goto err; } trace_i915_gem_object_pread(obj, args->offset, args->size); + ret = __unsafe_wait_rendering(obj, to_rps_client(file), true); + if (ret) + goto err; + + ret = i915_mutex_lock_interruptible(dev); + if (ret) + goto err; + ret = i915_gem_shmem_pread(dev, obj, args, file); /* pread for non shmem backed objects */ @@ -985,10 +987,13 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data, intel_runtime_pm_put(to_i915(dev)); } -out: i915_gem_object_put(obj); -unlock: mutex_unlock(&dev->struct_mutex); + + return ret; + +err: + i915_gem_object_put_unlocked(obj); return ret; } @@ -1374,27 +1379,29 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, return -EFAULT; } - intel_runtime_pm_get(dev_priv); - - ret = i915_mutex_lock_interruptible(dev); - if (ret) - goto put_rpm; - obj = i915_gem_object_lookup(file, args->handle); - if (!obj) { - ret = -ENOENT; - goto unlock; - } + if (!obj) + return -ENOENT; /* Bounds check destination. */ if (args->offset > obj->base.size || args->size > obj->base.size - args->offset) { ret = -EINVAL; - goto out; + goto err; } trace_i915_gem_object_pwrite(obj, args->offset, args->size); + ret = __unsafe_wait_rendering(obj, to_rps_client(file), false); + if (ret) + goto err; + + intel_runtime_pm_get(dev_priv); + + ret = i915_mutex_lock_interruptible(dev); + if (ret) + goto err_rpm; + ret = -EFAULT; /* We can only do the GTT pwrite on untiled buffers, as otherwise * it would end up going through the fenced access, and we'll get @@ -1419,14 +1426,17 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, ret = -ENODEV; } -out: i915_gem_object_put(obj); -unlock: mutex_unlock(&dev->struct_mutex); -put_rpm: intel_runtime_pm_put(dev_priv); return ret; + +err_rpm: + intel_runtime_pm_put(dev_priv); +err: + i915_gem_object_put_unlocked(obj); + return ret; } static enum fb_op_origin -- cgit v0.10.2 From 033d549b811ea8512efd57fa5fb5c53e498e0db3 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 5 Aug 2016 10:14:17 +0100 Subject: drm/i915: Remove (struct_mutex) locking for wait-ioctl With a bit of care (and leniency) we can iterate over the object and wait for previous rendering to complete with judicial use of atomic reference counting. The ABI requires us to ensure that an active object is eventually flushed (like the busy-ioctl) which is guaranteed by our management of requests (i.e. everything that is submitted to hardware is flushed in the same request). All we have to do is ensure that we can detect when the requests are complete for reporting when the object is idle (without triggering ETIME), locklessly - this is handled by i915_gem_active_wait_unlocked(). The impact of this is actually quite small - the return to userspace following the wait was already lockless and so we don't see much gain in latency improvement upon completing the wait. What we do achieve here is completing an already finished wait without hitting the struct_mutex, our hold is quite short and so we are typically just a victim of contention rather than a cause - but it is still one less contention point! v2: Break up a long line. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470388464-28458-12-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 640f96f..4262073 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2622,47 +2622,28 @@ int i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { struct drm_i915_gem_wait *args = data; + struct intel_rps_client *rps = to_rps_client(file); struct drm_i915_gem_object *obj; - struct drm_i915_gem_request *requests[I915_NUM_ENGINES]; - int i, n = 0; - int ret; + unsigned long active; + int idx, ret = 0; if (args->flags != 0) return -EINVAL; - ret = i915_mutex_lock_interruptible(dev); - if (ret) - return ret; - obj = i915_gem_object_lookup(file, args->bo_handle); - if (!obj) { - mutex_unlock(&dev->struct_mutex); + if (!obj) return -ENOENT; - } - - if (!i915_gem_object_is_active(obj)) - goto out; - for (i = 0; i < I915_NUM_ENGINES; i++) { - struct drm_i915_gem_request *req; - - req = i915_gem_active_get(&obj->last_read[i], - &obj->base.dev->struct_mutex); - if (req) - requests[n++] = req; + active = __I915_BO_ACTIVE(obj); + for_each_active(active, idx) { + s64 *timeout = args->timeout_ns >= 0 ? &args->timeout_ns : NULL; + ret = i915_gem_active_wait_unlocked(&obj->last_read[idx], true, + timeout, rps); + if (ret) + break; } -out: - i915_gem_object_put(obj); - mutex_unlock(&dev->struct_mutex); - - for (i = 0; i < n; i++) { - if (ret == 0) - ret = i915_wait_request(requests[i], true, - args->timeout_ns > 0 ? &args->timeout_ns : NULL, - to_rps_client(file)); - i915_gem_request_put(requests[i]); - } + i915_gem_object_put_unlocked(obj); return ret; } -- cgit v0.10.2 From 3fdc13c7a3cbd5788daad4cf1ddc619856e2f1c0 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 5 Aug 2016 10:14:18 +0100 Subject: drm/i915: Remove (struct_mutex) locking for busy-ioctl By applying the same logic as for wait-ioctl, we can query whether a request has completed without holding struct_mutex. The biggest impact system-wide is removing the flush_active and the contention that causes. Testcase: igt/gem_busy Signed-off-by: Chris Wilson Cc: Akash Goel Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470388464-28458-13-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 4262073..1d8858d 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3736,49 +3736,120 @@ i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj, i915_vma_unpin(i915_gem_obj_to_ggtt_view(obj, view)); } +static __always_inline unsigned __busy_read_flag(unsigned int id) +{ + /* Note that we could alias engines in the execbuf API, but + * that would be very unwise as it prevents userspace from + * fine control over engine selection. Ahem. + * + * This should be something like EXEC_MAX_ENGINE instead of + * I915_NUM_ENGINES. + */ + BUILD_BUG_ON(I915_NUM_ENGINES > 16); + return 0x10000 << id; +} + +static __always_inline unsigned int __busy_write_id(unsigned int id) +{ + return id; +} + +static __always_inline unsigned +__busy_set_if_active(const struct i915_gem_active *active, + unsigned int (*flag)(unsigned int id)) +{ + /* For more discussion about the barriers and locking concerns, + * see __i915_gem_active_get_rcu(). + */ + do { + struct drm_i915_gem_request *request; + unsigned int id; + + request = rcu_dereference(active->request); + if (!request || i915_gem_request_completed(request)) + return 0; + + id = request->engine->exec_id; + + /* Check that the pointer wasn't reassigned and overwritten. */ + if (request == rcu_access_pointer(active->request)) + return flag(id); + } while (1); +} + +static inline unsigned +busy_check_reader(const struct i915_gem_active *active) +{ + return __busy_set_if_active(active, __busy_read_flag); +} + +static inline unsigned +busy_check_writer(const struct i915_gem_active *active) +{ + return __busy_set_if_active(active, __busy_write_id); +} + int i915_gem_busy_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { struct drm_i915_gem_busy *args = data; struct drm_i915_gem_object *obj; - int ret; - - ret = i915_mutex_lock_interruptible(dev); - if (ret) - return ret; + unsigned long active; obj = i915_gem_object_lookup(file, args->handle); - if (!obj) { - ret = -ENOENT; - goto unlock; - } + if (!obj) + return -ENOENT; - /* Count all active objects as busy, even if they are currently not used - * by the gpu. Users of this interface expect objects to eventually - * become non-busy without any further actions. - */ args->busy = 0; - if (i915_gem_object_is_active(obj)) { - struct drm_i915_gem_request *req; - int i; + active = __I915_BO_ACTIVE(obj); + if (active) { + int idx; - for (i = 0; i < I915_NUM_ENGINES; i++) { - req = i915_gem_active_peek(&obj->last_read[i], - &obj->base.dev->struct_mutex); - if (req) - args->busy |= 1 << (16 + req->engine->exec_id); - } - req = i915_gem_active_peek(&obj->last_write, - &obj->base.dev->struct_mutex); - if (req) - args->busy |= req->engine->exec_id; + /* Yes, the lookups are intentionally racy. + * + * First, we cannot simply rely on __I915_BO_ACTIVE. We have + * to regard the value as stale and as our ABI guarantees + * forward progress, we confirm the status of each active + * request with the hardware. + * + * Even though we guard the pointer lookup by RCU, that only + * guarantees that the pointer and its contents remain + * dereferencable and does *not* mean that the request we + * have is the same as the one being tracked by the object. + * + * Consider that we lookup the request just as it is being + * retired and freed. We take a local copy of the pointer, + * but before we add its engine into the busy set, the other + * thread reallocates it and assigns it to a task on another + * engine with a fresh and incomplete seqno. + * + * So after we lookup the engine's id, we double check that + * the active request is the same and only then do we add it + * into the busy set. + */ + rcu_read_lock(); + + for_each_active(active, idx) + args->busy |= busy_check_reader(&obj->last_read[idx]); + + /* For ABI sanity, we only care that the write engine is in + * the set of read engines. This is ensured by the ordering + * of setting last_read/last_write in i915_vma_move_to_active, + * and then in reverse in retire. + * + * We don't care that the set of active read/write engines + * may change during construction of the result, as it is + * equally liable to change before userspace can inspect + * the result. + */ + args->busy |= busy_check_writer(&obj->last_write); + + rcu_read_unlock(); } - i915_gem_object_put(obj); -unlock: - mutex_unlock(&dev->struct_mutex); - return ret; + i915_gem_object_put_unlocked(obj); + return 0; } int -- cgit v0.10.2 From c21724cc4d3d5c96a15347f2435a753aff2007c4 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 5 Aug 2016 10:14:19 +0100 Subject: drm/i915: Reduce locking inside swfinish ioctl We only need to take the struct_mutex if the object is pinned to the display engine and so requires checking for clflush. (The race with userspace pinning the object to a framebuffer is irrelevant.) v2: Use access once for compiler hints (or not as it is a bitfield) v3: READ_ONCE, obj->pin_display is not a bitfield anymore v4: Don't be creative with goto. Signed-off-by: Chris Wilson Cc: Daniel Vetter Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470388464-28458-14-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 1d8858d..03e806c 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1518,26 +1518,23 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, { struct drm_i915_gem_sw_finish *args = data; struct drm_i915_gem_object *obj; - int ret = 0; - - ret = i915_mutex_lock_interruptible(dev); - if (ret) - return ret; + int err = 0; obj = i915_gem_object_lookup(file, args->handle); - if (!obj) { - ret = -ENOENT; - goto unlock; - } + if (!obj) + return -ENOENT; /* Pinned buffers may be scanout, so flush the cache */ - if (obj->pin_display) - i915_gem_object_flush_cpu_write_domain(obj); + if (READ_ONCE(obj->pin_display)) { + err = i915_mutex_lock_interruptible(dev); + if (!err) { + i915_gem_object_flush_cpu_write_domain(obj); + mutex_unlock(&dev->struct_mutex); + } + } - i915_gem_object_put(obj); -unlock: - mutex_unlock(&dev->struct_mutex); - return ret; + i915_gem_object_put_unlocked(obj); + return err; } /** -- cgit v0.10.2 From e883d73503205d1eaaf049b835bf135b46738f57 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 5 Aug 2016 10:14:20 +0100 Subject: drm/i915: Remove pinned check from madvise ioctl We don't need to incur the overhead of checking whether the object is pinned prior to changing its madvise. If the object is pinned, the madvise will not take effect until it is unpinned and so we cannot free the pages being pointed at by hardware. Marking a pinned object with allocated pages as DONTNEED will not trigger any undue warnings. The check is therefore superfluous, and by removing it we can remove a linear walk over all the vma the object has. Still despite it being an overzealous check, that error code is part of the current ABI and so we must proceed with caution. Signed-off-by: Chris Wilson Cc: Daniel Vetter Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470388464-28458-15-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 03e806c..4e66045 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3883,11 +3883,6 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data, goto unlock; } - if (i915_gem_obj_is_pinned(obj)) { - ret = -EINVAL; - goto out; - } - if (obj->pages && obj->tiling_mode != I915_TILING_NONE && dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) { @@ -3906,7 +3901,6 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data, args->retained = obj->madv != __I915_MADV_PURGED; -out: i915_gem_object_put(obj); unlock: mutex_unlock(&dev->struct_mutex); -- cgit v0.10.2 From 9ad3676148511d6af72be6f3638e361fd86e1f7b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 5 Aug 2016 10:14:21 +0100 Subject: drm/i915: Remove locking for get_tiling Since we are not concerned with userspace racing itself with set-tiling (the order is indeterminant even if we take a lock), then we can safely read back the single obj->tiling_mode and do the static lookup of swizzle mode without having to take a lock. get-tiling is reasonably frequent due to the back-channel passing around of tiling parameters in DRI2/DRI3. v2: Make tiling_mode a full unsigned int so that we can trivially use it with READ_ONCE(). Separating it out into manual control over the flags field was too noisy for a simple patch. Note that we could use the lower bits of obj->stride for the tiling mode. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470388464-28458-16-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 31a614f..f18d876 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2183,10 +2183,6 @@ struct drm_i915_gem_object { unsigned int madv:2; /** - * Current tiling mode for the object. - */ - unsigned int tiling_mode:2; - /** * Whether the tiling parameters for the currently associated fence * register have changed. Note that for the purposes of tracking * tiling changes we also treat the unfenced register, the register @@ -2218,6 +2214,14 @@ struct drm_i915_gem_object { atomic_t frontbuffer_bits; + /** + * Current tiling mode for the object. + */ + unsigned int tiling_mode; + + /** Current tiling stride for the object, if it's tiled. */ + uint32_t stride; + unsigned int has_wc_mmap; /** Count of VMA actually bound by this object */ unsigned int bind_count; @@ -2245,9 +2249,6 @@ struct drm_i915_gem_object { struct i915_gem_active last_write; struct i915_gem_active last_fence; - /** Current tiling stride for the object, if it's tiled. */ - uint32_t stride; - /** References from framebuffers, locks out tiling changes. */ unsigned long framebuffer_references; diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index b7f9875..c0e0133 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -303,10 +303,8 @@ i915_gem_get_tiling(struct drm_device *dev, void *data, if (!obj) return -ENOENT; - mutex_lock(&dev->struct_mutex); - - args->tiling_mode = obj->tiling_mode; - switch (obj->tiling_mode) { + args->tiling_mode = READ_ONCE(obj->tiling_mode); + switch (args->tiling_mode) { case I915_TILING_X: args->swizzle_mode = dev_priv->mm.bit_6_swizzle_x; break; @@ -330,8 +328,6 @@ i915_gem_get_tiling(struct drm_device *dev, void *data, if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_10_17) args->swizzle_mode = I915_BIT_6_SWIZZLE_9_10; - i915_gem_object_put(obj); - mutex_unlock(&dev->struct_mutex); - + i915_gem_object_put_unlocked(obj); return 0; } -- cgit v0.10.2 From deeb1519b65a92ca06c8e8554a92df0fdb4d5dea Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 5 Aug 2016 10:14:22 +0100 Subject: drm/i915: Document and reject invalid tiling modes Through the GTT interface to the fence registers, we can only handle linear, X and Y tiling. The more esoteric tiling patterns are ignored. Document that the tiling ABI only supports upto Y tiling, and reject any attempts to set a tiling mode other than NONE, X or Y. Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470388464-28458-17-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index c0e0133..6817f69 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -68,6 +68,9 @@ i915_tiling_ok(struct drm_device *dev, int stride, int size, int tiling_mode) if (tiling_mode == I915_TILING_NONE) return true; + if (tiling_mode > I915_TILING_LAST) + return false; + if (IS_GEN2(dev) || (tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))) tile_width = 128; diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 0f29273..452629d 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -926,6 +926,7 @@ struct drm_i915_gem_caching { #define I915_TILING_NONE 0 #define I915_TILING_X 1 #define I915_TILING_Y 2 +#define I915_TILING_LAST I915_TILING_Y #define I915_BIT_6_SWIZZLE_NONE 0 #define I915_BIT_6_SWIZZLE_9 1 -- cgit v0.10.2 From 3e510a8e65ef6d1cf45c18bf79c8f91ec481f154 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 5 Aug 2016 10:14:23 +0100 Subject: drm/i915: Repack fence tiling mode and stride into a single integer In the previous commit, we moved the obj->tiling_mode out of a bitfield and into its own integer so that we could safely use READ_ONCE(). Let us now repair some of that damage by sharing the tiling_mode with its companion, the fence stride. v2: New magic Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470388464-28458-18-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 1faea38..0620a84 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -101,7 +101,7 @@ static char get_pin_flag(struct drm_i915_gem_object *obj) static char get_tiling_flag(struct drm_i915_gem_object *obj) { - switch (obj->tiling_mode) { + switch (i915_gem_object_get_tiling(obj)) { default: case I915_TILING_NONE: return ' '; case I915_TILING_X: return 'X'; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index f18d876..feec00f 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2214,13 +2214,11 @@ struct drm_i915_gem_object { atomic_t frontbuffer_bits; - /** - * Current tiling mode for the object. - */ - unsigned int tiling_mode; - /** Current tiling stride for the object, if it's tiled. */ - uint32_t stride; + unsigned int tiling_and_stride; +#define FENCE_MINIMUM_STRIDE 128 /* See i915_tiling_ok() */ +#define TILING_MASK (FENCE_MINIMUM_STRIDE-1) +#define STRIDE_MASK (~TILING_MASK) unsigned int has_wc_mmap; /** Count of VMA actually bound by this object */ @@ -2359,6 +2357,24 @@ i915_gem_object_has_active_engine(const struct drm_i915_gem_object *obj, return obj->flags & BIT(engine + I915_BO_ACTIVE_SHIFT); } +static inline unsigned int +i915_gem_object_get_tiling(struct drm_i915_gem_object *obj) +{ + return obj->tiling_and_stride & TILING_MASK; +} + +static inline bool +i915_gem_object_is_tiled(struct drm_i915_gem_object *obj) +{ + return i915_gem_object_get_tiling(obj) != I915_TILING_NONE; +} + +static inline unsigned int +i915_gem_object_get_stride(struct drm_i915_gem_object *obj) +{ + return obj->tiling_and_stride & STRIDE_MASK; +} + /* * Optimised SGL iterator for GEM objects */ @@ -3457,7 +3473,7 @@ static inline bool i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_objec struct drm_i915_private *dev_priv = to_i915(obj->base.dev); return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 && - obj->tiling_mode != I915_TILING_NONE; + i915_gem_object_is_tiled(obj); } /* i915_debugfs.c */ diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 4e66045..7a00678 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1042,7 +1042,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915, int ret; bool hit_slow_path = false; - if (obj->tiling_mode != I915_TILING_NONE) + if (i915_gem_object_is_tiled(obj)) return -EFAULT; ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, @@ -1671,7 +1671,7 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) /* Use a partial view if the object is bigger than the aperture. */ if (obj->base.size >= ggtt->mappable_end && - obj->tiling_mode == I915_TILING_NONE) { + !i915_gem_object_is_tiled(obj)) { static const unsigned int chunk_size = 256; // 1 MiB memset(&view, 0, sizeof(view)); @@ -2189,7 +2189,7 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) if (i915_gem_object_needs_bit17_swizzle(obj)) i915_gem_object_do_bit_17_swizzle(obj); - if (obj->tiling_mode != I915_TILING_NONE && + if (i915_gem_object_is_tiled(obj) && dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) i915_gem_object_pin_pages(obj); @@ -2938,10 +2938,12 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) size = max(size, vma->size); if (flags & PIN_MAPPABLE) - size = i915_gem_get_ggtt_size(dev_priv, size, obj->tiling_mode); + size = i915_gem_get_ggtt_size(dev_priv, size, + i915_gem_object_get_tiling(obj)); min_alignment = - i915_gem_get_ggtt_alignment(dev_priv, size, obj->tiling_mode, + i915_gem_get_ggtt_alignment(dev_priv, size, + i915_gem_object_get_tiling(obj), flags & PIN_MAPPABLE); if (alignment == 0) alignment = min_alignment; @@ -3637,10 +3639,10 @@ void __i915_vma_set_map_and_fenceable(struct i915_vma *vma) fence_size = i915_gem_get_ggtt_size(dev_priv, obj->base.size, - obj->tiling_mode); + i915_gem_object_get_tiling(obj)); fence_alignment = i915_gem_get_ggtt_alignment(dev_priv, obj->base.size, - obj->tiling_mode, + i915_gem_object_get_tiling(obj), true); fenceable = (vma->node.size == fence_size && @@ -3884,7 +3886,7 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data, } if (obj->pages && - obj->tiling_mode != I915_TILING_NONE && + i915_gem_object_is_tiled(obj) && dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) { if (obj->madv == I915_MADV_WILLNEED) i915_gem_object_unpin_pages(obj); @@ -4054,7 +4056,7 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj) if (obj->pages && obj->madv == I915_MADV_WILLNEED && dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES && - obj->tiling_mode != I915_TILING_NONE) + i915_gem_object_is_tiled(obj)) i915_gem_object_unpin_pages(obj); if (WARN_ON(obj->pages_pin_count)) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 7183474..c494b79 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -803,7 +803,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *engine, entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE; need_fence = entry->flags & EXEC_OBJECT_NEEDS_FENCE && - obj->tiling_mode != I915_TILING_NONE; + i915_gem_object_is_tiled(obj); need_mappable = need_fence || need_reloc_mappable(vma); if (entry->flags & EXEC_OBJECT_PINNED) diff --git a/drivers/gpu/drm/i915/i915_gem_fence.c b/drivers/gpu/drm/i915/i915_gem_fence.c index 3b462da..9e8173f 100644 --- a/drivers/gpu/drm/i915/i915_gem_fence.c +++ b/drivers/gpu/drm/i915/i915_gem_fence.c @@ -86,20 +86,22 @@ static void i965_write_fence_reg(struct drm_device *dev, int reg, if (obj) { u32 size = i915_gem_obj_ggtt_size(obj); + unsigned int tiling = i915_gem_object_get_tiling(obj); + unsigned int stride = i915_gem_object_get_stride(obj); uint64_t val; /* Adjust fence size to match tiled area */ - if (obj->tiling_mode != I915_TILING_NONE) { - uint32_t row_size = obj->stride * - (obj->tiling_mode == I915_TILING_Y ? 32 : 8); + if (tiling != I915_TILING_NONE) { + uint32_t row_size = stride * + (tiling == I915_TILING_Y ? 32 : 8); size = (size / row_size) * row_size; } val = (uint64_t)((i915_gem_obj_ggtt_offset(obj) + size - 4096) & 0xfffff000) << 32; val |= i915_gem_obj_ggtt_offset(obj) & 0xfffff000; - val |= (uint64_t)((obj->stride / 128) - 1) << fence_pitch_shift; - if (obj->tiling_mode == I915_TILING_Y) + val |= (uint64_t)((stride / 128) - 1) << fence_pitch_shift; + if (tiling == I915_TILING_Y) val |= 1 << I965_FENCE_TILING_Y_SHIFT; val |= I965_FENCE_REG_VALID; @@ -122,6 +124,8 @@ static void i915_write_fence_reg(struct drm_device *dev, int reg, if (obj) { u32 size = i915_gem_obj_ggtt_size(obj); + unsigned int tiling = i915_gem_object_get_tiling(obj); + unsigned int stride = i915_gem_object_get_stride(obj); int pitch_val; int tile_width; @@ -131,17 +135,17 @@ static void i915_write_fence_reg(struct drm_device *dev, int reg, "object 0x%08llx [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n", i915_gem_obj_ggtt_offset(obj), obj->map_and_fenceable, size); - if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev)) + if (tiling == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev)) tile_width = 128; else tile_width = 512; /* Note: pitch better be a power of two tile widths */ - pitch_val = obj->stride / tile_width; + pitch_val = stride / tile_width; pitch_val = ffs(pitch_val) - 1; val = i915_gem_obj_ggtt_offset(obj); - if (obj->tiling_mode == I915_TILING_Y) + if (tiling == I915_TILING_Y) val |= 1 << I830_FENCE_TILING_Y_SHIFT; val |= I915_FENCE_SIZE_BITS(size); val |= pitch_val << I830_FENCE_PITCH_SHIFT; @@ -161,6 +165,8 @@ static void i830_write_fence_reg(struct drm_device *dev, int reg, if (obj) { u32 size = i915_gem_obj_ggtt_size(obj); + unsigned int tiling = i915_gem_object_get_tiling(obj); + unsigned int stride = i915_gem_object_get_stride(obj); uint32_t pitch_val; WARN((i915_gem_obj_ggtt_offset(obj) & ~I830_FENCE_START_MASK) || @@ -169,11 +175,11 @@ static void i830_write_fence_reg(struct drm_device *dev, int reg, "object 0x%08llx not 512K or pot-size 0x%08x aligned\n", i915_gem_obj_ggtt_offset(obj), size); - pitch_val = obj->stride / 128; + pitch_val = stride / 128; pitch_val = ffs(pitch_val) - 1; val = i915_gem_obj_ggtt_offset(obj); - if (obj->tiling_mode == I915_TILING_Y) + if (tiling == I915_TILING_Y) val |= 1 << I830_FENCE_TILING_Y_SHIFT; val |= I830_FENCE_SIZE_BITS(size); val |= pitch_val << I830_FENCE_PITCH_SHIFT; @@ -201,9 +207,12 @@ static void i915_gem_write_fence(struct drm_device *dev, int reg, if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj)) mb(); - WARN(obj && (!obj->stride || !obj->tiling_mode), + WARN(obj && + (!i915_gem_object_get_stride(obj) || + !i915_gem_object_get_tiling(obj)), "bogus fence setup with stride: 0x%x, tiling mode: %i\n", - obj->stride, obj->tiling_mode); + i915_gem_object_get_stride(obj), + i915_gem_object_get_tiling(obj)); if (IS_GEN2(dev)) i830_write_fence_reg(dev, reg, obj); @@ -248,7 +257,7 @@ static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj) { - if (obj->tiling_mode) + if (i915_gem_object_is_tiled(obj)) i915_gem_release_mmap(obj); /* As we do not have an associated fence register, we will force @@ -361,7 +370,7 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj) { struct drm_device *dev = obj->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - bool enable = obj->tiling_mode != I915_TILING_NONE; + bool enable = i915_gem_object_is_tiled(obj); struct drm_i915_fence_reg *reg; int ret; @@ -477,7 +486,7 @@ void i915_gem_restore_fences(struct drm_device *dev) */ if (reg->obj) { i915_gem_object_update_fence(reg->obj, reg, - reg->obj->tiling_mode); + i915_gem_object_get_tiling(reg->obj)); } else { i915_gem_write_fence(dev, i, NULL); } diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index 6817f69..f4b984d 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -170,6 +170,9 @@ i915_gem_set_tiling(struct drm_device *dev, void *data, struct drm_i915_gem_object *obj; int ret = 0; + /* Make sure we don't cross-contaminate obj->tiling_and_stride */ + BUILD_BUG_ON(I915_TILING_LAST & STRIDE_MASK); + obj = i915_gem_object_lookup(file, args->handle); if (!obj) return -ENOENT; @@ -217,8 +220,8 @@ i915_gem_set_tiling(struct drm_device *dev, void *data, } } - if (args->tiling_mode != obj->tiling_mode || - args->stride != obj->stride) { + if (args->tiling_mode != i915_gem_object_get_tiling(obj) || + args->stride != i915_gem_object_get_stride(obj)) { /* We need to rebind the object if its current allocation * no longer meets the alignment restrictions for its new * tiling mode. Otherwise we can just leave it alone, but @@ -241,7 +244,7 @@ i915_gem_set_tiling(struct drm_device *dev, void *data, dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) { if (args->tiling_mode == I915_TILING_NONE) i915_gem_object_unpin_pages(obj); - if (obj->tiling_mode == I915_TILING_NONE) + if (!i915_gem_object_is_tiled(obj)) i915_gem_object_pin_pages(obj); } @@ -250,16 +253,16 @@ i915_gem_set_tiling(struct drm_device *dev, void *data, &dev->struct_mutex) || obj->fence_reg != I915_FENCE_REG_NONE; - obj->tiling_mode = args->tiling_mode; - obj->stride = args->stride; + obj->tiling_and_stride = + args->stride | args->tiling_mode; /* Force the fence to be reacquired for GTT access */ i915_gem_release_mmap(obj); } } /* we have to maintain this existing ABI... */ - args->stride = obj->stride; - args->tiling_mode = obj->tiling_mode; + args->stride = i915_gem_object_get_stride(obj); + args->tiling_mode = i915_gem_object_get_tiling(obj); /* Try to preallocate memory required to save swizzling on put-pages */ if (i915_gem_object_needs_bit17_swizzle(obj)) { @@ -306,7 +309,7 @@ i915_gem_get_tiling(struct drm_device *dev, void *data, if (!obj) return -ENOENT; - args->tiling_mode = READ_ONCE(obj->tiling_mode); + args->tiling_mode = READ_ONCE(obj->tiling_and_stride) & TILING_MASK; switch (args->tiling_mode) { case I915_TILING_X: args->swizzle_mode = dev_priv->mm.bit_6_swizzle_x; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index cc28ad4..eecb870 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -781,7 +781,7 @@ static void capture_bo(struct drm_i915_error_buffer *err, err->pinned = 0; if (i915_gem_obj_is_pinned(obj)) err->pinned = 1; - err->tiling = obj->tiling_mode; + err->tiling = i915_gem_object_get_tiling(obj); err->dirty = obj->dirty; err->purgeable = obj->madv != I915_MADV_WILLNEED; err->userptr = obj->userptr.mm != NULL; diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 9068676..9cbf543 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2466,9 +2466,8 @@ intel_alloc_initial_plane_obj(struct intel_crtc *crtc, return false; } - obj->tiling_mode = plane_config->tiling; - if (obj->tiling_mode == I915_TILING_X) - obj->stride = fb->pitches[0]; + if (plane_config->tiling == I915_TILING_X) + obj->tiling_and_stride = fb->pitches[0] | I915_TILING_X; mode_cmd.pixel_format = fb->pixel_format; mode_cmd.width = fb->width; @@ -2594,7 +2593,7 @@ valid_fb: intel_state->dst.y2 = plane_state->crtc_y + plane_state->crtc_h; obj = intel_fb_obj(fb); - if (obj->tiling_mode != I915_TILING_NONE) + if (i915_gem_object_is_tiled(obj)) dev_priv->preserve_bios_swizzle = true; drm_framebuffer_reference(fb); @@ -2672,8 +2671,7 @@ static void i9xx_update_primary_plane(struct drm_plane *primary, BUG(); } - if (INTEL_INFO(dev)->gen >= 4 && - obj->tiling_mode != I915_TILING_NONE) + if (INTEL_INFO(dev)->gen >= 4 && i915_gem_object_is_tiled(obj)) dspcntr |= DISPPLANE_TILED; if (IS_G4X(dev)) @@ -2782,7 +2780,7 @@ static void ironlake_update_primary_plane(struct drm_plane *primary, BUG(); } - if (obj->tiling_mode != I915_TILING_NONE) + if (i915_gem_object_is_tiled(obj)) dspcntr |= DISPPLANE_TILED; if (!IS_HASWELL(dev) && !IS_BROADWELL(dev)) @@ -11200,7 +11198,7 @@ static int intel_gen4_queue_flip(struct drm_device *dev, MI_DISPLAY_FLIP_PLANE(intel_crtc->plane)); intel_ring_emit(ring, fb->pitches[0]); intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset | - obj->tiling_mode); + i915_gem_object_get_tiling(obj)); /* XXX Enabling the panel-fitter across page-flip is so far * untested on non-native modes, so ignore it for now. @@ -11232,7 +11230,7 @@ static int intel_gen6_queue_flip(struct drm_device *dev, intel_ring_emit(ring, MI_DISPLAY_FLIP | MI_DISPLAY_FLIP_PLANE(intel_crtc->plane)); - intel_ring_emit(ring, fb->pitches[0] | obj->tiling_mode); + intel_ring_emit(ring, fb->pitches[0] | i915_gem_object_get_tiling(obj)); intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset); /* Contrary to the suggestions in the documentation, @@ -11335,7 +11333,7 @@ static int intel_gen7_queue_flip(struct drm_device *dev, } intel_ring_emit(ring, MI_DISPLAY_FLIP_I915 | plane_bit); - intel_ring_emit(ring, (fb->pitches[0] | obj->tiling_mode)); + intel_ring_emit(ring, fb->pitches[0] | i915_gem_object_get_tiling(obj)); intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset); intel_ring_emit(ring, (MI_NOOP)); @@ -11442,7 +11440,7 @@ static void ilk_do_mmio_flip(struct intel_crtc *intel_crtc, dspcntr = I915_READ(reg); - if (obj->tiling_mode != I915_TILING_NONE) + if (i915_gem_object_is_tiled(obj)) dspcntr |= DISPPLANE_TILED; else dspcntr &= ~DISPPLANE_TILED; @@ -11670,7 +11668,8 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) { engine = &dev_priv->engine[BCS]; - if (obj->tiling_mode != intel_fb_obj(work->old_fb)->tiling_mode) + if (i915_gem_object_get_tiling(obj) != + i915_gem_object_get_tiling(intel_fb_obj(work->old_fb))) /* vlv: DISPLAY_FLIP fails to change tiling */ engine = NULL; } else if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev)) { @@ -14932,15 +14931,15 @@ static int intel_framebuffer_init(struct drm_device *dev, if (mode_cmd->flags & DRM_MODE_FB_MODIFIERS) { /* Enforce that fb modifier and tiling mode match, but only for * X-tiled. This is needed for FBC. */ - if (!!(obj->tiling_mode == I915_TILING_X) != + if (!!(i915_gem_object_get_tiling(obj) == I915_TILING_X) != !!(mode_cmd->modifier[0] == I915_FORMAT_MOD_X_TILED)) { DRM_DEBUG("tiling_mode doesn't match fb modifier\n"); return -EINVAL; } } else { - if (obj->tiling_mode == I915_TILING_X) + if (i915_gem_object_get_tiling(obj) == I915_TILING_X) mode_cmd->modifier[0] = I915_FORMAT_MOD_X_TILED; - else if (obj->tiling_mode == I915_TILING_Y) { + else if (i915_gem_object_get_tiling(obj) == I915_TILING_Y) { DRM_DEBUG("No Y tiling for legacy addfb\n"); return -EINVAL; } @@ -14984,9 +14983,10 @@ static int intel_framebuffer_init(struct drm_device *dev, } if (mode_cmd->modifier[0] == I915_FORMAT_MOD_X_TILED && - mode_cmd->pitches[0] != obj->stride) { + mode_cmd->pitches[0] != i915_gem_object_get_stride(obj)) { DRM_DEBUG("pitch (%d) must match tiling stride (%d)\n", - mode_cmd->pitches[0], obj->stride); + mode_cmd->pitches[0], + i915_gem_object_get_stride(obj)); return -EINVAL; } diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c index d4be076..85adc2b 100644 --- a/drivers/gpu/drm/i915/intel_fbc.c +++ b/drivers/gpu/drm/i915/intel_fbc.c @@ -741,7 +741,7 @@ static void intel_fbc_update_state_cache(struct intel_crtc *crtc, cache->fb.pixel_format = fb->pixel_format; cache->fb.stride = fb->pitches[0]; cache->fb.fence_reg = obj->fence_reg; - cache->fb.tiling_mode = obj->tiling_mode; + cache->fb.tiling_mode = i915_gem_object_get_tiling(obj); } static bool intel_fbc_can_activate(struct intel_crtc *crtc) diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index 413a203..90f3ab4 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -1129,7 +1129,7 @@ int intel_overlay_put_image_ioctl(struct drm_device *dev, void *data, drm_modeset_lock_all(dev); mutex_lock(&dev->struct_mutex); - if (new_bo->tiling_mode) { + if (i915_gem_object_is_tiled(new_bo)) { DRM_DEBUG_KMS("buffer used for overlay image can not be tiled\n"); ret = -EINVAL; goto out_unlock; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index eedcace..aef0b10 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1585,7 +1585,7 @@ static void i9xx_update_wm(struct drm_crtc *unused_crtc) obj = intel_fb_obj(enabled->primary->state->fb); /* self-refresh seems busted with untiled */ - if (obj->tiling_mode == I915_TILING_NONE) + if (!i915_gem_object_is_tiled(obj)) enabled = NULL; } diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index e045295..9ed7ad3 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -431,7 +431,7 @@ vlv_update_plane(struct drm_plane *dplane, */ sprctl |= SP_GAMMA_ENABLE; - if (obj->tiling_mode != I915_TILING_NONE) + if (i915_gem_object_is_tiled(obj)) sprctl |= SP_TILED; /* Sizes are 0 based */ @@ -468,7 +468,7 @@ vlv_update_plane(struct drm_plane *dplane, I915_WRITE(SPSTRIDE(pipe, plane), fb->pitches[0]); I915_WRITE(SPPOS(pipe, plane), (crtc_y << 16) | crtc_x); - if (obj->tiling_mode != I915_TILING_NONE) + if (i915_gem_object_is_tiled(obj)) I915_WRITE(SPTILEOFF(pipe, plane), (y << 16) | x); else I915_WRITE(SPLINOFF(pipe, plane), linear_offset); @@ -553,7 +553,7 @@ ivb_update_plane(struct drm_plane *plane, */ sprctl |= SPRITE_GAMMA_ENABLE; - if (obj->tiling_mode != I915_TILING_NONE) + if (i915_gem_object_is_tiled(obj)) sprctl |= SPRITE_TILED; if (IS_HASWELL(dev) || IS_BROADWELL(dev)) @@ -607,7 +607,7 @@ ivb_update_plane(struct drm_plane *plane, * register */ if (IS_HASWELL(dev) || IS_BROADWELL(dev)) I915_WRITE(SPROFFSET(pipe), (y << 16) | x); - else if (obj->tiling_mode != I915_TILING_NONE) + else if (i915_gem_object_is_tiled(obj)) I915_WRITE(SPRTILEOFF(pipe), (y << 16) | x); else I915_WRITE(SPRLINOFF(pipe), linear_offset); @@ -694,7 +694,7 @@ ilk_update_plane(struct drm_plane *plane, */ dvscntr |= DVS_GAMMA_ENABLE; - if (obj->tiling_mode != I915_TILING_NONE) + if (i915_gem_object_is_tiled(obj)) dvscntr |= DVS_TILED; if (IS_GEN6(dev)) @@ -737,7 +737,7 @@ ilk_update_plane(struct drm_plane *plane, I915_WRITE(DVSSTRIDE(pipe), fb->pitches[0]); I915_WRITE(DVSPOS(pipe), (crtc_y << 16) | crtc_x); - if (obj->tiling_mode != I915_TILING_NONE) + if (i915_gem_object_is_tiled(obj)) I915_WRITE(DVSTILEOFF(pipe), (y << 16) | x); else I915_WRITE(DVSLINOFF(pipe), linear_offset); -- cgit v0.10.2 From 209b3f7ed0e865ef0f3ffde3d623703019daeafc Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 5 Aug 2016 10:14:24 +0100 Subject: drm/i915: Assert that the request hasn't been retired With all callers now not playing tricks with dropping the struct_mutex between waiting and retiring, we can assert that the request is ready to be retired. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470388464-28458-19-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 1f91dc8..b317a67 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -170,7 +170,7 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) struct i915_gem_active *active, *next; trace_i915_gem_request_retire(request); - list_del_init(&request->link); + list_del(&request->link); /* We know the GPU must have read the request to have * sent us the seqno + interrupt, so use the position @@ -228,9 +228,7 @@ void i915_gem_request_retire_upto(struct drm_i915_gem_request *req) struct drm_i915_gem_request *tmp; lockdep_assert_held(&req->i915->drm.struct_mutex); - - if (list_empty(&req->link)) - return; + GEM_BUG_ON(list_empty(&req->link)); do { tmp = list_first_entry(&engine->request_list, -- cgit v0.10.2 From 575e3ccbce4582395d57612b289178bad4af3be8 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Tue, 2 Aug 2016 09:36:53 +0100 Subject: drm/i915: fix WaInsertDummyPushConstPs As pointed out by Chris Harris, we are using the wrong WA name, it should in fact be WaToEnableHwFixForPushConstHWBug, also it should be applied from C0 onwards for both BXT and KBL. Fixes: 7b9005cd45f3 ("drm/i915: Add WaInsertDummyPushConstP for bxt and kbl") Cc: Chris Harris Cc: Mika Kuoppala Reported-by: Chris Harris Signed-off-by: Matthew Auld Reviewed-by: Arun Siluvery Signed-off-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470127013-29653-1-git-send-email-matthew.auld@intel.com diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 322274a..e08a1e1 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1149,8 +1149,8 @@ static int bxt_init_workarounds(struct intel_engine_cs *engine) I915_WRITE(GEN8_L3SQCREG1, L3_GENERAL_PRIO_CREDITS(62) | L3_HIGH_PRIO_CREDITS(2)); - /* WaInsertDummyPushConstPs:bxt */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_B0)) + /* WaToEnableHwFixForPushConstHWBug:bxt */ + if (IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER)) WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); @@ -1193,8 +1193,8 @@ static int kbl_init_workarounds(struct intel_engine_cs *engine) I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) | GEN8_LQSC_RO_PERF_DIS); - /* WaInsertDummyPushConstPs:kbl */ - if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0)) + /* WaToEnableHwFixForPushConstHWBug:kbl */ + if (IS_KBL_REVID(dev_priv, KBL_REVID_C0, REVID_FOREVER)) WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); -- cgit v0.10.2 From 19e0b4cab9cf3c07bc84360a854f9040d8c64644 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 5 Aug 2016 19:05:42 +0300 Subject: Revert "drm/i915: Track active streams also for DP SST" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit f64425a82bdb5c3d7e09ba765716da88a9b00eec. active_streams will get totally out of whack with SST unless we sync up with the hw state at readout, obviously! We don't yet do that, so now the WARNs fire all the time. Let's revert :( Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1470413142-26402-1-git-send-email-ville.syrjala@linux.intel.com References: https://bugs.freedesktop.org/show_bug.cgi?id=95472#c14 Acked-by: Chris Wilson diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index b8f729a..c2df4e4 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -1641,9 +1641,6 @@ static void intel_ddi_pre_enable(struct intel_encoder *intel_encoder) intel_ddi_init_dp_buf_reg(intel_encoder); - WARN_ON(intel_dp->active_streams != 0); - intel_dp->active_streams++; - intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON); intel_dp_start_link_train(intel_dp); if (port != PORT_A || INTEL_INFO(dev_priv)->gen >= 9) @@ -1770,13 +1767,6 @@ static void intel_disable_ddi(struct intel_encoder *intel_encoder) intel_psr_disable(intel_dp); intel_edp_backlight_off(intel_dp); } - - if (type == INTEL_OUTPUT_DP || type == INTEL_OUTPUT_EDP) { - struct intel_dp *intel_dp = enc_to_intel_dp(encoder); - - intel_dp->active_streams--; - WARN_ON(intel_dp->active_streams != 0); - } } bool bxt_ddi_phy_is_enabled(struct drm_i915_private *dev_priv, diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 0aadc65..8fe2afa 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -2685,9 +2685,6 @@ static void intel_enable_dp(struct intel_encoder *encoder) lane_mask); } - WARN_ON(intel_dp->active_streams != 0); - intel_dp->active_streams++; - intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON); intel_dp_start_link_train(intel_dp); intel_dp_stop_link_train(intel_dp); @@ -3347,9 +3344,6 @@ intel_dp_link_down(struct intel_dp *intel_dp) DRM_DEBUG_KMS("\n"); - intel_dp->active_streams--; - WARN_ON(intel_dp->active_streams != 0); - if ((IS_GEN7(dev) && port == PORT_A) || (HAS_PCH_CPT(dev) && port != PORT_A)) { DP &= ~DP_LINK_TRAIN_MASK_CPT; @@ -3851,7 +3845,7 @@ go_again: if (bret == true) { /* check link status - esi[10] = 0x200c */ - if (intel_dp->active_streams && + if (intel_dp->active_mst_links && !drm_dp_channel_eq_ok(&esi[10], intel_dp->lane_count)) { DRM_DEBUG_KMS("channel EQ not ok, retraining\n"); intel_dp_start_link_train(intel_dp); diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c index 0beca91..629337d 100644 --- a/drivers/gpu/drm/i915/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/intel_dp_mst.c @@ -99,7 +99,7 @@ static void intel_mst_disable_dp(struct intel_encoder *encoder) struct intel_dp *intel_dp = &intel_dig_port->dp; int ret; - DRM_DEBUG_KMS("%d\n", intel_dp->active_streams); + DRM_DEBUG_KMS("%d\n", intel_dp->active_mst_links); drm_dp_mst_reset_vcpi_slots(&intel_dp->mst_mgr, intel_mst->connector->port); @@ -115,7 +115,7 @@ static void intel_mst_post_disable_dp(struct intel_encoder *encoder) struct intel_digital_port *intel_dig_port = intel_mst->primary; struct intel_dp *intel_dp = &intel_dig_port->dp; - DRM_DEBUG_KMS("%d\n", intel_dp->active_streams); + DRM_DEBUG_KMS("%d\n", intel_dp->active_mst_links); /* this can fail */ drm_dp_check_act_status(&intel_dp->mst_mgr); @@ -124,10 +124,10 @@ static void intel_mst_post_disable_dp(struct intel_encoder *encoder) drm_dp_mst_deallocate_vcpi(&intel_dp->mst_mgr, intel_mst->connector->port); - intel_dp->active_streams--; + intel_dp->active_mst_links--; intel_mst->connector = NULL; - if (intel_dp->active_streams == 0) { + if (intel_dp->active_mst_links == 0) { intel_dig_port->base.post_disable(&intel_dig_port->base); intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_OFF); } @@ -165,11 +165,11 @@ static void intel_mst_pre_enable_dp(struct intel_encoder *encoder) */ found->encoder = encoder; - DRM_DEBUG_KMS("%d\n", intel_dp->active_streams); + DRM_DEBUG_KMS("%d\n", intel_dp->active_mst_links); intel_mst->connector = found; - if (intel_dp->active_streams == 0) { + if (intel_dp->active_mst_links == 0) { intel_ddi_clk_select(&intel_dig_port->base, intel_crtc->config); intel_prepare_dp_ddi_buffers(&intel_dig_port->base); @@ -193,7 +193,7 @@ static void intel_mst_pre_enable_dp(struct intel_encoder *encoder) } - intel_dp->active_streams++; + intel_dp->active_mst_links++; temp = I915_READ(DP_TP_STATUS(port)); I915_WRITE(DP_TP_STATUS(port), temp); @@ -210,7 +210,7 @@ static void intel_mst_enable_dp(struct intel_encoder *encoder) enum port port = intel_dig_port->port; int ret; - DRM_DEBUG_KMS("%d\n", intel_dp->active_streams); + DRM_DEBUG_KMS("%d\n", intel_dp->active_mst_links); if (intel_wait_for_register(dev_priv, DP_TP_STATUS(port), diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index a978866..b1fc67e 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -894,7 +894,7 @@ struct intel_dp { bool can_mst; /* this port supports mst */ bool is_mst; - int active_streams; /* number of active streams (for SST and MST both) */ + int active_mst_links; /* connector directly attached - won't be use for modeset in mst world */ struct intel_connector *attached_connector; -- cgit v0.10.2 From 4e9121e6b4f9fee47e6746bfdb6745951b7a9b20 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 3 Aug 2016 08:22:57 -0700 Subject: drm/i915: Fix copy_to_user usage for pipe_crc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Copy to user return the number of bytes it couldn't write and zero on success. So any number different than 0 should be considered a fault, not only when it doesn't write the full size. v2: fixed the inverted logic. (Ville) Cc: Ville Syrjälä Reviewed-by: Daniel Vetter Signed-off-by: Rodrigo Vivi diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 0620a84..9bd4158 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -3625,7 +3625,6 @@ i915_pipe_crc_read(struct file *filep, char __user *user_buf, size_t count, while (n_entries > 0) { struct intel_pipe_crc_entry *entry = &pipe_crc->entries[pipe_crc->tail]; - int ret; if (CIRC_CNT(pipe_crc->head, pipe_crc->tail, INTEL_PIPE_CRC_ENTRIES_NR) < 1) @@ -3642,8 +3641,7 @@ i915_pipe_crc_read(struct file *filep, char __user *user_buf, size_t count, spin_unlock_irq(&pipe_crc->lock); - ret = copy_to_user(user_buf, buf, PIPE_CRC_LINE_LEN); - if (ret == PIPE_CRC_LINE_LEN) + if (copy_to_user(user_buf, buf, PIPE_CRC_LINE_LEN)) return -EFAULT; user_buf += PIPE_CRC_LINE_LEN; -- cgit v0.10.2 From 4194c088df6808336bcb1fe434332fb64bdb240e Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 3 Aug 2016 10:00:56 -0700 Subject: drm/i915: Use drm official vblank_no_hw_counter callback. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No functional change. Instead of defining a new empty function let's use what is available on drm. It gets cleaner, and easy to read, and understand. Signed-off-by: Rodrigo Vivi Reviewed-by: Ville Syrjälä diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 006a855..591f452 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -656,12 +656,6 @@ static void i915_enable_asle_pipestat(struct drm_i915_private *dev_priv) * of horizontal active on the first line of vertical active */ -static u32 i8xx_get_vblank_counter(struct drm_device *dev, unsigned int pipe) -{ - /* Gen2 doesn't have a hardware frame counter */ - return 0; -} - /* Called from drm generic code, passed a 'crtc', which * we use as a pipe index */ @@ -4538,8 +4532,9 @@ void intel_irq_init(struct drm_i915_private *dev_priv) i915_hangcheck_elapsed); if (IS_GEN2(dev_priv)) { + /* Gen2 doesn't have a hardware frame counter */ dev->max_vblank_count = 0; - dev->driver->get_vblank_counter = i8xx_get_vblank_counter; + dev->driver->get_vblank_counter = drm_vblank_no_hw_counter; } else if (IS_G4X(dev_priv) || INTEL_INFO(dev_priv)->gen >= 5) { dev->max_vblank_count = 0xffffffff; /* full 32 bit counter */ dev->driver->get_vblank_counter = g4x_get_vblank_counter; -- cgit v0.10.2 From 2e7ba01494ea1ec33ae7d7f5f124975818ddb825 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 9 Aug 2016 08:37:00 +0100 Subject: drm/i915: Remove unused i915_gem_active_peek_rcu() This was originally introduced to be used by the busy-ioctl, but in the end busy ioctl performed a different dance. Since there are no users, and no likely users, remove an unwanted chunk of the API. Suggested-by: Daniel Vetter Signed-off-by: Chris Wilson Cc: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1470728222-10243-1-git-send-email-chris@chris-wilson.co.uk Reviewed-by: Daniel Vetter diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index 3496e28..583e237 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -381,27 +381,6 @@ i915_gem_active_peek(const struct i915_gem_active *active, struct mutex *mutex) } /** - * i915_gem_active_peek_rcu - report the active request being monitored - * @active - the active tracker - * - * i915_gem_active_peek_rcu() returns the current request being tracked if - * still active, or NULL. It does not obtain a reference on the request - * for the caller, and inspection of the request is only valid under - * the RCU lock. - */ -static inline struct drm_i915_gem_request * -i915_gem_active_peek_rcu(const struct i915_gem_active *active) -{ - struct drm_i915_gem_request *request; - - request = rcu_dereference(active->request); - if (!request || i915_gem_request_completed(request)) - return NULL; - - return request; -} - -/** * i915_gem_active_get - return a reference to the active request * @active - the active tracker * -- cgit v0.10.2 From 385384a82cb4d9d1725330fde293877c36c1dba2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 9 Aug 2016 08:37:01 +0100 Subject: drm/i915: Wrap the protected active RCU dereference in a helper As we do the lockdep protected RCU lookup in a couple of places, refactor that code to a common helper i915_gem_active_raw(). Signed-off-by: Chris Wilson Reviewed-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1470728222-10243-2-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index 583e237..f6661f3 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -360,6 +360,21 @@ __i915_gem_active_peek(const struct i915_gem_active *active) } /** + * i915_gem_active_raw - return the active request + * @active - the active tracker + * + * i915_gem_active_raw() returns the current request being tracked, or NULL. + * It does not obtain a reference on the request for the caller, so the caller + * must hold struct_mutex. + */ +static inline struct drm_i915_gem_request * +i915_gem_active_raw(const struct i915_gem_active *active, struct mutex *mutex) +{ + return rcu_dereference_protected(active->request, + lockdep_is_held(mutex)); +} + +/** * i915_gem_active_peek - report the active request being monitored * @active - the active tracker * @@ -372,8 +387,7 @@ i915_gem_active_peek(const struct i915_gem_active *active, struct mutex *mutex) { struct drm_i915_gem_request *request; - request = rcu_dereference_protected(active->request, - lockdep_is_held(mutex)); + request = i915_gem_active_raw(active, mutex); if (!request || i915_gem_request_completed(request)) return NULL; @@ -614,8 +628,7 @@ i915_gem_active_retire(struct i915_gem_active *active, struct drm_i915_gem_request *request; int ret; - request = rcu_dereference_protected(active->request, - lockdep_is_held(mutex)); + request = i915_gem_active_raw(active, mutex); if (!request) return 0; -- cgit v0.10.2 From 87b723a16de9ff95e2b7d61dbd86bddd3c1716d9 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 9 Aug 2016 08:37:02 +0100 Subject: drm/i915: Don't check for idleness before retiring after a GPU hang When we force the cleanup after a GPU hang, we want to retire all requests, or else we may leak them if truly wedged (and the GPU never advances again). Converting to the active request helpers had the issue of doing the check against busyness before reporting the request, so if we claim the GPU had hung but this engine hadn't we could potential skip the request cleanup - triggering the self-check BUG. Fixes: dcff85c8443e ("drm/i915: Enable i915_gem_wait_for_idle() ...") Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Reviewed-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1470728222-10243-3-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 7a00678..bb83069 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2423,15 +2423,11 @@ static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine) struct drm_i915_gem_request *request; struct intel_ring *ring; - request = i915_gem_active_peek(&engine->last_request, - &engine->i915->drm.struct_mutex); - /* Mark all pending requests as complete so that any concurrent * (lockless) lookup doesn't try and wait upon the request as we * reset it. */ - if (request) - intel_engine_init_seqno(engine, request->fence.seqno); + intel_engine_init_seqno(engine, engine->last_submitted_seqno); /* * Clear the execlists queue up before freeing the requests, as those @@ -2453,6 +2449,8 @@ static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine) * implicit references on things like e.g. ppgtt address spaces through * the request. */ + request = i915_gem_active_raw(&engine->last_request, + &engine->i915->drm.struct_mutex); if (request) i915_gem_request_retire_upto(request); GEM_BUG_ON(intel_engine_is_active(engine)); -- cgit v0.10.2 From edf6b76f64a2f62b81ed796fe2ce6dd664351d64 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 9 Aug 2016 09:23:33 +0100 Subject: drm/i915: Add smp_rmb() to busy ioctl's RCU dance In the debate as to whether the second read of active->request is ordered after the dependent reads of the first read of active->request, just give in and throw a smp_rmb() in there so that ordering of loads is assured. v2: Explain the manual smp_rmb() Signed-off-by: Chris Wilson Cc: Daniel Vetter Reviewed-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1470731014-6894-1-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index bb83069..373136f 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3733,7 +3733,7 @@ i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj, i915_vma_unpin(i915_gem_obj_to_ggtt_view(obj, view)); } -static __always_inline unsigned __busy_read_flag(unsigned int id) +static __always_inline unsigned int __busy_read_flag(unsigned int id) { /* Note that we could alias engines in the execbuf API, but * that would be very unwise as it prevents userspace from @@ -3751,7 +3751,7 @@ static __always_inline unsigned int __busy_write_id(unsigned int id) return id; } -static __always_inline unsigned +static __always_inline unsigned int __busy_set_if_active(const struct i915_gem_active *active, unsigned int (*flag)(unsigned int id)) { @@ -3768,19 +3768,45 @@ __busy_set_if_active(const struct i915_gem_active *active, id = request->engine->exec_id; - /* Check that the pointer wasn't reassigned and overwritten. */ + /* Check that the pointer wasn't reassigned and overwritten. + * + * In __i915_gem_active_get_rcu(), we enforce ordering between + * the first rcu pointer dereference (imposing a + * read-dependency only on access through the pointer) and + * the second lockless access through the memory barrier + * following a successful atomic_inc_not_zero(). Here there + * is no such barrier, and so we must manually insert an + * explicit read barrier to ensure that the following + * access occurs after all the loads through the first + * pointer. + * + * It is worth comparing this sequence with + * raw_write_seqcount_latch() which operates very similarly. + * The challenge here is the visibility of the other CPU + * writes to the reallocated request vs the local CPU ordering. + * Before the other CPU can overwrite the request, it will + * have updated our active->request and gone through a wmb. + * During the read here, we want to make sure that the values + * we see have not been overwritten as we do so - and we do + * that by serialising the second pointer check with the writes + * on other other CPUs. + * + * The corresponding write barrier is part of + * rcu_assign_pointer(). + */ + smp_rmb(); if (request == rcu_access_pointer(active->request)) return flag(id); } while (1); } -static inline unsigned +static __always_inline unsigned int busy_check_reader(const struct i915_gem_active *active) { return __busy_set_if_active(active, __busy_read_flag); } -static inline unsigned +static __always_inline unsigned int busy_check_writer(const struct i915_gem_active *active) { return __busy_set_if_active(active, __busy_write_id); diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index f6661f3..6dd83b1 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -490,6 +490,9 @@ __i915_gem_active_get_rcu(const struct i915_gem_active *active) * incremented) then the following read for rcu_access_pointer() * must occur after the atomic operation and so confirm * that this request is the one currently being tracked. + * + * The corresponding write barrier is part of + * rcu_assign_pointer(). */ if (!request || request == rcu_access_pointer(active->request)) return rcu_pointer_handoff(request); -- cgit v0.10.2 From 5a198b8c53e68b6e4737b6890995e676d856c9ed Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 9 Aug 2016 09:23:34 +0100 Subject: drm/i915: Do not overwrite the request with zero on reallocation When using RCU lookup for the request, commit 0eafec6d3244 ("drm/i915: Enable lockless lookup of request tracking via RCU"), we acknowledge that we may race with another thread that could have reallocated the request. In order for the first thread not to blow up, the second thread must not clear the request completed before overwriting it. In the RCU lookup, we allow for the engine/seqno to be replaced but we do not allow for it to be zeroed. The choice we make is to either add extra checking to the RCU lookup, or embrace the inherent races (as intended). It is more complicated as we need to manually clear everything we depend upon being zero initialised, but we benefit from not emiting the memset() to clear the entire frequently allocated structure (that memset turns up in throughput profiles). And at the same time, the lookup remains flexible for future adjustments. v2: Old style LRC requires another variable to be initialize. (The danger inherent in not zeroing everything.) v3: request->batch also needs to be cleared v4: signaling.tsk is no long used unset, but pid still exists Fixes: 0eafec6d3244 ("drm/i915: Enable lockless lookup of request...") Signed-off-by: Chris Wilson Cc: "Goel, Akash" Cc: Daniel Vetter Cc: Joonas Lahtinen Reviewed-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1470731014-6894-2-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 6a16616..06d1267 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -355,7 +355,35 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, if (req && i915_gem_request_completed(req)) i915_gem_request_retire(req); - req = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL); + /* Beware: Dragons be flying overhead. + * + * We use RCU to look up requests in flight. The lookups may + * race with the request being allocated from the slab freelist. + * That is the request we are writing to here, may be in the process + * of being read by __i915_gem_active_get_request_rcu(). As such, + * we have to be very careful when overwriting the contents. During + * the RCU lookup, we change chase the request->engine pointer, + * read the request->fence.seqno and increment the reference count. + * + * The reference count is incremented atomically. If it is zero, + * the lookup knows the request is unallocated and complete. Otherwise, + * it is either still in use, or has been reallocated and reset + * with fence_init(). This increment is safe for release as we check + * that the request we have a reference to and matches the active + * request. + * + * Before we increment the refcount, we chase the request->engine + * pointer. We must not call kmem_cache_zalloc() or else we set + * that pointer to NULL and cause a crash during the lookup. If + * we see the request is completed (based on the value of the + * old engine and seqno), the lookup is complete and reports NULL. + * If we decide the request is not completed (new engine or seqno), + * then we grab a reference and double check that it is still the + * active request - which it won't be and restart the lookup. + * + * Do not use kmem_cache_zalloc() here! + */ + req = kmem_cache_alloc(dev_priv->requests, GFP_KERNEL); if (!req) return ERR_PTR(-ENOMEM); @@ -375,6 +403,13 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, req->engine = engine; req->ctx = i915_gem_context_get(ctx); + /* No zalloc, must clear what we need by hand */ + req->previous_context = NULL; + req->file_priv = NULL; + req->batch_obj = NULL; + req->pid = NULL; + req->elsp_submitted = 0; + /* * Reserve space in the ring buffer for all the commands required to * eventually emit this request. This is to guarantee that the diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index 6dd83b1..4e91d49 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -51,6 +51,13 @@ struct intel_signal_node { * emission time to be associated with the request for tracking how far ahead * of the GPU the submission is. * + * When modifying this structure be very aware that we perform a lockless + * RCU lookup of it that may race against reallocation of the struct + * from the slab freelist. We intentionally do not zero the structure on + * allocation so that the lookup can use the dangling pointers (and is + * cogniscent that those pointers may be wrong). Instead, everything that + * needs to be initialised must be done so explicitly. + * * The requests are reference counted. */ struct drm_i915_gem_request { @@ -458,6 +465,10 @@ __i915_gem_active_get_rcu(const struct i915_gem_active *active) * just report the active tracker is idle. If the new request is * incomplete, then we acquire a reference on it and check that * it remained the active request. + * + * It is then imperative that we do not zero the request on + * reallocation, so that we can chase the dangling pointers! + * See i915_gem_request_alloc(). */ do { struct drm_i915_gem_request *request; -- cgit v0.10.2 From 09fa8bb9094569941661ed2a1dc0ff4bd37a0d7d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 5 Aug 2016 20:41:34 +0300 Subject: drm/i915: Add some curly braces MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit intel_enable_pipe() looks rather confusing when one side doesn't have the curly braces, and the other one does. And what's even worse, there's another if-else inside the braceless side. Let's put braces around it to make it clear which branch goes where. Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1470418894-1249-1-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 9cbf543..ddae54a 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1959,12 +1959,12 @@ static void intel_enable_pipe(struct intel_crtc *crtc) * a plane. On ILK+ the pipe PLLs are integrated, so we don't * need the check. */ - if (HAS_GMCH_DISPLAY(dev_priv)) + if (HAS_GMCH_DISPLAY(dev_priv)) { if (intel_crtc_has_type(crtc->config, INTEL_OUTPUT_DSI)) assert_dsi_pll_enabled(dev_priv); else assert_pll_enabled(dev_priv, pipe); - else { + } else { if (crtc->config->has_pch_encoder) { /* if driving the PCH, we need FDI enabled */ assert_fdi_rx_pll_enabled(dev_priv, pch_transcoder); -- cgit v0.10.2 From a168f5b3f1797d6704d2edb36a3674e663154f80 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 5 Aug 2016 20:00:17 +0300 Subject: drm/i915: Don't mark PCH underrun reporting as disabled for transcoder B/C on LPT-H MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Marking PCH transcoder FIFO underrun reporting as disabled for transcoder B/C on LPT-H will block us from enabling the south error interrupt. So let's only mark transcoder A underrun reporting as disabled initially. This is a little tricky to hit since you need a machine with LPT-H, and the BIOS must enable either pipe B or C at boot. Then i915 would mark the "transcoder B/C" underrun reporting as disabled and never enable it again, meaning south interrupts would never get enabled either. The only other interrupt in there is actually the poison interrupt which, if we could ever trigger it, would just result in a little error in dmesg. Here's the resulting change in SDEIMR on my HSW when I boot it with multiple displays attached: - (0x000c4004): 0xf115ffff + (0x000c4004): 0xf114ffff My previous attempt [1] tried to fix this a little differently, but Daniel requested I do this instead. [1] https://lists.freedesktop.org/archives/intel-gfx/2015-November/081420.html Cc: Daniel Vetter Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1470416417-15021-1-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: Daniel Vetter diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index ddae54a..47ad6b3 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -15771,6 +15771,13 @@ static bool intel_encoder_has_connectors(struct intel_encoder *encoder) return false; } +static bool has_pch_trancoder(struct drm_i915_private *dev_priv, + enum transcoder pch_transcoder) +{ + return HAS_PCH_IBX(dev_priv) || HAS_PCH_CPT(dev_priv) || + (HAS_PCH_LPT_H(dev_priv) && pch_transcoder == TRANSCODER_A); +} + static void intel_sanitize_crtc(struct intel_crtc *crtc) { struct drm_device *dev = crtc->base.dev; @@ -15849,7 +15856,17 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc) * worst a fifo underrun happens which also sets this to false. */ crtc->cpu_fifo_underrun_disabled = true; - crtc->pch_fifo_underrun_disabled = true; + /* + * We track the PCH trancoder underrun reporting state + * within the crtc. With crtc for pipe A housing the underrun + * reporting state for PCH transcoder A, crtc for pipe B housing + * it for PCH transcoder B, etc. LPT-H has only PCH transcoder A, + * and marking underrun reporting as disabled for the non-existing + * PCH transcoders B and C would prevent enabling the south + * error interrupt (see cpt_can_enable_serr_int()). + */ + if (has_pch_trancoder(dev_priv, (enum transcoder)crtc->pipe)) + crtc->pch_fifo_underrun_disabled = true; } } -- cgit v0.10.2 From 739748939974791b84629a8790527a16f76873a4 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Fri, 5 Aug 2016 23:28:27 +0300 Subject: drm/i915: Fix modeset handling during gpu reset, v5. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This function would call drm_modeset_lock_all, while the suspend/resume functions already have their own locking. Fix this by factoring out __intel_display_resume, and calling the atomic helpers for duplicating atomic state and disabling all crtc's during suspend. Changes since v1: - Deal with -EDEADLK right after lock_all and clean up calls to hw readout. - Always take all modeset locks so updates during gpu reset are blocked. Changes since v2: - Fix deadlock in intel_update_primary_planes. - Move WARN_ON(EDEADLK) to __intel_display_resume. - pctx -> ctx - only call __intel_display_resume on success in intel_display_resume. Changes since v3: - Rebase on top of dev_priv -> dev change. - Use drm_modeset_lock_all_ctx instead of drm_modeset_lock_all. Changes since v4 [by vsyrjala]: - Deal with skip_intermediate_wm - Update comment w.r.t. mode_config.mutex vs. ->detect() - Rebase due to INTEL_GEN() etc. Signed-off-by: Maarten Lankhorst Fixes: e2c8b8701e2d ("drm/i915: Use atomic helpers for suspend, v2.") Cc: stable@vger.kernel.org Tested-by: Ville Syrjälä Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1470428910-12125-2-git-send-email-ville.syrjala@linux.intel.com diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index c36d176..54f789c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1840,6 +1840,7 @@ struct drm_i915_private { enum modeset_restore modeset_restore; struct mutex modeset_restore_lock; struct drm_atomic_state *modeset_restore_state; + struct drm_modeset_acquire_ctx reset_ctx; struct list_head vm_list; /* Global list of all address spaces */ struct i915_ggtt ggtt; /* VM representing the global address space */ diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 47ad6b3..a951395 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3093,40 +3093,110 @@ static void intel_update_primary_planes(struct drm_device *dev) for_each_crtc(dev, crtc) { struct intel_plane *plane = to_intel_plane(crtc->primary); - struct intel_plane_state *plane_state; - - drm_modeset_lock_crtc(crtc, &plane->base); - plane_state = to_intel_plane_state(plane->base.state); + struct intel_plane_state *plane_state = + to_intel_plane_state(plane->base.state); if (plane_state->visible) plane->update_plane(&plane->base, to_intel_crtc_state(crtc->state), plane_state); + } +} + +static int +__intel_display_resume(struct drm_device *dev, + struct drm_atomic_state *state) +{ + struct drm_crtc_state *crtc_state; + struct drm_crtc *crtc; + int i, ret; - drm_modeset_unlock_crtc(crtc); + intel_modeset_setup_hw_state(dev); + i915_redisable_vga(dev); + + if (!state) + return 0; + + for_each_crtc_in_state(state, crtc, crtc_state, i) { + /* + * Force recalculation even if we restore + * current state. With fast modeset this may not result + * in a modeset when the state is compatible. + */ + crtc_state->mode_changed = true; } + + /* ignore any reset values/BIOS leftovers in the WM registers */ + to_intel_atomic_state(state)->skip_intermediate_wm = true; + + ret = drm_atomic_commit(state); + + WARN_ON(ret == -EDEADLK); + return ret; } void intel_prepare_reset(struct drm_i915_private *dev_priv) { + struct drm_device *dev = &dev_priv->drm; + struct drm_modeset_acquire_ctx *ctx = &dev_priv->reset_ctx; + struct drm_atomic_state *state; + int ret; + /* no reset support for gen2 */ if (IS_GEN2(dev_priv)) return; - /* reset doesn't touch the display */ + /* + * Need mode_config.mutex so that we don't + * trample ongoing ->detect() and whatnot. + */ + mutex_lock(&dev->mode_config.mutex); + drm_modeset_acquire_init(ctx, 0); + while (1) { + ret = drm_modeset_lock_all_ctx(dev, ctx); + if (ret != -EDEADLK) + break; + + drm_modeset_backoff(ctx); + } + + /* reset doesn't touch the display, but flips might get nuked anyway, */ if (INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv)) return; - drm_modeset_lock_all(&dev_priv->drm); /* * Disabling the crtcs gracefully seems nicer. Also the * g33 docs say we should at least disable all the planes. */ - intel_display_suspend(&dev_priv->drm); + state = drm_atomic_helper_duplicate_state(dev, ctx); + if (IS_ERR(state)) { + ret = PTR_ERR(state); + state = NULL; + DRM_ERROR("Duplicating state failed with %i\n", ret); + goto err; + } + + ret = drm_atomic_helper_disable_all(dev, ctx); + if (ret) { + DRM_ERROR("Suspending crtc's failed with %i\n", ret); + goto err; + } + + dev_priv->modeset_restore_state = state; + state->acquire_ctx = ctx; + return; + +err: + drm_atomic_state_free(state); } void intel_finish_reset(struct drm_i915_private *dev_priv) { + struct drm_device *dev = &dev_priv->drm; + struct drm_modeset_acquire_ctx *ctx = &dev_priv->reset_ctx; + struct drm_atomic_state *state = dev_priv->modeset_restore_state; + int ret; + /* * Flips in the rings will be nuked by the reset, * so complete all pending flips so that user space @@ -3138,6 +3208,8 @@ void intel_finish_reset(struct drm_i915_private *dev_priv) if (IS_GEN2(dev_priv)) return; + dev_priv->modeset_restore_state = NULL; + /* reset doesn't touch the display */ if (INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv)) { /* @@ -3149,29 +3221,32 @@ void intel_finish_reset(struct drm_i915_private *dev_priv) * FIXME: Atomic will make this obsolete since we won't schedule * CS-based flips (which might get lost in gpu resets) any more. */ - intel_update_primary_planes(&dev_priv->drm); - return; - } - - /* - * The display has been reset as well, - * so need a full re-initialization. - */ - intel_runtime_pm_disable_interrupts(dev_priv); - intel_runtime_pm_enable_interrupts(dev_priv); + intel_update_primary_planes(dev); + } else { + /* + * The display has been reset as well, + * so need a full re-initialization. + */ + intel_runtime_pm_disable_interrupts(dev_priv); + intel_runtime_pm_enable_interrupts(dev_priv); - intel_modeset_init_hw(&dev_priv->drm); + intel_modeset_init_hw(dev); - spin_lock_irq(&dev_priv->irq_lock); - if (dev_priv->display.hpd_irq_setup) - dev_priv->display.hpd_irq_setup(dev_priv); - spin_unlock_irq(&dev_priv->irq_lock); + spin_lock_irq(&dev_priv->irq_lock); + if (dev_priv->display.hpd_irq_setup) + dev_priv->display.hpd_irq_setup(dev_priv); + spin_unlock_irq(&dev_priv->irq_lock); - intel_display_resume(&dev_priv->drm); + ret = __intel_display_resume(dev, state); + if (ret) + DRM_ERROR("Restoring old state failed with %i\n", ret); - intel_hpd_init(dev_priv); + intel_hpd_init(dev_priv); + } - drm_modeset_unlock_all(&dev_priv->drm); + drm_modeset_drop_locks(ctx); + drm_modeset_acquire_fini(ctx); + mutex_unlock(&dev->mode_config.mutex); } static bool intel_crtc_has_pending_flip(struct drm_crtc *crtc) @@ -16180,9 +16255,10 @@ void intel_display_resume(struct drm_device *dev) struct drm_atomic_state *state = dev_priv->modeset_restore_state; struct drm_modeset_acquire_ctx ctx; int ret; - bool setup = false; dev_priv->modeset_restore_state = NULL; + if (state) + state->acquire_ctx = &ctx; /* * This is a cludge because with real atomic modeset mode_config.mutex @@ -16193,43 +16269,17 @@ void intel_display_resume(struct drm_device *dev) mutex_lock(&dev->mode_config.mutex); drm_modeset_acquire_init(&ctx, 0); -retry: - ret = drm_modeset_lock_all_ctx(dev, &ctx); - - if (ret == 0 && !setup) { - setup = true; - - intel_modeset_setup_hw_state(dev); - i915_redisable_vga(dev); - } - - if (ret == 0 && state) { - struct drm_crtc_state *crtc_state; - struct drm_crtc *crtc; - int i; - - state->acquire_ctx = &ctx; - - /* ignore any reset values/BIOS leftovers in the WM registers */ - to_intel_atomic_state(state)->skip_intermediate_wm = true; - - for_each_crtc_in_state(state, crtc, crtc_state, i) { - /* - * Force recalculation even if we restore - * current state. With fast modeset this may not result - * in a modeset when the state is compatible. - */ - crtc_state->mode_changed = true; - } - - ret = drm_atomic_commit(state); - } + while (1) { + ret = drm_modeset_lock_all_ctx(dev, &ctx); + if (ret != -EDEADLK) + break; - if (ret == -EDEADLK) { drm_modeset_backoff(&ctx); - goto retry; } + if (!ret) + ret = __intel_display_resume(dev, state); + drm_modeset_drop_locks(&ctx); drm_modeset_acquire_fini(&ctx); mutex_unlock(&dev->mode_config.mutex); -- cgit v0.10.2 From 522a63de18a12a3c74609323f984790e21d4a47e Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Fri, 5 Aug 2016 23:28:28 +0300 Subject: drm/i915: Add a way to test the modeset done during gpu reset, v3. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add force_reset_modeset_test as a parameter to force the modeset path during gpu reset. This allows a IGT test to set the knob and trigger a hang to force the gpu reset, even on platforms that wouldn't otherwise require it. Changes since v1: - Split out fix to separate commit. Changes since v2: - This commit is purely about force_reset_modeset_test now. Signed-off-by: Maarten Lankhorst Testcase: drv_hangman.reset-with-forced-modeset Tested-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1470428910-12125-3-git-send-email-ville.syrjala@linux.intel.com Signed-off-by: Ville Syrjälä diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index b6e404c..768ad89 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -45,6 +45,7 @@ struct i915_params i915 __read_mostly = { .fastboot = 0, .prefault_disable = 0, .load_detect_test = 0, + .force_reset_modeset_test = 0, .reset = true, .invert_brightness = 0, .disable_display = 0, @@ -161,6 +162,11 @@ MODULE_PARM_DESC(load_detect_test, "Force-enable the VGA load detect code for testing (default:false). " "For developers only."); +module_param_named_unsafe(force_reset_modeset_test, i915.force_reset_modeset_test, bool, 0600); +MODULE_PARM_DESC(force_reset_modeset_test, + "Force a modeset during gpu reset for testing (default:false). " + "For developers only."); + module_param_named_unsafe(invert_brightness, i915.invert_brightness, int, 0600); MODULE_PARM_DESC(invert_brightness, "Invert backlight brightness " diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h index 0ad020b..3a0dd78 100644 --- a/drivers/gpu/drm/i915/i915_params.h +++ b/drivers/gpu/drm/i915/i915_params.h @@ -57,6 +57,7 @@ struct i915_params { bool fastboot; bool prefault_disable; bool load_detect_test; + bool force_reset_modeset_test; bool reset; bool disable_display; bool verbose_state_checks; diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index a951395..8fa200bf 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3161,7 +3161,8 @@ void intel_prepare_reset(struct drm_i915_private *dev_priv) } /* reset doesn't touch the display, but flips might get nuked anyway, */ - if (INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv)) + if (!i915.force_reset_modeset_test && + (INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv))) return; /* @@ -3212,16 +3213,22 @@ void intel_finish_reset(struct drm_i915_private *dev_priv) /* reset doesn't touch the display */ if (INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv)) { - /* - * Flips in the rings have been nuked by the reset, - * so update the base address of all primary - * planes to the the last fb to make sure we're - * showing the correct fb after a reset. - * - * FIXME: Atomic will make this obsolete since we won't schedule - * CS-based flips (which might get lost in gpu resets) any more. - */ - intel_update_primary_planes(dev); + if (!state) { + /* + * Flips in the rings have been nuked by the reset, + * so update the base address of all primary + * planes to the the last fb to make sure we're + * showing the correct fb after a reset. + * + * FIXME: Atomic will make this obsolete since we won't schedule + * CS-based flips (which might get lost in gpu resets) any more. + */ + intel_update_primary_planes(dev); + } else { + ret = __intel_display_resume(dev, state); + if (ret) + DRM_ERROR("Restoring old state failed with %i\n", ret); + } } else { /* * The display has been reset as well, -- cgit v0.10.2 From 4ac2ba2f8c4b7bb33d32fc1be290daf25d39802a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 5 Aug 2016 23:28:29 +0300 Subject: drm/i915: Introduce gpu_reset_clobbers_display() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Factor out the "does the GPU reset clobber the display?" check into a small helper. Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1470428910-12125-4-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: Daniel Vetter diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 8fa200bf..5d37661 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3135,6 +3135,11 @@ __intel_display_resume(struct drm_device *dev, return ret; } +static bool gpu_reset_clobbers_display(struct drm_i915_private *dev_priv) +{ + return INTEL_GEN(dev_priv) < 5 && !IS_G4X(dev_priv); +} + void intel_prepare_reset(struct drm_i915_private *dev_priv) { struct drm_device *dev = &dev_priv->drm; @@ -3162,7 +3167,7 @@ void intel_prepare_reset(struct drm_i915_private *dev_priv) /* reset doesn't touch the display, but flips might get nuked anyway, */ if (!i915.force_reset_modeset_test && - (INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv))) + !gpu_reset_clobbers_display(dev_priv)) return; /* @@ -3212,7 +3217,7 @@ void intel_finish_reset(struct drm_i915_private *dev_priv) dev_priv->modeset_restore_state = NULL; /* reset doesn't touch the display */ - if (INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv)) { + if (!gpu_reset_clobbers_display(dev_priv)) { if (!state) { /* * Flips in the rings have been nuked by the reset, -- cgit v0.10.2 From ae98104bec5bbe9543764a78f37a421d45dc65af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 5 Aug 2016 23:28:30 +0300 Subject: drm/i915: Use the g4x+ approach on gen2 for handling display stuff around GPU reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We don't have GPU reset support for gen2, which means the display hardware is unaffected when a GPU hang is handled. However as the ring has in fact stopped, any flips still in the ring will never complete, and thus the display base address updates will never happen. So we really need to fix that up manually just like we do on g4x+. In fact, let's just use intel_has_gpu_reset() instead of IS_GEN2() since that'll also handle cases where someone would disable the GPU reset support on gen3/4 for whatever reason. Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1470428910-12125-5-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: Daniel Vetter diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 5d37661..ecd1543 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3137,7 +3137,8 @@ __intel_display_resume(struct drm_device *dev, static bool gpu_reset_clobbers_display(struct drm_i915_private *dev_priv) { - return INTEL_GEN(dev_priv) < 5 && !IS_G4X(dev_priv); + return intel_has_gpu_reset(dev_priv) && + INTEL_GEN(dev_priv) < 5 && !IS_G4X(dev_priv); } void intel_prepare_reset(struct drm_i915_private *dev_priv) @@ -3147,10 +3148,6 @@ void intel_prepare_reset(struct drm_i915_private *dev_priv) struct drm_atomic_state *state; int ret; - /* no reset support for gen2 */ - if (IS_GEN2(dev_priv)) - return; - /* * Need mode_config.mutex so that we don't * trample ongoing ->detect() and whatnot. @@ -3210,10 +3207,6 @@ void intel_finish_reset(struct drm_i915_private *dev_priv) */ intel_complete_page_flips(dev_priv); - /* no reset support for gen2 */ - if (IS_GEN2(dev_priv)) - return; - dev_priv->modeset_restore_state = NULL; /* reset doesn't touch the display */ -- cgit v0.10.2 From 437c30874cb90537e6275f9ed6f6b88307a906a1 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 5 Aug 2016 18:11:24 +0200 Subject: drm/i915: Update comment before i915_spin_request ~jiffie and a few usecs is 3 orders of magnitude different. A bit much. This was changed in commit ca5b721e238226af1d767103ac852aeb8e4c0764 Author: Chris Wilson Date: Fri Dec 11 11:32:58 2015 +0000 drm/i915: Limit the busy wait on requests to 5us not 10ms! But probably missed the comment since the change was non-local to the comment. v2: Polish comment more (Chris). Cc: Tvrtko Ursulin Cc: Chris Wilson Acked-by: Chris Wilson Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1470413484-23775-1-git-send-email-daniel.vetter@ffwll.ch diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index b317a67..6a16616 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -643,7 +643,7 @@ int i915_wait_request(struct drm_i915_gem_request *req, if (IS_RPS_CLIENT(rps) && INTEL_GEN(req->i915) >= 6) gen6_rps_boost(req->i915, rps, req->emitted_jiffies); - /* Optimistic spin for the next ~jiffie before touching IRQs */ + /* Optimistic short spin before touching IRQs */ if (i915_spin_request(req, state, 5)) goto complete; -- cgit v0.10.2 From cb7f27601c81a1e0454e9461e96f65b31fafbea0 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 5 Aug 2016 19:04:40 +0100 Subject: drm/i915: fix aliasing_ppgtt leak In i915_ggtt_cleanup_hw we need to remember to free aliasing_ppgtt. This fixes the following kmemleak message: unreferenced object 0xffff880213cca000 (size 8192): comm "modprobe", pid 1298, jiffies 4294745402 (age 703.930s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] kmemleak_alloc+0x4e/0xb0 [] kmem_cache_alloc_trace+0x142/0x1d0 [] i915_gem_init_ggtt+0x10f/0x210 [i915] [] i915_gem_init+0x5b/0xd0 [i915] [] i915_driver_load+0x97a/0x1460 [i915] [] i915_pci_probe+0x4f/0x70 [i915] [] local_pci_probe+0x45/0xa0 [] pci_device_probe+0x103/0x150 [] driver_probe_device+0x22c/0x440 [] __driver_attach+0xd1/0xf0 [] bus_for_each_dev+0x6c/0xc0 [] driver_attach+0x1e/0x20 [] bus_add_driver+0x1c3/0x280 [] driver_register+0x60/0xe0 [] __pci_register_driver+0x4c/0x50 [] 0xffffffffa013605b Signed-off-by: Matthew Auld Reviewed-by: Chris Wilson Fixes: b18b6bde300e ("drm/i915/bdw: Free PPGTT struct") Cc: stable@vger.kernel.org Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1470420280-21417-1-git-send-email-matthew.auld@intel.com diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 8b4f2f3..18c7c96 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2797,6 +2797,7 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv) struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; ppgtt->base.cleanup(&ppgtt->base); + kfree(ppgtt); } i915_gem_cleanup_stolen(&dev_priv->drm); -- cgit v0.10.2 From c5b7e97b27db4f8a8ffe1072506620679043f006 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 8 Aug 2016 09:37:31 +0200 Subject: drm/i915: Update DRIVER_DATE to 20160808 Signed-off-by: Daniel Vetter diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index feec00f..c36d176 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -70,7 +70,7 @@ #define DRIVER_NAME "i915" #define DRIVER_DESC "Intel Graphics" -#define DRIVER_DATE "20160725" +#define DRIVER_DATE "20160808" #undef WARN_ON /* Many gcc seem to no see through this and fall over :( */ -- cgit v0.10.2 From 1d776851a6949abd85ba305514612fed16ff2459 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 3 Aug 2016 09:53:46 -0700 Subject: drm: Avoid printing negative values for unsigned variables. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It was really strange to see negative vblank seqs on debug messages. It is rare to have that big number, but when it happens it is confusing and misleading. Signed-off-by: Rodrigo Vivi Reviewed-by: Ville Syrjälä Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1470243226-2750-1-git-send-email-rodrigo.vivi@intel.com diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c index 77f357b..01a5079 100644 --- a/drivers/gpu/drm/drm_irq.c +++ b/drivers/gpu/drm/drm_irq.c @@ -1295,7 +1295,7 @@ void drm_vblank_off(struct drm_device *dev, unsigned int pipe) if (e->pipe != pipe) continue; DRM_DEBUG("Sending premature vblank event on disable: " - "wanted %d, current %d\n", + "wanted %u, current %u\n", e->event.sequence, seq); list_del(&e->base.link); drm_vblank_put(dev, pipe); @@ -1585,7 +1585,7 @@ static int drm_queue_vblank_event(struct drm_device *dev, unsigned int pipe, seq = drm_vblank_count_and_time(dev, pipe, &now); - DRM_DEBUG("event on vblank count %d, current %d, crtc %u\n", + DRM_DEBUG("event on vblank count %u, current %u, crtc %u\n", vblwait->request.sequence, seq, pipe); trace_drm_vblank_event_queued(current->pid, pipe, @@ -1693,7 +1693,7 @@ int drm_wait_vblank(struct drm_device *dev, void *data, return drm_queue_vblank_event(dev, pipe, vblwait, file_priv); } - DRM_DEBUG("waiting on vblank count %d, crtc %u\n", + DRM_DEBUG("waiting on vblank count %u, crtc %u\n", vblwait->request.sequence, pipe); DRM_WAIT_ON(ret, vblank->queue, 3 * HZ, (((drm_vblank_count(dev, pipe) - @@ -1708,7 +1708,7 @@ int drm_wait_vblank(struct drm_device *dev, void *data, vblwait->reply.tval_sec = now.tv_sec; vblwait->reply.tval_usec = now.tv_usec; - DRM_DEBUG("returning %d to client\n", + DRM_DEBUG("returning %u to client\n", vblwait->reply.sequence); } else { DRM_DEBUG("vblank wait interrupted by signal\n"); @@ -1735,7 +1735,7 @@ static void drm_handle_vblank_events(struct drm_device *dev, unsigned int pipe) if ((seq - e->event.sequence) > (1<<23)) continue; - DRM_DEBUG("vblank event on %d, current %d\n", + DRM_DEBUG("vblank event on %u, current %u\n", e->event.sequence, seq); list_del(&e->base.link); -- cgit v0.10.2 From 202b52b7fbf70858609ec20829c7d69a13ffa351 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 3 Aug 2016 16:04:09 +0100 Subject: drm: Track drm_mm nodes with an interval tree In addition to the last-in/first-out stack for accessing drm_mm nodes, we occasionally and in the future often want to find a drm_mm_node by an address. To do so efficiently we need to track the nodes in an interval tree - lookups for a particular address will then be O(lg(N)), where N is the number of nodes in the range manager as opposed to O(N). Insertion however gains an extra O(lg(N)) step for all nodes irrespective of whether the interval tree is in use. For future i915 patches, eliminating the linear walk is a significant improvement. v2: Use generic interval-tree template for u64 and faster insertion. Signed-off-by: Chris Wilson Cc: David Herrmann Cc: dri-devel@lists.freedesktop.org Reviewed-by: David Herrmann Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1470236651-678-1-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c index cb39f45..5c188c5 100644 --- a/drivers/gpu/drm/drm_mm.c +++ b/drivers/gpu/drm/drm_mm.c @@ -46,6 +46,7 @@ #include #include #include +#include /** * DOC: Overview @@ -103,6 +104,72 @@ static struct drm_mm_node *drm_mm_search_free_in_range_generic(const struct drm_ u64 end, enum drm_mm_search_flags flags); +#define START(node) ((node)->start) +#define LAST(node) ((node)->start + (node)->size - 1) + +INTERVAL_TREE_DEFINE(struct drm_mm_node, rb, + u64, __subtree_last, + START, LAST, static inline, drm_mm_interval_tree) + +struct drm_mm_node * +drm_mm_interval_first(struct drm_mm *mm, u64 start, u64 last) +{ + return drm_mm_interval_tree_iter_first(&mm->interval_tree, + start, last); +} +EXPORT_SYMBOL(drm_mm_interval_first); + +struct drm_mm_node * +drm_mm_interval_next(struct drm_mm_node *node, u64 start, u64 last) +{ + return drm_mm_interval_tree_iter_next(node, start, last); +} +EXPORT_SYMBOL(drm_mm_interval_next); + +static void drm_mm_interval_tree_add_node(struct drm_mm_node *hole_node, + struct drm_mm_node *node) +{ + struct drm_mm *mm = hole_node->mm; + struct rb_node **link, *rb; + struct drm_mm_node *parent; + + node->__subtree_last = LAST(node); + + if (hole_node->allocated) { + rb = &hole_node->rb; + while (rb) { + parent = rb_entry(rb, struct drm_mm_node, rb); + if (parent->__subtree_last >= node->__subtree_last) + break; + + parent->__subtree_last = node->__subtree_last; + rb = rb_parent(rb); + } + + rb = &hole_node->rb; + link = &hole_node->rb.rb_right; + } else { + rb = NULL; + link = &mm->interval_tree.rb_node; + } + + while (*link) { + rb = *link; + parent = rb_entry(rb, struct drm_mm_node, rb); + if (parent->__subtree_last < node->__subtree_last) + parent->__subtree_last = node->__subtree_last; + if (node->start < parent->start) + link = &parent->rb.rb_left; + else + link = &parent->rb.rb_right; + } + + rb_link_node(&node->rb, rb, link); + rb_insert_augmented(&node->rb, + &mm->interval_tree, + &drm_mm_interval_tree_augment); +} + static void drm_mm_insert_helper(struct drm_mm_node *hole_node, struct drm_mm_node *node, u64 size, unsigned alignment, @@ -153,6 +220,8 @@ static void drm_mm_insert_helper(struct drm_mm_node *hole_node, INIT_LIST_HEAD(&node->hole_stack); list_add(&node->node_list, &hole_node->node_list); + drm_mm_interval_tree_add_node(hole_node, node); + BUG_ON(node->start + node->size > adj_end); node->hole_follows = 0; @@ -178,41 +247,52 @@ static void drm_mm_insert_helper(struct drm_mm_node *hole_node, */ int drm_mm_reserve_node(struct drm_mm *mm, struct drm_mm_node *node) { + u64 end = node->start + node->size; struct drm_mm_node *hole; - u64 end; - u64 hole_start; - u64 hole_end; - - BUG_ON(node == NULL); + u64 hole_start, hole_end; end = node->start + node->size; /* Find the relevant hole to add our node to */ - drm_mm_for_each_hole(hole, mm, hole_start, hole_end) { - if (hole_start > node->start || hole_end < end) - continue; + hole = drm_mm_interval_tree_iter_first(&mm->interval_tree, + node->start, ~(u64)0); + if (hole) { + if (hole->start < end) + return -ENOSPC; + } else { + hole = list_entry(&mm->head_node.node_list, + typeof(*hole), node_list); + } - node->mm = mm; - node->allocated = 1; + hole = list_last_entry(&hole->node_list, typeof(*hole), node_list); + if (!hole->hole_follows) + return -ENOSPC; - INIT_LIST_HEAD(&node->hole_stack); - list_add(&node->node_list, &hole->node_list); + hole_start = __drm_mm_hole_node_start(hole); + hole_end = __drm_mm_hole_node_end(hole); + if (hole_start > node->start || hole_end < end) + return -ENOSPC; - if (node->start == hole_start) { - hole->hole_follows = 0; - list_del_init(&hole->hole_stack); - } + node->mm = mm; + node->allocated = 1; - node->hole_follows = 0; - if (end != hole_end) { - list_add(&node->hole_stack, &mm->hole_stack); - node->hole_follows = 1; - } + INIT_LIST_HEAD(&node->hole_stack); + list_add(&node->node_list, &hole->node_list); - return 0; + drm_mm_interval_tree_add_node(hole, node); + + if (node->start == hole_start) { + hole->hole_follows = 0; + list_del_init(&hole->hole_stack); + } + + node->hole_follows = 0; + if (end != hole_end) { + list_add(&node->hole_stack, &mm->hole_stack); + node->hole_follows = 1; } - return -ENOSPC; + return 0; } EXPORT_SYMBOL(drm_mm_reserve_node); @@ -302,6 +382,8 @@ static void drm_mm_insert_helper_range(struct drm_mm_node *hole_node, INIT_LIST_HEAD(&node->hole_stack); list_add(&node->node_list, &hole_node->node_list); + drm_mm_interval_tree_add_node(hole_node, node); + BUG_ON(node->start < start); BUG_ON(node->start < adj_start); BUG_ON(node->start + node->size > adj_end); @@ -390,6 +472,7 @@ void drm_mm_remove_node(struct drm_mm_node *node) } else list_move(&prev_node->hole_stack, &mm->hole_stack); + drm_mm_interval_tree_remove(node, &mm->interval_tree); list_del(&node->node_list); node->allocated = 0; } @@ -516,11 +599,13 @@ void drm_mm_replace_node(struct drm_mm_node *old, struct drm_mm_node *new) { list_replace(&old->node_list, &new->node_list); list_replace(&old->hole_stack, &new->hole_stack); + rb_replace_node(&old->rb, &new->rb, &old->mm->interval_tree); new->hole_follows = old->hole_follows; new->mm = old->mm; new->start = old->start; new->size = old->size; new->color = old->color; + new->__subtree_last = old->__subtree_last; old->allocated = 0; new->allocated = 1; @@ -758,6 +843,8 @@ void drm_mm_init(struct drm_mm * mm, u64 start, u64 size) mm->head_node.size = start - mm->head_node.start; list_add_tail(&mm->head_node.hole_stack, &mm->hole_stack); + mm->interval_tree = RB_ROOT; + mm->color_adjust = NULL; } EXPORT_SYMBOL(drm_mm_init); diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h index fc65118..205ddcf 100644 --- a/include/drm/drm_mm.h +++ b/include/drm/drm_mm.h @@ -37,6 +37,7 @@ * Generic range manager structs */ #include +#include #include #include #include @@ -61,6 +62,7 @@ enum drm_mm_allocator_flags { struct drm_mm_node { struct list_head node_list; struct list_head hole_stack; + struct rb_node rb; unsigned hole_follows : 1; unsigned scanned_block : 1; unsigned scanned_prev_free : 1; @@ -70,6 +72,7 @@ struct drm_mm_node { unsigned long color; u64 start; u64 size; + u64 __subtree_last; struct drm_mm *mm; }; @@ -79,6 +82,9 @@ struct drm_mm { /* head_node.node_list is the list of all memory nodes, ordered * according to the (increasing) start address of the memory node. */ struct drm_mm_node head_node; + /* Keep an interval_tree for fast lookup of drm_mm_nodes by address. */ + struct rb_root interval_tree; + unsigned int scan_check_range : 1; unsigned scan_alignment; unsigned long scan_color; @@ -295,6 +301,12 @@ void drm_mm_init(struct drm_mm *mm, void drm_mm_takedown(struct drm_mm *mm); bool drm_mm_clean(struct drm_mm *mm); +struct drm_mm_node * +drm_mm_interval_first(struct drm_mm *mm, u64 start, u64 last); + +struct drm_mm_node * +drm_mm_interval_next(struct drm_mm_node *node, u64 start, u64 last); + void drm_mm_init_scan(struct drm_mm *mm, u64 size, unsigned alignment, -- cgit v0.10.2 From db2395eccf0829de5eb54625167f6cbc3faa1418 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 3 Aug 2016 16:04:10 +0100 Subject: drm: Convert drm_vma_manager to embedded interval-tree in drm_mm Having added an interval-tree to struct drm_mm, we can replace the auxiliary rb-tree inside the drm_vma_manager with it. Signed-off-by: Chris Wilson Cc: David Herrmann Cc: dri-devel@lists.freedesktop.org Reviewed-by: David Herrmann Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1470236651-678-2-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/drm_vma_manager.c b/drivers/gpu/drm/drm_vma_manager.c index f306c88..0aef432 100644 --- a/drivers/gpu/drm/drm_vma_manager.c +++ b/drivers/gpu/drm/drm_vma_manager.c @@ -86,7 +86,6 @@ void drm_vma_offset_manager_init(struct drm_vma_offset_manager *mgr, unsigned long page_offset, unsigned long size) { rwlock_init(&mgr->vm_lock); - mgr->vm_addr_space_rb = RB_ROOT; drm_mm_init(&mgr->vm_addr_space_mm, page_offset, size); } EXPORT_SYMBOL(drm_vma_offset_manager_init); @@ -145,16 +144,16 @@ struct drm_vma_offset_node *drm_vma_offset_lookup_locked(struct drm_vma_offset_m unsigned long start, unsigned long pages) { - struct drm_vma_offset_node *node, *best; + struct drm_mm_node *node, *best; struct rb_node *iter; unsigned long offset; - iter = mgr->vm_addr_space_rb.rb_node; + iter = mgr->vm_addr_space_mm.interval_tree.rb_node; best = NULL; while (likely(iter)) { - node = rb_entry(iter, struct drm_vma_offset_node, vm_rb); - offset = node->vm_node.start; + node = rb_entry(iter, struct drm_mm_node, rb); + offset = node->start; if (start >= offset) { iter = iter->rb_right; best = node; @@ -167,38 +166,17 @@ struct drm_vma_offset_node *drm_vma_offset_lookup_locked(struct drm_vma_offset_m /* verify that the node spans the requested area */ if (best) { - offset = best->vm_node.start + best->vm_node.size; + offset = best->start + best->size; if (offset < start + pages) best = NULL; } - return best; -} -EXPORT_SYMBOL(drm_vma_offset_lookup_locked); - -/* internal helper to link @node into the rb-tree */ -static void _drm_vma_offset_add_rb(struct drm_vma_offset_manager *mgr, - struct drm_vma_offset_node *node) -{ - struct rb_node **iter = &mgr->vm_addr_space_rb.rb_node; - struct rb_node *parent = NULL; - struct drm_vma_offset_node *iter_node; - - while (likely(*iter)) { - parent = *iter; - iter_node = rb_entry(*iter, struct drm_vma_offset_node, vm_rb); + if (!best) + return NULL; - if (node->vm_node.start < iter_node->vm_node.start) - iter = &(*iter)->rb_left; - else if (node->vm_node.start > iter_node->vm_node.start) - iter = &(*iter)->rb_right; - else - BUG(); - } - - rb_link_node(&node->vm_rb, parent, iter); - rb_insert_color(&node->vm_rb, &mgr->vm_addr_space_rb); + return container_of(best, struct drm_vma_offset_node, vm_node); } +EXPORT_SYMBOL(drm_vma_offset_lookup_locked); /** * drm_vma_offset_add() - Add offset node to manager @@ -240,8 +218,6 @@ int drm_vma_offset_add(struct drm_vma_offset_manager *mgr, if (ret) goto out_unlock; - _drm_vma_offset_add_rb(mgr, node); - out_unlock: write_unlock(&mgr->vm_lock); return ret; @@ -265,7 +241,6 @@ void drm_vma_offset_remove(struct drm_vma_offset_manager *mgr, write_lock(&mgr->vm_lock); if (drm_mm_node_allocated(&node->vm_node)) { - rb_erase(&node->vm_rb, &mgr->vm_addr_space_rb); drm_mm_remove_node(&node->vm_node); memset(&node->vm_node, 0, sizeof(node->vm_node)); } diff --git a/include/drm/drm_vma_manager.h b/include/drm/drm_vma_manager.h index 06ea8e07..afba6fc 100644 --- a/include/drm/drm_vma_manager.h +++ b/include/drm/drm_vma_manager.h @@ -40,13 +40,11 @@ struct drm_vma_offset_file { struct drm_vma_offset_node { rwlock_t vm_lock; struct drm_mm_node vm_node; - struct rb_node vm_rb; struct rb_root vm_files; }; struct drm_vma_offset_manager { rwlock_t vm_lock; - struct rb_root vm_addr_space_rb; struct drm_mm vm_addr_space_mm; }; -- cgit v0.10.2 From a787900564ffc6eebbcb7086b45e04a0bc3370f0 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 3 Aug 2016 16:04:11 +0100 Subject: drm: Skip initialising the drm_mm_node->hole_stack As we always add this to the drm_mm->hole_stack as our first operation, we do not need to initialise the list node. Signed-off-by: Chris Wilson Cc: David Herrmann Cc: dri-devel@lists.freedesktop.org Reviewed-by: David Herrmann Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1470236651-678-3-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c index 5c188c5..3f56d4b 100644 --- a/drivers/gpu/drm/drm_mm.c +++ b/drivers/gpu/drm/drm_mm.c @@ -217,7 +217,6 @@ static void drm_mm_insert_helper(struct drm_mm_node *hole_node, node->color = color; node->allocated = 1; - INIT_LIST_HEAD(&node->hole_stack); list_add(&node->node_list, &hole_node->node_list); drm_mm_interval_tree_add_node(hole_node, node); @@ -276,14 +275,13 @@ int drm_mm_reserve_node(struct drm_mm *mm, struct drm_mm_node *node) node->mm = mm; node->allocated = 1; - INIT_LIST_HEAD(&node->hole_stack); list_add(&node->node_list, &hole->node_list); drm_mm_interval_tree_add_node(hole, node); if (node->start == hole_start) { hole->hole_follows = 0; - list_del_init(&hole->hole_stack); + list_del(&hole->hole_stack); } node->hole_follows = 0; @@ -379,7 +377,6 @@ static void drm_mm_insert_helper_range(struct drm_mm_node *hole_node, node->color = color; node->allocated = 1; - INIT_LIST_HEAD(&node->hole_stack); list_add(&node->node_list, &hole_node->node_list); drm_mm_interval_tree_add_node(hole_node, node); @@ -833,7 +830,6 @@ void drm_mm_init(struct drm_mm * mm, u64 start, u64 size) /* Clever trick to avoid a special case in the free hole tracking. */ INIT_LIST_HEAD(&mm->head_node.node_list); - INIT_LIST_HEAD(&mm->head_node.hole_stack); mm->head_node.hole_follows = 1; mm->head_node.scanned_block = 0; mm->head_node.scanned_prev_free = 0; -- cgit v0.10.2 From a3ccc461668951c14b7c656233b993f0dea326ce Mon Sep 17 00:00:00 2001 From: David Herrmann Date: Wed, 3 Aug 2016 20:04:25 +0200 Subject: drm: rename DRM_MINOR_LEGACY to DRM_MINOR_PRIMARY The minor referred to by "DRM_MINOR_LEGACY" is called 'dev->primary' and gets 'cardX' as name assigned. Lets reduce this magnificent number of names for the same concept by one and rename DRM_MINOR_LEGACY to DRM_MINOR_PRIMARY (to match the actual struct-member name). Furthermore, this is in no way a legacy node, so lets not call it that. Signed-off-by: David Herrmann Reviewed-by: Frank Binns Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20160803180432.1341-2-dh.herrmann@gmail.com diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c index be27ed3..57ce973 100644 --- a/drivers/gpu/drm/drm_drv.c +++ b/drivers/gpu/drm/drm_drv.c @@ -112,7 +112,7 @@ static struct drm_minor **drm_minor_get_slot(struct drm_device *dev, unsigned int type) { switch (type) { - case DRM_MINOR_LEGACY: + case DRM_MINOR_PRIMARY: return &dev->primary; case DRM_MINOR_RENDER: return &dev->render; @@ -512,7 +512,7 @@ int drm_dev_init(struct drm_device *dev, goto err_minors; } - ret = drm_minor_alloc(dev, DRM_MINOR_LEGACY); + ret = drm_minor_alloc(dev, DRM_MINOR_PRIMARY); if (ret) goto err_minors; @@ -545,7 +545,7 @@ err_ctxbitmap: drm_legacy_ctxbitmap_cleanup(dev); drm_ht_remove(&dev->map_hash); err_minors: - drm_minor_free(dev, DRM_MINOR_LEGACY); + drm_minor_free(dev, DRM_MINOR_PRIMARY); drm_minor_free(dev, DRM_MINOR_RENDER); drm_minor_free(dev, DRM_MINOR_CONTROL); drm_fs_inode_free(dev->anon_inode); @@ -608,7 +608,7 @@ static void drm_dev_release(struct kref *ref) drm_ht_remove(&dev->map_hash); drm_fs_inode_free(dev->anon_inode); - drm_minor_free(dev, DRM_MINOR_LEGACY); + drm_minor_free(dev, DRM_MINOR_PRIMARY); drm_minor_free(dev, DRM_MINOR_RENDER); drm_minor_free(dev, DRM_MINOR_CONTROL); @@ -684,7 +684,7 @@ int drm_dev_register(struct drm_device *dev, unsigned long flags) if (ret) goto err_minors; - ret = drm_minor_register(dev, DRM_MINOR_LEGACY); + ret = drm_minor_register(dev, DRM_MINOR_PRIMARY); if (ret) goto err_minors; @@ -701,7 +701,7 @@ int drm_dev_register(struct drm_device *dev, unsigned long flags) goto out_unlock; err_minors: - drm_minor_unregister(dev, DRM_MINOR_LEGACY); + drm_minor_unregister(dev, DRM_MINOR_PRIMARY); drm_minor_unregister(dev, DRM_MINOR_RENDER); drm_minor_unregister(dev, DRM_MINOR_CONTROL); out_unlock: @@ -741,7 +741,7 @@ void drm_dev_unregister(struct drm_device *dev) list_for_each_entry_safe(r_list, list_temp, &dev->maplist, head) drm_legacy_rmmap(dev, r_list->map); - drm_minor_unregister(dev, DRM_MINOR_LEGACY); + drm_minor_unregister(dev, DRM_MINOR_PRIMARY); drm_minor_unregister(dev, DRM_MINOR_RENDER); drm_minor_unregister(dev, DRM_MINOR_CONTROL); } diff --git a/include/drm/drmP.h b/include/drm/drmP.h index d377865..d488a72 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -642,7 +642,7 @@ struct drm_driver { }; enum drm_minor_type { - DRM_MINOR_LEGACY, + DRM_MINOR_PRIMARY, DRM_MINOR_CONTROL, DRM_MINOR_RENDER, DRM_MINOR_CNT, @@ -856,7 +856,7 @@ static inline bool drm_is_control_client(const struct drm_file *file_priv) static inline bool drm_is_primary_client(const struct drm_file *file_priv) { - return file_priv->minor->type == DRM_MINOR_LEGACY; + return file_priv->minor->type == DRM_MINOR_PRIMARY; } /******************************************************************/ -- cgit v0.10.2 From aafdcfd3f9aa3c9f77ae4e9385f21bf9ae120d3e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 3 Aug 2016 19:26:28 +0100 Subject: drm: Declare that create drm_mm nodes with size 0 is illegal At a higher level, all objects are created with definite size i.e. 0 is illegal. In forthcoming patches, this assumption is dependent upon in the drm_mm range manager, i.e. trying to create a drm_mm node with size 0 will have undefined behaviour. Add a couple of WARNs upon creating the drm_mm node to prevent later bugs. Signed-off-by: Chris Wilson Reviewed-by: David Herrmann Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1470248788-30873-1-git-send-email-chris@chris-wilson.co.uk diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c index 3f56d4b..11d44a1 100644 --- a/drivers/gpu/drm/drm_mm.c +++ b/drivers/gpu/drm/drm_mm.c @@ -250,6 +250,9 @@ int drm_mm_reserve_node(struct drm_mm *mm, struct drm_mm_node *node) struct drm_mm_node *hole; u64 hole_start, hole_end; + if (WARN_ON(node->size == 0)) + return -EINVAL; + end = node->start + node->size; /* Find the relevant hole to add our node to */ @@ -317,6 +320,9 @@ int drm_mm_insert_node_generic(struct drm_mm *mm, struct drm_mm_node *node, { struct drm_mm_node *hole_node; + if (WARN_ON(size == 0)) + return -EINVAL; + hole_node = drm_mm_search_free_generic(mm, size, alignment, color, sflags); if (!hole_node) @@ -419,6 +425,9 @@ int drm_mm_insert_node_in_range_generic(struct drm_mm *mm, struct drm_mm_node *n { struct drm_mm_node *hole_node; + if (WARN_ON(size == 0)) + return -EINVAL; + hole_node = drm_mm_search_free_in_range_generic(mm, size, alignment, color, start, end, sflags); -- cgit v0.10.2 From 3cbf6a5deb2f4a469de7a90a3cc169e8fcba95e2 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 3 Aug 2016 21:11:09 +0200 Subject: drm: Mark up legacy/dri1 drivers with DRM_LEGACY It's super confusing that new drivers need to be marked with DRIVER_MODESET when really it means DRIVER_MODERN. Much better to invert the meaning and rename it to something that's suitably off-putting. Since there's over 100 places using DRIVER_MODESET we need to roll out this change without a flag day. v2: Update docs. Reviewed-by: Frank Binns Reviewed-by: David Herrmann Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1470251470-30830-1-git-send-email-daniel.vetter@ffwll.ch diff --git a/Documentation/gpu/drm-internals.rst b/Documentation/gpu/drm-internals.rst index 3bb2613..37284bc 100644 --- a/Documentation/gpu/drm-internals.rst +++ b/Documentation/gpu/drm-internals.rst @@ -53,9 +53,12 @@ u32 driver_features; DRIVER_USE_AGP Driver uses AGP interface, the DRM core will manage AGP resources. -DRIVER_REQUIRE_AGP - Driver needs AGP interface to function. AGP initialization failure - will become a fatal error. +DRIVER_LEGACY + Denote a legacy driver using shadow attach. Don't use. + +DRIVER_KMS_LEGACY_CONTEXT + Used only by nouveau for backwards compatibility with existing userspace. + Don't use. DRIVER_PCI_DMA Driver is capable of PCI DMA, mapping of PCI DMA buffers to diff --git a/drivers/gpu/drm/i810/i810_drv.c b/drivers/gpu/drm/i810/i810_drv.c index 44f4a13..0be55dc 100644 --- a/drivers/gpu/drm/i810/i810_drv.c +++ b/drivers/gpu/drm/i810/i810_drv.c @@ -56,9 +56,7 @@ static const struct file_operations i810_driver_fops = { }; static struct drm_driver driver = { - .driver_features = - DRIVER_USE_AGP | - DRIVER_HAVE_DMA, + .driver_features = DRIVER_USE_AGP | DRIVER_HAVE_DMA | DRIVER_LEGACY, .dev_priv_size = sizeof(drm_i810_buf_priv_t), .load = i810_driver_load, .lastclose = i810_driver_lastclose, diff --git a/drivers/gpu/drm/mga/mga_drv.c b/drivers/gpu/drm/mga/mga_drv.c index 5e2f131..25b2a1a 100644 --- a/drivers/gpu/drm/mga/mga_drv.c +++ b/drivers/gpu/drm/mga/mga_drv.c @@ -58,7 +58,7 @@ static const struct file_operations mga_driver_fops = { static struct drm_driver driver = { .driver_features = - DRIVER_USE_AGP | DRIVER_PCI_DMA | + DRIVER_USE_AGP | DRIVER_PCI_DMA | DRIVER_LEGACY | DRIVER_HAVE_DMA | DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED, .dev_priv_size = sizeof(drm_mga_buf_priv_t), .load = mga_driver_load, diff --git a/drivers/gpu/drm/r128/r128_drv.c b/drivers/gpu/drm/r128/r128_drv.c index c57b4de..a982be5 100644 --- a/drivers/gpu/drm/r128/r128_drv.c +++ b/drivers/gpu/drm/r128/r128_drv.c @@ -56,7 +56,7 @@ static const struct file_operations r128_driver_fops = { static struct drm_driver driver = { .driver_features = - DRIVER_USE_AGP | DRIVER_PCI_DMA | DRIVER_SG | + DRIVER_USE_AGP | DRIVER_PCI_DMA | DRIVER_SG | DRIVER_LEGACY | DRIVER_HAVE_DMA | DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED, .dev_priv_size = sizeof(drm_r128_buf_priv_t), .load = r128_driver_load, diff --git a/drivers/gpu/drm/savage/savage_drv.c b/drivers/gpu/drm/savage/savage_drv.c index 21aed1f..3b80713 100644 --- a/drivers/gpu/drm/savage/savage_drv.c +++ b/drivers/gpu/drm/savage/savage_drv.c @@ -50,7 +50,7 @@ static const struct file_operations savage_driver_fops = { static struct drm_driver driver = { .driver_features = - DRIVER_USE_AGP | DRIVER_HAVE_DMA | DRIVER_PCI_DMA, + DRIVER_USE_AGP | DRIVER_HAVE_DMA | DRIVER_PCI_DMA | DRIVER_LEGACY, .dev_priv_size = sizeof(drm_savage_buf_priv_t), .load = savage_driver_load, .firstopen = savage_driver_firstopen, diff --git a/drivers/gpu/drm/sis/sis_drv.c b/drivers/gpu/drm/sis/sis_drv.c index 79bce76..ae98398 100644 --- a/drivers/gpu/drm/sis/sis_drv.c +++ b/drivers/gpu/drm/sis/sis_drv.c @@ -102,7 +102,7 @@ static void sis_driver_postclose(struct drm_device *dev, struct drm_file *file) } static struct drm_driver driver = { - .driver_features = DRIVER_USE_AGP, + .driver_features = DRIVER_USE_AGP | DRIVER_LEGACY, .load = sis_driver_load, .unload = sis_driver_unload, .open = sis_driver_open, diff --git a/drivers/gpu/drm/tdfx/tdfx_drv.c b/drivers/gpu/drm/tdfx/tdfx_drv.c index fab5ebc..f418892 100644 --- a/drivers/gpu/drm/tdfx/tdfx_drv.c +++ b/drivers/gpu/drm/tdfx/tdfx_drv.c @@ -56,6 +56,7 @@ static const struct file_operations tdfx_driver_fops = { }; static struct drm_driver driver = { + .driver_features = DRIVER_LEGACY, .set_busid = drm_pci_set_busid, .fops = &tdfx_driver_fops, .name = DRIVER_NAME, diff --git a/drivers/gpu/drm/via/via_drv.c b/drivers/gpu/drm/via/via_drv.c index ed8aa8f..e5582ba 100644 --- a/drivers/gpu/drm/via/via_drv.c +++ b/drivers/gpu/drm/via/via_drv.c @@ -72,7 +72,7 @@ static const struct file_operations via_driver_fops = { static struct drm_driver driver = { .driver_features = - DRIVER_USE_AGP | DRIVER_HAVE_IRQ | + DRIVER_USE_AGP | DRIVER_HAVE_IRQ | DRIVER_LEGACY | DRIVER_IRQ_SHARED, .load = via_driver_load, .unload = via_driver_unload, diff --git a/include/drm/drmP.h b/include/drm/drmP.h index d488a72..856c174 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -146,6 +146,7 @@ void drm_err(const char *format, ...); /* driver capabilities and requirements mask */ #define DRIVER_USE_AGP 0x1 +#define DRIVER_LEGACY 0x2 #define DRIVER_PCI_DMA 0x8 #define DRIVER_SG 0x10 #define DRIVER_HAVE_DMA 0x20 -- cgit v0.10.2 From fa5386459f06dc3b9181d4c954f980b127d1a32f Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 3 Aug 2016 21:11:10 +0200 Subject: drm: Used DRM_LEGACY for all legacy functions Except for nouveau, only legacy drivers need this really. And nouveau is already marked up with DRIVER_KMS_LEGACY_CONTEXT as the special case. I've tried to be careful to leave everything related to modeset still using the DRIVER_MODESET flag. Otherwise it's a direct replacement of !DRIVER_MODESET with DRIVER_LEGACY checks. Also helps readability since fewer negative checks overall. Reviewed-by: David Herrmann Reviewed-by: Frank Binns Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1470251470-30830-2-git-send-email-daniel.vetter@ffwll.ch diff --git a/drivers/gpu/drm/drm_agpsupport.c b/drivers/gpu/drm/drm_agpsupport.c index 605bd24..d621c8a 100644 --- a/drivers/gpu/drm/drm_agpsupport.c +++ b/drivers/gpu/drm/drm_agpsupport.c @@ -430,9 +430,7 @@ struct drm_agp_head *drm_agp_init(struct drm_device *dev) * intact so it can still be used. It is safe to call this if AGP is disabled or * was already removed. * - * If DRIVER_MODESET is active, nothing is done to protect the modesetting - * resources from getting destroyed. Drivers are responsible of cleaning them up - * during device shutdown. + * Cleanup is only done for drivers who have DRIVER_LEGACY set. */ void drm_legacy_agp_clear(struct drm_device *dev) { @@ -440,7 +438,7 @@ void drm_legacy_agp_clear(struct drm_device *dev) if (!dev->agp) return; - if (drm_core_check_feature(dev, DRIVER_MODESET)) + if (!drm_core_check_feature(dev, DRIVER_LEGACY)) return; list_for_each_entry_safe(entry, tempe, &dev->agp->memory, head) { diff --git a/drivers/gpu/drm/drm_auth.c b/drivers/gpu/drm/drm_auth.c index 4153e8a..6b14351 100644 --- a/drivers/gpu/drm/drm_auth.c +++ b/drivers/gpu/drm/drm_auth.c @@ -251,7 +251,7 @@ void drm_master_release(struct drm_file *file_priv) if (!drm_is_current_master(file_priv)) goto out; - if (!drm_core_check_feature(dev, DRIVER_MODESET)) { + if (drm_core_check_feature(dev, DRIVER_LEGACY)) { /* * Since the master is disappearing, so is the * possibility to lock. diff --git a/drivers/gpu/drm/drm_bufs.c b/drivers/gpu/drm/drm_bufs.c index c3a12cd..3219151 100644 --- a/drivers/gpu/drm/drm_bufs.c +++ b/drivers/gpu/drm/drm_bufs.c @@ -397,7 +397,7 @@ int drm_legacy_addmap_ioctl(struct drm_device *dev, void *data, return -EPERM; if (!drm_core_check_feature(dev, DRIVER_KMS_LEGACY_CONTEXT) && - drm_core_check_feature(dev, DRIVER_MODESET)) + !drm_core_check_feature(dev, DRIVER_LEGACY)) return -EINVAL; err = drm_addmap_core(dev, map->offset, map->size, map->type, @@ -443,7 +443,7 @@ int drm_legacy_getmap_ioctl(struct drm_device *dev, void *data, int i; if (!drm_core_check_feature(dev, DRIVER_KMS_LEGACY_CONTEXT) && - drm_core_check_feature(dev, DRIVER_MODESET)) + !drm_core_check_feature(dev, DRIVER_LEGACY)) return -EINVAL; idx = map->offset; @@ -545,7 +545,7 @@ EXPORT_SYMBOL(drm_legacy_rmmap_locked); void drm_legacy_rmmap(struct drm_device *dev, struct drm_local_map *map) { if (!drm_core_check_feature(dev, DRIVER_KMS_LEGACY_CONTEXT) && - drm_core_check_feature(dev, DRIVER_MODESET)) + !drm_core_check_feature(dev, DRIVER_LEGACY)) return; mutex_lock(&dev->struct_mutex); @@ -558,7 +558,7 @@ void drm_legacy_master_rmmaps(struct drm_device *dev, struct drm_master *master) { struct drm_map_list *r_list, *list_temp; - if (drm_core_check_feature(dev, DRIVER_MODESET)) + if (!drm_core_check_feature(dev, DRIVER_LEGACY)) return; mutex_lock(&dev->struct_mutex); @@ -595,7 +595,7 @@ int drm_legacy_rmmap_ioctl(struct drm_device *dev, void *data, int ret; if (!drm_core_check_feature(dev, DRIVER_KMS_LEGACY_CONTEXT) && - drm_core_check_feature(dev, DRIVER_MODESET)) + !drm_core_check_feature(dev, DRIVER_LEGACY)) return -EINVAL; mutex_lock(&dev->struct_mutex); @@ -1220,7 +1220,7 @@ int drm_legacy_addbufs(struct drm_device *dev, void *data, struct drm_buf_desc *request = data; int ret; - if (drm_core_check_feature(dev, DRIVER_MODESET)) + if (!drm_core_check_feature(dev, DRIVER_LEGACY)) return -EINVAL; if (!drm_core_check_feature(dev, DRIVER_HAVE_DMA)) @@ -1266,7 +1266,7 @@ int drm_legacy_infobufs(struct drm_device *dev, void *data, int i; int count; - if (drm_core_check_feature(dev, DRIVER_MODESET)) + if (!drm_core_check_feature(dev, DRIVER_LEGACY)) return -EINVAL; if (!drm_core_check_feature(dev, DRIVER_HAVE_DMA)) @@ -1347,7 +1347,7 @@ int drm_legacy_markbufs(struct drm_device *dev, void *data, int order; struct drm_buf_entry *entry; - if (drm_core_check_feature(dev, DRIVER_MODESET)) + if (!drm_core_check_feature(dev, DRIVER_LEGACY)) return -EINVAL; if (!drm_core_check_feature(dev, DRIVER_HAVE_DMA)) @@ -1395,7 +1395,7 @@ int drm_legacy_freebufs(struct drm_device *dev, void *data, int idx; struct drm_buf *buf; - if (drm_core_check_feature(dev, DRIVER_MODESET)) + if (!drm_core_check_feature(dev, DRIVER_LEGACY)) return -EINVAL; if (!drm_core_check_feature(dev, DRIVER_HAVE_DMA)) @@ -1450,7 +1450,7 @@ int drm_legacy_mapbufs(struct drm_device *dev, void *data, struct drm_buf_map *request = data; int i; - if (drm_core_check_feature(dev, DRIVER_MODESET)) + if (!drm_core_check_feature(dev, DRIVER_LEGACY)) return -EINVAL; if (!drm_core_check_feature(dev, DRIVER_HAVE_DMA)) @@ -1530,7 +1530,7 @@ int drm_legacy_mapbufs(struct drm_device *dev, void *data, int drm_legacy_dma_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { - if (drm_core_check_feature(dev, DRIVER_MODESET)) + if (!drm_core_check_feature(dev, DRIVER_LEGACY)) return -EINVAL; if (dev->driver->dma_ioctl) diff --git a/drivers/gpu/drm/drm_context.c b/drivers/gpu/drm/drm_context.c index 192a5f9..3c4000f 100644 --- a/drivers/gpu/drm/drm_context.c +++ b/drivers/gpu/drm/drm_context.c @@ -54,7 +54,7 @@ struct drm_ctx_list { void drm_legacy_ctxbitmap_free(struct drm_device * dev, int ctx_handle) { if (!drm_core_check_feature(dev, DRIVER_KMS_LEGACY_CONTEXT) && - drm_core_check_feature(dev, DRIVER_MODESET)) + !drm_core_check_feature(dev, DRIVER_LEGACY)) return; mutex_lock(&dev->struct_mutex); @@ -92,7 +92,7 @@ static int drm_legacy_ctxbitmap_next(struct drm_device * dev) void drm_legacy_ctxbitmap_init(struct drm_device * dev) { if (!drm_core_check_feature(dev, DRIVER_KMS_LEGACY_CONTEXT) && - drm_core_check_feature(dev, DRIVER_MODESET)) + !drm_core_check_feature(dev, DRIVER_LEGACY)) return; idr_init(&dev->ctx_idr); @@ -109,7 +109,7 @@ void drm_legacy_ctxbitmap_init(struct drm_device * dev) void drm_legacy_ctxbitmap_cleanup(struct drm_device * dev) { if (!drm_core_check_feature(dev, DRIVER_KMS_LEGACY_CONTEXT) && - drm_core_check_feature(dev, DRIVER_MODESET)) + !drm_core_check_feature(dev, DRIVER_LEGACY)) return; mutex_lock(&dev->struct_mutex); @@ -131,7 +131,7 @@ void drm_legacy_ctxbitmap_flush(struct drm_device *dev, struct drm_file *file) struct drm_ctx_list *pos, *tmp; if (!drm_core_check_feature(dev, DRIVER_KMS_LEGACY_CONTEXT) && - drm_core_check_feature(dev, DRIVER_MODESET)) + !drm_core_check_feature(dev, DRIVER_LEGACY)) return; mutex_lock(&dev->ctxlist_mutex); @@ -177,7 +177,7 @@ int drm_legacy_getsareactx(struct drm_device *dev, void *data, struct drm_map_list *_entry; if (!drm_core_check_feature(dev, DRIVER_KMS_LEGACY_CONTEXT) && - drm_core_check_feature(dev, DRIVER_MODESET)) + !drm_core_check_feature(dev, DRIVER_LEGACY)) return -EINVAL; mutex_lock(&dev->struct_mutex); @@ -225,7 +225,7 @@ int drm_legacy_setsareactx(struct drm_device *dev, void *data, struct drm_map_list *r_list = NULL; if (!drm_core_check_feature(dev, DRIVER_KMS_LEGACY_CONTEXT) && - drm_core_check_feature(dev, DRIVER_MODESET)) + !drm_core_check_feature(dev, DRIVER_LEGACY)) return -EINVAL; mutex_lock(&dev->struct_mutex); @@ -329,7 +329,7 @@ int drm_legacy_resctx(struct drm_device *dev, void *data, int i; if (!drm_core_check_feature(dev, DRIVER_KMS_LEGACY_CONTEXT) && - drm_core_check_feature(dev, DRIVER_MODESET)) + !drm_core_check_feature(dev, DRIVER_LEGACY)) return -EINVAL; if (res->count >= DRM_RESERVED_CONTEXTS) { @@ -363,7 +363,7 @@ int drm_legacy_addctx(struct drm_device *dev, void *data, struct drm_ctx *ctx = data; if (!drm_core_check_feature(dev, DRIVER_KMS_LEGACY_CONTEXT) && - drm_core_check_feature(dev, DRIVER_MODESET)) + !drm_core_check_feature(dev, DRIVER_LEGACY)) return -EINVAL; ctx->handle = drm_legacy_ctxbitmap_next(dev); @@ -410,7 +410,7 @@ int drm_legacy_getctx(struct drm_device *dev, void *data, struct drm_ctx *ctx = data; if (!drm_core_check_feature(dev, DRIVER_KMS_LEGACY_CONTEXT) && - drm_core_check_feature(dev, DRIVER_MODESET)) + !drm_core_check_feature(dev, DRIVER_LEGACY)) return -EINVAL; /* This is 0, because we don't handle any context flags */ @@ -436,7 +436,7 @@ int drm_legacy_switchctx(struct drm_device *dev, void *data, struct drm_ctx *ctx = data; if (!drm_core_check_feature(dev, DRIVER_KMS_LEGACY_CONTEXT) && - drm_core_check_feature(dev, DRIVER_MODESET)) + !drm_core_check_feature(dev, DRIVER_LEGACY)) return -EINVAL; DRM_DEBUG("%d\n", ctx->handle); @@ -460,7 +460,7 @@ int drm_legacy_newctx(struct drm_device *dev, void *data, struct drm_ctx *ctx = data; if (!drm_core_check_feature(dev, DRIVER_KMS_LEGACY_CONTEXT) && - drm_core_check_feature(dev, DRIVER_MODESET)) + !drm_core_check_feature(dev, DRIVER_LEGACY)) return -EINVAL; DRM_DEBUG("%d\n", ctx->handle); @@ -486,7 +486,7 @@ int drm_legacy_rmctx(struct drm_device *dev, void *data, struct drm_ctx *ctx = data; if (!drm_core_check_feature(dev, DRIVER_KMS_LEGACY_CONTEXT) && - drm_core_check_feature(dev, DRIVER_MODESET)) + !drm_core_check_feature(dev, DRIVER_LEGACY)) return -EINVAL; DRM_DEBUG("%d\n", ctx->handle); diff --git a/drivers/gpu/drm/drm_dma.c b/drivers/gpu/drm/drm_dma.c index ea48180..3f83e2c 100644 --- a/drivers/gpu/drm/drm_dma.c +++ b/drivers/gpu/drm/drm_dma.c @@ -50,9 +50,8 @@ int drm_legacy_dma_setup(struct drm_device *dev) int i; if (!drm_core_check_feature(dev, DRIVER_HAVE_DMA) || - drm_core_check_feature(dev, DRIVER_MODESET)) { + !drm_core_check_feature(dev, DRIVER_LEGACY)) return 0; - } dev->buf_use = 0; atomic_set(&dev->buf_alloc, 0); @@ -81,9 +80,8 @@ void drm_legacy_dma_takedown(struct drm_device *dev) int i, j; if (!drm_core_check_feature(dev, DRIVER_HAVE_DMA) || - drm_core_check_feature(dev, DRIVER_MODESET)) { + !drm_core_check_feature(dev, DRIVER_LEGACY)) return; - } if (!dma) return; diff --git a/drivers/gpu/drm/drm_fops.c b/drivers/gpu/drm/drm_fops.c index 323c238..036cd27 100644 --- a/drivers/gpu/drm/drm_fops.c +++ b/drivers/gpu/drm/drm_fops.c @@ -92,7 +92,7 @@ static int drm_setup(struct drm_device * dev) int ret; if (dev->driver->firstopen && - !drm_core_check_feature(dev, DRIVER_MODESET)) { + drm_core_check_feature(dev, DRIVER_LEGACY)) { ret = dev->driver->firstopen(dev); if (ret != 0) return ret; @@ -346,7 +346,7 @@ void drm_lastclose(struct drm_device * dev) dev->driver->lastclose(dev); DRM_DEBUG("driver lastclose completed\n"); - if (!drm_core_check_feature(dev, DRIVER_MODESET)) + if (drm_core_check_feature(dev, DRIVER_LEGACY)) drm_legacy_dev_reinit(dev); } @@ -389,7 +389,7 @@ int drm_release(struct inode *inode, struct file *filp) (long)old_encode_dev(file_priv->minor->kdev->devt), dev->open_count); - if (!drm_core_check_feature(dev, DRIVER_MODESET)) + if (drm_core_check_feature(dev, DRIVER_LEGACY)) drm_legacy_lock_release(dev, filp); if (drm_core_check_feature(dev, DRIVER_HAVE_DMA)) diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c index 33af4a5..bb51ee9 100644 --- a/drivers/gpu/drm/drm_ioctl.c +++ b/drivers/gpu/drm/drm_ioctl.c @@ -714,9 +714,9 @@ long drm_ioctl(struct file *filp, if (ksize > in_size) memset(kdata + in_size, 0, ksize - in_size); - /* Enforce sane locking for kms driver ioctls. Core ioctls are + /* Enforce sane locking for modern driver ioctls. Core ioctls are * too messy still. */ - if ((drm_core_check_feature(dev, DRIVER_MODESET) && is_driver_ioctl) || + if ((!drm_core_check_feature(dev, DRIVER_LEGACY) && is_driver_ioctl) || (ioctl->flags & DRM_UNLOCKED)) retcode = func(dev, kdata, file_priv); else { diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c index 01a5079..97c7064 100644 --- a/drivers/gpu/drm/drm_irq.c +++ b/drivers/gpu/drm/drm_irq.c @@ -482,7 +482,7 @@ int drm_irq_install(struct drm_device *dev, int irq) return ret; } - if (!drm_core_check_feature(dev, DRIVER_MODESET)) + if (drm_core_check_feature(dev, DRIVER_LEGACY)) vga_client_register(dev->pdev, (void *)dev, drm_irq_vgaarb_nokms, NULL); /* After installing handler */ @@ -491,7 +491,7 @@ int drm_irq_install(struct drm_device *dev, int irq) if (ret < 0) { dev->irq_enabled = false; - if (!drm_core_check_feature(dev, DRIVER_MODESET)) + if (drm_core_check_feature(dev, DRIVER_LEGACY)) vga_client_register(dev->pdev, NULL, NULL, NULL); free_irq(irq, dev); } else { @@ -557,7 +557,7 @@ int drm_irq_uninstall(struct drm_device *dev) DRM_DEBUG("irq=%d\n", dev->irq); - if (!drm_core_check_feature(dev, DRIVER_MODESET)) + if (drm_core_check_feature(dev, DRIVER_LEGACY)) vga_client_register(dev->pdev, NULL, NULL, NULL); if (dev->driver->irq_uninstall) @@ -592,7 +592,7 @@ int drm_control(struct drm_device *dev, void *data, if (!drm_core_check_feature(dev, DRIVER_HAVE_IRQ)) return 0; - if (drm_core_check_feature(dev, DRIVER_MODESET)) + if (!drm_core_check_feature(dev, DRIVER_LEGACY)) return 0; /* UMS was only ever supported on pci devices. */ if (WARN_ON(!dev->pdev)) @@ -1519,7 +1519,7 @@ int drm_modeset_ctl(struct drm_device *dev, void *data, return 0; /* KMS drivers handle this internally */ - if (drm_core_check_feature(dev, DRIVER_MODESET)) + if (!drm_core_check_feature(dev, DRIVER_LEGACY)) return 0; pipe = modeset->crtc; diff --git a/drivers/gpu/drm/drm_lock.c b/drivers/gpu/drm/drm_lock.c index 48ac0eb..c901f3c 100644 --- a/drivers/gpu/drm/drm_lock.c +++ b/drivers/gpu/drm/drm_lock.c @@ -163,7 +163,7 @@ int drm_legacy_lock(struct drm_device *dev, void *data, struct drm_master *master = file_priv->master; int ret = 0; - if (drm_core_check_feature(dev, DRIVER_MODESET)) + if (!drm_core_check_feature(dev, DRIVER_LEGACY)) return -EINVAL; ++file_priv->lock_count; @@ -252,7 +252,7 @@ int drm_legacy_unlock(struct drm_device *dev, void *data, struct drm_file *file_ struct drm_lock *lock = data; struct drm_master *master = file_priv->master; - if (drm_core_check_feature(dev, DRIVER_MODESET)) + if (!drm_core_check_feature(dev, DRIVER_LEGACY)) return -EINVAL; if (lock->context == DRM_KERNEL_CONTEXT) { diff --git a/drivers/gpu/drm/drm_pci.c b/drivers/gpu/drm/drm_pci.c index b2f8f10..d86362f 100644 --- a/drivers/gpu/drm/drm_pci.c +++ b/drivers/gpu/drm/drm_pci.c @@ -175,7 +175,7 @@ int drm_irq_by_busid(struct drm_device *dev, void *data, { struct drm_irq_busid *p = data; - if (drm_core_check_feature(dev, DRIVER_MODESET)) + if (!drm_core_check_feature(dev, DRIVER_LEGACY)) return -EINVAL; /* UMS was only ever support on PCI devices. */ @@ -263,7 +263,7 @@ int drm_get_pci_dev(struct pci_dev *pdev, const struct pci_device_id *ent, /* No locking needed since shadow-attach is single-threaded since it may * only be called from the per-driver module init hook. */ - if (!drm_core_check_feature(dev, DRIVER_MODESET)) + if (drm_core_check_feature(dev, DRIVER_LEGACY)) list_add_tail(&dev->legacy_dev_list, &driver->legacy_dev_list); return 0; @@ -299,7 +299,7 @@ int drm_pci_init(struct drm_driver *driver, struct pci_driver *pdriver) DRM_DEBUG("\n"); - if (driver->driver_features & DRIVER_MODESET) + if (!(driver->driver_features & DRIVER_LEGACY)) return pci_register_driver(pdriver); /* If not using KMS, fall back to stealth mode manual scanning. */ @@ -421,7 +421,7 @@ void drm_pci_exit(struct drm_driver *driver, struct pci_driver *pdriver) struct drm_device *dev, *tmp; DRM_DEBUG("\n"); - if (driver->driver_features & DRIVER_MODESET) { + if (!(driver->driver_features & DRIVER_LEGACY)) { pci_unregister_driver(pdriver); } else { list_for_each_entry_safe(dev, tmp, &driver->legacy_dev_list, diff --git a/drivers/gpu/drm/drm_scatter.c b/drivers/gpu/drm/drm_scatter.c index bf70431..275bca4 100644 --- a/drivers/gpu/drm/drm_scatter.c +++ b/drivers/gpu/drm/drm_scatter.c @@ -68,7 +68,7 @@ static void drm_sg_cleanup(struct drm_sg_mem * entry) void drm_legacy_sg_cleanup(struct drm_device *dev) { if (drm_core_check_feature(dev, DRIVER_SG) && dev->sg && - !drm_core_check_feature(dev, DRIVER_MODESET)) { + drm_core_check_feature(dev, DRIVER_LEGACY)) { drm_sg_cleanup(dev->sg); dev->sg = NULL; } @@ -88,7 +88,7 @@ int drm_legacy_sg_alloc(struct drm_device *dev, void *data, DRM_DEBUG("\n"); - if (drm_core_check_feature(dev, DRIVER_MODESET)) + if (!drm_core_check_feature(dev, DRIVER_LEGACY)) return -EINVAL; if (!drm_core_check_feature(dev, DRIVER_SG)) @@ -201,7 +201,7 @@ int drm_legacy_sg_free(struct drm_device *dev, void *data, struct drm_scatter_gather *request = data; struct drm_sg_mem *entry; - if (drm_core_check_feature(dev, DRIVER_MODESET)) + if (!drm_core_check_feature(dev, DRIVER_LEGACY)) return -EINVAL; if (!drm_core_check_feature(dev, DRIVER_SG)) -- cgit v0.10.2 From fcc60b413d14dd06ddbd79ec50e83c4fb2a097ba Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Sat, 4 Jun 2016 01:16:22 -0700 Subject: drm: Don't prepare or cleanup unchanging frame buffers [v3] When reconfiguring a plane position (as in moving the cursor), the frame buffer for the cursor isn't changing, so don't call the prepare or cleanup driver functions. This avoids making cursor position updates block on all pending rendering. v3: use drm_atomic_helper_framebuffer_changed in both prepare and cleanup phases instead of keeping state in the plane. cc: dri-devel@lists.freedesktop.org cc: David Airlie cc: Daniel Vetter Signed-off-by: Keith Packard [danvet: Rebase onto 4.8] Signed-off-by: Daniel Vetter diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index 20be86d..813821e 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -1631,6 +1631,9 @@ int drm_atomic_helper_prepare_planes(struct drm_device *dev, funcs = plane->helper_private; + if (!drm_atomic_helper_framebuffer_changed(dev, state, plane_state->crtc)) + continue; + if (funcs->prepare_fb) { ret = funcs->prepare_fb(plane, plane_state); if (ret) @@ -1647,11 +1650,13 @@ fail: if (j >= i) continue; + if (!drm_atomic_helper_framebuffer_changed(dev, state, plane_state->crtc)) + continue; + funcs = plane->helper_private; if (funcs->cleanup_fb) funcs->cleanup_fb(plane, plane_state); - } return ret; @@ -1894,6 +1899,9 @@ void drm_atomic_helper_cleanup_planes(struct drm_device *dev, for_each_plane_in_state(old_state, plane, plane_state, i) { const struct drm_plane_helper_funcs *funcs; + if (!drm_atomic_helper_framebuffer_changed(dev, old_state, plane_state->crtc)) + continue; + funcs = plane->helper_private; if (funcs->cleanup_fb) -- cgit v0.10.2 From d72daa0d75e8fe71368113350254b9da2c64b235 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lothar=20Wa=C3=9Fmann?= Date: Tue, 12 Jul 2016 15:30:02 +0200 Subject: drm: add a helper function to extract 'de-active' and 'pixelclk-active' from DT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit add a helper function to extract information about pixel clock and DE polarity from DT for use by of_get_drm_display_mode(). While at it, convert spaces to tabs in indentation in drm_modes.h. Signed-off-by: Lothar Waßmann Signed-off-by: Philipp Zabel diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c index fc5040a..51804e5 100644 --- a/drivers/gpu/drm/drm_modes.c +++ b/drivers/gpu/drm/drm_modes.c @@ -657,6 +657,21 @@ void drm_display_mode_to_videomode(const struct drm_display_mode *dmode, } EXPORT_SYMBOL_GPL(drm_display_mode_to_videomode); +void drm_bus_flags_from_videomode(const struct videomode *vm, u32 *bus_flags) +{ + *bus_flags = 0; + if (vm->flags & DISPLAY_FLAGS_PIXDATA_POSEDGE) + *bus_flags |= DRM_BUS_FLAG_PIXDATA_POSEDGE; + if (vm->flags & DISPLAY_FLAGS_PIXDATA_NEGEDGE) + *bus_flags |= DRM_BUS_FLAG_PIXDATA_NEGEDGE; + + if (vm->flags & DISPLAY_FLAGS_DE_LOW) + *bus_flags |= DRM_BUS_FLAG_DE_LOW; + if (vm->flags & DISPLAY_FLAGS_DE_HIGH) + *bus_flags |= DRM_BUS_FLAG_DE_HIGH; +} +EXPORT_SYMBOL_GPL(drm_bus_flags_from_videomode); + #ifdef CONFIG_OF /** * of_get_drm_display_mode - get a drm_display_mode from devicetree diff --git a/include/drm/drm_modes.h b/include/drm/drm_modes.h index ff48177..a8164d2 100644 --- a/include/drm/drm_modes.h +++ b/include/drm/drm_modes.h @@ -434,7 +434,7 @@ struct drm_cmdline_mode; struct drm_display_mode *drm_mode_create(struct drm_device *dev); void drm_mode_destroy(struct drm_device *dev, struct drm_display_mode *mode); void drm_mode_convert_to_umode(struct drm_mode_modeinfo *out, - const struct drm_display_mode *in); + const struct drm_display_mode *in); int drm_mode_convert_umode(struct drm_display_mode *out, const struct drm_mode_modeinfo *in); void drm_mode_probed_add(struct drm_connector *connector, struct drm_display_mode *mode); @@ -457,6 +457,7 @@ void drm_display_mode_from_videomode(const struct videomode *vm, struct drm_display_mode *dmode); void drm_display_mode_to_videomode(const struct drm_display_mode *dmode, struct videomode *vm); +void drm_bus_flags_from_videomode(const struct videomode *vm, u32 *bus_flags); int of_get_drm_display_mode(struct device_node *np, struct drm_display_mode *dmode, int index); -- cgit v0.10.2 From fafc79ef2e9148d0085585b6b71bc7154c14ebb6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lothar=20Wa=C3=9Fmann?= Date: Tue, 12 Jul 2016 15:30:03 +0200 Subject: drm/imx: convey the pixelclk-active and de-active flags from DT to the ipu-di driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 'de-active' and 'pixelclk-active' DT properties are evaluated by of_parse_display_timing() called from of_get_drm_display_mode(), but later lost in the conversion from videomode.flags to drm_display_mode.flags. Enhance of_get_drm_display_mode() to also return the bus flags in a separate variable, so that they can be passed on to the ipu-di driver. Signed-off-by: Lothar Waßmann Signed-off-by: Philipp Zabel diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c index 51804e5..1570487 100644 --- a/drivers/gpu/drm/drm_modes.c +++ b/drivers/gpu/drm/drm_modes.c @@ -687,7 +687,8 @@ EXPORT_SYMBOL_GPL(drm_bus_flags_from_videomode); * 0 on success, a negative errno code when no of videomode node was found. */ int of_get_drm_display_mode(struct device_node *np, - struct drm_display_mode *dmode, int index) + struct drm_display_mode *dmode, u32 *bus_flags, + int index) { struct videomode vm; int ret; @@ -697,6 +698,8 @@ int of_get_drm_display_mode(struct device_node *np, return ret; drm_display_mode_from_videomode(&vm, dmode); + if (bus_flags) + drm_bus_flags_from_videomode(&vm, bus_flags); pr_debug("%s: got %dx%d display mode from %s\n", of_node_full_name(np), vm.hactive, vm.vactive, np->name); diff --git a/drivers/gpu/drm/imx/imx-ldb.c b/drivers/gpu/drm/imx/imx-ldb.c index b03919e..3ed2d50 100644 --- a/drivers/gpu/drm/imx/imx-ldb.c +++ b/drivers/gpu/drm/imx/imx-ldb.c @@ -66,6 +66,7 @@ struct imx_ldb_channel { struct drm_display_mode mode; int mode_valid; u32 bus_format; + u32 bus_flags; }; static inline struct imx_ldb_channel *con_to_imx_ldb_ch(struct drm_connector *c) @@ -379,8 +380,13 @@ static int imx_ldb_encoder_atomic_check(struct drm_encoder *encoder, u32 bus_format = imx_ldb_ch->bus_format; /* Bus format description in DT overrides connector display info. */ - if (!bus_format && di->num_bus_formats) + if (!bus_format && di->num_bus_formats) { bus_format = di->bus_formats[0]; + imx_crtc_state->bus_flags = di->bus_flags; + } else { + bus_format = imx_ldb_ch->bus_format; + imx_crtc_state->bus_flags = imx_ldb_ch->bus_flags; + } switch (bus_format) { case MEDIA_BUS_FMT_RGB666_1X7X3_SPWG: imx_crtc_state->bus_format = MEDIA_BUS_FMT_RGB666_1X18; @@ -674,6 +680,7 @@ static int imx_ldb_bind(struct device *dev, struct device *master, void *data) /* fallback to display-timings node */ ret = of_get_drm_display_mode(child, &channel->mode, + &channel->bus_flags, OF_USE_NATIVE_MODE); if (!ret) channel->mode_valid = 1; diff --git a/drivers/gpu/drm/imx/parallel-display.c b/drivers/gpu/drm/imx/parallel-display.c index 1dad297..74b0ac0 100644 --- a/drivers/gpu/drm/imx/parallel-display.c +++ b/drivers/gpu/drm/imx/parallel-display.c @@ -33,6 +33,7 @@ struct imx_parallel_display { void *edid; int edid_len; u32 bus_format; + u32 bus_flags; struct drm_display_mode mode; struct drm_panel *panel; struct drm_bridge *bridge; @@ -80,6 +81,7 @@ static int imx_pd_connector_get_modes(struct drm_connector *connector) return -EINVAL; ret = of_get_drm_display_mode(np, &imxpd->mode, + &imxpd->bus_flags, OF_USE_NATIVE_MODE); if (ret) return ret; @@ -125,11 +127,13 @@ static int imx_pd_encoder_atomic_check(struct drm_encoder *encoder, struct drm_display_info *di = &conn_state->connector->display_info; struct imx_parallel_display *imxpd = enc_to_imxpd(encoder); - imx_crtc_state->bus_flags = di->bus_flags; - if (!imxpd->bus_format && di->num_bus_formats) + if (!imxpd->bus_format && di->num_bus_formats) { + imx_crtc_state->bus_flags = di->bus_flags; imx_crtc_state->bus_format = di->bus_formats[0]; - else + } else { + imx_crtc_state->bus_flags = imxpd->bus_flags; imx_crtc_state->bus_format = imxpd->bus_format; + } imx_crtc_state->di_hsync_pin = 2; imx_crtc_state->di_vsync_pin = 3; diff --git a/include/drm/drm_modes.h b/include/drm/drm_modes.h index a8164d2..48e1a56 100644 --- a/include/drm/drm_modes.h +++ b/include/drm/drm_modes.h @@ -459,7 +459,7 @@ void drm_display_mode_to_videomode(const struct drm_display_mode *dmode, struct videomode *vm); void drm_bus_flags_from_videomode(const struct videomode *vm, u32 *bus_flags); int of_get_drm_display_mode(struct device_node *np, - struct drm_display_mode *dmode, + struct drm_display_mode *dmode, u32 *bus_flags, int index); void drm_mode_set_name(struct drm_display_mode *mode); -- cgit v0.10.2 From 2fd911bc5b5e647902fd319cbf8c75f98555e188 Mon Sep 17 00:00:00 2001 From: Liu Ying Date: Mon, 18 Jul 2016 15:44:23 +0800 Subject: drm/imx: Remove imx_drm_crtc_vblank_get/_put() There is no one calling imx_drm_crtc_vblank_get/_put() and they are just two simple wrappers of drm_crtc_vblank_get/_put() without doing any thing fancy - the drivers may call drm_crtc_vblank_get/_put() directly. So, let's remove the two wrappers. Signed-off-by: Liu Ying Signed-off-by: Philipp Zabel diff --git a/drivers/gpu/drm/imx/imx-drm-core.c b/drivers/gpu/drm/imx/imx-drm-core.c index 9f7dafc..1fd1900 100644 --- a/drivers/gpu/drm/imx/imx-drm-core.c +++ b/drivers/gpu/drm/imx/imx-drm-core.c @@ -90,18 +90,6 @@ static int imx_drm_driver_unload(struct drm_device *drm) return 0; } -int imx_drm_crtc_vblank_get(struct imx_drm_crtc *imx_drm_crtc) -{ - return drm_crtc_vblank_get(imx_drm_crtc->crtc); -} -EXPORT_SYMBOL_GPL(imx_drm_crtc_vblank_get); - -void imx_drm_crtc_vblank_put(struct imx_drm_crtc *imx_drm_crtc) -{ - drm_crtc_vblank_put(imx_drm_crtc->crtc); -} -EXPORT_SYMBOL_GPL(imx_drm_crtc_vblank_put); - void imx_drm_handle_vblank(struct imx_drm_crtc *imx_drm_crtc) { drm_crtc_handle_vblank(imx_drm_crtc->crtc); diff --git a/drivers/gpu/drm/imx/imx-drm.h b/drivers/gpu/drm/imx/imx-drm.h index 07d33e4..0049b77f 100644 --- a/drivers/gpu/drm/imx/imx-drm.h +++ b/drivers/gpu/drm/imx/imx-drm.h @@ -44,8 +44,6 @@ int imx_drm_init_drm(struct platform_device *pdev, int preferred_bpp); int imx_drm_exit_drm(void); -int imx_drm_crtc_vblank_get(struct imx_drm_crtc *imx_drm_crtc); -void imx_drm_crtc_vblank_put(struct imx_drm_crtc *imx_drm_crtc); void imx_drm_handle_vblank(struct imx_drm_crtc *imx_drm_crtc); void imx_drm_mode_config_init(struct drm_device *drm); -- cgit v0.10.2 From 8892cc899e15fb896231539c902b813c3f7ab915 Mon Sep 17 00:00:00 2001 From: Liu Ying Date: Mon, 18 Jul 2016 15:44:24 +0800 Subject: drm/imx: Remove imx_drm_crtc_id() There is no one calling imx_drm_crtc_id() and it is just a simple wrapper of drm_crtc_index() without doing any thing fancy - the drivers may call drm_crtc_index() directly. So, let's remove the wrapper. Signed-off-by: Liu Ying Signed-off-by: Philipp Zabel diff --git a/drivers/gpu/drm/imx/imx-drm-core.c b/drivers/gpu/drm/imx/imx-drm-core.c index 1fd1900..1aefced 100644 --- a/drivers/gpu/drm/imx/imx-drm-core.c +++ b/drivers/gpu/drm/imx/imx-drm-core.c @@ -58,12 +58,6 @@ static int legacyfb_depth = 16; module_param(legacyfb_depth, int, 0444); #endif -unsigned int imx_drm_crtc_id(struct imx_drm_crtc *crtc) -{ - return drm_crtc_index(crtc->crtc); -} -EXPORT_SYMBOL_GPL(imx_drm_crtc_id); - static void imx_drm_driver_lastclose(struct drm_device *drm) { struct imx_drm_device *imxdrm = drm->dev_private; diff --git a/drivers/gpu/drm/imx/imx-drm.h b/drivers/gpu/drm/imx/imx-drm.h index 0049b77f..bdaa381 100644 --- a/drivers/gpu/drm/imx/imx-drm.h +++ b/drivers/gpu/drm/imx/imx-drm.h @@ -13,8 +13,6 @@ struct drm_plane; struct imx_drm_crtc; struct platform_device; -unsigned int imx_drm_crtc_id(struct imx_drm_crtc *crtc); - struct imx_crtc_state { struct drm_crtc_state base; u32 bus_format; -- cgit v0.10.2 From e5e8690ff0cd88e42153fbefcbfcbdb726a1a283 Mon Sep 17 00:00:00 2001 From: Steve Longerbeam Date: Tue, 19 Jul 2016 18:11:00 -0700 Subject: gpu: ipu-cpmem: Add ipu_cpmem_set_uv_offset() Adds ipu_cpmem_set_uv_offset(), to set planar U/V offsets. Signed-off-by: Steve Longerbeam Signed-off-by: Philipp Zabel diff --git a/drivers/gpu/ipu-v3/ipu-cpmem.c b/drivers/gpu/ipu-v3/ipu-cpmem.c index 6494a4d..a36c35e 100644 --- a/drivers/gpu/ipu-v3/ipu-cpmem.c +++ b/drivers/gpu/ipu-v3/ipu-cpmem.c @@ -253,6 +253,13 @@ void ipu_cpmem_set_buffer(struct ipuv3_channel *ch, int bufnum, dma_addr_t buf) } EXPORT_SYMBOL_GPL(ipu_cpmem_set_buffer); +void ipu_cpmem_set_uv_offset(struct ipuv3_channel *ch, u32 u_off, u32 v_off) +{ + ipu_ch_param_write_field(ch, IPU_FIELD_UBO, u_off / 8); + ipu_ch_param_write_field(ch, IPU_FIELD_VBO, v_off / 8); +} +EXPORT_SYMBOL_GPL(ipu_cpmem_set_uv_offset); + void ipu_cpmem_interlaced_scan(struct ipuv3_channel *ch, int stride) { ipu_ch_param_write_field(ch, IPU_FIELD_SO, 1); diff --git a/include/video/imx-ipu-v3.h b/include/video/imx-ipu-v3.h index 7adeaae0..69c8658 100644 --- a/include/video/imx-ipu-v3.h +++ b/include/video/imx-ipu-v3.h @@ -184,6 +184,7 @@ void ipu_cpmem_set_resolution(struct ipuv3_channel *ch, int xres, int yres); void ipu_cpmem_set_stride(struct ipuv3_channel *ch, int stride); void ipu_cpmem_set_high_priority(struct ipuv3_channel *ch); void ipu_cpmem_set_buffer(struct ipuv3_channel *ch, int bufnum, dma_addr_t buf); +void ipu_cpmem_set_uv_offset(struct ipuv3_channel *ch, u32 u_off, u32 v_off); void ipu_cpmem_interlaced_scan(struct ipuv3_channel *ch, int stride); void ipu_cpmem_set_axi_id(struct ipuv3_channel *ch, u32 id); void ipu_cpmem_set_burstsize(struct ipuv3_channel *ch, int burstsize); -- cgit v0.10.2 From 03085911d7bbe1132977302fe45ec4efea29cd3d Mon Sep 17 00:00:00 2001 From: Steve Longerbeam Date: Tue, 19 Jul 2016 18:11:01 -0700 Subject: gpu: ipu-cpmem: Add ipu_cpmem_get_burstsize() Adds ipu_cpmem_get_burstsize(). Signed-off-by: Steve Longerbeam Signed-off-by: Philipp Zabel diff --git a/drivers/gpu/ipu-v3/ipu-cpmem.c b/drivers/gpu/ipu-v3/ipu-cpmem.c index a36c35e..fcb7dc8 100644 --- a/drivers/gpu/ipu-v3/ipu-cpmem.c +++ b/drivers/gpu/ipu-v3/ipu-cpmem.c @@ -275,6 +275,12 @@ void ipu_cpmem_set_axi_id(struct ipuv3_channel *ch, u32 id) } EXPORT_SYMBOL_GPL(ipu_cpmem_set_axi_id); +int ipu_cpmem_get_burstsize(struct ipuv3_channel *ch) +{ + return ipu_ch_param_read_field(ch, IPU_FIELD_NPB) + 1; +} +EXPORT_SYMBOL_GPL(ipu_cpmem_get_burstsize); + void ipu_cpmem_set_burstsize(struct ipuv3_channel *ch, int burstsize) { ipu_ch_param_write_field(ch, IPU_FIELD_NPB, burstsize - 1); diff --git a/include/video/imx-ipu-v3.h b/include/video/imx-ipu-v3.h index 69c8658..8c4312d 100644 --- a/include/video/imx-ipu-v3.h +++ b/include/video/imx-ipu-v3.h @@ -187,6 +187,7 @@ void ipu_cpmem_set_buffer(struct ipuv3_channel *ch, int bufnum, dma_addr_t buf); void ipu_cpmem_set_uv_offset(struct ipuv3_channel *ch, u32 u_off, u32 v_off); void ipu_cpmem_interlaced_scan(struct ipuv3_channel *ch, int stride); void ipu_cpmem_set_axi_id(struct ipuv3_channel *ch, u32 id); +int ipu_cpmem_get_burstsize(struct ipuv3_channel *ch); void ipu_cpmem_set_burstsize(struct ipuv3_channel *ch, int burstsize); void ipu_cpmem_set_block_mode(struct ipuv3_channel *ch); void ipu_cpmem_set_rotation(struct ipuv3_channel *ch, -- cgit v0.10.2 From 572a7615aeddc881057a4653658731c8bfac7d86 Mon Sep 17 00:00:00 2001 From: Steve Longerbeam Date: Tue, 19 Jul 2016 18:11:02 -0700 Subject: gpu: ipu-v3: Add ipu_get_num() Adds of-alias id to ipu_soc and retrieve with ipu_get_num(). Signed-off-by: Steve Longerbeam Signed-off-by: Philipp Zabel diff --git a/drivers/gpu/ipu-v3/ipu-common.c b/drivers/gpu/ipu-v3/ipu-common.c index 99dcacf..d697cd5 100644 --- a/drivers/gpu/ipu-v3/ipu-common.c +++ b/drivers/gpu/ipu-v3/ipu-common.c @@ -45,6 +45,12 @@ static inline void ipu_cm_write(struct ipu_soc *ipu, u32 value, unsigned offset) writel(value, ipu->cm_reg + offset); } +int ipu_get_num(struct ipu_soc *ipu) +{ + return ipu->id; +} +EXPORT_SYMBOL_GPL(ipu_get_num); + void ipu_srm_dp_sync_update(struct ipu_soc *ipu) { u32 val; @@ -1209,6 +1215,7 @@ static int ipu_probe(struct platform_device *pdev) { const struct of_device_id *of_id = of_match_device(imx_ipu_dt_ids, &pdev->dev); + struct device_node *np = pdev->dev.of_node; struct ipu_soc *ipu; struct resource *res; unsigned long ipu_base; @@ -1237,6 +1244,7 @@ static int ipu_probe(struct platform_device *pdev) ipu->channel[i].ipu = ipu; ipu->devtype = devtype; ipu->ipu_type = devtype->type; + ipu->id = of_alias_get_id(np, "ipu"); spin_lock_init(&ipu->lock); mutex_init(&ipu->channel_lock); diff --git a/drivers/gpu/ipu-v3/ipu-prv.h b/drivers/gpu/ipu-v3/ipu-prv.h index bfb1e8a..fd47f8f 100644 --- a/drivers/gpu/ipu-v3/ipu-prv.h +++ b/drivers/gpu/ipu-v3/ipu-prv.h @@ -152,6 +152,7 @@ struct ipu_soc { void __iomem *cm_reg; void __iomem *idmac_reg; + int id; int usecount; struct clk *clk; diff --git a/include/video/imx-ipu-v3.h b/include/video/imx-ipu-v3.h index 8c4312d..ef54634 100644 --- a/include/video/imx-ipu-v3.h +++ b/include/video/imx-ipu-v3.h @@ -138,6 +138,7 @@ int ipu_idmac_channel_irq(struct ipu_soc *ipu, struct ipuv3_channel *channel, /* * IPU Common functions */ +int ipu_get_num(struct ipu_soc *ipu); void ipu_set_csi_src_mux(struct ipu_soc *ipu, int csi_id, bool mipi_csi2); void ipu_set_ic_src_mux(struct ipu_soc *ipu, int csi_id, bool vdi); void ipu_dump(struct ipu_soc *ipu); -- cgit v0.10.2 From 97afc25c4392105ea031e6f643f659058161c7dc Mon Sep 17 00:00:00 2001 From: Steve Longerbeam Date: Tue, 19 Jul 2016 18:11:05 -0700 Subject: gpu: ipu-v3: Add VDI input IDMAC channels Adds the VDIC field input IDMAC channels. These channels transfer fields F(n-1), F(n), and F(N+1) from memory to the VDIC (channels 8, 9, 10 respectively). Signed-off-by: Steve Longerbeam Signed-off-by: Philipp Zabel diff --git a/include/video/imx-ipu-v3.h b/include/video/imx-ipu-v3.h index ef54634..c4ccc79 100644 --- a/include/video/imx-ipu-v3.h +++ b/include/video/imx-ipu-v3.h @@ -97,6 +97,9 @@ enum ipu_channel_irq { #define IPUV3_CHANNEL_CSI2 2 #define IPUV3_CHANNEL_CSI3 3 #define IPUV3_CHANNEL_VDI_MEM_IC_VF 5 +#define IPUV3_CHANNEL_MEM_VDI_PREV 8 +#define IPUV3_CHANNEL_MEM_VDI_CUR 9 +#define IPUV3_CHANNEL_MEM_VDI_NEXT 10 #define IPUV3_CHANNEL_MEM_IC_PP 11 #define IPUV3_CHANNEL_MEM_IC_PRP_VF 12 #define IPUV3_CHANNEL_G_MEM_IC_PRP_VF 14 -- cgit v0.10.2 From aede45b2f6cd995f9949297b0e3234f495ec93f1 Mon Sep 17 00:00:00 2001 From: Steve Longerbeam Date: Tue, 19 Jul 2016 18:11:06 -0700 Subject: gpu: ipu-v3: set correct full sensor frame for PAL/NTSC Set the sensor full frame based on whether the passed in mbus_fmt is 720x480 (NTSC) or 720x576 (PAL). Signed-off-by: Steve Longerbeam Signed-off-by: Philipp Zabel diff --git a/drivers/gpu/ipu-v3/ipu-csi.c b/drivers/gpu/ipu-v3/ipu-csi.c index 06631ac..641ed76 100644 --- a/drivers/gpu/ipu-v3/ipu-csi.c +++ b/drivers/gpu/ipu-v3/ipu-csi.c @@ -365,10 +365,14 @@ int ipu_csi_init_interface(struct ipu_csi *csi, { struct ipu_csi_bus_config cfg; unsigned long flags; - u32 data = 0; + u32 width, height, data = 0; fill_csi_bus_cfg(&cfg, mbus_cfg, mbus_fmt); + /* set default sensor frame width and height */ + width = mbus_fmt->width; + height = mbus_fmt->height; + /* Set the CSI_SENS_CONF register remaining fields */ data |= cfg.data_width << CSI_SENS_CONF_DATA_WIDTH_SHIFT | cfg.data_fmt << CSI_SENS_CONF_DATA_FMT_SHIFT | @@ -386,11 +390,6 @@ int ipu_csi_init_interface(struct ipu_csi *csi, ipu_csi_write(csi, data, CSI_SENS_CONF); - /* Setup sensor frame size */ - ipu_csi_write(csi, - (mbus_fmt->width - 1) | ((mbus_fmt->height - 1) << 16), - CSI_SENS_FRM_SIZE); - /* Set CCIR registers */ switch (cfg.clk_mode) { @@ -408,11 +407,12 @@ int ipu_csi_init_interface(struct ipu_csi *csi, * Field1BlankEnd = 0x7, Field1BlankStart = 0x3, * Field1ActiveEnd = 0x5, Field1ActiveStart = 0x1 */ + height = 625; /* framelines for PAL */ + ipu_csi_write(csi, 0x40596 | CSI_CCIR_ERR_DET_EN, CSI_CCIR_CODE_1); ipu_csi_write(csi, 0xD07DF, CSI_CCIR_CODE_2); ipu_csi_write(csi, 0xFF0000, CSI_CCIR_CODE_3); - } else if (mbus_fmt->width == 720 && mbus_fmt->height == 480) { /* * NTSC case @@ -422,6 +422,8 @@ int ipu_csi_init_interface(struct ipu_csi *csi, * Field1BlankEnd = 0x6, Field1BlankStart = 0x2, * Field1ActiveEnd = 0x4, Field1ActiveStart = 0 */ + height = 525; /* framelines for NTSC */ + ipu_csi_write(csi, 0xD07DF | CSI_CCIR_ERR_DET_EN, CSI_CCIR_CODE_1); ipu_csi_write(csi, 0x40596, CSI_CCIR_CODE_2); @@ -447,6 +449,10 @@ int ipu_csi_init_interface(struct ipu_csi *csi, break; } + /* Setup sensor frame size */ + ipu_csi_write(csi, (width - 1) | ((height - 1) << 16), + CSI_SENS_FRM_SIZE); + dev_dbg(csi->ipu->dev, "CSI_SENS_CONF = 0x%08X\n", ipu_csi_read(csi, CSI_SENS_CONF)); dev_dbg(csi->ipu->dev, "CSI_ACT_FRM_SIZE = 0x%08X\n", -- cgit v0.10.2 From ea6bead7ebd99bab1fcc5728beb4c3a791886d77 Mon Sep 17 00:00:00 2001 From: Steve Longerbeam Date: Sun, 31 Jul 2016 12:42:21 -0700 Subject: gpu: ipu-v3: Fix CSI data format for 16-bit media bus formats The CSI data format was being programmed incorrectly for the 1x16 media bus formats. The CSI data format for 16-bit must be bayer/generic (CSI_SENS_CONF_DATA_FMT_BAYER). Suggested-by: Carsten Resch Signed-off-by: Steve Longerbeam Signed-off-by: Philipp Zabel diff --git a/drivers/gpu/ipu-v3/ipu-csi.c b/drivers/gpu/ipu-v3/ipu-csi.c index 641ed76..d6e5ded 100644 --- a/drivers/gpu/ipu-v3/ipu-csi.c +++ b/drivers/gpu/ipu-v3/ipu-csi.c @@ -258,12 +258,8 @@ static int mbus_code_to_bus_cfg(struct ipu_csi_bus_config *cfg, u32 mbus_code) cfg->data_width = IPU_CSI_DATA_WIDTH_8; break; case MEDIA_BUS_FMT_UYVY8_1X16: - cfg->data_fmt = CSI_SENS_CONF_DATA_FMT_YUV422_UYVY; - cfg->mipi_dt = MIPI_DT_YUV422; - cfg->data_width = IPU_CSI_DATA_WIDTH_16; - break; case MEDIA_BUS_FMT_YUYV8_1X16: - cfg->data_fmt = CSI_SENS_CONF_DATA_FMT_YUV422_YUYV; + cfg->data_fmt = CSI_SENS_CONF_DATA_FMT_BAYER; cfg->mipi_dt = MIPI_DT_YUV422; cfg->data_width = IPU_CSI_DATA_WIDTH_16; break; -- cgit v0.10.2 From 58e366eb86e9d9e5ba4bc369daa04a8d8418d515 Mon Sep 17 00:00:00 2001 From: Steve Longerbeam Date: Tue, 19 Jul 2016 18:11:08 -0700 Subject: gpu: ipu-v3: Fix IRT usage There can be multiple IC tasks using the IRT, so the IRT needs a separate use counter. Create a private ipu_irt_enable() to enable the IRT module when any IC task requires rotation, and ipu_irt_disable() when a task no longer needs the IRT. Signed-off-by: Steve Longerbeam Signed-off-by: Philipp Zabel diff --git a/drivers/gpu/ipu-v3/ipu-ic.c b/drivers/gpu/ipu-v3/ipu-ic.c index 1dcb96c..1a37afc 100644 --- a/drivers/gpu/ipu-v3/ipu-ic.c +++ b/drivers/gpu/ipu-v3/ipu-ic.c @@ -160,6 +160,7 @@ struct ipu_ic_priv { spinlock_t lock; struct ipu_soc *ipu; int use_count; + int irt_use_count; struct ipu_ic task[IC_NUM_TASKS]; }; @@ -379,8 +380,6 @@ void ipu_ic_task_disable(struct ipu_ic *ic) ipu_ic_write(ic, ic_conf, IC_CONF); - ic->rotation = ic->graphics = false; - spin_unlock_irqrestore(&priv->lock, flags); } EXPORT_SYMBOL_GPL(ipu_ic_task_disable); @@ -629,22 +628,41 @@ unlock: } EXPORT_SYMBOL_GPL(ipu_ic_task_idma_init); +static void ipu_irt_enable(struct ipu_ic *ic) +{ + struct ipu_ic_priv *priv = ic->priv; + + if (!priv->irt_use_count) + ipu_module_enable(priv->ipu, IPU_CONF_ROT_EN); + + priv->irt_use_count++; +} + +static void ipu_irt_disable(struct ipu_ic *ic) +{ + struct ipu_ic_priv *priv = ic->priv; + + if (priv->irt_use_count) { + if (!--priv->irt_use_count) + ipu_module_disable(priv->ipu, IPU_CONF_ROT_EN); + } +} + int ipu_ic_enable(struct ipu_ic *ic) { struct ipu_ic_priv *priv = ic->priv; unsigned long flags; - u32 module = IPU_CONF_IC_EN; spin_lock_irqsave(&priv->lock, flags); - if (ic->rotation) - module |= IPU_CONF_ROT_EN; - if (!priv->use_count) - ipu_module_enable(priv->ipu, module); + ipu_module_enable(priv->ipu, IPU_CONF_IC_EN); priv->use_count++; + if (ic->rotation) + ipu_irt_enable(ic); + spin_unlock_irqrestore(&priv->lock, flags); return 0; @@ -655,18 +673,22 @@ int ipu_ic_disable(struct ipu_ic *ic) { struct ipu_ic_priv *priv = ic->priv; unsigned long flags; - u32 module = IPU_CONF_IC_EN | IPU_CONF_ROT_EN; spin_lock_irqsave(&priv->lock, flags); priv->use_count--; if (!priv->use_count) - ipu_module_disable(priv->ipu, module); + ipu_module_disable(priv->ipu, IPU_CONF_IC_EN); if (priv->use_count < 0) priv->use_count = 0; + if (ic->rotation) + ipu_irt_disable(ic); + + ic->rotation = ic->graphics = false; + spin_unlock_irqrestore(&priv->lock, flags); return 0; -- cgit v0.10.2 From 88287ec3f88eb28df2c2db73659b859ff645fc42 Mon Sep 17 00:00:00 2001 From: Steve Longerbeam Date: Tue, 19 Jul 2016 18:11:11 -0700 Subject: gpu: ipu-v3: rename CSI client device Rename the CSI client device in the client_reg[] table to "imx-ipuv3-csi". Signed-off-by: Steve Longerbeam Signed-off-by: Philipp Zabel diff --git a/drivers/gpu/ipu-v3/ipu-common.c b/drivers/gpu/ipu-v3/ipu-common.c index d697cd5..d230988 100644 --- a/drivers/gpu/ipu-v3/ipu-common.c +++ b/drivers/gpu/ipu-v3/ipu-common.c @@ -1010,14 +1010,14 @@ static struct ipu_platform_reg client_reg[] = { .dma[0] = IPUV3_CHANNEL_CSI0, .dma[1] = -EINVAL, }, - .name = "imx-ipuv3-camera", + .name = "imx-ipuv3-csi", }, { .pdata = { .csi = 1, .dma[0] = IPUV3_CHANNEL_CSI1, .dma[1] = -EINVAL, }, - .name = "imx-ipuv3-camera", + .name = "imx-ipuv3-csi", }, { .pdata = { .di = 0, -- cgit v0.10.2 From bc0a338750bb3940bf274c728a32f96da542dafc Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Wed, 30 Jul 2014 14:10:51 +0200 Subject: gpu: ipu-v3: Add missing IDMAC channel names This patch adds the remaining missing IDMAC channel names: VDIC channels for combining, the separate alpha channels for the MEM->IC and MEM->DC ASYNC channels, and the DC read, command, and output mask channels. Signed-off-by: Philipp Zabel diff --git a/include/video/imx-ipu-v3.h b/include/video/imx-ipu-v3.h index c4ccc79..c3de740 100644 --- a/include/video/imx-ipu-v3.h +++ b/include/video/imx-ipu-v3.h @@ -102,18 +102,29 @@ enum ipu_channel_irq { #define IPUV3_CHANNEL_MEM_VDI_NEXT 10 #define IPUV3_CHANNEL_MEM_IC_PP 11 #define IPUV3_CHANNEL_MEM_IC_PRP_VF 12 +#define IPUV3_CHANNEL_VDI_MEM_RECENT 13 #define IPUV3_CHANNEL_G_MEM_IC_PRP_VF 14 #define IPUV3_CHANNEL_G_MEM_IC_PP 15 +#define IPUV3_CHANNEL_G_MEM_IC_PRP_VF_ALPHA 17 +#define IPUV3_CHANNEL_G_MEM_IC_PP_ALPHA 18 +#define IPUV3_CHANNEL_MEM_VDI_PLANE1_COMB_ALPHA 19 #define IPUV3_CHANNEL_IC_PRP_ENC_MEM 20 #define IPUV3_CHANNEL_IC_PRP_VF_MEM 21 #define IPUV3_CHANNEL_IC_PP_MEM 22 #define IPUV3_CHANNEL_MEM_BG_SYNC 23 #define IPUV3_CHANNEL_MEM_BG_ASYNC 24 +#define IPUV3_CHANNEL_MEM_VDI_PLANE1_COMB 25 +#define IPUV3_CHANNEL_MEM_VDI_PLANE3_COMB 26 #define IPUV3_CHANNEL_MEM_FG_SYNC 27 #define IPUV3_CHANNEL_MEM_DC_SYNC 28 #define IPUV3_CHANNEL_MEM_FG_ASYNC 29 #define IPUV3_CHANNEL_MEM_FG_SYNC_ALPHA 31 +#define IPUV3_CHANNEL_MEM_FG_ASYNC_ALPHA 33 +#define IPUV3_CHANNEL_DC_MEM_READ 40 #define IPUV3_CHANNEL_MEM_DC_ASYNC 41 +#define IPUV3_CHANNEL_MEM_DC_COMMAND 42 +#define IPUV3_CHANNEL_MEM_DC_COMMAND2 43 +#define IPUV3_CHANNEL_MEM_DC_OUTPUT_MASK 44 #define IPUV3_CHANNEL_MEM_ROT_ENC 45 #define IPUV3_CHANNEL_MEM_ROT_VF 46 #define IPUV3_CHANNEL_MEM_ROT_PP 47 @@ -121,6 +132,7 @@ enum ipu_channel_irq { #define IPUV3_CHANNEL_ROT_VF_MEM 49 #define IPUV3_CHANNEL_ROT_PP_MEM 50 #define IPUV3_CHANNEL_MEM_BG_SYNC_ALPHA 51 +#define IPUV3_CHANNEL_MEM_BG_ASYNC_ALPHA 52 int ipu_map_irq(struct ipu_soc *ipu, int irq); int ipu_idmac_channel_irq(struct ipu_soc *ipu, struct ipuv3_channel *channel, -- cgit v0.10.2 From 3ec2e506f98f8464798d11217617421368711e34 Mon Sep 17 00:00:00 2001 From: Liu Ying Date: Fri, 29 Jul 2016 14:00:21 +0800 Subject: drm/imx: Remove imx_drm_handle_vblank() imx_drm_handle_vblank() is just a simple wrapper of drm_crtc_handle_vblank() without doing any thing fancy - drm_crtc_handle_vblank() can be called directly. So, let's remove the wrapper. Signed-off-by: Liu Ying Signed-off-by: Philipp Zabel diff --git a/drivers/gpu/drm/imx/imx-drm-core.c b/drivers/gpu/drm/imx/imx-drm-core.c index 1aefced..6dc0ef4 100644 --- a/drivers/gpu/drm/imx/imx-drm-core.c +++ b/drivers/gpu/drm/imx/imx-drm-core.c @@ -84,12 +84,6 @@ static int imx_drm_driver_unload(struct drm_device *drm) return 0; } -void imx_drm_handle_vblank(struct imx_drm_crtc *imx_drm_crtc) -{ - drm_crtc_handle_vblank(imx_drm_crtc->crtc); -} -EXPORT_SYMBOL_GPL(imx_drm_handle_vblank); - static int imx_drm_enable_vblank(struct drm_device *drm, unsigned int pipe) { struct imx_drm_device *imxdrm = drm->dev_private; diff --git a/drivers/gpu/drm/imx/imx-drm.h b/drivers/gpu/drm/imx/imx-drm.h index bdaa381..5a91cb1 100644 --- a/drivers/gpu/drm/imx/imx-drm.h +++ b/drivers/gpu/drm/imx/imx-drm.h @@ -42,8 +42,6 @@ int imx_drm_init_drm(struct platform_device *pdev, int preferred_bpp); int imx_drm_exit_drm(void); -void imx_drm_handle_vblank(struct imx_drm_crtc *imx_drm_crtc); - void imx_drm_mode_config_init(struct drm_device *drm); struct drm_gem_cma_object *imx_drm_fb_get_obj(struct drm_framebuffer *fb); diff --git a/drivers/gpu/drm/imx/ipuv3-crtc.c b/drivers/gpu/drm/imx/ipuv3-crtc.c index 08e188b..5950b12 100644 --- a/drivers/gpu/drm/imx/ipuv3-crtc.c +++ b/drivers/gpu/drm/imx/ipuv3-crtc.c @@ -134,7 +134,7 @@ static irqreturn_t ipu_irq_handler(int irq, void *dev_id) { struct ipu_crtc *ipu_crtc = dev_id; - imx_drm_handle_vblank(ipu_crtc->imx_crtc); + drm_crtc_handle_vblank(&ipu_crtc->base); return IRQ_HANDLED; } -- cgit v0.10.2 From fe4a11c935ca675021b30051f08f3c5ab066f5bb Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Fri, 22 Jul 2016 12:20:47 +0200 Subject: drm/atomic-helper: Add atomic_mode_set helper callback Some encoders need more information from crtc and connector state or connector display info than just the mode during mode setting. This patch adds an atomic encoder mode setting variant that passes the crtc state (which contains the modes) and the connector state. atomic_enable/disable variants that additionally pass crtc and connector state don't seem to be necessary for any current driver. mode_fixup already has an atomic equivalent in atomic_check. Signed-off-by: Philipp Zabel Reviewed-by: Daniel Vetter diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index 20be86d..9936508 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -886,8 +886,12 @@ crtc_set_mode(struct drm_device *dev, struct drm_atomic_state *old_state) * Each encoder has at most one connector (since we always steal * it away), so we won't call mode_set hooks twice. */ - if (funcs && funcs->mode_set) + if (funcs && funcs->atomic_mode_set) { + funcs->atomic_mode_set(encoder, new_crtc_state, + connector->state); + } else if (funcs && funcs->mode_set) { funcs->mode_set(encoder, mode, adjusted_mode); + } drm_bridge_mode_set(encoder->bridge, mode, adjusted_mode); } diff --git a/include/drm/drm_modeset_helper_vtables.h b/include/drm/drm_modeset_helper_vtables.h index b55f218..686feec 100644 --- a/include/drm/drm_modeset_helper_vtables.h +++ b/include/drm/drm_modeset_helper_vtables.h @@ -523,12 +523,41 @@ struct drm_encoder_helper_funcs { * * This callback is used both by the legacy CRTC helpers and the atomic * modeset helpers. It is optional in the atomic helpers. + * + * NOTE: + * + * If the driver uses the atomic modeset helpers and needs to inspect + * the connector state or connector display info during mode setting, + * @atomic_mode_set can be used instead. */ void (*mode_set)(struct drm_encoder *encoder, struct drm_display_mode *mode, struct drm_display_mode *adjusted_mode); /** + * @atomic_mode_set: + * + * This callback is used to update the display mode of an encoder. + * + * Note that the display pipe is completely off when this function is + * called. Drivers which need hardware to be running before they program + * the new display mode (because they implement runtime PM) should not + * use this hook, because the helper library calls it only once and not + * every time the display pipeline is suspended using either DPMS or the + * new "ACTIVE" property. Such drivers should instead move all their + * encoder setup into the ->enable() callback. + * + * This callback is used by the atomic modeset helpers in place of the + * @mode_set callback, if set by the driver. It is optional and should + * be used instead of @mode_set if the driver needs to inspect the + * connector state or display info, since there is no direct way to + * go from the encoder to the current connector. + */ + void (*atomic_mode_set)(struct drm_encoder *encoder, + struct drm_crtc_state *crtc_state, + struct drm_connector_state *conn_state); + + /** * @get_crtc: * * This callback is used by the legacy CRTC helpers to work around -- cgit v0.10.2 From 3a2ad5028cf2cc3067c7d8bf7fab68d9c1c3c0e8 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Fri, 22 Jul 2016 12:43:04 +0200 Subject: drm/imx: imx-ldb: use encoder atomic_mode_set callback Using atomic_mode_set instead of mode_set allows to access crtc and connector states in addition to the modes. This allows to remove the connector list walk. Signed-off-by: Philipp Zabel diff --git a/drivers/gpu/drm/imx/imx-ldb.c b/drivers/gpu/drm/imx/imx-ldb.c index 3ed2d50..7b588b4 100644 --- a/drivers/gpu/drm/imx/imx-ldb.c +++ b/drivers/gpu/drm/imx/imx-ldb.c @@ -252,11 +252,13 @@ static void imx_ldb_encoder_enable(struct drm_encoder *encoder) drm_panel_enable(imx_ldb_ch->panel); } -static void imx_ldb_encoder_mode_set(struct drm_encoder *encoder, - struct drm_display_mode *orig_mode, - struct drm_display_mode *mode) +static void +imx_ldb_encoder_atomic_mode_set(struct drm_encoder *encoder, + struct drm_crtc_state *crtc_state, + struct drm_connector_state *connector_state) { struct imx_ldb_channel *imx_ldb_ch = enc_to_imx_ldb_ch(encoder); + struct drm_display_mode *mode = &crtc_state->adjusted_mode; struct imx_ldb *ldb = imx_ldb_ch->ldb; int dual = ldb->ldb_ctrl & LDB_SPLIT_MODE_EN; unsigned long serial_clk; @@ -298,17 +300,11 @@ static void imx_ldb_encoder_mode_set(struct drm_encoder *encoder, } if (!bus_format) { - struct drm_connector *connector; + struct drm_connector *connector = connector_state->connector; + struct drm_display_info *di = &connector->display_info; - drm_for_each_connector(connector, encoder->dev) { - struct drm_display_info *di = &connector->display_info; - - if (connector->encoder == encoder && - di->num_bus_formats) { - bus_format = di->bus_formats[0]; - break; - } - } + if (di->num_bus_formats) + bus_format = di->bus_formats[0]; } imx_ldb_ch_set_bus_format(imx_ldb_ch, bus_format); } @@ -426,7 +422,7 @@ static const struct drm_encoder_funcs imx_ldb_encoder_funcs = { }; static const struct drm_encoder_helper_funcs imx_ldb_encoder_helper_funcs = { - .mode_set = imx_ldb_encoder_mode_set, + .atomic_mode_set = imx_ldb_encoder_atomic_mode_set, .enable = imx_ldb_encoder_enable, .disable = imx_ldb_encoder_disable, .atomic_check = imx_ldb_encoder_atomic_check, -- cgit v0.10.2 From 63fbf42f7307a5911237fed3285e669d9d4d0d1a Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Fri, 15 Jul 2016 11:19:20 +0800 Subject: drm/amdgpu: add check_soft_reset ip func MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is used to identify if the ip block is hang. Signed-off-by: Chunming Zhou Reviewed-by: Christian König Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 8ebc5f1..a21b342 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1956,6 +1956,7 @@ struct amdgpu_ip_block_status { bool valid; bool sw; bool hw; + bool hang; }; struct amdgpu_device { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index df7ab245..fd7698a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1937,6 +1937,24 @@ int amdgpu_resume_kms(struct drm_device *dev, bool resume, bool fbcon) return 0; } +static bool amdgpu_check_soft_reset(struct amdgpu_device *adev) +{ + int i; + bool asic_hang = false; + + for (i = 0; i < adev->num_ip_blocks; i++) { + if (!adev->ip_block_status[i].valid) + continue; + if (adev->ip_blocks[i].funcs->check_soft_reset) + adev->ip_blocks[i].funcs->check_soft_reset(adev); + if (adev->ip_block_status[i].hang) { + DRM_INFO("IP block:%d is hang!\n", i); + asic_hang = true; + } + } + return asic_hang; +} + /** * amdgpu_gpu_reset - reset the asic * @@ -1950,6 +1968,11 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev) int i, r; int resched; + if (!amdgpu_check_soft_reset(adev)) { + DRM_INFO("No hardware hang detected. Did some blocks stall?\n"); + return 0; + } + atomic_inc(&adev->gpu_reset_counter); /* block TTM */ diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h index a74a0d2..fe4aa09 100644 --- a/drivers/gpu/drm/amd/include/amd_shared.h +++ b/drivers/gpu/drm/amd/include/amd_shared.h @@ -159,6 +159,8 @@ struct amd_ip_funcs { bool (*is_idle)(void *handle); /* poll for idle */ int (*wait_for_idle)(void *handle); + /* check soft reset the IP block */ + int (*check_soft_reset)(void *handle); /* soft reset the IP block */ int (*soft_reset)(void *handle); /* enable/disable cg for the IP block */ -- cgit v0.10.2 From 3d7c63849072747eaba6b5d35671bd9cd2d002c1 Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Fri, 15 Jul 2016 11:28:30 +0800 Subject: drm/amdgpu: implement gfx8 check_soft_reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Chunming Zhou Reviewed-by: Christian König Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index a21b342..b6e8e7a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1195,6 +1195,10 @@ struct amdgpu_gfx { unsigned ce_ram_size; struct amdgpu_cu_info cu_info; const struct amdgpu_gfx_funcs *funcs; + + /* reset mask */ + uint32_t grbm_soft_reset; + uint32_t srbm_soft_reset; }; int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index bff8668..8723239 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -5047,11 +5047,11 @@ static int gfx_v8_0_wait_for_idle(void *handle) return -ETIMEDOUT; } -static int gfx_v8_0_soft_reset(void *handle) +static int gfx_v8_0_check_soft_reset(void *handle) { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; u32 grbm_soft_reset = 0, srbm_soft_reset = 0; u32 tmp; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; /* GRBM_STATUS */ tmp = RREG32(mmGRBM_STATUS); @@ -5060,16 +5060,12 @@ static int gfx_v8_0_soft_reset(void *handle) GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK | GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK | GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK | - GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) { + GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK | + GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) { grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP, 1); grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX, 1); - } - - if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) { - grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, - GRBM_SOFT_RESET, SOFT_RESET_CP, 1); srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1); } @@ -5080,73 +5076,99 @@ static int gfx_v8_0_soft_reset(void *handle) grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); + if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) || + REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) || + REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) { + grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, + SOFT_RESET_CPF, 1); + grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, + SOFT_RESET_CPC, 1); + grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, + SOFT_RESET_CPG, 1); + srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, + SOFT_RESET_GRBM, 1); + } + /* SRBM_STATUS */ tmp = RREG32(mmSRBM_STATUS); if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING)) srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1); + if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY)) + srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, + SRBM_SOFT_RESET, SOFT_RESET_SEM, 1); if (grbm_soft_reset || srbm_soft_reset) { - /* stop the rlc */ - gfx_v8_0_rlc_stop(adev); + adev->ip_block_status[AMD_IP_BLOCK_TYPE_GFX].hang = true; + adev->gfx.grbm_soft_reset = grbm_soft_reset; + adev->gfx.srbm_soft_reset = srbm_soft_reset; + } else { + adev->ip_block_status[AMD_IP_BLOCK_TYPE_GFX].hang = false; + adev->gfx.grbm_soft_reset = 0; + adev->gfx.srbm_soft_reset = 0; + } - /* Disable GFX parsing/prefetching */ - gfx_v8_0_cp_gfx_enable(adev, false); + return 0; +} - /* Disable MEC parsing/prefetching */ - gfx_v8_0_cp_compute_enable(adev, false); +static int gfx_v8_0_soft_reset(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + u32 grbm_soft_reset = 0, srbm_soft_reset = 0; + u32 tmp; - if (grbm_soft_reset || srbm_soft_reset) { - tmp = RREG32(mmGMCON_DEBUG); - tmp = REG_SET_FIELD(tmp, - GMCON_DEBUG, GFX_STALL, 1); - tmp = REG_SET_FIELD(tmp, - GMCON_DEBUG, GFX_CLEAR, 1); - WREG32(mmGMCON_DEBUG, tmp); + if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_GFX].hang) + return 0; - udelay(50); - } + grbm_soft_reset = adev->gfx.grbm_soft_reset; + srbm_soft_reset = adev->gfx.srbm_soft_reset; - if (grbm_soft_reset) { - tmp = RREG32(mmGRBM_SOFT_RESET); - tmp |= grbm_soft_reset; - dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); - WREG32(mmGRBM_SOFT_RESET, tmp); - tmp = RREG32(mmGRBM_SOFT_RESET); + if (grbm_soft_reset || srbm_soft_reset) { + tmp = RREG32(mmGMCON_DEBUG); + tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1); + tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1); + WREG32(mmGMCON_DEBUG, tmp); + udelay(50); + } - udelay(50); + if (grbm_soft_reset) { + tmp = RREG32(mmGRBM_SOFT_RESET); + tmp |= grbm_soft_reset; + dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); + WREG32(mmGRBM_SOFT_RESET, tmp); + tmp = RREG32(mmGRBM_SOFT_RESET); - tmp &= ~grbm_soft_reset; - WREG32(mmGRBM_SOFT_RESET, tmp); - tmp = RREG32(mmGRBM_SOFT_RESET); - } + udelay(50); - if (srbm_soft_reset) { - tmp = RREG32(mmSRBM_SOFT_RESET); - tmp |= srbm_soft_reset; - dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); - WREG32(mmSRBM_SOFT_RESET, tmp); - tmp = RREG32(mmSRBM_SOFT_RESET); + tmp &= ~grbm_soft_reset; + WREG32(mmGRBM_SOFT_RESET, tmp); + tmp = RREG32(mmGRBM_SOFT_RESET); + } - udelay(50); + if (srbm_soft_reset) { + tmp = RREG32(mmSRBM_SOFT_RESET); + tmp |= srbm_soft_reset; + dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); + WREG32(mmSRBM_SOFT_RESET, tmp); + tmp = RREG32(mmSRBM_SOFT_RESET); - tmp &= ~srbm_soft_reset; - WREG32(mmSRBM_SOFT_RESET, tmp); - tmp = RREG32(mmSRBM_SOFT_RESET); - } + udelay(50); - if (grbm_soft_reset || srbm_soft_reset) { - tmp = RREG32(mmGMCON_DEBUG); - tmp = REG_SET_FIELD(tmp, - GMCON_DEBUG, GFX_STALL, 0); - tmp = REG_SET_FIELD(tmp, - GMCON_DEBUG, GFX_CLEAR, 0); - WREG32(mmGMCON_DEBUG, tmp); - } + tmp &= ~srbm_soft_reset; + WREG32(mmSRBM_SOFT_RESET, tmp); + tmp = RREG32(mmSRBM_SOFT_RESET); + } - /* Wait a little for things to settle down */ - udelay(50); + if (grbm_soft_reset || srbm_soft_reset) { + tmp = RREG32(mmGMCON_DEBUG); + tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0); + tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0); + WREG32(mmGMCON_DEBUG, tmp); } + + /* Wait a little for things to settle down */ + udelay(50); + return 0; } @@ -6334,6 +6356,7 @@ const struct amd_ip_funcs gfx_v8_0_ip_funcs = { .resume = gfx_v8_0_resume, .is_idle = gfx_v8_0_is_idle, .wait_for_idle = gfx_v8_0_wait_for_idle, + .check_soft_reset = gfx_v8_0_check_soft_reset, .soft_reset = gfx_v8_0_soft_reset, .set_clockgating_state = gfx_v8_0_set_clockgating_state, .set_powergating_state = gfx_v8_0_set_powergating_state, -- cgit v0.10.2 From d31a501ead7fb21b85c6f34a9dd79b07c6417711 Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Mon, 18 Jul 2016 10:04:34 +0800 Subject: drm/amdgpu: add pre_soft_reset ip func MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It will be used before soft_reset to do some preparing work for reset. Signed-off-by: Chunming Zhou Reviewed-by: Christian König Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index fd7698a..b886205 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1955,6 +1955,23 @@ static bool amdgpu_check_soft_reset(struct amdgpu_device *adev) return asic_hang; } +int amdgpu_pre_soft_reset(struct amdgpu_device *adev) +{ + int i, r = 0; + + for (i = 0; i < adev->num_ip_blocks; i++) { + if (!adev->ip_block_status[i].valid) + continue; + if (adev->ip_blocks[i].funcs->pre_soft_reset) { + r = adev->ip_blocks[i].funcs->pre_soft_reset(adev); + if (r) + return r; + } + } + + return 0; +} + /** * amdgpu_gpu_reset - reset the asic * diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h index fe4aa09..2a2a5aa 100644 --- a/drivers/gpu/drm/amd/include/amd_shared.h +++ b/drivers/gpu/drm/amd/include/amd_shared.h @@ -161,6 +161,8 @@ struct amd_ip_funcs { int (*wait_for_idle)(void *handle); /* check soft reset the IP block */ int (*check_soft_reset)(void *handle); + /* pre soft reset the IP block */ + int (*pre_soft_reset)(void *handle); /* soft reset the IP block */ int (*soft_reset)(void *handle); /* enable/disable cg for the IP block */ -- cgit v0.10.2 From 1057f20c2252c5eba17e4a9bbf74ea990411f46c Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Fri, 15 Jul 2016 15:18:25 +0800 Subject: drm/amdgpu: add gfx8 pre soft reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Chunming Zhou Reviewed-by: Christian König Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 8723239..62ba7e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -5111,6 +5111,63 @@ static int gfx_v8_0_check_soft_reset(void *handle) return 0; } +static void gfx_v8_0_inactive_hqd(struct amdgpu_device *adev, + struct amdgpu_ring *ring) +{ + int i; + + vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) { + u32 tmp; + tmp = RREG32(mmCP_HQD_DEQUEUE_REQUEST); + tmp = REG_SET_FIELD(tmp, CP_HQD_DEQUEUE_REQUEST, + DEQUEUE_REQ, 2); + WREG32(mmCP_HQD_DEQUEUE_REQUEST, tmp); + for (i = 0; i < adev->usec_timeout; i++) { + if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK)) + break; + udelay(1); + } + } +} + +static int gfx_v8_0_pre_soft_reset(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + u32 grbm_soft_reset = 0, srbm_soft_reset = 0; + + if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_GFX].hang) + return 0; + + grbm_soft_reset = adev->gfx.grbm_soft_reset; + srbm_soft_reset = adev->gfx.srbm_soft_reset; + + /* stop the rlc */ + gfx_v8_0_rlc_stop(adev); + + if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || + REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX)) + /* Disable GFX parsing/prefetching */ + gfx_v8_0_cp_gfx_enable(adev, false); + + if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || + REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) || + REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) || + REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) { + int i; + + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; + + gfx_v8_0_inactive_hqd(adev, ring); + } + /* Disable MEC parsing/prefetching */ + gfx_v8_0_cp_compute_enable(adev, false); + } + + return 0; +} + static int gfx_v8_0_soft_reset(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -6357,6 +6414,7 @@ const struct amd_ip_funcs gfx_v8_0_ip_funcs = { .is_idle = gfx_v8_0_is_idle, .wait_for_idle = gfx_v8_0_wait_for_idle, .check_soft_reset = gfx_v8_0_check_soft_reset, + .pre_soft_reset = gfx_v8_0_pre_soft_reset, .soft_reset = gfx_v8_0_soft_reset, .set_clockgating_state = gfx_v8_0_set_clockgating_state, .set_powergating_state = gfx_v8_0_set_powergating_state, -- cgit v0.10.2 From 35d782feae7f0b817016315d8718a82c61968894 Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Fri, 15 Jul 2016 15:57:13 +0800 Subject: drm/amdgpu: add amdgpu soft reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Check gpu status first, if MC/VMC/DISPLAY hang, directly triger full reset. If engine hangs, then triger engine soft reset, if soft reset fails, will fallback to full reset. Signed-off-by: Chunming Zhou Reviewed-by: Christian König Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index b886205..2bd2b19 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1962,7 +1962,8 @@ int amdgpu_pre_soft_reset(struct amdgpu_device *adev) for (i = 0; i < adev->num_ip_blocks; i++) { if (!adev->ip_block_status[i].valid) continue; - if (adev->ip_blocks[i].funcs->pre_soft_reset) { + if (adev->ip_block_status[i].hang && + adev->ip_blocks[i].funcs->pre_soft_reset) { r = adev->ip_blocks[i].funcs->pre_soft_reset(adev); if (r) return r; @@ -1972,6 +1973,58 @@ int amdgpu_pre_soft_reset(struct amdgpu_device *adev) return 0; } +static bool amdgpu_need_full_reset(struct amdgpu_device *adev) +{ + if (adev->ip_block_status[AMD_IP_BLOCK_TYPE_GMC].hang || + adev->ip_block_status[AMD_IP_BLOCK_TYPE_IH].hang || + adev->ip_block_status[AMD_IP_BLOCK_TYPE_SMC].hang || + adev->ip_block_status[AMD_IP_BLOCK_TYPE_GFX].hang || + adev->ip_block_status[AMD_IP_BLOCK_TYPE_SDMA].hang || + adev->ip_block_status[AMD_IP_BLOCK_TYPE_UVD].hang || + adev->ip_block_status[AMD_IP_BLOCK_TYPE_VCE].hang || + adev->ip_block_status[AMD_IP_BLOCK_TYPE_ACP].hang || + adev->ip_block_status[AMD_IP_BLOCK_TYPE_DCE].hang) { + DRM_INFO("Some block need full reset!\n"); + return true; + } + return false; +} + +static int amdgpu_soft_reset(struct amdgpu_device *adev) +{ + int i, r = 0; + + for (i = 0; i < adev->num_ip_blocks; i++) { + if (!adev->ip_block_status[i].valid) + continue; + if (adev->ip_block_status[i].hang && + adev->ip_blocks[i].funcs->soft_reset) { + r = adev->ip_blocks[i].funcs->soft_reset(adev); + if (r) + return r; + } + } + + return 0; +} + +static int amdgpu_post_soft_reset(struct amdgpu_device *adev) +{ + int i, r = 0; + + for (i = 0; i < adev->num_ip_blocks; i++) { + if (!adev->ip_block_status[i].valid) + continue; + if (adev->ip_block_status[i].hang && + adev->ip_blocks[i].funcs->post_soft_reset) + r = adev->ip_blocks[i].funcs->post_soft_reset(adev); + if (r) + return r; + } + + return 0; +} + /** * amdgpu_gpu_reset - reset the asic * @@ -1984,6 +2037,7 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev) { int i, r; int resched; + bool need_full_reset; if (!amdgpu_check_soft_reset(adev)) { DRM_INFO("No hardware hang detected. Did some blocks stall?\n"); @@ -2007,28 +2061,42 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev) /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ amdgpu_fence_driver_force_completion(adev); - /* save scratch */ - amdgpu_atombios_scratch_regs_save(adev); - r = amdgpu_suspend(adev); + need_full_reset = amdgpu_need_full_reset(adev); -retry: - /* Disable fb access */ - if (adev->mode_info.num_crtc) { - struct amdgpu_mode_mc_save save; - amdgpu_display_stop_mc_access(adev, &save); - amdgpu_wait_for_idle(adev, AMD_IP_BLOCK_TYPE_GMC); + if (!need_full_reset) { + amdgpu_pre_soft_reset(adev); + r = amdgpu_soft_reset(adev); + amdgpu_post_soft_reset(adev); + if (r || amdgpu_check_soft_reset(adev)) { + DRM_INFO("soft reset failed, will fallback to full reset!\n"); + need_full_reset = true; + } } - r = amdgpu_asic_reset(adev); - /* post card */ - amdgpu_atom_asic_init(adev->mode_info.atom_context); + if (need_full_reset) { + /* save scratch */ + amdgpu_atombios_scratch_regs_save(adev); + r = amdgpu_suspend(adev); - if (!r) { - dev_info(adev->dev, "GPU reset succeeded, trying to resume\n"); - r = amdgpu_resume(adev); +retry: + /* Disable fb access */ + if (adev->mode_info.num_crtc) { + struct amdgpu_mode_mc_save save; + amdgpu_display_stop_mc_access(adev, &save); + amdgpu_wait_for_idle(adev, AMD_IP_BLOCK_TYPE_GMC); + } + + r = amdgpu_asic_reset(adev); + /* post card */ + amdgpu_atom_asic_init(adev->mode_info.atom_context); + + if (!r) { + dev_info(adev->dev, "GPU reset succeeded, trying to resume\n"); + r = amdgpu_resume(adev); + } + /* restore scratch */ + amdgpu_atombios_scratch_regs_restore(adev); } - /* restore scratch */ - amdgpu_atombios_scratch_regs_restore(adev); if (!r) { r = amdgpu_ib_ring_tests(adev); if (r) { diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h index 2a2a5aa..db71041 100644 --- a/drivers/gpu/drm/amd/include/amd_shared.h +++ b/drivers/gpu/drm/amd/include/amd_shared.h @@ -165,6 +165,8 @@ struct amd_ip_funcs { int (*pre_soft_reset)(void *handle); /* soft reset the IP block */ int (*soft_reset)(void *handle); + /* post soft reset the IP block */ + int (*post_soft_reset)(void *handle); /* enable/disable cg for the IP block */ int (*set_clockgating_state)(void *handle, enum amd_clockgating_state state); -- cgit v0.10.2 From e4ae0fc3363191f31fb9627fff9f88d43523aac7 Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Fri, 15 Jul 2016 16:24:25 +0800 Subject: drm/amdgpu: implement gfx8 post_soft_reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Chunming Zhou Reviewed-by: Christian König Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 2bd2b19..1e55366 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1978,7 +1978,6 @@ static bool amdgpu_need_full_reset(struct amdgpu_device *adev) if (adev->ip_block_status[AMD_IP_BLOCK_TYPE_GMC].hang || adev->ip_block_status[AMD_IP_BLOCK_TYPE_IH].hang || adev->ip_block_status[AMD_IP_BLOCK_TYPE_SMC].hang || - adev->ip_block_status[AMD_IP_BLOCK_TYPE_GFX].hang || adev->ip_block_status[AMD_IP_BLOCK_TYPE_SDMA].hang || adev->ip_block_status[AMD_IP_BLOCK_TYPE_UVD].hang || adev->ip_block_status[AMD_IP_BLOCK_TYPE_VCE].hang || diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 62ba7e5..af0efa2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -5229,6 +5229,49 @@ static int gfx_v8_0_soft_reset(void *handle) return 0; } +static void gfx_v8_0_init_hqd(struct amdgpu_device *adev, + struct amdgpu_ring *ring) +{ + vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0); + WREG32(mmCP_HQD_PQ_RPTR, 0); + WREG32(mmCP_HQD_PQ_WPTR, 0); + vi_srbm_select(adev, 0, 0, 0, 0); +} + +static int gfx_v8_0_post_soft_reset(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + u32 grbm_soft_reset = 0, srbm_soft_reset = 0; + + if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_GFX].hang) + return 0; + + grbm_soft_reset = adev->gfx.grbm_soft_reset; + srbm_soft_reset = adev->gfx.srbm_soft_reset; + + if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || + REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX)) + gfx_v8_0_cp_gfx_resume(adev); + + if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || + REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) || + REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) || + REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) { + int i; + + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; + + gfx_v8_0_init_hqd(adev, ring); + } + gfx_v8_0_cp_compute_resume(adev); + } + gfx_v8_0_rlc_start(adev); + + return 0; +} + /** * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot * @@ -6416,6 +6459,7 @@ const struct amd_ip_funcs gfx_v8_0_ip_funcs = { .check_soft_reset = gfx_v8_0_check_soft_reset, .pre_soft_reset = gfx_v8_0_pre_soft_reset, .soft_reset = gfx_v8_0_soft_reset, + .post_soft_reset = gfx_v8_0_post_soft_reset, .set_clockgating_state = gfx_v8_0_set_clockgating_state, .set_powergating_state = gfx_v8_0_set_powergating_state, }; -- cgit v0.10.2 From e702a68051b174e87df62bbc0204809b5c1ba3e5 Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Wed, 13 Jul 2016 10:28:56 +0800 Subject: drm/amdgpu: implement sdma3 check/pre/post_soft_reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Chunming Zhou Reviewed-by: Christian König Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index b6e8e7a..17600b4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1733,6 +1733,7 @@ struct amdgpu_sdma { struct amdgpu_irq_src trap_irq; struct amdgpu_irq_src illegal_inst_irq; int num_instances; + uint32_t srbm_soft_reset; }; /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 1e55366..47f29f9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1978,7 +1978,6 @@ static bool amdgpu_need_full_reset(struct amdgpu_device *adev) if (adev->ip_block_status[AMD_IP_BLOCK_TYPE_GMC].hang || adev->ip_block_status[AMD_IP_BLOCK_TYPE_IH].hang || adev->ip_block_status[AMD_IP_BLOCK_TYPE_SMC].hang || - adev->ip_block_status[AMD_IP_BLOCK_TYPE_SDMA].hang || adev->ip_block_status[AMD_IP_BLOCK_TYPE_UVD].hang || adev->ip_block_status[AMD_IP_BLOCK_TYPE_VCE].hang || adev->ip_block_status[AMD_IP_BLOCK_TYPE_ACP].hang || diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index 653ce5e..3a63a4f 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -1320,28 +1320,79 @@ static int sdma_v3_0_wait_for_idle(void *handle) return -ETIMEDOUT; } -static int sdma_v3_0_soft_reset(void *handle) +static int sdma_v3_0_check_soft_reset(void *handle) { - u32 srbm_soft_reset = 0; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + u32 srbm_soft_reset = 0; u32 tmp = RREG32(mmSRBM_STATUS2); - if (tmp & SRBM_STATUS2__SDMA_BUSY_MASK) { - /* sdma0 */ - tmp = RREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET); - tmp = REG_SET_FIELD(tmp, SDMA0_F32_CNTL, HALT, 0); - WREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET, tmp); + if ((tmp & SRBM_STATUS2__SDMA_BUSY_MASK) || + (tmp & SRBM_STATUS2__SDMA1_BUSY_MASK)) { srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SDMA_MASK; - } - if (tmp & SRBM_STATUS2__SDMA1_BUSY_MASK) { - /* sdma1 */ - tmp = RREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET); - tmp = REG_SET_FIELD(tmp, SDMA0_F32_CNTL, HALT, 0); - WREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET, tmp); srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SDMA1_MASK; } if (srbm_soft_reset) { + adev->ip_block_status[AMD_IP_BLOCK_TYPE_SDMA].hang = true; + adev->sdma.srbm_soft_reset = srbm_soft_reset; + } else { + adev->ip_block_status[AMD_IP_BLOCK_TYPE_SDMA].hang = false; + adev->sdma.srbm_soft_reset = 0; + } + + return 0; +} + +static int sdma_v3_0_pre_soft_reset(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + u32 srbm_soft_reset = 0; + + if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_SDMA].hang) + return 0; + + srbm_soft_reset = adev->sdma.srbm_soft_reset; + + if (REG_GET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_SDMA) || + REG_GET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_SDMA1)) { + sdma_v3_0_ctx_switch_enable(adev, false); + sdma_v3_0_enable(adev, false); + } + + return 0; +} + +static int sdma_v3_0_post_soft_reset(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + u32 srbm_soft_reset = 0; + + if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_SDMA].hang) + return 0; + + srbm_soft_reset = adev->sdma.srbm_soft_reset; + + if (REG_GET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_SDMA) || + REG_GET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_SDMA1)) { + sdma_v3_0_gfx_resume(adev); + sdma_v3_0_rlc_resume(adev); + } + + return 0; +} + +static int sdma_v3_0_soft_reset(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + u32 srbm_soft_reset = 0; + u32 tmp; + + if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_SDMA].hang) + return 0; + + srbm_soft_reset = adev->sdma.srbm_soft_reset; + + if (srbm_soft_reset) { tmp = RREG32(mmSRBM_SOFT_RESET); tmp |= srbm_soft_reset; dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); @@ -1559,6 +1610,9 @@ const struct amd_ip_funcs sdma_v3_0_ip_funcs = { .resume = sdma_v3_0_resume, .is_idle = sdma_v3_0_is_idle, .wait_for_idle = sdma_v3_0_wait_for_idle, + .check_soft_reset = sdma_v3_0_check_soft_reset, + .pre_soft_reset = sdma_v3_0_pre_soft_reset, + .post_soft_reset = sdma_v3_0_post_soft_reset, .soft_reset = sdma_v3_0_soft_reset, .set_clockgating_state = sdma_v3_0_set_clockgating_state, .set_powergating_state = sdma_v3_0_set_powergating_state, -- cgit v0.10.2 From 50b0197abfa062d05f5f14a94e04ed7fd45cb003 Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Mon, 18 Jul 2016 16:59:24 +0800 Subject: drm/amdgpu: implement gmc8 check/pre/post_soft_reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Chunming Zhou Reviewed-by: Christian König Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 17600b4..e4a731b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -677,6 +677,8 @@ struct amdgpu_mc { uint32_t fw_version; struct amdgpu_irq_src vm_fault; uint32_t vram_type; + uint32_t srbm_soft_reset; + struct amdgpu_mode_mc_save save; }; /* diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 717359d..0a23b83 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -1092,9 +1092,8 @@ static int gmc_v8_0_wait_for_idle(void *handle) } -static int gmc_v8_0_soft_reset(void *handle) +static int gmc_v8_0_check_soft_reset(void *handle) { - struct amdgpu_mode_mc_save save; u32 srbm_soft_reset = 0; struct amdgpu_device *adev = (struct amdgpu_device *)handle; u32 tmp = RREG32(mmSRBM_STATUS); @@ -1109,13 +1108,42 @@ static int gmc_v8_0_soft_reset(void *handle) srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_MC, 1); } - if (srbm_soft_reset) { - gmc_v8_0_mc_stop(adev, &save); - if (gmc_v8_0_wait_for_idle((void *)adev)) { - dev_warn(adev->dev, "Wait for GMC idle timed out !\n"); - } + adev->ip_block_status[AMD_IP_BLOCK_TYPE_GMC].hang = true; + adev->mc.srbm_soft_reset = srbm_soft_reset; + } else { + adev->ip_block_status[AMD_IP_BLOCK_TYPE_GMC].hang = false; + adev->mc.srbm_soft_reset = 0; + } + return 0; +} + +static int gmc_v8_0_pre_soft_reset(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_GMC].hang) + return 0; + + gmc_v8_0_mc_stop(adev, &adev->mc.save); + if (gmc_v8_0_wait_for_idle(adev)) { + dev_warn(adev->dev, "Wait for GMC idle timed out !\n"); + } + + return 0; +} +static int gmc_v8_0_soft_reset(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + u32 srbm_soft_reset; + + if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_GMC].hang) + return 0; + srbm_soft_reset = adev->mc.srbm_soft_reset; + + if (srbm_soft_reset) { + u32 tmp; tmp = RREG32(mmSRBM_SOFT_RESET); tmp |= srbm_soft_reset; @@ -1131,14 +1159,22 @@ static int gmc_v8_0_soft_reset(void *handle) /* Wait a little for things to settle down */ udelay(50); - - gmc_v8_0_mc_resume(adev, &save); - udelay(50); } return 0; } +static int gmc_v8_0_post_soft_reset(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_GMC].hang) + return 0; + + gmc_v8_0_mc_resume(adev, &adev->mc.save); + return 0; +} + static int gmc_v8_0_vm_fault_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *src, unsigned type, @@ -1406,7 +1442,10 @@ const struct amd_ip_funcs gmc_v8_0_ip_funcs = { .resume = gmc_v8_0_resume, .is_idle = gmc_v8_0_is_idle, .wait_for_idle = gmc_v8_0_wait_for_idle, + .check_soft_reset = gmc_v8_0_check_soft_reset, + .pre_soft_reset = gmc_v8_0_pre_soft_reset, .soft_reset = gmc_v8_0_soft_reset, + .post_soft_reset = gmc_v8_0_post_soft_reset, .set_clockgating_state = gmc_v8_0_set_clockgating_state, .set_powergating_state = gmc_v8_0_set_powergating_state, }; -- cgit v0.10.2 From 1015a1b1750e578868a96d812d388d3c65d7faaf Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Mon, 18 Jul 2016 17:02:57 +0800 Subject: drm/amdgpu: implement vi ih check/pre/post_soft_reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Chunming Zhou Reviewed-by: Christian König Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 47f29f9..3640b12 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1976,7 +1976,6 @@ int amdgpu_pre_soft_reset(struct amdgpu_device *adev) static bool amdgpu_need_full_reset(struct amdgpu_device *adev) { if (adev->ip_block_status[AMD_IP_BLOCK_TYPE_GMC].hang || - adev->ip_block_status[AMD_IP_BLOCK_TYPE_IH].hang || adev->ip_block_status[AMD_IP_BLOCK_TYPE_SMC].hang || adev->ip_block_status[AMD_IP_BLOCK_TYPE_UVD].hang || adev->ip_block_status[AMD_IP_BLOCK_TYPE_VCE].hang || diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h index 7ef0935..f016464 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h @@ -70,6 +70,7 @@ struct amdgpu_irq { /* gen irq stuff */ struct irq_domain *domain; /* GPU irq controller domain */ unsigned virq[AMDGPU_MAX_IRQ_SRC_ID]; + uint32_t srbm_soft_reset; }; void amdgpu_irq_preinstall(struct drm_device *dev); diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c index c920558..d127d59 100644 --- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c @@ -373,10 +373,10 @@ static int tonga_ih_wait_for_idle(void *handle) return -ETIMEDOUT; } -static int tonga_ih_soft_reset(void *handle) +static int tonga_ih_check_soft_reset(void *handle) { - u32 srbm_soft_reset = 0; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + u32 srbm_soft_reset = 0; u32 tmp = RREG32(mmSRBM_STATUS); if (tmp & SRBM_STATUS__IH_BUSY_MASK) @@ -384,6 +384,48 @@ static int tonga_ih_soft_reset(void *handle) SOFT_RESET_IH, 1); if (srbm_soft_reset) { + adev->ip_block_status[AMD_IP_BLOCK_TYPE_IH].hang = true; + adev->irq.srbm_soft_reset = srbm_soft_reset; + } else { + adev->ip_block_status[AMD_IP_BLOCK_TYPE_IH].hang = false; + adev->irq.srbm_soft_reset = 0; + } + + return 0; +} + +static int tonga_ih_pre_soft_reset(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_IH].hang) + return 0; + + return tonga_ih_hw_fini(adev); +} + +static int tonga_ih_post_soft_reset(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_IH].hang) + return 0; + + return tonga_ih_hw_init(adev); +} + +static int tonga_ih_soft_reset(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + u32 srbm_soft_reset; + + if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_IH].hang) + return 0; + srbm_soft_reset = adev->irq.srbm_soft_reset; + + if (srbm_soft_reset) { + u32 tmp; + tmp = RREG32(mmSRBM_SOFT_RESET); tmp |= srbm_soft_reset; dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); @@ -427,7 +469,10 @@ const struct amd_ip_funcs tonga_ih_ip_funcs = { .resume = tonga_ih_resume, .is_idle = tonga_ih_is_idle, .wait_for_idle = tonga_ih_wait_for_idle, + .check_soft_reset = tonga_ih_check_soft_reset, + .pre_soft_reset = tonga_ih_pre_soft_reset, .soft_reset = tonga_ih_soft_reset, + .post_soft_reset = tonga_ih_post_soft_reset, .set_clockgating_state = tonga_ih_set_clockgating_state, .set_powergating_state = tonga_ih_set_powergating_state, }; -- cgit v0.10.2 From fc0b3b90b7e8c761372edc9d5661d0c4421ac116 Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Mon, 18 Jul 2016 17:18:01 +0800 Subject: drm/amdgpu: implement UVD6 check/pre/post_soft_reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Chunming Zhou Reviewed-by: Christian König Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index e4a731b..108e04f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1689,6 +1689,7 @@ struct amdgpu_uvd { bool address_64_bit; bool use_ctx_buf; struct amd_sched_entity entity; + uint32_t srbm_soft_reset; }; /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 3640b12..309e58c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1977,7 +1977,6 @@ static bool amdgpu_need_full_reset(struct amdgpu_device *adev) { if (adev->ip_block_status[AMD_IP_BLOCK_TYPE_GMC].hang || adev->ip_block_status[AMD_IP_BLOCK_TYPE_SMC].hang || - adev->ip_block_status[AMD_IP_BLOCK_TYPE_UVD].hang || adev->ip_block_status[AMD_IP_BLOCK_TYPE_VCE].hang || adev->ip_block_status[AMD_IP_BLOCK_TYPE_ACP].hang || adev->ip_block_status[AMD_IP_BLOCK_TYPE_DCE].hang) { diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index 7f21102..4fa5091 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -754,14 +754,76 @@ static int uvd_v6_0_wait_for_idle(void *handle) return -ETIMEDOUT; } -static int uvd_v6_0_soft_reset(void *handle) +#define AMDGPU_UVD_STATUS_BUSY_MASK 0xfd +static int uvd_v6_0_check_soft_reset(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + u32 srbm_soft_reset = 0; + u32 tmp = RREG32(mmSRBM_STATUS); + + if (REG_GET_FIELD(tmp, SRBM_STATUS, UVD_RQ_PENDING) || + REG_GET_FIELD(tmp, SRBM_STATUS, UVD_BUSY) || + (RREG32(mmUVD_STATUS) & AMDGPU_UVD_STATUS_BUSY_MASK)) + srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_UVD, 1); + + if (srbm_soft_reset) { + adev->ip_block_status[AMD_IP_BLOCK_TYPE_UVD].hang = true; + adev->uvd.srbm_soft_reset = srbm_soft_reset; + } else { + adev->ip_block_status[AMD_IP_BLOCK_TYPE_UVD].hang = false; + adev->uvd.srbm_soft_reset = 0; + } + return 0; +} +static int uvd_v6_0_pre_soft_reset(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_UVD].hang) + return 0; + uvd_v6_0_stop(adev); + return 0; +} + +static int uvd_v6_0_soft_reset(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + u32 srbm_soft_reset; + + if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_UVD].hang) + return 0; + srbm_soft_reset = adev->uvd.srbm_soft_reset; + + if (srbm_soft_reset) { + u32 tmp; + + tmp = RREG32(mmSRBM_SOFT_RESET); + tmp |= srbm_soft_reset; + dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); + WREG32(mmSRBM_SOFT_RESET, tmp); + tmp = RREG32(mmSRBM_SOFT_RESET); + + udelay(50); + + tmp &= ~srbm_soft_reset; + WREG32(mmSRBM_SOFT_RESET, tmp); + tmp = RREG32(mmSRBM_SOFT_RESET); + + /* Wait a little for things to settle down */ + udelay(50); + } + + return 0; +} + +static int uvd_v6_0_post_soft_reset(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_UVD].hang) + return 0; - WREG32_P(mmSRBM_SOFT_RESET, SRBM_SOFT_RESET__SOFT_RESET_UVD_MASK, - ~SRBM_SOFT_RESET__SOFT_RESET_UVD_MASK); mdelay(5); return uvd_v6_0_start(adev); @@ -966,7 +1028,10 @@ const struct amd_ip_funcs uvd_v6_0_ip_funcs = { .resume = uvd_v6_0_resume, .is_idle = uvd_v6_0_is_idle, .wait_for_idle = uvd_v6_0_wait_for_idle, + .check_soft_reset = uvd_v6_0_check_soft_reset, + .pre_soft_reset = uvd_v6_0_pre_soft_reset, .soft_reset = uvd_v6_0_soft_reset, + .post_soft_reset = uvd_v6_0_post_soft_reset, .set_clockgating_state = uvd_v6_0_set_clockgating_state, .set_powergating_state = uvd_v6_0_set_powergating_state, }; -- cgit v0.10.2 From 115933a57779ec453a629331d08bb8e8d124f0d5 Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Mon, 18 Jul 2016 17:38:50 +0800 Subject: drm/amdgpu: implement vce3 check/post_soft_reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Chunming Zhou Reviewed-by: Christian König Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 108e04f..e936468 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1716,6 +1716,7 @@ struct amdgpu_vce { struct amdgpu_irq_src irq; unsigned harvest_config; struct amd_sched_entity entity; + uint32_t srbm_soft_reset; }; /* diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index c271abf..e5b18ad 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -37,6 +37,9 @@ #include "gca/gfx_8_0_d.h" #include "smu/smu_7_1_2_d.h" #include "smu/smu_7_1_2_sh_mask.h" +#include "gca/gfx_8_0_d.h" +#include "gca/gfx_8_0_sh_mask.h" + #define GRBM_GFX_INDEX__VCE_INSTANCE__SHIFT 0x04 #define GRBM_GFX_INDEX__VCE_INSTANCE_MASK 0x10 @@ -601,20 +604,115 @@ static int vce_v3_0_wait_for_idle(void *handle) return -ETIMEDOUT; } +#define AMDGPU_VCE_STATUS_BUSY_MASK 0x78 + +static int vce_v3_0_check_soft_reset(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + u32 srbm_soft_reset = 0; + u32 tmp; + + /* VCE BUG: it is always busy, so skip its checking now */ + return 0; + + /* According to VCE team , we should use VCE_STATUS instead + * SRBM_STATUS.VCE_BUSY bit for busy status checking. + * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE + * instance's registers are accessed + * (0 for 1st instance, 10 for 2nd instance). + * + *VCE_STATUS + *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 | |FW_LOADED|JOB | + *|----+----+-----------+----+----+----+----------+---------+----| + *|bit8|bit7| bit6 |bit5|bit4|bit3| bit2 | bit1 |bit0| + * + * VCE team suggest use bit 3--bit 6 for busy status check + */ + tmp = RREG32(mmGRBM_GFX_INDEX); + tmp = REG_SET_FIELD(tmp, GRBM_GFX_INDEX, INSTANCE_INDEX, 0); + WREG32(mmGRBM_GFX_INDEX, tmp); + if (RREG32(mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) { + srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1); + srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1); + } + tmp = RREG32(mmGRBM_GFX_INDEX); + tmp = REG_SET_FIELD(tmp, GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10); + WREG32(mmGRBM_GFX_INDEX, tmp); + if (RREG32(mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) { + srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1); + srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1); + } + tmp = RREG32(mmGRBM_GFX_INDEX); + tmp = REG_SET_FIELD(tmp, GRBM_GFX_INDEX, INSTANCE_INDEX, 0); + WREG32(mmGRBM_GFX_INDEX, tmp); + + if (adev->vce.harvest_config & (AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1)) + srbm_soft_reset = 0; + + if (srbm_soft_reset) { + adev->ip_block_status[AMD_IP_BLOCK_TYPE_VCE].hang = true; + adev->vce.srbm_soft_reset = srbm_soft_reset; + } else { + adev->ip_block_status[AMD_IP_BLOCK_TYPE_VCE].hang = false; + adev->vce.srbm_soft_reset = 0; + } + return 0; +} + static int vce_v3_0_soft_reset(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - u32 mask = 0; + u32 srbm_soft_reset; + + if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_VCE].hang) + return 0; + srbm_soft_reset = adev->vce.srbm_soft_reset; + + if (srbm_soft_reset) { + u32 tmp; - mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_SOFT_RESET__SOFT_RESET_VCE0_MASK; - mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_SOFT_RESET__SOFT_RESET_VCE1_MASK; + tmp = RREG32(mmSRBM_SOFT_RESET); + tmp |= srbm_soft_reset; + dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); + WREG32(mmSRBM_SOFT_RESET, tmp); + tmp = RREG32(mmSRBM_SOFT_RESET); + + udelay(50); + + tmp &= ~srbm_soft_reset; + WREG32(mmSRBM_SOFT_RESET, tmp); + tmp = RREG32(mmSRBM_SOFT_RESET); + + /* Wait a little for things to settle down */ + udelay(50); + } + + return 0; +} + +static int vce_v3_0_pre_soft_reset(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_VCE].hang) + return 0; + + mdelay(5); + + return vce_v3_0_suspend(adev); +} + + +static int vce_v3_0_post_soft_reset(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_VCE].hang) + return 0; - WREG32_P(mmSRBM_SOFT_RESET, mask, - ~(SRBM_SOFT_RESET__SOFT_RESET_VCE0_MASK | - SRBM_SOFT_RESET__SOFT_RESET_VCE1_MASK)); mdelay(5); - return vce_v3_0_start(adev); + return vce_v3_0_resume(adev); } static int vce_v3_0_set_interrupt_state(struct amdgpu_device *adev, @@ -751,7 +849,10 @@ const struct amd_ip_funcs vce_v3_0_ip_funcs = { .resume = vce_v3_0_resume, .is_idle = vce_v3_0_is_idle, .wait_for_idle = vce_v3_0_wait_for_idle, + .check_soft_reset = vce_v3_0_check_soft_reset, + .pre_soft_reset = vce_v3_0_pre_soft_reset, .soft_reset = vce_v3_0_soft_reset, + .post_soft_reset = vce_v3_0_post_soft_reset, .set_clockgating_state = vce_v3_0_set_clockgating_state, .set_powergating_state = vce_v3_0_set_powergating_state, }; -- cgit v0.10.2 From 81e04e1809057128abe1b996ca7bc0cbb38e56c9 Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Wed, 20 Jul 2016 13:42:25 +0800 Subject: drm/amdgpu: add dce10 check_soft_reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Chunming Zhou Reviewed-by: Christian König Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index c1b04e9..7f642b5 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -3141,11 +3141,26 @@ static int dce_v10_0_wait_for_idle(void *handle) return 0; } +static int dce_v10_0_check_soft_reset(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (dce_v10_0_is_display_hung(adev)) + adev->ip_block_status[AMD_IP_BLOCK_TYPE_DCE].hang = true; + else + adev->ip_block_status[AMD_IP_BLOCK_TYPE_DCE].hang = false; + + return 0; +} + static int dce_v10_0_soft_reset(void *handle) { u32 srbm_soft_reset = 0, tmp; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_DCE].hang) + return 0; + if (dce_v10_0_is_display_hung(adev)) srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_DC_MASK; @@ -3512,6 +3527,7 @@ const struct amd_ip_funcs dce_v10_0_ip_funcs = { .resume = dce_v10_0_resume, .is_idle = dce_v10_0_is_idle, .wait_for_idle = dce_v10_0_wait_for_idle, + .check_soft_reset = dce_v10_0_check_soft_reset, .soft_reset = dce_v10_0_soft_reset, .set_clockgating_state = dce_v10_0_set_clockgating_state, .set_powergating_state = dce_v10_0_set_powergating_state, -- cgit v0.10.2 From 7c204889dec5e745cdcc174388fa3aa824e75160 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Mon, 14 Dec 2015 13:18:01 +0100 Subject: drm/amdgpu: add new helper for in kernel allocations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We often allocate, pin and map things at the same time in the kernel. Signed-off-by: Christian König Reviewed-by: Chunming Zhou Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 6f0873c..9357358 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -211,6 +211,69 @@ static void amdgpu_fill_placement_to_bo(struct amdgpu_bo *bo, bo->placement.busy_placement = bo->placements; } +/** + * amdgpu_bo_create_kernel - create BO for kernel use + * + * @adev: amdgpu device object + * @size: size for the new BO + * @align: alignment for the new BO + * @domain: where to place it + * @bo_ptr: resulting BO + * @gpu_addr: GPU addr of the pinned BO + * @cpu_addr: optional CPU address mapping + * + * Allocates and pins a BO for kernel internal use. + * + * Returns 0 on success, negative error code otherwise. + */ +int amdgpu_bo_create_kernel(struct amdgpu_device *adev, + unsigned long size, int align, + u32 domain, struct amdgpu_bo **bo_ptr, + u64 *gpu_addr, void **cpu_addr) +{ + int r; + + r = amdgpu_bo_create(adev, size, align, true, domain, + AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, + NULL, NULL, bo_ptr); + if (r) { + dev_err(adev->dev, "(%d) failed to allocate kernel bo\n", r); + return r; + } + + r = amdgpu_bo_reserve(*bo_ptr, false); + if (r) { + dev_err(adev->dev, "(%d) failed to reserve kernel bo\n", r); + goto error_free; + } + + r = amdgpu_bo_pin(*bo_ptr, domain, gpu_addr); + if (r) { + dev_err(adev->dev, "(%d) kernel bo pin failed\n", r); + goto error_unreserve; + } + + if (cpu_addr) { + r = amdgpu_bo_kmap(*bo_ptr, cpu_addr); + if (r) { + dev_err(adev->dev, "(%d) kernel bo map failed\n", r); + goto error_unreserve; + } + } + + amdgpu_bo_unreserve(*bo_ptr); + + return 0; + +error_unreserve: + amdgpu_bo_unreserve(*bo_ptr); + +error_free: + amdgpu_bo_unref(bo_ptr); + + return r; +} + int amdgpu_bo_create_restricted(struct amdgpu_device *adev, unsigned long size, int byte_align, bool kernel, u32 domain, u64 flags, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index bdb01d9..ae188a8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -139,6 +139,10 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev, struct ttm_placement *placement, struct reservation_object *resv, struct amdgpu_bo **bo_ptr); +int amdgpu_bo_create_kernel(struct amdgpu_device *adev, + unsigned long size, int align, + u32 domain, struct amdgpu_bo **bo_ptr, + u64 *gpu_addr, void **cpu_addr); int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr); void amdgpu_bo_kunmap(struct amdgpu_bo *bo); struct amdgpu_bo *amdgpu_bo_ref(struct amdgpu_bo *bo); -- cgit v0.10.2 From 78bbbd9c0fcde51b56c5bec99dd286cbad698712 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Mon, 14 Dec 2015 13:23:57 +0100 Subject: drm/amdgpu: pin shared GWS, GDS and OA resources MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit They can't move anyway, but just to be clean here. Signed-off-by: Christian König Reviewed-by: Chunming Zhou Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index d869d05..f4fbec3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -4465,24 +4465,21 @@ static int gfx_v7_0_sw_init(void *handle) } /* reserve GDS, GWS and OA resource for gfx */ - r = amdgpu_bo_create(adev, adev->gds.mem.gfx_partition_size, - PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_GDS, 0, - NULL, NULL, &adev->gds.gds_gfx_bo); + r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size, + PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS, + &adev->gds.gds_gfx_bo, NULL, NULL); if (r) return r; - r = amdgpu_bo_create(adev, adev->gds.gws.gfx_partition_size, - PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_GWS, 0, - NULL, NULL, &adev->gds.gws_gfx_bo); + r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size, + PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS, + &adev->gds.gws_gfx_bo, NULL, NULL); if (r) return r; - r = amdgpu_bo_create(adev, adev->gds.oa.gfx_partition_size, - PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_OA, 0, - NULL, NULL, &adev->gds.oa_gfx_bo); + r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size, + PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA, + &adev->gds.oa_gfx_bo, NULL, NULL); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index af0efa2..ce0929b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -2078,24 +2078,21 @@ static int gfx_v8_0_sw_init(void *handle) } /* reserve GDS, GWS and OA resource for gfx */ - r = amdgpu_bo_create(adev, adev->gds.mem.gfx_partition_size, - PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_GDS, 0, NULL, - NULL, &adev->gds.gds_gfx_bo); + r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size, + PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS, + &adev->gds.gds_gfx_bo, NULL, NULL); if (r) return r; - r = amdgpu_bo_create(adev, adev->gds.gws.gfx_partition_size, - PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_GWS, 0, NULL, - NULL, &adev->gds.gws_gfx_bo); + r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size, + PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS, + &adev->gds.gws_gfx_bo, NULL, NULL); if (r) return r; - r = amdgpu_bo_create(adev, adev->gds.oa.gfx_partition_size, - PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_OA, 0, NULL, - NULL, &adev->gds.oa_gfx_bo); + r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size, + PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA, + &adev->gds.oa_gfx_bo, NULL, NULL); if (r) return r; -- cgit v0.10.2 From cdb7e8f2733579e35ec2883bde7b594d07ad67e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Mon, 25 Jul 2016 17:56:18 +0200 Subject: drm/amdgpu: add more warning to amdgpu_bo_offset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Warn when we try to get the address and the BO isn't locked or reserved. Signed-off-by: Christian König Reviewed-by: Chunming Zhou Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 9357358..67de19c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -737,3 +737,21 @@ void amdgpu_bo_fence(struct amdgpu_bo *bo, struct fence *fence, else reservation_object_add_excl_fence(resv, fence); } + +/** + * amdgpu_bo_gpu_offset - return GPU offset of bo + * @bo: amdgpu object for which we query the offset + * + * Returns current GPU offset of the object. + * + * Note: object should either be pinned or reserved when calling this + * function, it might be useful to add check for this for debugging. + */ +u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo) +{ + WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_SYSTEM); + WARN_ON_ONCE(!ww_mutex_is_locked(&bo->tbo.resv->lock) && + !bo->pin_count); + + return bo->tbo.offset; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index ae188a8..d650b42 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -85,21 +85,6 @@ static inline void amdgpu_bo_unreserve(struct amdgpu_bo *bo) ttm_bo_unreserve(&bo->tbo); } -/** - * amdgpu_bo_gpu_offset - return GPU offset of bo - * @bo: amdgpu object for which we query the offset - * - * Returns current GPU offset of the object. - * - * Note: object should either be pinned or reserved when calling this - * function, it might be useful to add check for this for debugging. - */ -static inline u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo) -{ - WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_SYSTEM); - return bo->tbo.offset; -} - static inline unsigned long amdgpu_bo_size(struct amdgpu_bo *bo) { return bo->tbo.num_pages << PAGE_SHIFT; @@ -169,6 +154,7 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo); void amdgpu_bo_fence(struct amdgpu_bo *bo, struct fence *fence, bool shared); +u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo); /* * sub allocation -- cgit v0.10.2 From 4b62e697777d79c88c61a2cdb826a6c413052a09 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Mon, 25 Jul 2016 17:37:38 +0200 Subject: drm/amdgpu: user amdgpu_bo_create_kernel for the UVD BO MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Saves us some code. Signed-off-by: Christian König Reviewed-by: Chunming Zhou Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index b11f4e8..c22b64e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -201,39 +201,14 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev) bo_size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8) + AMDGPU_UVD_STACK_SIZE + AMDGPU_UVD_HEAP_SIZE + AMDGPU_UVD_SESSION_SIZE * adev->uvd.max_handles; - r = amdgpu_bo_create(adev, bo_size, PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_VRAM, - AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, - NULL, NULL, &adev->uvd.vcpu_bo); + r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, &adev->uvd.vcpu_bo, + &adev->uvd.gpu_addr, &adev->uvd.cpu_addr); if (r) { dev_err(adev->dev, "(%d) failed to allocate UVD bo\n", r); return r; } - r = amdgpu_bo_reserve(adev->uvd.vcpu_bo, false); - if (r) { - amdgpu_bo_unref(&adev->uvd.vcpu_bo); - dev_err(adev->dev, "(%d) failed to reserve UVD bo\n", r); - return r; - } - - r = amdgpu_bo_pin(adev->uvd.vcpu_bo, AMDGPU_GEM_DOMAIN_VRAM, - &adev->uvd.gpu_addr); - if (r) { - amdgpu_bo_unreserve(adev->uvd.vcpu_bo); - amdgpu_bo_unref(&adev->uvd.vcpu_bo); - dev_err(adev->dev, "(%d) UVD bo pin failed\n", r); - return r; - } - - r = amdgpu_bo_kmap(adev->uvd.vcpu_bo, &adev->uvd.cpu_addr); - if (r) { - dev_err(adev->dev, "(%d) UVD map failed\n", r); - return r; - } - - amdgpu_bo_unreserve(adev->uvd.vcpu_bo); - ring = &adev->uvd.ring; rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL]; r = amd_sched_entity_init(&ring->sched, &adev->uvd.entity, -- cgit v0.10.2 From c6f4439ce669f4ffaf5ac5254ad477befc03458a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Mon, 25 Jul 2016 18:06:30 +0200 Subject: drm/amdgpu: use amdgpu_bo_create_kernel in amdgpu_ih.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Saves us quite a bunch of code. Signed-off-by: Christian König Reviewed-by: Chunming Zhou Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c index 534fc04..5ebb3f4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c @@ -40,32 +40,15 @@ static int amdgpu_ih_ring_alloc(struct amdgpu_device *adev) /* Allocate ring buffer */ if (adev->irq.ih.ring_obj == NULL) { - r = amdgpu_bo_create(adev, adev->irq.ih.ring_size, - PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_GTT, 0, - NULL, NULL, &adev->irq.ih.ring_obj); + r = amdgpu_bo_create_kernel(adev, adev->irq.ih.ring_size, + PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, + &adev->irq.ih.ring_obj, + &adev->irq.ih.gpu_addr, + (void **)&adev->irq.ih.ring); if (r) { DRM_ERROR("amdgpu: failed to create ih ring buffer (%d).\n", r); return r; } - r = amdgpu_bo_reserve(adev->irq.ih.ring_obj, false); - if (unlikely(r != 0)) - return r; - r = amdgpu_bo_pin(adev->irq.ih.ring_obj, - AMDGPU_GEM_DOMAIN_GTT, - &adev->irq.ih.gpu_addr); - if (r) { - amdgpu_bo_unreserve(adev->irq.ih.ring_obj); - DRM_ERROR("amdgpu: failed to pin ih ring buffer (%d).\n", r); - return r; - } - r = amdgpu_bo_kmap(adev->irq.ih.ring_obj, - (void **)&adev->irq.ih.ring); - amdgpu_bo_unreserve(adev->irq.ih.ring_obj); - if (r) { - DRM_ERROR("amdgpu: failed to map ih ring buffer (%d).\n", r); - return r; - } } return 0; } -- cgit v0.10.2 From 37ac235bf8c8a44c6c5aa8b03ab3d5ad23f64cef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Tue, 26 Jul 2016 09:58:45 +0200 Subject: drm/amdgpu: use amdgpu_bo_create_kernel in amdgpu_ring.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Saves us quite a bunch of code. Signed-off-by: Christian König Reviewed-by: Chunming Zhou Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 85aeb0a..242ba04 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -222,33 +222,16 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, /* Allocate ring buffer */ if (ring->ring_obj == NULL) { - r = amdgpu_bo_create(adev, ring->ring_size, PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_GTT, 0, - NULL, NULL, &ring->ring_obj); + r = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_GTT, + &ring->ring_obj, + &ring->gpu_addr, + (void **)&ring->ring); if (r) { dev_err(adev->dev, "(%d) ring create failed\n", r); return r; } - r = amdgpu_bo_reserve(ring->ring_obj, false); - if (unlikely(r != 0)) - return r; - r = amdgpu_bo_pin(ring->ring_obj, AMDGPU_GEM_DOMAIN_GTT, - &ring->gpu_addr); - if (r) { - amdgpu_bo_unreserve(ring->ring_obj); - dev_err(adev->dev, "(%d) ring pin failed\n", r); - return r; - } - r = amdgpu_bo_kmap(ring->ring_obj, - (void **)&ring->ring); - memset((void *)ring->ring, 0, ring->ring_size); - - amdgpu_bo_unreserve(ring->ring_obj); - if (r) { - dev_err(adev->dev, "(%d) ring map failed\n", r); - return r; - } } ring->ptr_mask = (ring->ring_size / 4) - 1; ring->max_dw = max_dw; -- cgit v0.10.2 From d31d3c28c77e25990ea295c1536253043c7b255c Mon Sep 17 00:00:00 2001 From: Huang Rui Date: Tue, 21 Jun 2016 10:26:26 +0800 Subject: drm/amd/powerplay: add SMU71 header files for iceland (v2) v2: cleanup headers, add copyright Signed-off-by: Huang Rui Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu71.h b/drivers/gpu/drm/amd/powerplay/inc/smu71.h new file mode 100644 index 0000000..71c9b2d --- /dev/null +++ b/drivers/gpu/drm/amd/powerplay/inc/smu71.h @@ -0,0 +1,510 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef SMU71_H +#define SMU71_H + +#if !defined(SMC_MICROCODE) +#pragma pack(push, 1) +#endif + +#define SMU__NUM_PCIE_DPM_LEVELS 8 +#define SMU__NUM_SCLK_DPM_STATE 8 +#define SMU__NUM_MCLK_DPM_LEVELS 4 +#define SMU__VARIANT__ICELAND 1 +#define SMU__DGPU_ONLY 1 +#define SMU__DYNAMIC_MCARB_SETTINGS 1 + +enum SID_OPTION { + SID_OPTION_HI, + SID_OPTION_LO, + SID_OPTION_COUNT +}; + +typedef struct { + uint32_t high; + uint32_t low; +} data_64_t; + +typedef struct { + data_64_t high; + data_64_t low; +} data_128_t; + +#define SMU7_CONTEXT_ID_SMC 1 +#define SMU7_CONTEXT_ID_VBIOS 2 + +#define SMU71_MAX_LEVELS_VDDC 8 +#define SMU71_MAX_LEVELS_VDDCI 4 +#define SMU71_MAX_LEVELS_MVDD 4 +#define SMU71_MAX_LEVELS_VDDNB 8 + +#define SMU71_MAX_LEVELS_GRAPHICS SMU__NUM_SCLK_DPM_STATE +#define SMU71_MAX_LEVELS_MEMORY SMU__NUM_MCLK_DPM_LEVELS +#define SMU71_MAX_LEVELS_GIO SMU__NUM_LCLK_DPM_LEVELS +#define SMU71_MAX_LEVELS_LINK SMU__NUM_PCIE_DPM_LEVELS +#define SMU71_MAX_ENTRIES_SMIO 32 + +#define DPM_NO_LIMIT 0 +#define DPM_NO_UP 1 +#define DPM_GO_DOWN 2 +#define DPM_GO_UP 3 + +#define SMU7_FIRST_DPM_GRAPHICS_LEVEL 0 +#define SMU7_FIRST_DPM_MEMORY_LEVEL 0 + +#define GPIO_CLAMP_MODE_VRHOT 1 +#define GPIO_CLAMP_MODE_THERM 2 +#define GPIO_CLAMP_MODE_DC 4 + +#define SCRATCH_B_TARG_PCIE_INDEX_SHIFT 0 +#define SCRATCH_B_TARG_PCIE_INDEX_MASK (0x7< Date: Mon, 20 Jun 2016 00:42:01 +0800 Subject: drm/amd/powerplay: add iceland SMU mananger The system management unit (SMU) is a subcomponent of the northbridge that is responsible for a variety of system and power management tasks during boot and runtime for GPU. In powerplay, it will be used on firmware loading and power task management. This patch adds SMU mananger for iceland. Signed-off-by: Huang Rui Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/Makefile b/drivers/gpu/drm/amd/powerplay/smumgr/Makefile index f10fb64..19e7946 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/Makefile +++ b/drivers/gpu/drm/amd/powerplay/smumgr/Makefile @@ -2,7 +2,8 @@ # Makefile for the 'smu manager' sub-component of powerplay. # It provides the smu management services for the driver. -SMU_MGR = smumgr.o cz_smumgr.o tonga_smumgr.o fiji_smumgr.o polaris10_smumgr.o +SMU_MGR = smumgr.o cz_smumgr.o tonga_smumgr.o fiji_smumgr.o \ + polaris10_smumgr.o iceland_smumgr.o AMD_PP_SMUMGR = $(addprefix $(AMD_PP_PATH)/smumgr/,$(SMU_MGR)) diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c new file mode 100644 index 0000000..f506583 --- /dev/null +++ b/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c @@ -0,0 +1,713 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Author: Huang Rui + * + */ +#include +#include +#include +#include + +#include "smumgr.h" +#include "iceland_smumgr.h" +#include "pp_debug.h" +#include "smu_ucode_xfer_vi.h" +#include "ppsmc.h" +#include "smu/smu_7_1_1_d.h" +#include "smu/smu_7_1_1_sh_mask.h" +#include "cgs_common.h" + +#define ICELAND_SMC_SIZE 0x20000 +#define BUFFER_SIZE 80000 +#define MAX_STRING_SIZE 15 +#define BUFFER_SIZETWO 131072 /*128 *1024*/ + +/** + * Set the address for reading/writing the SMC SRAM space. + * @param smumgr the address of the powerplay hardware manager. + * @param smcAddress the address in the SMC RAM to access. + */ +static int iceland_set_smc_sram_address(struct pp_smumgr *smumgr, + uint32_t smcAddress, uint32_t limit) +{ + if (smumgr == NULL || smumgr->device == NULL) + return -EINVAL; + PP_ASSERT_WITH_CODE((0 == (3 & smcAddress)), + "SMC address must be 4 byte aligned.", + return -1;); + + PP_ASSERT_WITH_CODE((limit > (smcAddress + 3)), + "SMC address is beyond the SMC RAM area.", + return -1;); + + cgs_write_register(smumgr->device, mmSMC_IND_INDEX_0, smcAddress); + SMUM_WRITE_FIELD(smumgr->device, SMC_IND_ACCESS_CNTL, AUTO_INCREMENT_IND_0, 0); + + return 0; +} + +/** + * Copy bytes from an array into the SMC RAM space. + * + * @param smumgr the address of the powerplay SMU manager. + * @param smcStartAddress the start address in the SMC RAM to copy bytes to. + * @param src the byte array to copy the bytes from. + * @param byteCount the number of bytes to copy. + */ +int iceland_copy_bytes_to_smc(struct pp_smumgr *smumgr, + uint32_t smcStartAddress, const uint8_t *src, + uint32_t byteCount, uint32_t limit) +{ + uint32_t addr; + uint32_t data, orig_data; + int result = 0; + uint32_t extra_shift; + + if (smumgr == NULL || smumgr->device == NULL) + return -EINVAL; + PP_ASSERT_WITH_CODE((0 == (3 & smcStartAddress)), + "SMC address must be 4 byte aligned.", + return 0;); + + PP_ASSERT_WITH_CODE((limit > (smcStartAddress + byteCount)), + "SMC address is beyond the SMC RAM area.", + return 0;); + + addr = smcStartAddress; + + while (byteCount >= 4) { + /* + * Bytes are written into the + * SMC address space with the MSB first + */ + data = (src[0] << 24) + (src[1] << 16) + (src[2] << 8) + src[3]; + + result = iceland_set_smc_sram_address(smumgr, addr, limit); + + if (result) + goto out; + + cgs_write_register(smumgr->device, mmSMC_IND_DATA_0, data); + + src += 4; + byteCount -= 4; + addr += 4; + } + + if (0 != byteCount) { + /* Now write odd bytes left, do a read modify write cycle */ + data = 0; + + result = iceland_set_smc_sram_address(smumgr, addr, limit); + if (result) + goto out; + + orig_data = cgs_read_register(smumgr->device, + mmSMC_IND_DATA_0); + extra_shift = 8 * (4 - byteCount); + + while (byteCount > 0) { + data = (data << 8) + *src++; + byteCount--; + } + + data <<= extra_shift; + data |= (orig_data & ~((~0UL) << extra_shift)); + + result = iceland_set_smc_sram_address(smumgr, addr, limit); + if (result) + goto out; + + cgs_write_register(smumgr->device, mmSMC_IND_DATA_0, data); + } + +out: + return result; +} + +/** + * Deassert the reset'pin' (set it to high). + * + * @param smumgr the address of the powerplay hardware manager. + */ +static int iceland_start_smc(struct pp_smumgr *smumgr) +{ + SMUM_WRITE_INDIRECT_FIELD(smumgr->device, CGS_IND_REG__SMC, + SMC_SYSCON_RESET_CNTL, rst_reg, 0); + + return 0; +} + +static void iceland_pp_reset_smc(struct pp_smumgr *smumgr) +{ + SMUM_WRITE_INDIRECT_FIELD(smumgr->device, CGS_IND_REG__SMC, + SMC_SYSCON_RESET_CNTL, + rst_reg, 1); +} + +int iceland_program_jump_on_start(struct pp_smumgr *smumgr) +{ + static const unsigned char pData[] = { 0xE0, 0x00, 0x80, 0x40 }; + + iceland_copy_bytes_to_smc(smumgr, 0x0, pData, 4, sizeof(pData)+1); + + return 0; +} + +/** + * Return if the SMC is currently running. + * + * @param smumgr the address of the powerplay hardware manager. + */ +bool iceland_is_smc_ram_running(struct pp_smumgr *smumgr) +{ + uint32_t val1, val2; + + val1 = SMUM_READ_INDIRECT_FIELD(smumgr->device, CGS_IND_REG__SMC, + SMC_SYSCON_CLOCK_CNTL_0, ck_disable); + val2 = cgs_read_ind_register(smumgr->device, CGS_IND_REG__SMC, + ixSMC_PC_C); + + return ((0 == val1) && (0x20100 <= val2)); +} + +/** + * Send a message to the SMC, and wait for its response. + * + * @param smumgr the address of the powerplay hardware manager. + * @param msg the message to send. + * @return The response that came from the SMC. + */ +static int iceland_send_msg_to_smc(struct pp_smumgr *smumgr, uint16_t msg) +{ + if (smumgr == NULL || smumgr->device == NULL) + return -EINVAL; + + if (!iceland_is_smc_ram_running(smumgr)) + return -EINVAL; + + SMUM_WAIT_FIELD_UNEQUAL(smumgr, SMC_RESP_0, SMC_RESP, 0); + PP_ASSERT_WITH_CODE( + 1 == SMUM_READ_FIELD(smumgr->device, SMC_RESP_0, SMC_RESP), + "Failed to send Previous Message.", + ); + + cgs_write_register(smumgr->device, mmSMC_MESSAGE_0, msg); + + SMUM_WAIT_FIELD_UNEQUAL(smumgr, SMC_RESP_0, SMC_RESP, 0); + PP_ASSERT_WITH_CODE( + 1 == SMUM_READ_FIELD(smumgr->device, SMC_RESP_0, SMC_RESP), + "Failed to send Message.", + ); + + return 0; +} + +/** + * Send a message to the SMC with parameter + * + * @param smumgr: the address of the powerplay hardware manager. + * @param msg: the message to send. + * @param parameter: the parameter to send + * @return The response that came from the SMC. + */ +static int iceland_send_msg_to_smc_with_parameter(struct pp_smumgr *smumgr, + uint16_t msg, uint32_t parameter) +{ + if (smumgr == NULL || smumgr->device == NULL) + return -EINVAL; + + cgs_write_register(smumgr->device, mmSMC_MSG_ARG_0, parameter); + + return iceland_send_msg_to_smc(smumgr, msg); +} + +/* + * Read a 32bit value from the SMC SRAM space. + * ALL PARAMETERS ARE IN HOST BYTE ORDER. + * @param smumgr the address of the powerplay hardware manager. + * @param smcAddress the address in the SMC RAM to access. + * @param value and output parameter for the data read from the SMC SRAM. + */ +int iceland_read_smc_sram_dword(struct pp_smumgr *smumgr, + uint32_t smcAddress, uint32_t *value, + uint32_t limit) +{ + int result; + + result = iceland_set_smc_sram_address(smumgr, smcAddress, limit); + + if (0 != result) + return result; + + *value = cgs_read_register(smumgr->device, mmSMC_IND_DATA_0); + + return 0; +} + +/* + * Write a 32bit value to the SMC SRAM space. + * ALL PARAMETERS ARE IN HOST BYTE ORDER. + * @param smumgr the address of the powerplay hardware manager. + * @param smcAddress the address in the SMC RAM to access. + * @param value to write to the SMC SRAM. + */ +int iceland_write_smc_sram_dword(struct pp_smumgr *smumgr, + uint32_t smcAddress, uint32_t value, + uint32_t limit) +{ + int result; + + result = iceland_set_smc_sram_address(smumgr, smcAddress, limit); + + if (0 != result) + return result; + + cgs_write_register(smumgr->device, mmSMC_IND_DATA_0, value); + + return 0; +} + +static int iceland_smu_fini(struct pp_smumgr *smumgr) +{ + struct iceland_smumgr *priv = (struct iceland_smumgr *)(smumgr->backend); + + smu_free_memory(smumgr->device, (void *)priv->header_buffer.handle); + + if (smumgr->backend != NULL) { + kfree(smumgr->backend); + smumgr->backend = NULL; + } + + cgs_rel_firmware(smumgr->device, CGS_UCODE_ID_SMU); + return 0; +} + +static enum cgs_ucode_id iceland_convert_fw_type_to_cgs(uint32_t fw_type) +{ + enum cgs_ucode_id result = CGS_UCODE_ID_MAXIMUM; + + switch (fw_type) { + case UCODE_ID_SMU: + result = CGS_UCODE_ID_SMU; + break; + case UCODE_ID_SDMA0: + result = CGS_UCODE_ID_SDMA0; + break; + case UCODE_ID_SDMA1: + result = CGS_UCODE_ID_SDMA1; + break; + case UCODE_ID_CP_CE: + result = CGS_UCODE_ID_CP_CE; + break; + case UCODE_ID_CP_PFP: + result = CGS_UCODE_ID_CP_PFP; + break; + case UCODE_ID_CP_ME: + result = CGS_UCODE_ID_CP_ME; + break; + case UCODE_ID_CP_MEC: + result = CGS_UCODE_ID_CP_MEC; + break; + case UCODE_ID_CP_MEC_JT1: + result = CGS_UCODE_ID_CP_MEC_JT1; + break; + case UCODE_ID_CP_MEC_JT2: + result = CGS_UCODE_ID_CP_MEC_JT2; + break; + case UCODE_ID_RLC_G: + result = CGS_UCODE_ID_RLC_G; + break; + default: + break; + } + + return result; +} + +/** + * Convert the PPIRI firmware type to SMU type mask. + * For MEC, we need to check all MEC related type + */ +static uint16_t iceland_get_mask_for_firmware_type(uint16_t firmwareType) +{ + uint16_t result = 0; + + switch (firmwareType) { + case UCODE_ID_SDMA0: + result = UCODE_ID_SDMA0_MASK; + break; + case UCODE_ID_SDMA1: + result = UCODE_ID_SDMA1_MASK; + break; + case UCODE_ID_CP_CE: + result = UCODE_ID_CP_CE_MASK; + break; + case UCODE_ID_CP_PFP: + result = UCODE_ID_CP_PFP_MASK; + break; + case UCODE_ID_CP_ME: + result = UCODE_ID_CP_ME_MASK; + break; + case UCODE_ID_CP_MEC: + case UCODE_ID_CP_MEC_JT1: + case UCODE_ID_CP_MEC_JT2: + result = UCODE_ID_CP_MEC_MASK; + break; + case UCODE_ID_RLC_G: + result = UCODE_ID_RLC_G_MASK; + break; + default: + break; + } + + return result; +} + +/** + * Check if the FW has been loaded, + * SMU will not return if loading has not finished. +*/ +static int iceland_check_fw_load_finish(struct pp_smumgr *smumgr, uint32_t fwType) +{ + uint16_t fwMask = iceland_get_mask_for_firmware_type(fwType); + + if (0 != SMUM_WAIT_VFPF_INDIRECT_REGISTER(smumgr, SMC_IND, + SOFT_REGISTERS_TABLE_27, fwMask, fwMask)) { + pr_err("[ powerplay ] check firmware loading failed\n"); + return -EINVAL; + } + + return 0; +} + +/* Populate one firmware image to the data structure */ +static int iceland_populate_single_firmware_entry(struct pp_smumgr *smumgr, + uint16_t firmware_type, + struct SMU_Entry *pentry) +{ + int result; + struct cgs_firmware_info info = {0}; + + result = cgs_get_firmware_info( + smumgr->device, + iceland_convert_fw_type_to_cgs(firmware_type), + &info); + + if (result == 0) { + pentry->version = 0; + pentry->id = (uint16_t)firmware_type; + pentry->image_addr_high = smu_upper_32_bits(info.mc_addr); + pentry->image_addr_low = smu_lower_32_bits(info.mc_addr); + pentry->meta_data_addr_high = 0; + pentry->meta_data_addr_low = 0; + pentry->data_size_byte = info.image_size; + pentry->num_register_entries = 0; + + if (firmware_type == UCODE_ID_RLC_G) + pentry->flags = 1; + else + pentry->flags = 0; + } else { + return result; + } + + return result; +} + +static void iceland_pp_stop_smc_clock(struct pp_smumgr *smumgr) +{ + SMUM_WRITE_INDIRECT_FIELD(smumgr->device, CGS_IND_REG__SMC, + SMC_SYSCON_CLOCK_CNTL_0, + ck_disable, 1); +} + +static void iceland_start_smc_clock(struct pp_smumgr *smumgr) +{ + SMUM_WRITE_INDIRECT_FIELD(smumgr->device, CGS_IND_REG__SMC, + SMC_SYSCON_CLOCK_CNTL_0, + ck_disable, 0); +} + +int iceland_smu_start_smc(struct pp_smumgr *smumgr) +{ + /* set smc instruct start point at 0x0 */ + iceland_program_jump_on_start(smumgr); + + /* enable smc clock */ + iceland_start_smc_clock(smumgr); + + /* de-assert reset */ + iceland_start_smc(smumgr); + + SMUM_WAIT_INDIRECT_FIELD(smumgr, SMC_IND, FIRMWARE_FLAGS, + INTERRUPTS_ENABLED, 1); + + return 0; +} + +/** + * Upload the SMC firmware to the SMC microcontroller. + * + * @param smumgr the address of the powerplay hardware manager. + * @param pFirmware the data structure containing the various sections of the firmware. + */ +int iceland_smu_upload_firmware_image(struct pp_smumgr *smumgr) +{ + const uint8_t *src; + uint32_t byte_count, val; + uint32_t data; + struct cgs_firmware_info info = {0}; + + if (smumgr == NULL || smumgr->device == NULL) + return -EINVAL; + + /* load SMC firmware */ + cgs_get_firmware_info(smumgr->device, + iceland_convert_fw_type_to_cgs(UCODE_ID_SMU), &info); + + if (info.image_size & 3) { + pr_err("[ powerplay ] SMC ucode is not 4 bytes aligned\n"); + return -EINVAL; + } + + if (info.image_size > ICELAND_SMC_SIZE) { + pr_err("[ powerplay ] SMC address is beyond the SMC RAM area\n"); + return -EINVAL; + } + + /* wait for smc boot up */ + SMUM_WAIT_INDIRECT_FIELD_UNEQUAL(smumgr, SMC_IND, + RCU_UC_EVENTS, boot_seq_done, 0); + + /* clear firmware interrupt enable flag */ + val = cgs_read_ind_register(smumgr->device, CGS_IND_REG__SMC, + ixSMC_SYSCON_MISC_CNTL); + cgs_write_ind_register(smumgr->device, CGS_IND_REG__SMC, + ixSMC_SYSCON_MISC_CNTL, val | 1); + + /* stop smc clock */ + iceland_pp_stop_smc_clock(smumgr); + + /* reset smc */ + iceland_pp_reset_smc(smumgr); + + cgs_write_register(smumgr->device, mmSMC_IND_INDEX_0, + info.ucode_start_address); + + SMUM_WRITE_FIELD(smumgr->device, SMC_IND_ACCESS_CNTL, + AUTO_INCREMENT_IND_0, 1); + + byte_count = info.image_size; + src = (const uint8_t *)info.kptr; + + while (byte_count >= 4) { + data = (src[0] << 24) + (src[1] << 16) + (src[2] << 8) + src[3]; + cgs_write_register(smumgr->device, mmSMC_IND_DATA_0, data); + src += 4; + byte_count -= 4; + } + + SMUM_WRITE_FIELD(smumgr->device, SMC_IND_ACCESS_CNTL, + AUTO_INCREMENT_IND_0, 0); + + return 0; +} + +static int iceland_request_smu_reload_fw(struct pp_smumgr *smumgr) +{ + struct iceland_smumgr *iceland_smu = + (struct iceland_smumgr *)(smumgr->backend); + uint16_t fw_to_load; + int result = 0; + struct SMU_DRAMData_TOC *toc; + + toc = (struct SMU_DRAMData_TOC *)iceland_smu->pHeader; + toc->num_entries = 0; + toc->structure_version = 1; + + PP_ASSERT_WITH_CODE( + 0 == iceland_populate_single_firmware_entry(smumgr, + UCODE_ID_RLC_G, + &toc->entry[toc->num_entries++]), + "Failed to Get Firmware Entry.\n", + return -1); + PP_ASSERT_WITH_CODE( + 0 == iceland_populate_single_firmware_entry(smumgr, + UCODE_ID_CP_CE, + &toc->entry[toc->num_entries++]), + "Failed to Get Firmware Entry.\n", + return -1); + PP_ASSERT_WITH_CODE( + 0 == iceland_populate_single_firmware_entry + (smumgr, UCODE_ID_CP_PFP, &toc->entry[toc->num_entries++]), + "Failed to Get Firmware Entry.\n", return -1); + PP_ASSERT_WITH_CODE( + 0 == iceland_populate_single_firmware_entry + (smumgr, UCODE_ID_CP_ME, &toc->entry[toc->num_entries++]), + "Failed to Get Firmware Entry.\n", return -1); + PP_ASSERT_WITH_CODE( + 0 == iceland_populate_single_firmware_entry + (smumgr, UCODE_ID_CP_MEC, &toc->entry[toc->num_entries++]), + "Failed to Get Firmware Entry.\n", return -1); + PP_ASSERT_WITH_CODE( + 0 == iceland_populate_single_firmware_entry + (smumgr, UCODE_ID_CP_MEC_JT1, &toc->entry[toc->num_entries++]), + "Failed to Get Firmware Entry.\n", return -1); + PP_ASSERT_WITH_CODE( + 0 == iceland_populate_single_firmware_entry + (smumgr, UCODE_ID_CP_MEC_JT2, &toc->entry[toc->num_entries++]), + "Failed to Get Firmware Entry.\n", return -1); + PP_ASSERT_WITH_CODE( + 0 == iceland_populate_single_firmware_entry + (smumgr, UCODE_ID_SDMA0, &toc->entry[toc->num_entries++]), + "Failed to Get Firmware Entry.\n", return -1); + PP_ASSERT_WITH_CODE( + 0 == iceland_populate_single_firmware_entry + (smumgr, UCODE_ID_SDMA1, &toc->entry[toc->num_entries++]), + "Failed to Get Firmware Entry.\n", return -1); + + if (!iceland_is_smc_ram_running(smumgr)) { + result = iceland_smu_upload_firmware_image(smumgr); + if (result) + return result; + + result = iceland_smu_start_smc(smumgr); + if (result) + return result; + } + + iceland_send_msg_to_smc_with_parameter(smumgr, + PPSMC_MSG_DRV_DRAM_ADDR_HI, + iceland_smu->header_buffer.mc_addr_high); + + iceland_send_msg_to_smc_with_parameter(smumgr, + PPSMC_MSG_DRV_DRAM_ADDR_LO, + iceland_smu->header_buffer.mc_addr_low); + + fw_to_load = UCODE_ID_RLC_G_MASK + + UCODE_ID_SDMA0_MASK + + UCODE_ID_SDMA1_MASK + + UCODE_ID_CP_CE_MASK + + UCODE_ID_CP_ME_MASK + + UCODE_ID_CP_PFP_MASK + + UCODE_ID_CP_MEC_MASK + + UCODE_ID_CP_MEC_JT1_MASK + + UCODE_ID_CP_MEC_JT2_MASK; + + PP_ASSERT_WITH_CODE( + 0 == iceland_send_msg_to_smc_with_parameter( + smumgr, PPSMC_MSG_LoadUcodes, fw_to_load), + "Fail to Request SMU Load uCode", return 0); + + return result; +} + +static int iceland_request_smu_load_specific_fw(struct pp_smumgr *smumgr, + uint32_t firmwareType) +{ + return 0; +} + +static int iceland_start_smu(struct pp_smumgr *smumgr) +{ + int result; + + result = iceland_smu_upload_firmware_image(smumgr); + if (result) + return result; + + result = iceland_smu_start_smc(smumgr); + if (result) + return result; + + result = iceland_request_smu_reload_fw(smumgr); + + return result; +} + +/** + * Write a 32bit value to the SMC SRAM space. + * ALL PARAMETERS ARE IN HOST BYTE ORDER. + * @param smumgr the address of the powerplay hardware manager. + * @param smcAddress the address in the SMC RAM to access. + * @param value to write to the SMC SRAM. + */ +static int iceland_smu_init(struct pp_smumgr *smumgr) +{ + struct iceland_smumgr *iceland_smu; + uint64_t mc_addr = 0; + + /* Allocate memory for backend private data */ + iceland_smu = (struct iceland_smumgr *)(smumgr->backend); + iceland_smu->header_buffer.data_size = + ((sizeof(struct SMU_DRAMData_TOC) / 4096) + 1) * 4096; + + smu_allocate_memory(smumgr->device, + iceland_smu->header_buffer.data_size, + CGS_GPU_MEM_TYPE__VISIBLE_CONTIG_FB, + PAGE_SIZE, + &mc_addr, + &iceland_smu->header_buffer.kaddr, + &iceland_smu->header_buffer.handle); + + iceland_smu->pHeader = iceland_smu->header_buffer.kaddr; + iceland_smu->header_buffer.mc_addr_high = smu_upper_32_bits(mc_addr); + iceland_smu->header_buffer.mc_addr_low = smu_lower_32_bits(mc_addr); + + PP_ASSERT_WITH_CODE((NULL != iceland_smu->pHeader), + "Out of memory.", + kfree(smumgr->backend); + cgs_free_gpu_mem(smumgr->device, + (cgs_handle_t)iceland_smu->header_buffer.handle); + return -1); + + return 0; +} + +static const struct pp_smumgr_func iceland_smu_funcs = { + .smu_init = &iceland_smu_init, + .smu_fini = &iceland_smu_fini, + .start_smu = &iceland_start_smu, + .check_fw_load_finish = &iceland_check_fw_load_finish, + .request_smu_load_fw = &iceland_request_smu_reload_fw, + .request_smu_load_specific_fw = &iceland_request_smu_load_specific_fw, + .send_msg_to_smc = &iceland_send_msg_to_smc, + .send_msg_to_smc_with_parameter = &iceland_send_msg_to_smc_with_parameter, + .download_pptable_settings = NULL, + .upload_pptable_settings = NULL, +}; + +int iceland_smum_init(struct pp_smumgr *smumgr) +{ + struct iceland_smumgr *iceland_smu = NULL; + + iceland_smu = kzalloc(sizeof(struct iceland_smumgr), GFP_KERNEL); + + if (iceland_smu == NULL) + return -ENOMEM; + + smumgr->backend = iceland_smu; + smumgr->smumgr_funcs = &iceland_smu_funcs; + + return 0; +} diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.h b/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.h new file mode 100644 index 0000000..62009a7 --- /dev/null +++ b/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.h @@ -0,0 +1,64 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Author: Huang Rui + * + */ + +#ifndef _ICELAND_SMUMGR_H_ +#define _ICELAND_SMUMGR_H_ + +struct iceland_buffer_entry { + uint32_t data_size; + uint32_t mc_addr_low; + uint32_t mc_addr_high; + void *kaddr; + unsigned long handle; +}; + +/* Iceland only has header_buffer, don't have smu buffer. */ +struct iceland_smumgr { + uint8_t *pHeader; + uint8_t *pMecImage; + uint32_t ulSoftRegsStart; + + struct iceland_buffer_entry header_buffer; +}; + +extern int iceland_smum_init(struct pp_smumgr *smumgr); +extern int iceland_copy_bytes_to_smc(struct pp_smumgr *smumgr, + uint32_t smcStartAddress, + const uint8_t *src, + uint32_t byteCount, uint32_t limit); + +extern int iceland_smu_start_smc(struct pp_smumgr *smumgr); + +extern int iceland_read_smc_sram_dword(struct pp_smumgr *smumgr, + uint32_t smcAddress, + uint32_t *value, uint32_t limit); +extern int iceland_write_smc_sram_dword(struct pp_smumgr *smumgr, + uint32_t smcAddress, + uint32_t value, uint32_t limit); + +extern bool iceland_is_smc_ram_running(struct pp_smumgr *smumgr); +extern int iceland_smu_upload_firmware_image(struct pp_smumgr *smumgr); + +#endif diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c index 7723473..cf3cabe 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c @@ -30,6 +30,7 @@ #include "linux/delay.h" #include "cz_smumgr.h" #include "tonga_smumgr.h" +#include "iceland_smumgr.h" #include "fiji_smumgr.h" #include "polaris10_smumgr.h" @@ -58,6 +59,9 @@ int smum_init(struct amd_pp_init *pp_init, struct pp_instance *handle) break; case AMDGPU_FAMILY_VI: switch (smumgr->chip_id) { + case CHIP_TOPAZ: + iceland_smum_init(smumgr); + break; case CHIP_TONGA: tonga_smum_init(smumgr); break; -- cgit v0.10.2 From d550df0b60d444e446afba00c41894bf350de4c0 Mon Sep 17 00:00:00 2001 From: Huang Rui Date: Tue, 12 Jul 2016 00:52:24 +0800 Subject: drm/amd/powerplay: add iceland_dyn_defaults header Signed-off-by: Huang Rui Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/iceland_dyn_defaults.h b/drivers/gpu/drm/amd/powerplay/hwmgr/iceland_dyn_defaults.h new file mode 100644 index 0000000..a7b4bc6 --- /dev/null +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/iceland_dyn_defaults.h @@ -0,0 +1,41 @@ +#ifndef ICELAND_DYN_DEFAULTS_H +#define ICELAND_DYN_DEFAULTS_H + +enum ICELANDdpm_TrendDetection +{ + ICELANDdpm_TrendDetection_AUTO, + ICELANDdpm_TrendDetection_UP, + ICELANDdpm_TrendDetection_DOWN +}; +typedef enum ICELANDdpm_TrendDetection ICELANDdpm_TrendDetection; + + +#define PPICELAND_VOTINGRIGHTSCLIENTS_DFLT0 0x3FFFC102 +#define PPICELAND_VOTINGRIGHTSCLIENTS_DFLT1 0x000400 +#define PPICELAND_VOTINGRIGHTSCLIENTS_DFLT2 0xC00080 +#define PPICELAND_VOTINGRIGHTSCLIENTS_DFLT3 0xC00200 +#define PPICELAND_VOTINGRIGHTSCLIENTS_DFLT4 0xC01680 +#define PPICELAND_VOTINGRIGHTSCLIENTS_DFLT5 0xC00033 +#define PPICELAND_VOTINGRIGHTSCLIENTS_DFLT6 0xC00033 +#define PPICELAND_VOTINGRIGHTSCLIENTS_DFLT7 0x3FFFC000 + + +#define PPICELAND_THERMALPROTECTCOUNTER_DFLT 0x200 + +#define PPICELAND_STATICSCREENTHRESHOLDUNIT_DFLT 0 + +#define PPICELAND_STATICSCREENTHRESHOLD_DFLT 0x00C8 + +#define PPICELAND_GFXIDLECLOCKSTOPTHRESHOLD_DFLT 0x200 + +#define PPICELAND_REFERENCEDIVIDER_DFLT 4 + +#define PPICELAND_ULVVOLTAGECHANGEDELAY_DFLT 1687 + +#define PPICELAND_CGULVPARAMETER_DFLT 0x00040035 +#define PPICELAND_CGULVCONTROL_DFLT 0x00007450 +#define PPICELAND_TARGETACTIVITY_DFLT 30 +#define PPICELAND_MCLK_TARGETACTIVITY_DFLT 10 + +#endif + -- cgit v0.10.2 From 025f8bfb84cbcaa78df31ab00d7e3c5f979e9e27 Mon Sep 17 00:00:00 2001 From: Huang Rui Date: Tue, 12 Jul 2016 15:45:12 +0800 Subject: drm/amd/powerplay: add iceland HW manager This patch introduces the iceland HW manager of powerplay which includes HW manager, clockpowergating, thermal, and powertune. Signed-off-by: Huang Rui Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/Makefile b/drivers/gpu/drm/amd/powerplay/hwmgr/Makefile index f7ce4cb..c264cb6 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/Makefile +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/Makefile @@ -10,7 +10,9 @@ HARDWARE_MGR = hwmgr.o processpptables.o functiontables.o \ fiji_powertune.o fiji_hwmgr.o tonga_clockpowergating.o \ fiji_clockpowergating.o fiji_thermal.o \ polaris10_hwmgr.o polaris10_powertune.o polaris10_thermal.o \ - polaris10_clockpowergating.o + polaris10_clockpowergating.o iceland_hwmgr.o \ + iceland_clockpowergating.o iceland_thermal.o \ + iceland_powertune.o AMD_PP_HWMGR = $(addprefix $(AMD_PP_PATH)/hwmgr/,$(HARDWARE_MGR)) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c index 27e0762..65408dd 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c @@ -39,6 +39,7 @@ extern int cz_hwmgr_init(struct pp_hwmgr *hwmgr); extern int tonga_hwmgr_init(struct pp_hwmgr *hwmgr); extern int fiji_hwmgr_init(struct pp_hwmgr *hwmgr); extern int polaris10_hwmgr_init(struct pp_hwmgr *hwmgr); +extern int iceland_hwmgr_init(struct pp_hwmgr *hwmgr); int hwmgr_init(struct amd_pp_init *pp_init, struct pp_instance *handle) { @@ -67,6 +68,9 @@ int hwmgr_init(struct amd_pp_init *pp_init, struct pp_instance *handle) break; case AMDGPU_FAMILY_VI: switch (hwmgr->chip_id) { + case CHIP_TOPAZ: + iceland_hwmgr_init(hwmgr); + break; case CHIP_TONGA: tonga_hwmgr_init(hwmgr); break; diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/iceland_clockpowergating.c b/drivers/gpu/drm/amd/powerplay/hwmgr/iceland_clockpowergating.c new file mode 100644 index 0000000..47949f5 --- /dev/null +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/iceland_clockpowergating.c @@ -0,0 +1,119 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Author: Huang Rui + * + */ + +#include "hwmgr.h" +#include "iceland_clockpowergating.h" +#include "ppsmc.h" +#include "iceland_hwmgr.h" + +int iceland_phm_powerdown_uvd(struct pp_hwmgr *hwmgr) +{ + /* iceland does not have MM hardware block */ + return 0; +} + +static int iceland_phm_powerup_uvd(struct pp_hwmgr *hwmgr) +{ + /* iceland does not have MM hardware block */ + return 0; +} + +static int iceland_phm_powerdown_vce(struct pp_hwmgr *hwmgr) +{ + /* iceland does not have MM hardware block */ + return 0; +} + +static int iceland_phm_powerup_vce(struct pp_hwmgr *hwmgr) +{ + /* iceland does not have MM hardware block */ + return 0; +} + +int iceland_phm_set_asic_block_gating(struct pp_hwmgr *hwmgr, enum + PHM_AsicBlock block, enum PHM_ClockGateSetting gating) +{ + int ret = 0; + + switch (block) { + case PHM_AsicBlock_UVD_MVC: + case PHM_AsicBlock_UVD: + case PHM_AsicBlock_UVD_HD: + case PHM_AsicBlock_UVD_SD: + if (gating == PHM_ClockGateSetting_StaticOff) + ret = iceland_phm_powerdown_uvd(hwmgr); + else + ret = iceland_phm_powerup_uvd(hwmgr); + break; + case PHM_AsicBlock_GFX: + default: + break; + } + + return ret; +} + +int iceland_phm_disable_clock_power_gating(struct pp_hwmgr *hwmgr) +{ + struct iceland_hwmgr *data = (struct iceland_hwmgr *)(hwmgr->backend); + + data->uvd_power_gated = false; + data->vce_power_gated = false; + + iceland_phm_powerup_uvd(hwmgr); + iceland_phm_powerup_vce(hwmgr); + + return 0; +} + +int iceland_phm_powergate_uvd(struct pp_hwmgr *hwmgr, bool bgate) +{ + if (bgate) { + iceland_update_uvd_dpm(hwmgr, true); + iceland_phm_powerdown_uvd(hwmgr); + } else { + iceland_phm_powerup_uvd(hwmgr); + iceland_update_uvd_dpm(hwmgr, false); + } + + return 0; +} + +int iceland_phm_powergate_vce(struct pp_hwmgr *hwmgr, bool bgate) +{ + if (bgate) + return iceland_phm_powerdown_vce(hwmgr); + else + return iceland_phm_powerup_vce(hwmgr); + + return 0; +} + +int iceland_phm_update_clock_gatings(struct pp_hwmgr *hwmgr, + const uint32_t *msg_id) +{ + /* iceland does not have MM hardware block */ + return 0; +} diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/iceland_clockpowergating.h b/drivers/gpu/drm/amd/powerplay/hwmgr/iceland_clockpowergating.h new file mode 100644 index 0000000..ff5ef00 --- /dev/null +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/iceland_clockpowergating.h @@ -0,0 +1,38 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Author: Huang Rui + * + */ + +#ifndef _ICELAND_CLOCK_POWER_GATING_H_ +#define _ICELAND_CLOCK_POWER_GATING_H_ + +#include "iceland_hwmgr.h" +#include "pp_asicblocks.h" + +extern int iceland_phm_set_asic_block_gating(struct pp_hwmgr *hwmgr, enum PHM_AsicBlock block, enum PHM_ClockGateSetting gating); +extern int iceland_phm_powergate_vce(struct pp_hwmgr *hwmgr, bool bgate); +extern int iceland_phm_powergate_uvd(struct pp_hwmgr *hwmgr, bool bgate); +extern int iceland_phm_powerdown_uvd(struct pp_hwmgr *hwmgr); +extern int iceland_phm_disable_clock_power_gating(struct pp_hwmgr *hwmgr); +extern int iceland_phm_update_clock_gatings(struct pp_hwmgr *hwmgr, const uint32_t *msg_id); +#endif /* _ICELAND_CLOCK_POWER_GATING_H_ */ diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/iceland_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/iceland_hwmgr.c new file mode 100644 index 0000000..9c6d7e3 --- /dev/null +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/iceland_hwmgr.c @@ -0,0 +1,5617 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Author: Huang Rui + * + */ +#include +#include +#include +#include "linux/delay.h" +#include "pp_acpi.h" +#include "hwmgr.h" +#include +#include "iceland_hwmgr.h" +#include "pptable.h" +#include "processpptables.h" +#include "pp_debug.h" +#include "ppsmc.h" +#include "cgs_common.h" +#include "pppcielanes.h" +#include "iceland_dyn_defaults.h" +#include "smumgr.h" +#include "iceland_smumgr.h" +#include "iceland_clockpowergating.h" +#include "iceland_thermal.h" +#include "iceland_powertune.h" + +#include "gmc/gmc_8_1_d.h" +#include "gmc/gmc_8_1_sh_mask.h" + +#include "bif/bif_5_0_d.h" +#include "bif/bif_5_0_sh_mask.h" + +#include "smu/smu_7_1_1_d.h" +#include "smu/smu_7_1_1_sh_mask.h" + +#include "cgs_linux.h" +#include "eventmgr.h" +#include "amd_pcie_helpers.h" + +#define MC_CG_ARB_FREQ_F0 0x0a +#define MC_CG_ARB_FREQ_F1 0x0b +#define MC_CG_ARB_FREQ_F2 0x0c +#define MC_CG_ARB_FREQ_F3 0x0d + +#define MC_CG_SEQ_DRAMCONF_S0 0x05 +#define MC_CG_SEQ_DRAMCONF_S1 0x06 +#define MC_CG_SEQ_YCLK_SUSPEND 0x04 +#define MC_CG_SEQ_YCLK_RESUME 0x0a + +#define PCIE_BUS_CLK 10000 +#define TCLK (PCIE_BUS_CLK / 10) + +#define SMC_RAM_END 0x40000 +#define SMC_CG_IND_START 0xc0030000 +#define SMC_CG_IND_END 0xc0040000 /* First byte after SMC_CG_IND*/ + +#define VOLTAGE_SCALE 4 +#define VOLTAGE_VID_OFFSET_SCALE1 625 +#define VOLTAGE_VID_OFFSET_SCALE2 100 + +const uint32_t iceland_magic = (uint32_t)(PHM_VIslands_Magic); + +#define MC_SEQ_MISC0_GDDR5_SHIFT 28 +#define MC_SEQ_MISC0_GDDR5_MASK 0xf0000000 +#define MC_SEQ_MISC0_GDDR5_VALUE 5 + +/** Values for the CG_THERMAL_CTRL::DPM_EVENT_SRC field. */ +enum DPM_EVENT_SRC { + DPM_EVENT_SRC_ANALOG = 0, /* Internal analog trip point */ + DPM_EVENT_SRC_EXTERNAL = 1, /* External (GPIO 17) signal */ + DPM_EVENT_SRC_DIGITAL = 2, /* Internal digital trip point (DIG_THERM_DPM) */ + DPM_EVENT_SRC_ANALOG_OR_EXTERNAL = 3, /* Internal analog or external */ + DPM_EVENT_SRC_DIGITAL_OR_EXTERNAL = 4 /* Internal digital or external */ +}; + +static int iceland_read_clock_registers(struct pp_hwmgr *hwmgr) +{ + iceland_hwmgr *data = (iceland_hwmgr *)(hwmgr->backend); + + data->clock_registers.vCG_SPLL_FUNC_CNTL = + cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixCG_SPLL_FUNC_CNTL); + data->clock_registers.vCG_SPLL_FUNC_CNTL_2 = + cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixCG_SPLL_FUNC_CNTL_2); + data->clock_registers.vCG_SPLL_FUNC_CNTL_3 = + cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixCG_SPLL_FUNC_CNTL_3); + data->clock_registers.vCG_SPLL_FUNC_CNTL_4 = + cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixCG_SPLL_FUNC_CNTL_4); + data->clock_registers.vCG_SPLL_SPREAD_SPECTRUM = + cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixCG_SPLL_SPREAD_SPECTRUM); + data->clock_registers.vCG_SPLL_SPREAD_SPECTRUM_2 = + cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixCG_SPLL_SPREAD_SPECTRUM_2); + data->clock_registers.vDLL_CNTL = + cgs_read_register(hwmgr->device, mmDLL_CNTL); + data->clock_registers.vMCLK_PWRMGT_CNTL = + cgs_read_register(hwmgr->device, mmMCLK_PWRMGT_CNTL); + data->clock_registers.vMPLL_AD_FUNC_CNTL = + cgs_read_register(hwmgr->device, mmMPLL_AD_FUNC_CNTL); + data->clock_registers.vMPLL_DQ_FUNC_CNTL = + cgs_read_register(hwmgr->device, mmMPLL_DQ_FUNC_CNTL); + data->clock_registers.vMPLL_FUNC_CNTL = + cgs_read_register(hwmgr->device, mmMPLL_FUNC_CNTL); + data->clock_registers.vMPLL_FUNC_CNTL_1 = + cgs_read_register(hwmgr->device, mmMPLL_FUNC_CNTL_1); + data->clock_registers.vMPLL_FUNC_CNTL_2 = + cgs_read_register(hwmgr->device, mmMPLL_FUNC_CNTL_2); + data->clock_registers.vMPLL_SS1 = + cgs_read_register(hwmgr->device, mmMPLL_SS1); + data->clock_registers.vMPLL_SS2 = + cgs_read_register(hwmgr->device, mmMPLL_SS2); + + return 0; +} + +/** + * Find out if memory is GDDR5. + * + * @param hwmgr the address of the powerplay hardware manager. + * @return always 0 + */ +int iceland_get_memory_type(struct pp_hwmgr *hwmgr) +{ + iceland_hwmgr *data = (iceland_hwmgr *)(hwmgr->backend); + uint32_t temp; + + temp = cgs_read_register(hwmgr->device, mmMC_SEQ_MISC0); + + data->is_memory_GDDR5 = (MC_SEQ_MISC0_GDDR5_VALUE == + ((temp & MC_SEQ_MISC0_GDDR5_MASK) >> + MC_SEQ_MISC0_GDDR5_SHIFT)); + + return 0; +} + +int iceland_update_uvd_dpm(struct pp_hwmgr *hwmgr, bool bgate) +{ + /* iceland does not have MM hardware blocks */ + return 0; +} + +/** + * Enables Dynamic Power Management by SMC + * + * @param hwmgr the address of the powerplay hardware manager. + * @return always 0 + */ +int iceland_enable_acpi_power_management(struct pp_hwmgr *hwmgr) +{ + PHM_WRITE_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, GENERAL_PWRMGT, STATIC_PM_EN, 1); + + return 0; +} + +/** + * Find the MC microcode version and store it in the HwMgr struct + * + * @param hwmgr the address of the powerplay hardware manager. + * @return always 0 + */ +int iceland_get_mc_microcode_version(struct pp_hwmgr *hwmgr) +{ + cgs_write_register(hwmgr->device, mmMC_SEQ_IO_DEBUG_INDEX, 0x9F); + + hwmgr->microcode_version_info.MC = cgs_read_register(hwmgr->device, mmMC_SEQ_IO_DEBUG_DATA); + + return 0; +} + +static int iceland_init_sclk_threshold(struct pp_hwmgr *hwmgr) +{ + iceland_hwmgr *data = (iceland_hwmgr *)(hwmgr->backend); + + data->low_sclk_interrupt_threshold = 0; + + return 0; +} + + +static int iceland_setup_asic_task(struct pp_hwmgr *hwmgr) +{ + int tmp_result, result = 0; + + tmp_result = iceland_read_clock_registers(hwmgr); + PP_ASSERT_WITH_CODE((0 == tmp_result), + "Failed to read clock registers!", result = tmp_result); + + tmp_result = iceland_get_memory_type(hwmgr); + PP_ASSERT_WITH_CODE((0 == tmp_result), + "Failed to get memory type!", result = tmp_result); + + tmp_result = iceland_enable_acpi_power_management(hwmgr); + PP_ASSERT_WITH_CODE((0 == tmp_result), + "Failed to enable ACPI power management!", result = tmp_result); + + tmp_result = iceland_get_mc_microcode_version(hwmgr); + PP_ASSERT_WITH_CODE((0 == tmp_result), + "Failed to get MC microcode version!", result = tmp_result); + + tmp_result = iceland_init_sclk_threshold(hwmgr); + PP_ASSERT_WITH_CODE((0 == tmp_result), + "Failed to init sclk threshold!", result = tmp_result); + + return result; +} + +static bool cf_iceland_voltage_control(struct pp_hwmgr *hwmgr) +{ + struct iceland_hwmgr *data = (struct iceland_hwmgr *)(hwmgr->backend); + + return ICELAND_VOLTAGE_CONTROL_NONE != data->voltage_control; +} + +/* + * -------------- Voltage Tables ---------------------- + * If the voltage table would be bigger than what will fit into the + * state table on the SMC keep only the higher entries. + */ + +static void iceland_trim_voltage_table_to_fit_state_table( + struct pp_hwmgr *hwmgr, + uint32_t max_voltage_steps, + pp_atomctrl_voltage_table *voltage_table) +{ + unsigned int i, diff; + + if (voltage_table->count <= max_voltage_steps) { + return; + } + + diff = voltage_table->count - max_voltage_steps; + + for (i = 0; i < max_voltage_steps; i++) { + voltage_table->entries[i] = voltage_table->entries[i + diff]; + } + + voltage_table->count = max_voltage_steps; + + return; +} + +/** + * Enable voltage control + * + * @param hwmgr the address of the powerplay hardware manager. + * @return always 0 + */ +int iceland_enable_voltage_control(struct pp_hwmgr *hwmgr) +{ + /* enable voltage control */ + PHM_WRITE_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, GENERAL_PWRMGT, VOLT_PWRMGT_EN, 1); + + return 0; +} + +static int iceland_get_svi2_voltage_table(struct pp_hwmgr *hwmgr, + struct phm_clock_voltage_dependency_table *voltage_dependency_table, + pp_atomctrl_voltage_table *voltage_table) +{ + uint32_t i; + + PP_ASSERT_WITH_CODE((NULL != voltage_table), + "Voltage Dependency Table empty.", return -EINVAL;); + + voltage_table->mask_low = 0; + voltage_table->phase_delay = 0; + voltage_table->count = voltage_dependency_table->count; + + for (i = 0; i < voltage_dependency_table->count; i++) { + voltage_table->entries[i].value = + voltage_dependency_table->entries[i].v; + voltage_table->entries[i].smio_low = 0; + } + + return 0; +} + +/** + * Create Voltage Tables. + * + * @param hwmgr the address of the powerplay hardware manager. + * @return always 0 + */ +int iceland_construct_voltage_tables(struct pp_hwmgr *hwmgr) +{ + iceland_hwmgr *data = (iceland_hwmgr *)(hwmgr->backend); + int result; + + /* GPIO voltage */ + if (ICELAND_VOLTAGE_CONTROL_BY_GPIO == data->voltage_control) { + result = atomctrl_get_voltage_table_v3(hwmgr, + VOLTAGE_TYPE_VDDC, VOLTAGE_OBJ_GPIO_LUT, + &data->vddc_voltage_table); + PP_ASSERT_WITH_CODE((0 == result), + "Failed to retrieve VDDC table.", return result;); + } else if (ICELAND_VOLTAGE_CONTROL_BY_SVID2 == data->voltage_control) { + /* SVI2 VDDC voltage */ + result = iceland_get_svi2_voltage_table(hwmgr, + hwmgr->dyn_state.vddc_dependency_on_mclk, + &data->vddc_voltage_table); + PP_ASSERT_WITH_CODE((0 == result), + "Failed to retrieve SVI2 VDDC table from dependancy table.", return result;); + } + + PP_ASSERT_WITH_CODE( + (data->vddc_voltage_table.count <= (SMU71_MAX_LEVELS_VDDC)), + "Too many voltage values for VDDC. Trimming to fit state table.", + iceland_trim_voltage_table_to_fit_state_table(hwmgr, + SMU71_MAX_LEVELS_VDDC, &(data->vddc_voltage_table)); + ); + + /* GPIO */ + if (ICELAND_VOLTAGE_CONTROL_BY_GPIO == data->vdd_ci_control) { + result = atomctrl_get_voltage_table_v3(hwmgr, + VOLTAGE_TYPE_VDDCI, VOLTAGE_OBJ_GPIO_LUT, &(data->vddci_voltage_table)); + PP_ASSERT_WITH_CODE((0 == result), + "Failed to retrieve VDDCI table.", return result;); + } + + /* SVI2 VDDCI voltage */ + if (ICELAND_VOLTAGE_CONTROL_BY_SVID2 == data->vdd_ci_control) { + result = iceland_get_svi2_voltage_table(hwmgr, + hwmgr->dyn_state.vddci_dependency_on_mclk, + &data->vddci_voltage_table); + PP_ASSERT_WITH_CODE((0 == result), + "Failed to retrieve SVI2 VDDCI table from dependancy table.", return result;); + } + + PP_ASSERT_WITH_CODE( + (data->vddci_voltage_table.count <= (SMU71_MAX_LEVELS_VDDCI)), + "Too many voltage values for VDDCI. Trimming to fit state table.", + iceland_trim_voltage_table_to_fit_state_table(hwmgr, + SMU71_MAX_LEVELS_VDDCI, &(data->vddci_voltage_table)); + ); + + + /* GPIO */ + if (ICELAND_VOLTAGE_CONTROL_BY_GPIO == data->mvdd_control) { + result = atomctrl_get_voltage_table_v3(hwmgr, + VOLTAGE_TYPE_MVDDC, VOLTAGE_OBJ_GPIO_LUT, &(data->mvdd_voltage_table)); + PP_ASSERT_WITH_CODE((0 == result), + "Failed to retrieve table.", return result;); + } + + /* SVI2 voltage control */ + if (ICELAND_VOLTAGE_CONTROL_BY_SVID2 == data->mvdd_control) { + result = iceland_get_svi2_voltage_table(hwmgr, + hwmgr->dyn_state.mvdd_dependency_on_mclk, + &data->mvdd_voltage_table); + PP_ASSERT_WITH_CODE((0 == result), + "Failed to retrieve SVI2 MVDD table from dependancy table.", return result;); + } + + PP_ASSERT_WITH_CODE( + (data->mvdd_voltage_table.count <= (SMU71_MAX_LEVELS_MVDD)), + "Too many voltage values for MVDD. Trimming to fit state table.", + iceland_trim_voltage_table_to_fit_state_table(hwmgr, + SMU71_MAX_LEVELS_MVDD, &(data->mvdd_voltage_table)); + ); + + return 0; +} + +/*---------------------------MC----------------------------*/ + +uint8_t iceland_get_memory_module_index(struct pp_hwmgr *hwmgr) +{ + return (uint8_t) (0xFF & (cgs_read_register(hwmgr->device, mmBIOS_SCRATCH_4) >> 16)); +} + +bool iceland_check_s0_mc_reg_index(uint16_t inReg, uint16_t *outReg) +{ + bool result = true; + + switch (inReg) { + case mmMC_SEQ_RAS_TIMING: + *outReg = mmMC_SEQ_RAS_TIMING_LP; + break; + + case mmMC_SEQ_DLL_STBY: + *outReg = mmMC_SEQ_DLL_STBY_LP; + break; + + case mmMC_SEQ_G5PDX_CMD0: + *outReg = mmMC_SEQ_G5PDX_CMD0_LP; + break; + + case mmMC_SEQ_G5PDX_CMD1: + *outReg = mmMC_SEQ_G5PDX_CMD1_LP; + break; + + case mmMC_SEQ_G5PDX_CTRL: + *outReg = mmMC_SEQ_G5PDX_CTRL_LP; + break; + + case mmMC_SEQ_CAS_TIMING: + *outReg = mmMC_SEQ_CAS_TIMING_LP; + break; + + case mmMC_SEQ_MISC_TIMING: + *outReg = mmMC_SEQ_MISC_TIMING_LP; + break; + + case mmMC_SEQ_MISC_TIMING2: + *outReg = mmMC_SEQ_MISC_TIMING2_LP; + break; + + case mmMC_SEQ_PMG_DVS_CMD: + *outReg = mmMC_SEQ_PMG_DVS_CMD_LP; + break; + + case mmMC_SEQ_PMG_DVS_CTL: + *outReg = mmMC_SEQ_PMG_DVS_CTL_LP; + break; + + case mmMC_SEQ_RD_CTL_D0: + *outReg = mmMC_SEQ_RD_CTL_D0_LP; + break; + + case mmMC_SEQ_RD_CTL_D1: + *outReg = mmMC_SEQ_RD_CTL_D1_LP; + break; + + case mmMC_SEQ_WR_CTL_D0: + *outReg = mmMC_SEQ_WR_CTL_D0_LP; + break; + + case mmMC_SEQ_WR_CTL_D1: + *outReg = mmMC_SEQ_WR_CTL_D1_LP; + break; + + case mmMC_PMG_CMD_EMRS: + *outReg = mmMC_SEQ_PMG_CMD_EMRS_LP; + break; + + case mmMC_PMG_CMD_MRS: + *outReg = mmMC_SEQ_PMG_CMD_MRS_LP; + break; + + case mmMC_PMG_CMD_MRS1: + *outReg = mmMC_SEQ_PMG_CMD_MRS1_LP; + break; + + case mmMC_SEQ_PMG_TIMING: + *outReg = mmMC_SEQ_PMG_TIMING_LP; + break; + + case mmMC_PMG_CMD_MRS2: + *outReg = mmMC_SEQ_PMG_CMD_MRS2_LP; + break; + + case mmMC_SEQ_WR_CTL_2: + *outReg = mmMC_SEQ_WR_CTL_2_LP; + break; + + default: + result = false; + break; + } + + return result; +} + +int iceland_set_s0_mc_reg_index(phw_iceland_mc_reg_table *table) +{ + uint32_t i; + uint16_t address; + + for (i = 0; i < table->last; i++) { + table->mc_reg_address[i].s0 = + iceland_check_s0_mc_reg_index(table->mc_reg_address[i].s1, &address) + ? address : table->mc_reg_address[i].s1; + } + return 0; +} + +int iceland_copy_vbios_smc_reg_table(const pp_atomctrl_mc_reg_table *table, phw_iceland_mc_reg_table *ni_table) +{ + uint8_t i, j; + + PP_ASSERT_WITH_CODE((table->last <= SMU71_DISCRETE_MC_REGISTER_ARRAY_SIZE), + "Invalid VramInfo table.", return -1); + PP_ASSERT_WITH_CODE((table->num_entries <= MAX_AC_TIMING_ENTRIES), + "Invalid VramInfo table.", return -1); + + for (i = 0; i < table->last; i++) { + ni_table->mc_reg_address[i].s1 = table->mc_reg_address[i].s1; + } + ni_table->last = table->last; + + for (i = 0; i < table->num_entries; i++) { + ni_table->mc_reg_table_entry[i].mclk_max = + table->mc_reg_table_entry[i].mclk_max; + for (j = 0; j < table->last; j++) { + ni_table->mc_reg_table_entry[i].mc_data[j] = + table->mc_reg_table_entry[i].mc_data[j]; + } + } + + ni_table->num_entries = table->num_entries; + + return 0; +} + +/** + * VBIOS omits some information to reduce size, we need to recover them here. + * 1. when we see mmMC_SEQ_MISC1, bit[31:16] EMRS1, need to be write to mmMC_PMG_CMD_EMRS /_LP[15:0]. + * Bit[15:0] MRS, need to be update mmMC_PMG_CMD_MRS/_LP[15:0] + * 2. when we see mmMC_SEQ_RESERVE_M, bit[15:0] EMRS2, need to be write to mmMC_PMG_CMD_MRS1/_LP[15:0]. + * 3. need to set these data for each clock range + * + * @param hwmgr the address of the powerplay hardware manager. + * @param table the address of MCRegTable + * @return always 0 + */ +static int iceland_set_mc_special_registers(struct pp_hwmgr *hwmgr, phw_iceland_mc_reg_table *table) +{ + uint8_t i, j, k; + uint32_t temp_reg; + const iceland_hwmgr *data = (struct iceland_hwmgr *)(hwmgr->backend); + + for (i = 0, j = table->last; i < table->last; i++) { + PP_ASSERT_WITH_CODE((j < SMU71_DISCRETE_MC_REGISTER_ARRAY_SIZE), + "Invalid VramInfo table.", return -1); + switch (table->mc_reg_address[i].s1) { + /* + * mmMC_SEQ_MISC1, bit[31:16] EMRS1, need to be write + * to mmMC_PMG_CMD_EMRS/_LP[15:0]. Bit[15:0] MRS, need + * to be update mmMC_PMG_CMD_MRS/_LP[15:0] + */ + case mmMC_SEQ_MISC1: + temp_reg = cgs_read_register(hwmgr->device, mmMC_PMG_CMD_EMRS); + table->mc_reg_address[j].s1 = mmMC_PMG_CMD_EMRS; + table->mc_reg_address[j].s0 = mmMC_SEQ_PMG_CMD_EMRS_LP; + for (k = 0; k < table->num_entries; k++) { + table->mc_reg_table_entry[k].mc_data[j] = + ((temp_reg & 0xffff0000)) | + ((table->mc_reg_table_entry[k].mc_data[i] & 0xffff0000) >> 16); + } + j++; + PP_ASSERT_WITH_CODE((j < SMU71_DISCRETE_MC_REGISTER_ARRAY_SIZE), + "Invalid VramInfo table.", return -1); + + temp_reg = cgs_read_register(hwmgr->device, mmMC_PMG_CMD_MRS); + table->mc_reg_address[j].s1 = mmMC_PMG_CMD_MRS; + table->mc_reg_address[j].s0 = mmMC_SEQ_PMG_CMD_MRS_LP; + for (k = 0; k < table->num_entries; k++) { + table->mc_reg_table_entry[k].mc_data[j] = + (temp_reg & 0xffff0000) | + (table->mc_reg_table_entry[k].mc_data[i] & 0x0000ffff); + + if (!data->is_memory_GDDR5) { + table->mc_reg_table_entry[k].mc_data[j] |= 0x100; + } + } + j++; + PP_ASSERT_WITH_CODE((j <= SMU71_DISCRETE_MC_REGISTER_ARRAY_SIZE), + "Invalid VramInfo table.", return -1); + + if (!data->is_memory_GDDR5) { + table->mc_reg_address[j].s1 = mmMC_PMG_AUTO_CMD; + table->mc_reg_address[j].s0 = mmMC_PMG_AUTO_CMD; + for (k = 0; k < table->num_entries; k++) { + table->mc_reg_table_entry[k].mc_data[j] = + (table->mc_reg_table_entry[k].mc_data[i] & 0xffff0000) >> 16; + } + j++; + PP_ASSERT_WITH_CODE((j <= SMU71_DISCRETE_MC_REGISTER_ARRAY_SIZE), + "Invalid VramInfo table.", return -1); + } + + break; + + case mmMC_SEQ_RESERVE_M: + temp_reg = cgs_read_register(hwmgr->device, mmMC_PMG_CMD_MRS1); + table->mc_reg_address[j].s1 = mmMC_PMG_CMD_MRS1; + table->mc_reg_address[j].s0 = mmMC_SEQ_PMG_CMD_MRS1_LP; + for (k = 0; k < table->num_entries; k++) { + table->mc_reg_table_entry[k].mc_data[j] = + (temp_reg & 0xffff0000) | + (table->mc_reg_table_entry[k].mc_data[i] & 0x0000ffff); + } + j++; + PP_ASSERT_WITH_CODE((j <= SMU71_DISCRETE_MC_REGISTER_ARRAY_SIZE), + "Invalid VramInfo table.", return -1); + break; + + default: + break; + } + + } + + table->last = j; + + return 0; +} + + +static int iceland_set_valid_flag(phw_iceland_mc_reg_table *table) +{ + uint8_t i, j; + for (i = 0; i < table->last; i++) { + for (j = 1; j < table->num_entries; j++) { + if (table->mc_reg_table_entry[j-1].mc_data[i] != + table->mc_reg_table_entry[j].mc_data[i]) { + table->validflag |= (1<backend); + pp_atomctrl_mc_reg_table *table; + phw_iceland_mc_reg_table *ni_table = &data->iceland_mc_reg_table; + uint8_t module_index = iceland_get_memory_module_index(hwmgr); + + table = kzalloc(sizeof(pp_atomctrl_mc_reg_table), GFP_KERNEL); + + if (NULL == table) + return -ENOMEM; + + /* Program additional LP registers that are no longer programmed by VBIOS */ + cgs_write_register(hwmgr->device, mmMC_SEQ_RAS_TIMING_LP, cgs_read_register(hwmgr->device, mmMC_SEQ_RAS_TIMING)); + cgs_write_register(hwmgr->device, mmMC_SEQ_CAS_TIMING_LP, cgs_read_register(hwmgr->device, mmMC_SEQ_CAS_TIMING)); + cgs_write_register(hwmgr->device, mmMC_SEQ_DLL_STBY_LP, cgs_read_register(hwmgr->device, mmMC_SEQ_DLL_STBY)); + cgs_write_register(hwmgr->device, mmMC_SEQ_G5PDX_CMD0_LP, cgs_read_register(hwmgr->device, mmMC_SEQ_G5PDX_CMD0)); + cgs_write_register(hwmgr->device, mmMC_SEQ_G5PDX_CMD1_LP, cgs_read_register(hwmgr->device, mmMC_SEQ_G5PDX_CMD1)); + cgs_write_register(hwmgr->device, mmMC_SEQ_G5PDX_CTRL_LP, cgs_read_register(hwmgr->device, mmMC_SEQ_G5PDX_CTRL)); + cgs_write_register(hwmgr->device, mmMC_SEQ_PMG_DVS_CMD_LP, cgs_read_register(hwmgr->device, mmMC_SEQ_PMG_DVS_CMD)); + cgs_write_register(hwmgr->device, mmMC_SEQ_PMG_DVS_CTL_LP, cgs_read_register(hwmgr->device, mmMC_SEQ_PMG_DVS_CTL)); + cgs_write_register(hwmgr->device, mmMC_SEQ_MISC_TIMING_LP, cgs_read_register(hwmgr->device, mmMC_SEQ_MISC_TIMING)); + cgs_write_register(hwmgr->device, mmMC_SEQ_MISC_TIMING2_LP, cgs_read_register(hwmgr->device, mmMC_SEQ_MISC_TIMING2)); + cgs_write_register(hwmgr->device, mmMC_SEQ_PMG_CMD_EMRS_LP, cgs_read_register(hwmgr->device, mmMC_PMG_CMD_EMRS)); + cgs_write_register(hwmgr->device, mmMC_SEQ_PMG_CMD_MRS_LP, cgs_read_register(hwmgr->device, mmMC_PMG_CMD_MRS)); + cgs_write_register(hwmgr->device, mmMC_SEQ_PMG_CMD_MRS1_LP, cgs_read_register(hwmgr->device, mmMC_PMG_CMD_MRS1)); + cgs_write_register(hwmgr->device, mmMC_SEQ_WR_CTL_D0_LP, cgs_read_register(hwmgr->device, mmMC_SEQ_WR_CTL_D0)); + cgs_write_register(hwmgr->device, mmMC_SEQ_WR_CTL_D1_LP, cgs_read_register(hwmgr->device, mmMC_SEQ_WR_CTL_D1)); + cgs_write_register(hwmgr->device, mmMC_SEQ_RD_CTL_D0_LP, cgs_read_register(hwmgr->device, mmMC_SEQ_RD_CTL_D0)); + cgs_write_register(hwmgr->device, mmMC_SEQ_RD_CTL_D1_LP, cgs_read_register(hwmgr->device, mmMC_SEQ_RD_CTL_D1)); + cgs_write_register(hwmgr->device, mmMC_SEQ_PMG_TIMING_LP, cgs_read_register(hwmgr->device, mmMC_SEQ_PMG_TIMING)); + cgs_write_register(hwmgr->device, mmMC_SEQ_PMG_CMD_MRS2_LP, cgs_read_register(hwmgr->device, mmMC_PMG_CMD_MRS2)); + cgs_write_register(hwmgr->device, mmMC_SEQ_WR_CTL_2_LP, cgs_read_register(hwmgr->device, mmMC_SEQ_WR_CTL_2)); + + memset(table, 0x00, sizeof(pp_atomctrl_mc_reg_table)); + + result = atomctrl_initialize_mc_reg_table(hwmgr, module_index, table); + + if (0 == result) + result = iceland_copy_vbios_smc_reg_table(table, ni_table); + + if (0 == result) { + iceland_set_s0_mc_reg_index(ni_table); + result = iceland_set_mc_special_registers(hwmgr, ni_table); + } + + if (0 == result) + iceland_set_valid_flag(ni_table); + + kfree(table); + return result; +} + +/** + * Programs static screed detection parameters + * + * @param hwmgr the address of the powerplay hardware manager. + * @return always 0 + */ +int iceland_program_static_screen_threshold_parameters(struct pp_hwmgr *hwmgr) +{ + iceland_hwmgr *data = (iceland_hwmgr *)(hwmgr->backend); + + /* Set static screen threshold unit*/ + PHM_WRITE_VFPF_INDIRECT_FIELD(hwmgr->device, + CGS_IND_REG__SMC, CG_STATIC_SCREEN_PARAMETER, STATIC_SCREEN_THRESHOLD_UNIT, + data->static_screen_threshold_unit); + /* Set static screen threshold*/ + PHM_WRITE_VFPF_INDIRECT_FIELD(hwmgr->device, + CGS_IND_REG__SMC, CG_STATIC_SCREEN_PARAMETER, STATIC_SCREEN_THRESHOLD, + data->static_screen_threshold); + + return 0; +} + +/** + * Setup display gap for glitch free memory clock switching. + * + * @param hwmgr the address of the powerplay hardware manager. + * @return always 0 + */ +int iceland_enable_display_gap(struct pp_hwmgr *hwmgr) +{ + uint32_t display_gap = cgs_read_ind_register(hwmgr->device, + CGS_IND_REG__SMC, ixCG_DISPLAY_GAP_CNTL); + + display_gap = PHM_SET_FIELD(display_gap, + CG_DISPLAY_GAP_CNTL, DISP_GAP, DISPLAY_GAP_IGNORE); + + display_gap = PHM_SET_FIELD(display_gap, + CG_DISPLAY_GAP_CNTL, DISP_GAP_MCHG, DISPLAY_GAP_VBLANK); + + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, + ixCG_DISPLAY_GAP_CNTL, display_gap); + + return 0; +} + +/** + * Programs activity state transition voting clients + * + * @param hwmgr the address of the powerplay hardware manager. + * @return always 0 + */ +int iceland_program_voting_clients(struct pp_hwmgr *hwmgr) +{ + iceland_hwmgr *data = (iceland_hwmgr *)(hwmgr->backend); + + /* Clear reset for voting clients before enabling DPM */ + PHM_WRITE_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, + SCLK_PWRMGT_CNTL, RESET_SCLK_CNT, 0); + PHM_WRITE_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, + SCLK_PWRMGT_CNTL, RESET_BUSY_CNT, 0); + + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, + ixCG_FREQ_TRAN_VOTING_0, data->voting_rights_clients0); + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, + ixCG_FREQ_TRAN_VOTING_1, data->voting_rights_clients1); + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, + ixCG_FREQ_TRAN_VOTING_2, data->voting_rights_clients2); + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, + ixCG_FREQ_TRAN_VOTING_3, data->voting_rights_clients3); + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, + ixCG_FREQ_TRAN_VOTING_4, data->voting_rights_clients4); + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, + ixCG_FREQ_TRAN_VOTING_5, data->voting_rights_clients5); + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, + ixCG_FREQ_TRAN_VOTING_6, data->voting_rights_clients6); + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, + ixCG_FREQ_TRAN_VOTING_7, data->voting_rights_clients7); + + return 0; +} + +static int iceland_upload_firmware(struct pp_hwmgr *hwmgr) +{ + int ret = 0; + + if (!iceland_is_smc_ram_running(hwmgr->smumgr)) + ret = iceland_smu_upload_firmware_image(hwmgr->smumgr); + + return ret; +} + +/** + * Get the location of various tables inside the FW image. + * + * @param hwmgr the address of the powerplay hardware manager. + * @return always 0 + */ +int iceland_process_firmware_header(struct pp_hwmgr *hwmgr) +{ + iceland_hwmgr *data = (iceland_hwmgr *)(hwmgr->backend); + + uint32_t tmp; + int result; + bool error = 0; + + result = iceland_read_smc_sram_dword(hwmgr->smumgr, + SMU71_FIRMWARE_HEADER_LOCATION + + offsetof(SMU71_Firmware_Header, DpmTable), + &tmp, data->sram_end); + + if (0 == result) { + data->dpm_table_start = tmp; + } + + error |= (0 != result); + + result = iceland_read_smc_sram_dword(hwmgr->smumgr, + SMU71_FIRMWARE_HEADER_LOCATION + + offsetof(SMU71_Firmware_Header, SoftRegisters), + &tmp, data->sram_end); + + if (0 == result) { + data->soft_regs_start = tmp; + } + + error |= (0 != result); + + + result = iceland_read_smc_sram_dword(hwmgr->smumgr, + SMU71_FIRMWARE_HEADER_LOCATION + + offsetof(SMU71_Firmware_Header, mcRegisterTable), + &tmp, data->sram_end); + + if (0 == result) { + data->mc_reg_table_start = tmp; + } + + result = iceland_read_smc_sram_dword(hwmgr->smumgr, + SMU71_FIRMWARE_HEADER_LOCATION + + offsetof(SMU71_Firmware_Header, FanTable), + &tmp, data->sram_end); + + if (0 == result) { + data->fan_table_start = tmp; + } + + error |= (0 != result); + + result = iceland_read_smc_sram_dword(hwmgr->smumgr, + SMU71_FIRMWARE_HEADER_LOCATION + + offsetof(SMU71_Firmware_Header, mcArbDramTimingTable), + &tmp, data->sram_end); + + if (0 == result) { + data->arb_table_start = tmp; + } + + error |= (0 != result); + + + result = iceland_read_smc_sram_dword(hwmgr->smumgr, + SMU71_FIRMWARE_HEADER_LOCATION + + offsetof(SMU71_Firmware_Header, Version), + &tmp, data->sram_end); + + if (0 == result) { + hwmgr->microcode_version_info.SMC = tmp; + } + + error |= (0 != result); + + result = iceland_read_smc_sram_dword(hwmgr->smumgr, + SMU71_FIRMWARE_HEADER_LOCATION + + offsetof(SMU71_Firmware_Header, UlvSettings), + &tmp, data->sram_end); + + if (0 == result) { + data->ulv_settings_start = tmp; + } + + error |= (0 != result); + + return error ? 1 : 0; +} + +/* +* Copy one arb setting to another and then switch the active set. +* arbFreqSrc and arbFreqDest is one of the MC_CG_ARB_FREQ_Fx constants. +*/ +int iceland_copy_and_switch_arb_sets(struct pp_hwmgr *hwmgr, + uint32_t arbFreqSrc, uint32_t arbFreqDest) +{ + uint32_t mc_arb_dram_timing; + uint32_t mc_arb_dram_timing2; + uint32_t burst_time; + uint32_t mc_cg_config; + + switch (arbFreqSrc) { + case MC_CG_ARB_FREQ_F0: + mc_arb_dram_timing = cgs_read_register(hwmgr->device, mmMC_ARB_DRAM_TIMING); + mc_arb_dram_timing2 = cgs_read_register(hwmgr->device, mmMC_ARB_DRAM_TIMING2); + burst_time = PHM_READ_FIELD(hwmgr->device, MC_ARB_BURST_TIME, STATE0); + break; + + case MC_CG_ARB_FREQ_F1: + mc_arb_dram_timing = cgs_read_register(hwmgr->device, mmMC_ARB_DRAM_TIMING_1); + mc_arb_dram_timing2 = cgs_read_register(hwmgr->device, mmMC_ARB_DRAM_TIMING2_1); + burst_time = PHM_READ_FIELD(hwmgr->device, MC_ARB_BURST_TIME, STATE1); + break; + + default: + return -1; + } + + switch (arbFreqDest) { + case MC_CG_ARB_FREQ_F0: + cgs_write_register(hwmgr->device, mmMC_ARB_DRAM_TIMING, mc_arb_dram_timing); + cgs_write_register(hwmgr->device, mmMC_ARB_DRAM_TIMING2, mc_arb_dram_timing2); + PHM_WRITE_FIELD(hwmgr->device, MC_ARB_BURST_TIME, STATE0, burst_time); + break; + + case MC_CG_ARB_FREQ_F1: + cgs_write_register(hwmgr->device, mmMC_ARB_DRAM_TIMING_1, mc_arb_dram_timing); + cgs_write_register(hwmgr->device, mmMC_ARB_DRAM_TIMING2_1, mc_arb_dram_timing2); + PHM_WRITE_FIELD(hwmgr->device, MC_ARB_BURST_TIME, STATE1, burst_time); + break; + + default: + return -1; + } + + mc_cg_config = cgs_read_register(hwmgr->device, mmMC_CG_CONFIG); + mc_cg_config |= 0x0000000F; + cgs_write_register(hwmgr->device, mmMC_CG_CONFIG, mc_cg_config); + PHM_WRITE_FIELD(hwmgr->device, MC_ARB_CG, CG_ARB_REQ, arbFreqDest); + + return 0; +} + +/** + * Initial switch from ARB F0->F1 + * + * @param hwmgr the address of the powerplay hardware manager. + * @return always 0 + * This function is to be called from the SetPowerState table. + */ +int iceland_initial_switch_from_arb_f0_to_f1(struct pp_hwmgr *hwmgr) +{ + return iceland_copy_and_switch_arb_sets(hwmgr, MC_CG_ARB_FREQ_F0, MC_CG_ARB_FREQ_F1); +} + +/* ---------------------------------------- ULV related functions ----------------------------------------------------*/ + + +static int iceland_reset_single_dpm_table( + struct pp_hwmgr *hwmgr, + struct iceland_single_dpm_table *dpm_table, + uint32_t count) +{ + uint32_t i; + if (!(count <= MAX_REGULAR_DPM_NUMBER)) + printk(KERN_ERR "[ powerplay ] Fatal error, can not set up single DPM \ + table entries to exceed max number! \n"); + + dpm_table->count = count; + for (i = 0; i < MAX_REGULAR_DPM_NUMBER; i++) { + dpm_table->dpm_levels[i].enabled = 0; + } + + return 0; +} + +static void iceland_setup_pcie_table_entry( + struct iceland_single_dpm_table *dpm_table, + uint32_t index, uint32_t pcie_gen, + uint32_t pcie_lanes) +{ + dpm_table->dpm_levels[index].value = pcie_gen; + dpm_table->dpm_levels[index].param1 = pcie_lanes; + dpm_table->dpm_levels[index].enabled = 1; +} + +/* + * Set up the PCIe DPM table as follows: + * + * A = Performance State, Max, Gen Speed + * C = Performance State, Min, Gen Speed + * 1 = Performance State, Max, Lane # + * 3 = Performance State, Min, Lane # + * + * B = Power Saving State, Max, Gen Speed + * D = Power Saving State, Min, Gen Speed + * 2 = Power Saving State, Max, Lane # + * 4 = Power Saving State, Min, Lane # + * + * + * DPM Index Gen Speed Lane # + * 5 A 1 + * 4 B 2 + * 3 C 1 + * 2 D 2 + * 1 C 3 + * 0 D 4 + * + */ +static int iceland_setup_default_pcie_tables(struct pp_hwmgr *hwmgr) +{ + iceland_hwmgr *data = (iceland_hwmgr *)(hwmgr->backend); + + PP_ASSERT_WITH_CODE((data->use_pcie_performance_levels || + data->use_pcie_power_saving_levels), + "No pcie performance levels!", return -EINVAL); + + if (data->use_pcie_performance_levels && !data->use_pcie_power_saving_levels) { + data->pcie_gen_power_saving = data->pcie_gen_performance; + data->pcie_lane_power_saving = data->pcie_lane_performance; + } else if (!data->use_pcie_performance_levels && data->use_pcie_power_saving_levels) { + data->pcie_gen_performance = data->pcie_gen_power_saving; + data->pcie_lane_performance = data->pcie_lane_power_saving; + } + + iceland_reset_single_dpm_table(hwmgr, &data->dpm_table.pcie_speed_table, SMU71_MAX_LEVELS_LINK); + + /* Hardcode Pcie Table */ + iceland_setup_pcie_table_entry(&data->dpm_table.pcie_speed_table, 0, + get_pcie_gen_support(data->pcie_gen_cap, PP_Min_PCIEGen), + get_pcie_lane_support(data->pcie_lane_cap, PP_Max_PCIELane)); + iceland_setup_pcie_table_entry(&data->dpm_table.pcie_speed_table, 1, + get_pcie_gen_support(data->pcie_gen_cap, PP_Min_PCIEGen), + get_pcie_lane_support(data->pcie_lane_cap, PP_Max_PCIELane)); + iceland_setup_pcie_table_entry(&data->dpm_table.pcie_speed_table, 2, + get_pcie_gen_support(data->pcie_gen_cap, PP_Max_PCIEGen), + get_pcie_lane_support(data->pcie_lane_cap, PP_Max_PCIELane)); + iceland_setup_pcie_table_entry(&data->dpm_table.pcie_speed_table, 3, + get_pcie_gen_support(data->pcie_gen_cap, PP_Max_PCIEGen), + get_pcie_lane_support(data->pcie_lane_cap, PP_Max_PCIELane)); + iceland_setup_pcie_table_entry(&data->dpm_table.pcie_speed_table, 4, + get_pcie_gen_support(data->pcie_gen_cap, PP_Max_PCIEGen), + get_pcie_lane_support(data->pcie_lane_cap, PP_Max_PCIELane)); + iceland_setup_pcie_table_entry(&data->dpm_table.pcie_speed_table, 5, + get_pcie_gen_support(data->pcie_gen_cap, PP_Max_PCIEGen), + get_pcie_lane_support(data->pcie_lane_cap, PP_Max_PCIELane)); + data->dpm_table.pcie_speed_table.count = 6; + + return 0; + +} + + +/* + * This function is to initalize all DPM state tables for SMU7 based on the dependency table. + * Dynamic state patching function will then trim these state tables to the allowed range based + * on the power policy or external client requests, such as UVD request, etc. + */ +static int iceland_setup_default_dpm_tables(struct pp_hwmgr *hwmgr) +{ + iceland_hwmgr *data = (iceland_hwmgr *)(hwmgr->backend); + uint32_t i; + + struct phm_clock_voltage_dependency_table *allowed_vdd_sclk_table = + hwmgr->dyn_state.vddc_dependency_on_sclk; + struct phm_clock_voltage_dependency_table *allowed_vdd_mclk_table = + hwmgr->dyn_state.vddc_dependency_on_mclk; + struct phm_cac_leakage_table *std_voltage_table = + hwmgr->dyn_state.cac_leakage_table; + + PP_ASSERT_WITH_CODE(allowed_vdd_sclk_table != NULL, + "SCLK dependency table is missing. This table is mandatory", return -1); + PP_ASSERT_WITH_CODE(allowed_vdd_sclk_table->count >= 1, + "SCLK dependency table has to have is missing. This table is mandatory", return -1); + + PP_ASSERT_WITH_CODE(allowed_vdd_mclk_table != NULL, + "MCLK dependency table is missing. This table is mandatory", return -1); + PP_ASSERT_WITH_CODE(allowed_vdd_mclk_table->count >= 1, + "VMCLK dependency table has to have is missing. This table is mandatory", return -1); + + /* clear the state table to reset everything to default */ + memset(&(data->dpm_table), 0x00, sizeof(data->dpm_table)); + iceland_reset_single_dpm_table(hwmgr, &data->dpm_table.sclk_table, SMU71_MAX_LEVELS_GRAPHICS); + iceland_reset_single_dpm_table(hwmgr, &data->dpm_table.mclk_table, SMU71_MAX_LEVELS_MEMORY); + iceland_reset_single_dpm_table(hwmgr, &data->dpm_table.vddc_table, SMU71_MAX_LEVELS_VDDC); + iceland_reset_single_dpm_table(hwmgr, &data->dpm_table.vdd_ci_table, SMU71_MAX_LEVELS_VDDCI); + iceland_reset_single_dpm_table(hwmgr, &data->dpm_table.mvdd_table, SMU71_MAX_LEVELS_MVDD); + + PP_ASSERT_WITH_CODE(allowed_vdd_sclk_table != NULL, + "SCLK dependency table is missing. This table is mandatory", return -1); + /* Initialize Sclk DPM table based on allow Sclk values*/ + data->dpm_table.sclk_table.count = 0; + + for (i = 0; i < allowed_vdd_sclk_table->count; i++) { + if (i == 0 || data->dpm_table.sclk_table.dpm_levels[data->dpm_table.sclk_table.count-1].value != + allowed_vdd_sclk_table->entries[i].clk) { + data->dpm_table.sclk_table.dpm_levels[data->dpm_table.sclk_table.count].value = + allowed_vdd_sclk_table->entries[i].clk; + data->dpm_table.sclk_table.dpm_levels[data->dpm_table.sclk_table.count].enabled = 1; /*(i==0) ? 1 : 0; to do */ + data->dpm_table.sclk_table.count++; + } + } + + PP_ASSERT_WITH_CODE(allowed_vdd_mclk_table != NULL, + "MCLK dependency table is missing. This table is mandatory", return -1); + /* Initialize Mclk DPM table based on allow Mclk values */ + data->dpm_table.mclk_table.count = 0; + for (i = 0; i < allowed_vdd_mclk_table->count; i++) { + if (i == 0 || data->dpm_table.mclk_table.dpm_levels[data->dpm_table.mclk_table.count-1].value != + allowed_vdd_mclk_table->entries[i].clk) { + data->dpm_table.mclk_table.dpm_levels[data->dpm_table.mclk_table.count].value = + allowed_vdd_mclk_table->entries[i].clk; + data->dpm_table.mclk_table.dpm_levels[data->dpm_table.mclk_table.count].enabled = 1; /*(i==0) ? 1 : 0; */ + data->dpm_table.mclk_table.count++; + } + } + + /* Initialize Vddc DPM table based on allow Vddc values. And populate corresponding std values. */ + for (i = 0; i < allowed_vdd_sclk_table->count; i++) { + data->dpm_table.vddc_table.dpm_levels[i].value = allowed_vdd_mclk_table->entries[i].v; + data->dpm_table.vddc_table.dpm_levels[i].param1 = std_voltage_table->entries[i].Leakage; + /* param1 is for corresponding std voltage */ + data->dpm_table.vddc_table.dpm_levels[i].enabled = 1; + } + + data->dpm_table.vddc_table.count = allowed_vdd_sclk_table->count; + allowed_vdd_mclk_table = hwmgr->dyn_state.vddci_dependency_on_mclk; + + if (NULL != allowed_vdd_mclk_table) { + /* Initialize Vddci DPM table based on allow Mclk values */ + for (i = 0; i < allowed_vdd_mclk_table->count; i++) { + data->dpm_table.vdd_ci_table.dpm_levels[i].value = allowed_vdd_mclk_table->entries[i].v; + data->dpm_table.vdd_ci_table.dpm_levels[i].enabled = 1; + } + data->dpm_table.vdd_ci_table.count = allowed_vdd_mclk_table->count; + } + + allowed_vdd_mclk_table = hwmgr->dyn_state.mvdd_dependency_on_mclk; + + if (NULL != allowed_vdd_mclk_table) { + /* + * Initialize MVDD DPM table based on allow Mclk + * values + */ + for (i = 0; i < allowed_vdd_mclk_table->count; i++) { + data->dpm_table.mvdd_table.dpm_levels[i].value = allowed_vdd_mclk_table->entries[i].v; + data->dpm_table.mvdd_table.dpm_levels[i].enabled = 1; + } + data->dpm_table.mvdd_table.count = allowed_vdd_mclk_table->count; + } + + /* setup PCIE gen speed levels*/ + iceland_setup_default_pcie_tables(hwmgr); + + /* save a copy of the default DPM table*/ + memcpy(&(data->golden_dpm_table), &(data->dpm_table), sizeof(struct iceland_dpm_table)); + + return 0; +} + +/** + * @brief PhwIceland_GetVoltageOrder + * Returns index of requested voltage record in lookup(table) + * @param hwmgr - pointer to hardware manager + * @param lookutab - lookup list to search in + * @param voltage - voltage to look for + * @return 0 on success + */ +uint8_t iceland_get_voltage_index(phm_ppt_v1_voltage_lookup_table *look_up_table, + uint16_t voltage) +{ + uint8_t count = (uint8_t) (look_up_table->count); + uint8_t i; + + PP_ASSERT_WITH_CODE((NULL != look_up_table), "Lookup Table empty.", return 0;); + PP_ASSERT_WITH_CODE((0 != count), "Lookup Table empty.", return 0;); + + for (i = 0; i < count; i++) { + /* find first voltage equal or bigger than requested */ + if (look_up_table->entries[i].us_vdd >= voltage) + return i; + } + + /* voltage is bigger than max voltage in the table */ + return i-1; +} + + +static int iceland_get_std_voltage_value_sidd(struct pp_hwmgr *hwmgr, + pp_atomctrl_voltage_table_entry *tab, uint16_t *hi, + uint16_t *lo) +{ + uint16_t v_index; + bool vol_found = false; + *hi = tab->value * VOLTAGE_SCALE; + *lo = tab->value * VOLTAGE_SCALE; + + /* SCLK/VDDC Dependency Table has to exist. */ + PP_ASSERT_WITH_CODE(NULL != hwmgr->dyn_state.vddc_dependency_on_sclk, + "The SCLK/VDDC Dependency Table does not exist.\n", + return -EINVAL); + + if (NULL == hwmgr->dyn_state.cac_leakage_table) { + pr_warning("CAC Leakage Table does not exist, using vddc.\n"); + return 0; + } + + /* + * Since voltage in the sclk/vddc dependency table is not + * necessarily in ascending order because of ELB voltage + * patching, loop through entire list to find exact voltage. + */ + for (v_index = 0; (uint32_t)v_index < hwmgr->dyn_state.vddc_dependency_on_sclk->count; v_index++) { + if (tab->value == hwmgr->dyn_state.vddc_dependency_on_sclk->entries[v_index].v) { + vol_found = true; + if ((uint32_t)v_index < hwmgr->dyn_state.cac_leakage_table->count) { + *lo = hwmgr->dyn_state.cac_leakage_table->entries[v_index].Vddc * VOLTAGE_SCALE; + *hi = (uint16_t)(hwmgr->dyn_state.cac_leakage_table->entries[v_index].Leakage * VOLTAGE_SCALE); + } else { + pr_warning("Index from SCLK/VDDC Dependency Table exceeds the CAC Leakage Table index, using maximum index from CAC table.\n"); + *lo = hwmgr->dyn_state.cac_leakage_table->entries[hwmgr->dyn_state.cac_leakage_table->count - 1].Vddc * VOLTAGE_SCALE; + *hi = (uint16_t)(hwmgr->dyn_state.cac_leakage_table->entries[hwmgr->dyn_state.cac_leakage_table->count - 1].Leakage * VOLTAGE_SCALE); + } + break; + } + } + + /* + * If voltage is not found in the first pass, loop again to + * find the best match, equal or higher value. + */ + if (!vol_found) { + for (v_index = 0; (uint32_t)v_index < hwmgr->dyn_state.vddc_dependency_on_sclk->count; v_index++) { + if (tab->value <= hwmgr->dyn_state.vddc_dependency_on_sclk->entries[v_index].v) { + vol_found = true; + if ((uint32_t)v_index < hwmgr->dyn_state.cac_leakage_table->count) { + *lo = hwmgr->dyn_state.cac_leakage_table->entries[v_index].Vddc * VOLTAGE_SCALE; + *hi = (uint16_t)(hwmgr->dyn_state.cac_leakage_table->entries[v_index].Leakage) * VOLTAGE_SCALE; + } else { + pr_warning("Index from SCLK/VDDC Dependency Table exceeds the CAC Leakage Table index in second look up, using maximum index from CAC table."); + *lo = hwmgr->dyn_state.cac_leakage_table->entries[hwmgr->dyn_state.cac_leakage_table->count - 1].Vddc * VOLTAGE_SCALE; + *hi = (uint16_t)(hwmgr->dyn_state.cac_leakage_table->entries[hwmgr->dyn_state.cac_leakage_table->count - 1].Leakage * VOLTAGE_SCALE); + } + break; + } + } + + if (!vol_found) + pr_warning("Unable to get std_vddc from SCLK/VDDC Dependency Table, using vddc.\n"); + } + + return 0; +} + +static int iceland_populate_smc_voltage_table(struct pp_hwmgr *hwmgr, + pp_atomctrl_voltage_table_entry *tab, + SMU71_Discrete_VoltageLevel *smc_voltage_tab) { + int result; + + + result = iceland_get_std_voltage_value_sidd(hwmgr, tab, + &smc_voltage_tab->StdVoltageHiSidd, + &smc_voltage_tab->StdVoltageLoSidd); + if (0 != result) { + smc_voltage_tab->StdVoltageHiSidd = tab->value * VOLTAGE_SCALE; + smc_voltage_tab->StdVoltageLoSidd = tab->value * VOLTAGE_SCALE; + } + + smc_voltage_tab->Voltage = PP_HOST_TO_SMC_US(tab->value * VOLTAGE_SCALE); + CONVERT_FROM_HOST_TO_SMC_US(smc_voltage_tab->StdVoltageHiSidd); + CONVERT_FROM_HOST_TO_SMC_US(smc_voltage_tab->StdVoltageHiSidd); + + return 0; +} + +/** + * Vddc table preparation for SMC. + * + * @param hwmgr the address of the hardware manager + * @param table the SMC DPM table structure to be populated + * @return always 0 + */ +static int iceland_populate_smc_vddc_table(struct pp_hwmgr *hwmgr, + SMU71_Discrete_DpmTable *table) +{ + unsigned int count; + int result; + + iceland_hwmgr *data = (iceland_hwmgr *)(hwmgr->backend); + + table->VddcLevelCount = data->vddc_voltage_table.count; + for (count = 0; count < table->VddcLevelCount; count++) { + result = iceland_populate_smc_voltage_table(hwmgr, + &data->vddc_voltage_table.entries[count], + &table->VddcLevel[count]); + PP_ASSERT_WITH_CODE(0 == result, "do not populate SMC VDDC voltage table", return -EINVAL); + + /* GPIO voltage control */ + if (ICELAND_VOLTAGE_CONTROL_BY_GPIO == data->voltage_control) + table->VddcLevel[count].Smio |= data->vddc_voltage_table.entries[count].smio_low; + else if (ICELAND_VOLTAGE_CONTROL_BY_SVID2 == data->voltage_control) + table->VddcLevel[count].Smio = 0; + } + + CONVERT_FROM_HOST_TO_SMC_UL(table->VddcLevelCount); + + return 0; +} + +/** + * Vddci table preparation for SMC. + * + * @param *hwmgr The address of the hardware manager. + * @param *table The SMC DPM table structure to be populated. + * @return 0 + */ +static int iceland_populate_smc_vdd_ci_table(struct pp_hwmgr *hwmgr, + SMU71_Discrete_DpmTable *table) +{ + int result; + uint32_t count; + iceland_hwmgr *data = (iceland_hwmgr *)(hwmgr->backend); + + table->VddciLevelCount = data->vddci_voltage_table.count; + for (count = 0; count < table->VddciLevelCount; count++) { + result = iceland_populate_smc_voltage_table(hwmgr, + &data->vddci_voltage_table.entries[count], + &table->VddciLevel[count]); + PP_ASSERT_WITH_CODE(0 == result, "do not populate SMC VDDCI voltage table", return -EINVAL); + + /* GPIO voltage control */ + if (ICELAND_VOLTAGE_CONTROL_BY_GPIO == data->vdd_ci_control) + table->VddciLevel[count].Smio |= data->vddci_voltage_table.entries[count].smio_low; + else + table->VddciLevel[count].Smio = 0; + } + + CONVERT_FROM_HOST_TO_SMC_UL(table->VddcLevelCount); + + return 0; +} + +/** + * Mvdd table preparation for SMC. + * + * @param *hwmgr The address of the hardware manager. + * @param *table The SMC DPM table structure to be populated. + * @return 0 + */ +static int iceland_populate_smc_mvdd_table(struct pp_hwmgr *hwmgr, + SMU71_Discrete_DpmTable *table) +{ + int result; + uint32_t count; + iceland_hwmgr *data = (iceland_hwmgr *)(hwmgr->backend); + + table->MvddLevelCount = data->mvdd_voltage_table.count; + for (count = 0; count < table->MvddLevelCount; count++) { + result = iceland_populate_smc_voltage_table(hwmgr, + &data->mvdd_voltage_table.entries[count], + &table->MvddLevel[count]); + PP_ASSERT_WITH_CODE(0 == result, "do not populate SMC VDDCI voltage table", return -EINVAL); + + /* GPIO voltage control */ + if (ICELAND_VOLTAGE_CONTROL_BY_GPIO == data->mvdd_control) + table->MvddLevel[count].Smio |= data->mvdd_voltage_table.entries[count].smio_low; + else + table->MvddLevel[count].Smio = 0; + } + + CONVERT_FROM_HOST_TO_SMC_UL(table->MvddLevelCount); + + return 0; +} + +/** + * Convert a voltage value in mv unit to VID number required by SMU firmware + */ +static uint8_t convert_to_vid(uint16_t vddc) +{ + return (uint8_t) ((6200 - (vddc * VOLTAGE_SCALE)) / 25); +} + +int iceland_populate_bapm_vddc_vid_sidd(struct pp_hwmgr *hwmgr) +{ + int i; + struct iceland_hwmgr *data = (struct iceland_hwmgr *)(hwmgr->backend); + uint8_t * hi_vid = data->power_tune_table.BapmVddCVidHiSidd; + uint8_t * lo_vid = data->power_tune_table.BapmVddCVidLoSidd; + + PP_ASSERT_WITH_CODE(NULL != hwmgr->dyn_state.cac_leakage_table, + "The CAC Leakage table does not exist!", return -EINVAL); + PP_ASSERT_WITH_CODE(hwmgr->dyn_state.cac_leakage_table->count <= 8, + "There should never be more than 8 entries for BapmVddcVid!!!", return -EINVAL); + PP_ASSERT_WITH_CODE(hwmgr->dyn_state.cac_leakage_table->count == hwmgr->dyn_state.vddc_dependency_on_sclk->count, + "CACLeakageTable->count and VddcDependencyOnSCLk->count not equal", return -EINVAL); + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_EVV)) { + for (i = 0; (uint32_t) i < hwmgr->dyn_state.cac_leakage_table->count; i++) { + lo_vid[i] = convert_to_vid(hwmgr->dyn_state.cac_leakage_table->entries[i].Vddc1); + hi_vid[i] = convert_to_vid(hwmgr->dyn_state.cac_leakage_table->entries[i].Vddc2); + } + } else { + PP_ASSERT_WITH_CODE(false, "Iceland should always support EVV", return -EINVAL); + } + + return 0; +} + +int iceland_populate_vddc_vid(struct pp_hwmgr *hwmgr) +{ + int i; + struct iceland_hwmgr *data = (struct iceland_hwmgr *)(hwmgr->backend); + uint8_t *vid = data->power_tune_table.VddCVid; + + PP_ASSERT_WITH_CODE(data->vddc_voltage_table.count <= 8, + "There should never be more than 8 entries for VddcVid!!!", + return -EINVAL); + + for (i = 0; i < (int)data->vddc_voltage_table.count; i++) { + vid[i] = convert_to_vid(data->vddc_voltage_table.entries[i].value); + } + + return 0; +} + +/** + * Preparation of voltage tables for SMC. + * + * @param hwmgr the address of the hardware manager + * @param table the SMC DPM table structure to be populated + * @return always 0 + */ + +int iceland_populate_smc_voltage_tables(struct pp_hwmgr *hwmgr, + SMU71_Discrete_DpmTable *table) +{ + int result; + + result = iceland_populate_smc_vddc_table(hwmgr, table); + PP_ASSERT_WITH_CODE(0 == result, + "can not populate VDDC voltage table to SMC", return -1); + + result = iceland_populate_smc_vdd_ci_table(hwmgr, table); + PP_ASSERT_WITH_CODE(0 == result, + "can not populate VDDCI voltage table to SMC", return -1); + + result = iceland_populate_smc_mvdd_table(hwmgr, table); + PP_ASSERT_WITH_CODE(0 == result, + "can not populate MVDD voltage table to SMC", return -1); + + return 0; +} + + +/** + * Re-generate the DPM level mask value + * @param hwmgr the address of the hardware manager + */ +static uint32_t iceland_get_dpm_level_enable_mask_value( + struct iceland_single_dpm_table * dpm_table) +{ + uint32_t i; + uint32_t mask_value = 0; + + for (i = dpm_table->count; i > 0; i--) { + mask_value = mask_value << 1; + + if (dpm_table->dpm_levels[i-1].enabled) + mask_value |= 0x1; + else + mask_value &= 0xFFFFFFFE; + } + return mask_value; +} + +int iceland_populate_memory_timing_parameters( + struct pp_hwmgr *hwmgr, + uint32_t engine_clock, + uint32_t memory_clock, + struct SMU71_Discrete_MCArbDramTimingTableEntry *arb_regs + ) +{ + uint32_t dramTiming; + uint32_t dramTiming2; + uint32_t burstTime; + int result; + + result = atomctrl_set_engine_dram_timings_rv770(hwmgr, + engine_clock, memory_clock); + + PP_ASSERT_WITH_CODE(result == 0, + "Error calling VBIOS to set DRAM_TIMING.", return result); + + dramTiming = cgs_read_register(hwmgr->device, mmMC_ARB_DRAM_TIMING); + dramTiming2 = cgs_read_register(hwmgr->device, mmMC_ARB_DRAM_TIMING2); + burstTime = PHM_READ_FIELD(hwmgr->device, MC_ARB_BURST_TIME, STATE0); + + arb_regs->McArbDramTiming = PP_HOST_TO_SMC_UL(dramTiming); + arb_regs->McArbDramTiming2 = PP_HOST_TO_SMC_UL(dramTiming2); + arb_regs->McArbBurstTime = (uint8_t)burstTime; + + return 0; +} + +/** + * Setup parameters for the MC ARB. + * + * @param hwmgr the address of the powerplay hardware manager. + * @return always 0 + * This function is to be called from the SetPowerState table. + */ +int iceland_program_memory_timing_parameters(struct pp_hwmgr *hwmgr) +{ + iceland_hwmgr *data = (iceland_hwmgr *)(hwmgr->backend); + int result = 0; + SMU71_Discrete_MCArbDramTimingTable arb_regs; + uint32_t i, j; + + memset(&arb_regs, 0x00, sizeof(SMU71_Discrete_MCArbDramTimingTable)); + + for (i = 0; i < data->dpm_table.sclk_table.count; i++) { + for (j = 0; j < data->dpm_table.mclk_table.count; j++) { + result = iceland_populate_memory_timing_parameters + (hwmgr, data->dpm_table.sclk_table.dpm_levels[i].value, + data->dpm_table.mclk_table.dpm_levels[j].value, + &arb_regs.entries[i][j]); + + if (0 != result) { + break; + } + } + } + + if (0 == result) { + result = iceland_copy_bytes_to_smc( + hwmgr->smumgr, + data->arb_table_start, + (uint8_t *)&arb_regs, + sizeof(SMU71_Discrete_MCArbDramTimingTable), + data->sram_end + ); + } + + return result; +} + +static int iceland_populate_smc_link_level(struct pp_hwmgr *hwmgr, SMU71_Discrete_DpmTable *table) +{ + iceland_hwmgr *data = (iceland_hwmgr *)(hwmgr->backend); + struct iceland_dpm_table *dpm_table = &data->dpm_table; + uint32_t i; + + /* Index (dpm_table->pcie_speed_table.count) is reserved for PCIE boot level. */ + for (i = 0; i <= dpm_table->pcie_speed_table.count; i++) { + table->LinkLevel[i].PcieGenSpeed = + (uint8_t)dpm_table->pcie_speed_table.dpm_levels[i].value; + table->LinkLevel[i].PcieLaneCount = + (uint8_t)encode_pcie_lane_width(dpm_table->pcie_speed_table.dpm_levels[i].param1); + table->LinkLevel[i].EnabledForActivity = + 1; + table->LinkLevel[i].SPC = + (uint8_t)(data->pcie_spc_cap & 0xff); + table->LinkLevel[i].DownThreshold = + PP_HOST_TO_SMC_UL(5); + table->LinkLevel[i].UpThreshold = + PP_HOST_TO_SMC_UL(30); + } + + data->smc_state_table.LinkLevelCount = + (uint8_t)dpm_table->pcie_speed_table.count; + data->dpm_level_enable_mask.pcie_dpm_enable_mask = + iceland_get_dpm_level_enable_mask_value(&dpm_table->pcie_speed_table); + + return 0; +} + +static int iceland_populate_smc_uvd_level(struct pp_hwmgr *hwmgr, + SMU71_Discrete_DpmTable *table) +{ + return 0; +} + +uint8_t iceland_get_voltage_id(pp_atomctrl_voltage_table *voltage_table, + uint32_t voltage) +{ + uint8_t count = (uint8_t) (voltage_table->count); + uint8_t i = 0; + + PP_ASSERT_WITH_CODE((NULL != voltage_table), + "Voltage Table empty.", return 0;); + PP_ASSERT_WITH_CODE((0 != count), + "Voltage Table empty.", return 0;); + + for (i = 0; i < count; i++) { + /* find first voltage bigger than requested */ + if (voltage_table->entries[i].value >= voltage) + return i; + } + + /* voltage is bigger than max voltage in the table */ + return i - 1; +} + +static int iceland_populate_smc_vce_level(struct pp_hwmgr *hwmgr, + SMU71_Discrete_DpmTable *table) +{ + return 0; +} + +static int iceland_populate_smc_acp_level(struct pp_hwmgr *hwmgr, + SMU71_Discrete_DpmTable *table) +{ + return 0; +} + +static int iceland_populate_smc_samu_level(struct pp_hwmgr *hwmgr, + SMU71_Discrete_DpmTable *table) +{ + return 0; +} + + +static int iceland_populate_smc_svi2_config(struct pp_hwmgr *hwmgr, + SMU71_Discrete_DpmTable *tab) +{ + iceland_hwmgr *data = (iceland_hwmgr *)(hwmgr->backend); + + if(ICELAND_VOLTAGE_CONTROL_BY_SVID2 == data->voltage_control) + tab->SVI2Enable |= VDDC_ON_SVI2; + + if(ICELAND_VOLTAGE_CONTROL_BY_SVID2 == data->vdd_ci_control) + tab->SVI2Enable |= VDDCI_ON_SVI2; + else + tab->MergedVddci = 1; + + if(ICELAND_VOLTAGE_CONTROL_BY_SVID2 == data->mvdd_control) + tab->SVI2Enable |= MVDD_ON_SVI2; + + PP_ASSERT_WITH_CODE( tab->SVI2Enable != (VDDC_ON_SVI2 | VDDCI_ON_SVI2 | MVDD_ON_SVI2) && + (tab->SVI2Enable & VDDC_ON_SVI2), "SVI2 domain configuration is incorrect!", return -EINVAL); + + return 0; +} + +static int iceland_get_dependecy_volt_by_clk(struct pp_hwmgr *hwmgr, + struct phm_clock_voltage_dependency_table *allowed_clock_voltage_table, + uint32_t clock, uint32_t *vol) +{ + uint32_t i = 0; + + /* clock - voltage dependency table is empty table */ + if (allowed_clock_voltage_table->count == 0) + return -EINVAL; + + for (i = 0; i < allowed_clock_voltage_table->count; i++) { + /* find first sclk bigger than request */ + if (allowed_clock_voltage_table->entries[i].clk >= clock) { + *vol = allowed_clock_voltage_table->entries[i].v; + return 0; + } + } + + /* sclk is bigger than max sclk in the dependence table */ + *vol = allowed_clock_voltage_table->entries[i - 1].v; + + return 0; +} + +static uint8_t iceland_get_mclk_frequency_ratio(uint32_t memory_clock, + bool strobe_mode) +{ + uint8_t mc_para_index; + + if (strobe_mode) { + if (memory_clock < 12500) { + mc_para_index = 0x00; + } else if (memory_clock > 47500) { + mc_para_index = 0x0f; + } else { + mc_para_index = (uint8_t)((memory_clock - 10000) / 2500); + } + } else { + if (memory_clock < 65000) { + mc_para_index = 0x00; + } else if (memory_clock > 135000) { + mc_para_index = 0x0f; + } else { + mc_para_index = (uint8_t)((memory_clock - 60000) / 5000); + } + } + + return mc_para_index; +} + +static uint8_t iceland_get_ddr3_mclk_frequency_ratio(uint32_t memory_clock) +{ + uint8_t mc_para_index; + + if (memory_clock < 10000) { + mc_para_index = 0; + } else if (memory_clock >= 80000) { + mc_para_index = 0x0f; + } else { + mc_para_index = (uint8_t)((memory_clock - 10000) / 5000 + 1); + } + + return mc_para_index; +} + +static int iceland_populate_phase_value_based_on_sclk(struct pp_hwmgr *hwmgr, const struct phm_phase_shedding_limits_table *pl, + uint32_t sclk, uint32_t *p_shed) +{ + unsigned int i; + + /* use the minimum phase shedding */ + *p_shed = 1; + + /* + * PPGen ensures the phase shedding limits table is sorted + * from lowest voltage/sclk/mclk to highest voltage/sclk/mclk. + * VBIOS ensures the phase shedding masks table is sorted from + * least phases enabled (phase shedding on) to most phases + * enabled (phase shedding off). + */ + for (i = 0; i < pl->count; i++) { + if (sclk < pl->entries[i].Sclk) { + /* Enable phase shedding */ + *p_shed = i; + break; + } + } + + return 0; +} + +static int iceland_populate_phase_value_based_on_mclk(struct pp_hwmgr *hwmgr, const struct phm_phase_shedding_limits_table *pl, + uint32_t memory_clock, uint32_t *p_shed) +{ + unsigned int i; + + /* use the minimum phase shedding */ + *p_shed = 1; + + /* + * PPGen ensures the phase shedding limits table is sorted + * from lowest voltage/sclk/mclk to highest voltage/sclk/mclk. + * VBIOS ensures the phase shedding masks table is sorted from + * least phases enabled (phase shedding on) to most phases + * enabled (phase shedding off). + */ + for (i = 0; i < pl->count; i++) { + if (memory_clock < pl->entries[i].Mclk) { + /* Enable phase shedding */ + *p_shed = i; + break; + } + } + + return 0; +} + +/** + * Populates the SMC MCLK structure using the provided memory clock + * + * @param hwmgr the address of the hardware manager + * @param memory_clock the memory clock to use to populate the structure + * @param sclk the SMC SCLK structure to be populated + */ +static int iceland_calculate_mclk_params( + struct pp_hwmgr *hwmgr, + uint32_t memory_clock, + SMU71_Discrete_MemoryLevel *mclk, + bool strobe_mode, + bool dllStateOn + ) +{ + const iceland_hwmgr *data = (iceland_hwmgr *)(hwmgr->backend); + uint32_t dll_cntl = data->clock_registers.vDLL_CNTL; + uint32_t mclk_pwrmgt_cntl = data->clock_registers.vMCLK_PWRMGT_CNTL; + uint32_t mpll_ad_func_cntl = data->clock_registers.vMPLL_AD_FUNC_CNTL; + uint32_t mpll_dq_func_cntl = data->clock_registers.vMPLL_DQ_FUNC_CNTL; + uint32_t mpll_func_cntl = data->clock_registers.vMPLL_FUNC_CNTL; + uint32_t mpll_func_cntl_1 = data->clock_registers.vMPLL_FUNC_CNTL_1; + uint32_t mpll_func_cntl_2 = data->clock_registers.vMPLL_FUNC_CNTL_2; + uint32_t mpll_ss1 = data->clock_registers.vMPLL_SS1; + uint32_t mpll_ss2 = data->clock_registers.vMPLL_SS2; + + pp_atomctrl_memory_clock_param mpll_param; + int result; + + result = atomctrl_get_memory_pll_dividers_si(hwmgr, + memory_clock, &mpll_param, strobe_mode); + PP_ASSERT_WITH_CODE(0 == result, + "Error retrieving Memory Clock Parameters from VBIOS.", return result); + + /* MPLL_FUNC_CNTL setup*/ + mpll_func_cntl = PHM_SET_FIELD(mpll_func_cntl, MPLL_FUNC_CNTL, BWCTRL, mpll_param.bw_ctrl); + + /* MPLL_FUNC_CNTL_1 setup*/ + mpll_func_cntl_1 = PHM_SET_FIELD(mpll_func_cntl_1, + MPLL_FUNC_CNTL_1, CLKF, mpll_param.mpll_fb_divider.cl_kf); + mpll_func_cntl_1 = PHM_SET_FIELD(mpll_func_cntl_1, + MPLL_FUNC_CNTL_1, CLKFRAC, mpll_param.mpll_fb_divider.clk_frac); + mpll_func_cntl_1 = PHM_SET_FIELD(mpll_func_cntl_1, + MPLL_FUNC_CNTL_1, VCO_MODE, mpll_param.vco_mode); + + /* MPLL_AD_FUNC_CNTL setup*/ + mpll_ad_func_cntl = PHM_SET_FIELD(mpll_ad_func_cntl, + MPLL_AD_FUNC_CNTL, YCLK_POST_DIV, mpll_param.mpll_post_divider); + + if (data->is_memory_GDDR5) { + /* MPLL_DQ_FUNC_CNTL setup*/ + mpll_dq_func_cntl = PHM_SET_FIELD(mpll_dq_func_cntl, + MPLL_DQ_FUNC_CNTL, YCLK_SEL, mpll_param.yclk_sel); + mpll_dq_func_cntl = PHM_SET_FIELD(mpll_dq_func_cntl, + MPLL_DQ_FUNC_CNTL, YCLK_POST_DIV, mpll_param.mpll_post_divider); + } + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_MemorySpreadSpectrumSupport)) { + /* + ************************************ + Fref = Reference Frequency + NF = Feedback divider ratio + NR = Reference divider ratio + Fnom = Nominal VCO output frequency = Fref * NF / NR + Fs = Spreading Rate + D = Percentage down-spread / 2 + Fint = Reference input frequency to PFD = Fref / NR + NS = Spreading rate divider ratio = int(Fint / (2 * Fs)) + CLKS = NS - 1 = ISS_STEP_NUM[11:0] + NV = D * Fs / Fnom * 4 * ((Fnom/Fref * NR) ^ 2) + CLKV = 65536 * NV = ISS_STEP_SIZE[25:0] + ************************************* + */ + pp_atomctrl_internal_ss_info ss_info; + uint32_t freq_nom; + uint32_t tmp; + uint32_t reference_clock = atomctrl_get_mpll_reference_clock(hwmgr); + + /* for GDDR5 for all modes and DDR3 */ + if (1 == mpll_param.qdr) + freq_nom = memory_clock * 4 * (1 << mpll_param.mpll_post_divider); + else + freq_nom = memory_clock * 2 * (1 << mpll_param.mpll_post_divider); + + /* tmp = (freq_nom / reference_clock * reference_divider) ^ 2 Note: S.I. reference_divider = 1*/ + tmp = (freq_nom / reference_clock); + tmp = tmp * tmp; + + if (0 == atomctrl_get_memory_clock_spread_spectrum(hwmgr, freq_nom, &ss_info)) { + /* ss_info.speed_spectrum_percentage -- in unit of 0.01% */ + /* ss.Info.speed_spectrum_rate -- in unit of khz */ + /* CLKS = reference_clock / (2 * speed_spectrum_rate * reference_divider) * 10 */ + /* = reference_clock * 5 / speed_spectrum_rate */ + uint32_t clks = reference_clock * 5 / ss_info.speed_spectrum_rate; + + /* CLKV = 65536 * speed_spectrum_percentage / 2 * spreadSpecrumRate / freq_nom * 4 / 100000 * ((freq_nom / reference_clock) ^ 2) */ + /* = 131 * speed_spectrum_percentage * speed_spectrum_rate / 100 * ((freq_nom / reference_clock) ^ 2) / freq_nom */ + uint32_t clkv = + (uint32_t)((((131 * ss_info.speed_spectrum_percentage * + ss_info.speed_spectrum_rate) / 100) * tmp) / freq_nom); + + mpll_ss1 = PHM_SET_FIELD(mpll_ss1, MPLL_SS1, CLKV, clkv); + mpll_ss2 = PHM_SET_FIELD(mpll_ss2, MPLL_SS2, CLKS, clks); + } + } + + /* MCLK_PWRMGT_CNTL setup */ + mclk_pwrmgt_cntl = PHM_SET_FIELD(mclk_pwrmgt_cntl, + MCLK_PWRMGT_CNTL, DLL_SPEED, mpll_param.dll_speed); + mclk_pwrmgt_cntl = PHM_SET_FIELD(mclk_pwrmgt_cntl, + MCLK_PWRMGT_CNTL, MRDCK0_PDNB, dllStateOn); + mclk_pwrmgt_cntl = PHM_SET_FIELD(mclk_pwrmgt_cntl, + MCLK_PWRMGT_CNTL, MRDCK1_PDNB, dllStateOn); + + + /* Save the result data to outpupt memory level structure */ + mclk->MclkFrequency = memory_clock; + mclk->MpllFuncCntl = mpll_func_cntl; + mclk->MpllFuncCntl_1 = mpll_func_cntl_1; + mclk->MpllFuncCntl_2 = mpll_func_cntl_2; + mclk->MpllAdFuncCntl = mpll_ad_func_cntl; + mclk->MpllDqFuncCntl = mpll_dq_func_cntl; + mclk->MclkPwrmgtCntl = mclk_pwrmgt_cntl; + mclk->DllCntl = dll_cntl; + mclk->MpllSs1 = mpll_ss1; + mclk->MpllSs2 = mpll_ss2; + + return 0; +} + +static int iceland_populate_single_memory_level( + struct pp_hwmgr *hwmgr, + uint32_t memory_clock, + SMU71_Discrete_MemoryLevel *memory_level + ) +{ + iceland_hwmgr *data = (iceland_hwmgr *)(hwmgr->backend); + int result = 0; + bool dllStateOn; + struct cgs_display_info info = {0}; + + + if (NULL != hwmgr->dyn_state.vddc_dependency_on_mclk) { + result = iceland_get_dependecy_volt_by_clk(hwmgr, + hwmgr->dyn_state.vddc_dependency_on_mclk, memory_clock, &memory_level->MinVddc); + PP_ASSERT_WITH_CODE((0 == result), + "can not find MinVddc voltage value from memory VDDC voltage dependency table", return result); + } + + if (data->vdd_ci_control == ICELAND_VOLTAGE_CONTROL_NONE) { + memory_level->MinVddci = memory_level->MinVddc; + } else if (NULL != hwmgr->dyn_state.vddci_dependency_on_mclk) { + result = iceland_get_dependecy_volt_by_clk(hwmgr, + hwmgr->dyn_state.vddci_dependency_on_mclk, + memory_clock, + &memory_level->MinVddci); + PP_ASSERT_WITH_CODE((0 == result), + "can not find MinVddci voltage value from memory VDDCI voltage dependency table", return result); + } + + if (NULL != hwmgr->dyn_state.mvdd_dependency_on_mclk) { + result = iceland_get_dependecy_volt_by_clk(hwmgr, + hwmgr->dyn_state.mvdd_dependency_on_mclk, memory_clock, &memory_level->MinMvdd); + PP_ASSERT_WITH_CODE((0 == result), + "can not find MinMVDD voltage value from memory MVDD voltage dependency table", return result); + } + + memory_level->MinVddcPhases = 1; + + if (data->vddc_phase_shed_control) { + iceland_populate_phase_value_based_on_mclk(hwmgr, hwmgr->dyn_state.vddc_phase_shed_limits_table, + memory_clock, &memory_level->MinVddcPhases); + } + + memory_level->EnabledForThrottle = 1; + memory_level->EnabledForActivity = 1; + memory_level->UpHyst = 0; + memory_level->DownHyst = 100; + memory_level->VoltageDownHyst = 0; + + /* Indicates maximum activity level for this performance level.*/ + memory_level->ActivityLevel = (uint16_t)data->mclk_activity_target; + memory_level->StutterEnable = 0; + memory_level->StrobeEnable = 0; + memory_level->EdcReadEnable = 0; + memory_level->EdcWriteEnable = 0; + memory_level->RttEnable = 0; + + /* default set to low watermark. Highest level will be set to high later.*/ + memory_level->DisplayWatermark = PPSMC_DISPLAY_WATERMARK_LOW; + + cgs_get_active_displays_info(hwmgr->device, &info); + data->display_timing.num_existing_displays = info.display_count; + + //if ((data->mclk_stutter_mode_threshold != 0) && + // (memory_clock <= data->mclk_stutter_mode_threshold) && + // (data->is_uvd_enabled == 0) + // && (PHM_READ_FIELD(hwmgr->device, DPG_PIPE_STUTTER_CONTROL, STUTTER_ENABLE) & 0x1) + // && (data->display_timing.num_existing_displays <= 2) + // && (data->display_timing.num_existing_displays != 0)) + // memory_level->StutterEnable = 1; + + /* decide strobe mode*/ + memory_level->StrobeEnable = (data->mclk_strobe_mode_threshold != 0) && + (memory_clock <= data->mclk_strobe_mode_threshold); + + /* decide EDC mode and memory clock ratio*/ + if (data->is_memory_GDDR5) { + memory_level->StrobeRatio = iceland_get_mclk_frequency_ratio(memory_clock, + memory_level->StrobeEnable); + + if ((data->mclk_edc_enable_threshold != 0) && + (memory_clock > data->mclk_edc_enable_threshold)) { + memory_level->EdcReadEnable = 1; + } + + if ((data->mclk_edc_wr_enable_threshold != 0) && + (memory_clock > data->mclk_edc_wr_enable_threshold)) { + memory_level->EdcWriteEnable = 1; + } + + if (memory_level->StrobeEnable) { + if (iceland_get_mclk_frequency_ratio(memory_clock, 1) >= + ((cgs_read_register(hwmgr->device, mmMC_SEQ_MISC7) >> 16) & 0xf)) { + dllStateOn = ((cgs_read_register(hwmgr->device, mmMC_SEQ_MISC5) >> 1) & 0x1) ? 1 : 0; + } else { + dllStateOn = ((cgs_read_register(hwmgr->device, mmMC_SEQ_MISC6) >> 1) & 0x1) ? 1 : 0; + } + + } else { + dllStateOn = data->dll_defaule_on; + } + } else { + memory_level->StrobeRatio = + iceland_get_ddr3_mclk_frequency_ratio(memory_clock); + dllStateOn = ((cgs_read_register(hwmgr->device, mmMC_SEQ_MISC5) >> 1) & 0x1) ? 1 : 0; + } + + result = iceland_calculate_mclk_params(hwmgr, + memory_clock, memory_level, memory_level->StrobeEnable, dllStateOn); + + if (0 == result) { + memory_level->MinVddc = PP_HOST_TO_SMC_UL(memory_level->MinVddc * VOLTAGE_SCALE); + CONVERT_FROM_HOST_TO_SMC_UL(memory_level->MinVddcPhases); + memory_level->MinVddci = PP_HOST_TO_SMC_UL(memory_level->MinVddci * VOLTAGE_SCALE); + memory_level->MinMvdd = PP_HOST_TO_SMC_UL(memory_level->MinMvdd * VOLTAGE_SCALE); + /* MCLK frequency in units of 10KHz*/ + CONVERT_FROM_HOST_TO_SMC_UL(memory_level->MclkFrequency); + /* Indicates maximum activity level for this performance level.*/ + CONVERT_FROM_HOST_TO_SMC_US(memory_level->ActivityLevel); + CONVERT_FROM_HOST_TO_SMC_UL(memory_level->MpllFuncCntl); + CONVERT_FROM_HOST_TO_SMC_UL(memory_level->MpllFuncCntl_1); + CONVERT_FROM_HOST_TO_SMC_UL(memory_level->MpllFuncCntl_2); + CONVERT_FROM_HOST_TO_SMC_UL(memory_level->MpllAdFuncCntl); + CONVERT_FROM_HOST_TO_SMC_UL(memory_level->MpllDqFuncCntl); + CONVERT_FROM_HOST_TO_SMC_UL(memory_level->MclkPwrmgtCntl); + CONVERT_FROM_HOST_TO_SMC_UL(memory_level->DllCntl); + CONVERT_FROM_HOST_TO_SMC_UL(memory_level->MpllSs1); + CONVERT_FROM_HOST_TO_SMC_UL(memory_level->MpllSs2); + } + + return result; +} + +/** + * Populates the SMC MVDD structure using the provided memory clock. + * + * @param hwmgr the address of the hardware manager + * @param mclk the MCLK value to be used in the decision if MVDD should be high or low. + * @param voltage the SMC VOLTAGE structure to be populated + */ +int iceland_populate_mvdd_value(struct pp_hwmgr *hwmgr, uint32_t mclk, SMU71_Discrete_VoltageLevel *voltage) +{ + const iceland_hwmgr *data = (iceland_hwmgr *)(hwmgr->backend); + uint32_t i = 0; + + if (ICELAND_VOLTAGE_CONTROL_NONE != data->mvdd_control) { + /* find mvdd value which clock is more than request */ + for (i = 0; i < hwmgr->dyn_state.mvdd_dependency_on_mclk->count; i++) { + if (mclk <= hwmgr->dyn_state.mvdd_dependency_on_mclk->entries[i].clk) { + /* Always round to higher voltage. */ + voltage->Voltage = data->mvdd_voltage_table.entries[i].value; + break; + } + } + + PP_ASSERT_WITH_CODE(i < hwmgr->dyn_state.mvdd_dependency_on_mclk->count, + "MVDD Voltage is outside the supported range.", return -1); + + } else { + return -1; + } + + return 0; +} + + +static int iceland_populate_smc_acpi_level(struct pp_hwmgr *hwmgr, + SMU71_Discrete_DpmTable *table) +{ + int result = 0; + const iceland_hwmgr *data = (iceland_hwmgr *)(hwmgr->backend); + pp_atomctrl_clock_dividers_vi dividers; + SMU71_Discrete_VoltageLevel voltage_level; + uint32_t spll_func_cntl = data->clock_registers.vCG_SPLL_FUNC_CNTL; + uint32_t spll_func_cntl_2 = data->clock_registers.vCG_SPLL_FUNC_CNTL_2; + uint32_t dll_cntl = data->clock_registers.vDLL_CNTL; + uint32_t mclk_pwrmgt_cntl = data->clock_registers.vMCLK_PWRMGT_CNTL; + + /* The ACPI state should not do DPM on DC (or ever).*/ + table->ACPILevel.Flags &= ~PPSMC_SWSTATE_FLAG_DC; + + if (data->acpi_vddc) + table->ACPILevel.MinVddc = PP_HOST_TO_SMC_UL(data->acpi_vddc * VOLTAGE_SCALE); + else + table->ACPILevel.MinVddc = PP_HOST_TO_SMC_UL(data->min_vddc_in_pp_table * VOLTAGE_SCALE); + + table->ACPILevel.MinVddcPhases = (data->vddc_phase_shed_control) ? 0 : 1; + + /* assign zero for now*/ + table->ACPILevel.SclkFrequency = atomctrl_get_reference_clock(hwmgr); + + /* get the engine clock dividers for this clock value*/ + result = atomctrl_get_engine_pll_dividers_vi(hwmgr, + table->ACPILevel.SclkFrequency, ÷rs); + + PP_ASSERT_WITH_CODE(result == 0, + "Error retrieving Engine Clock dividers from VBIOS.", return result); + + /* divider ID for required SCLK*/ + table->ACPILevel.SclkDid = (uint8_t)dividers.pll_post_divider; + table->ACPILevel.DisplayWatermark = PPSMC_DISPLAY_WATERMARK_LOW; + table->ACPILevel.DeepSleepDivId = 0; + + spll_func_cntl = PHM_SET_FIELD(spll_func_cntl, + CG_SPLL_FUNC_CNTL, SPLL_PWRON, 0); + spll_func_cntl = PHM_SET_FIELD(spll_func_cntl, + CG_SPLL_FUNC_CNTL, SPLL_RESET, 1); + spll_func_cntl_2 = PHM_SET_FIELD(spll_func_cntl_2, + CG_SPLL_FUNC_CNTL_2, SCLK_MUX_SEL, 4); + + table->ACPILevel.CgSpllFuncCntl = spll_func_cntl; + table->ACPILevel.CgSpllFuncCntl2 = spll_func_cntl_2; + table->ACPILevel.CgSpllFuncCntl3 = data->clock_registers.vCG_SPLL_FUNC_CNTL_3; + table->ACPILevel.CgSpllFuncCntl4 = data->clock_registers.vCG_SPLL_FUNC_CNTL_4; + table->ACPILevel.SpllSpreadSpectrum = data->clock_registers.vCG_SPLL_SPREAD_SPECTRUM; + table->ACPILevel.SpllSpreadSpectrum2 = data->clock_registers.vCG_SPLL_SPREAD_SPECTRUM_2; + table->ACPILevel.CcPwrDynRm = 0; + table->ACPILevel.CcPwrDynRm1 = 0; + + + /* For various features to be enabled/disabled while this level is active.*/ + CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.Flags); + /* SCLK frequency in units of 10KHz*/ + CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.SclkFrequency); + CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.CgSpllFuncCntl); + CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.CgSpllFuncCntl2); + CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.CgSpllFuncCntl3); + CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.CgSpllFuncCntl4); + CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.SpllSpreadSpectrum); + CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.SpllSpreadSpectrum2); + CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.CcPwrDynRm); + CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.CcPwrDynRm1); + + table->MemoryACPILevel.MinVddc = table->ACPILevel.MinVddc; + table->MemoryACPILevel.MinVddcPhases = table->ACPILevel.MinVddcPhases; + + /* CONVERT_FROM_HOST_TO_SMC_UL(table->MemoryACPILevel.MinVoltage);*/ + + if (0 == iceland_populate_mvdd_value(hwmgr, 0, &voltage_level)) + table->MemoryACPILevel.MinMvdd = + PP_HOST_TO_SMC_UL(voltage_level.Voltage * VOLTAGE_SCALE); + else + table->MemoryACPILevel.MinMvdd = 0; + + /* Force reset on DLL*/ + mclk_pwrmgt_cntl = PHM_SET_FIELD(mclk_pwrmgt_cntl, + MCLK_PWRMGT_CNTL, MRDCK0_RESET, 0x1); + mclk_pwrmgt_cntl = PHM_SET_FIELD(mclk_pwrmgt_cntl, + MCLK_PWRMGT_CNTL, MRDCK1_RESET, 0x1); + + /* Disable DLL in ACPIState*/ + mclk_pwrmgt_cntl = PHM_SET_FIELD(mclk_pwrmgt_cntl, + MCLK_PWRMGT_CNTL, MRDCK0_PDNB, 0); + mclk_pwrmgt_cntl = PHM_SET_FIELD(mclk_pwrmgt_cntl, + MCLK_PWRMGT_CNTL, MRDCK1_PDNB, 0); + + /* Enable DLL bypass signal*/ + dll_cntl = PHM_SET_FIELD(dll_cntl, + DLL_CNTL, MRDCK0_BYPASS, 0); + dll_cntl = PHM_SET_FIELD(dll_cntl, + DLL_CNTL, MRDCK1_BYPASS, 0); + + table->MemoryACPILevel.DllCntl = + PP_HOST_TO_SMC_UL(dll_cntl); + table->MemoryACPILevel.MclkPwrmgtCntl = + PP_HOST_TO_SMC_UL(mclk_pwrmgt_cntl); + table->MemoryACPILevel.MpllAdFuncCntl = + PP_HOST_TO_SMC_UL(data->clock_registers.vMPLL_AD_FUNC_CNTL); + table->MemoryACPILevel.MpllDqFuncCntl = + PP_HOST_TO_SMC_UL(data->clock_registers.vMPLL_DQ_FUNC_CNTL); + table->MemoryACPILevel.MpllFuncCntl = + PP_HOST_TO_SMC_UL(data->clock_registers.vMPLL_FUNC_CNTL); + table->MemoryACPILevel.MpllFuncCntl_1 = + PP_HOST_TO_SMC_UL(data->clock_registers.vMPLL_FUNC_CNTL_1); + table->MemoryACPILevel.MpllFuncCntl_2 = + PP_HOST_TO_SMC_UL(data->clock_registers.vMPLL_FUNC_CNTL_2); + table->MemoryACPILevel.MpllSs1 = + PP_HOST_TO_SMC_UL(data->clock_registers.vMPLL_SS1); + table->MemoryACPILevel.MpllSs2 = + PP_HOST_TO_SMC_UL(data->clock_registers.vMPLL_SS2); + + table->MemoryACPILevel.EnabledForThrottle = 0; + table->MemoryACPILevel.EnabledForActivity = 0; + table->MemoryACPILevel.UpHyst = 0; + table->MemoryACPILevel.DownHyst = 100; + table->MemoryACPILevel.VoltageDownHyst = 0; + /* Indicates maximum activity level for this performance level.*/ + table->MemoryACPILevel.ActivityLevel = PP_HOST_TO_SMC_US((uint16_t)data->mclk_activity_target); + + table->MemoryACPILevel.StutterEnable = 0; + table->MemoryACPILevel.StrobeEnable = 0; + table->MemoryACPILevel.EdcReadEnable = 0; + table->MemoryACPILevel.EdcWriteEnable = 0; + table->MemoryACPILevel.RttEnable = 0; + + return result; +} + +static int iceland_find_boot_level(struct iceland_single_dpm_table *table, uint32_t value, uint32_t *boot_level) +{ + int result = 0; + uint32_t i; + + for (i = 0; i < table->count; i++) { + if (value == table->dpm_levels[i].value) { + *boot_level = i; + result = 0; + } + } + return result; +} + +/** + * Calculates the SCLK dividers using the provided engine clock + * + * @param hwmgr the address of the hardware manager + * @param engine_clock the engine clock to use to populate the structure + * @param sclk the SMC SCLK structure to be populated + */ +int iceland_calculate_sclk_params(struct pp_hwmgr *hwmgr, + uint32_t engine_clock, SMU71_Discrete_GraphicsLevel *sclk) +{ + const iceland_hwmgr *data = (iceland_hwmgr *)(hwmgr->backend); + pp_atomctrl_clock_dividers_vi dividers; + uint32_t spll_func_cntl = data->clock_registers.vCG_SPLL_FUNC_CNTL; + uint32_t spll_func_cntl_3 = data->clock_registers.vCG_SPLL_FUNC_CNTL_3; + uint32_t spll_func_cntl_4 = data->clock_registers.vCG_SPLL_FUNC_CNTL_4; + uint32_t cg_spll_spread_spectrum = data->clock_registers.vCG_SPLL_SPREAD_SPECTRUM; + uint32_t cg_spll_spread_spectrum_2 = data->clock_registers.vCG_SPLL_SPREAD_SPECTRUM_2; + uint32_t reference_clock; + uint32_t reference_divider; + uint32_t fbdiv; + int result; + + /* get the engine clock dividers for this clock value*/ + result = atomctrl_get_engine_pll_dividers_vi(hwmgr, engine_clock, ÷rs); + + PP_ASSERT_WITH_CODE(result == 0, + "Error retrieving Engine Clock dividers from VBIOS.", return result); + + /* To get FBDIV we need to multiply this by 16384 and divide it by Fref.*/ + reference_clock = atomctrl_get_reference_clock(hwmgr); + + reference_divider = 1 + dividers.uc_pll_ref_div; + + /* low 14 bits is fraction and high 12 bits is divider*/ + fbdiv = dividers.ul_fb_div.ul_fb_divider & 0x3FFFFFF; + + /* SPLL_FUNC_CNTL setup*/ + spll_func_cntl = PHM_SET_FIELD(spll_func_cntl, + CG_SPLL_FUNC_CNTL, SPLL_REF_DIV, dividers.uc_pll_ref_div); + spll_func_cntl = PHM_SET_FIELD(spll_func_cntl, + CG_SPLL_FUNC_CNTL, SPLL_PDIV_A, dividers.uc_pll_post_div); + + /* SPLL_FUNC_CNTL_3 setup*/ + spll_func_cntl_3 = PHM_SET_FIELD(spll_func_cntl_3, + CG_SPLL_FUNC_CNTL_3, SPLL_FB_DIV, fbdiv); + + /* set to use fractional accumulation*/ + spll_func_cntl_3 = PHM_SET_FIELD(spll_func_cntl_3, + CG_SPLL_FUNC_CNTL_3, SPLL_DITHEN, 1); + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_EngineSpreadSpectrumSupport)) { + pp_atomctrl_internal_ss_info ss_info; + + uint32_t vcoFreq = engine_clock * dividers.uc_pll_post_div; + if (0 == atomctrl_get_engine_clock_spread_spectrum(hwmgr, vcoFreq, &ss_info)) { + /* + * ss_info.speed_spectrum_percentage -- in unit of 0.01% + * ss_info.speed_spectrum_rate -- in unit of khz + */ + /* clks = reference_clock * 10 / (REFDIV + 1) / speed_spectrum_rate / 2 */ + uint32_t clkS = reference_clock * 5 / (reference_divider * ss_info.speed_spectrum_rate); + + /* clkv = 2 * D * fbdiv / NS */ + uint32_t clkV = 4 * ss_info.speed_spectrum_percentage * fbdiv / (clkS * 10000); + + cg_spll_spread_spectrum = + PHM_SET_FIELD(cg_spll_spread_spectrum, CG_SPLL_SPREAD_SPECTRUM, CLKS, clkS); + cg_spll_spread_spectrum = + PHM_SET_FIELD(cg_spll_spread_spectrum, CG_SPLL_SPREAD_SPECTRUM, SSEN, 1); + cg_spll_spread_spectrum_2 = + PHM_SET_FIELD(cg_spll_spread_spectrum_2, CG_SPLL_SPREAD_SPECTRUM_2, CLKV, clkV); + } + } + + sclk->SclkFrequency = engine_clock; + sclk->CgSpllFuncCntl3 = spll_func_cntl_3; + sclk->CgSpllFuncCntl4 = spll_func_cntl_4; + sclk->SpllSpreadSpectrum = cg_spll_spread_spectrum; + sclk->SpllSpreadSpectrum2 = cg_spll_spread_spectrum_2; + sclk->SclkDid = (uint8_t)dividers.pll_post_divider; + + return 0; +} + +static uint8_t iceland_get_sleep_divider_id_from_clock(struct pp_hwmgr *hwmgr, + uint32_t engine_clock, uint32_t min_engine_clock_in_sr) +{ + uint32_t i, temp; + uint32_t min = (min_engine_clock_in_sr > ICELAND_MINIMUM_ENGINE_CLOCK) ? + min_engine_clock_in_sr : ICELAND_MINIMUM_ENGINE_CLOCK; + + PP_ASSERT_WITH_CODE((engine_clock >= min), + "Engine clock can't satisfy stutter requirement!", return 0); + + for (i = ICELAND_MAX_DEEPSLEEP_DIVIDER_ID;; i--) { + temp = engine_clock / (1 << i); + + if(temp >= min || i == 0) + break; + } + return (uint8_t)i; +} + +/** + * Populates single SMC SCLK structure using the provided engine clock + * + * @param hwmgr the address of the hardware manager + * @param engine_clock the engine clock to use to populate the structure + * @param sclk the SMC SCLK structure to be populated + */ +static int iceland_populate_single_graphic_level(struct pp_hwmgr *hwmgr, + uint32_t engine_clock, uint16_t sclk_activity_level_threshold, + SMU71_Discrete_GraphicsLevel *graphic_level) +{ + int result; + uint32_t threshold; + iceland_hwmgr *data = (iceland_hwmgr *)(hwmgr->backend); + + result = iceland_calculate_sclk_params(hwmgr, engine_clock, graphic_level); + + + /* populate graphics levels*/ + result = iceland_get_dependecy_volt_by_clk(hwmgr, + hwmgr->dyn_state.vddc_dependency_on_sclk, engine_clock, &graphic_level->MinVddc); + PP_ASSERT_WITH_CODE((0 == result), + "can not find VDDC voltage value for VDDC engine clock dependency table", return result); + + /* SCLK frequency in units of 10KHz*/ + graphic_level->SclkFrequency = engine_clock; + + /* + * Minimum VDDC phases required to support this level, it + * should get from dependence table. + */ + graphic_level->MinVddcPhases = 1; + + if (data->vddc_phase_shed_control) { + iceland_populate_phase_value_based_on_sclk(hwmgr, + hwmgr->dyn_state.vddc_phase_shed_limits_table, + engine_clock, + &graphic_level->MinVddcPhases); + } + + /* Indicates maximum activity level for this performance level. 50% for now*/ + graphic_level->ActivityLevel = sclk_activity_level_threshold; + + graphic_level->CcPwrDynRm = 0; + graphic_level->CcPwrDynRm1 = 0; + /* this level can be used if activity is high enough.*/ + graphic_level->EnabledForActivity = 1; + /* this level can be used for throttling.*/ + graphic_level->EnabledForThrottle = 1; + graphic_level->UpHyst = 0; + graphic_level->DownHyst = 100; + graphic_level->VoltageDownHyst = 0; + graphic_level->PowerThrottle = 0; + + threshold = engine_clock * data->fast_watermark_threshold / 100; + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_SclkDeepSleep)) { + graphic_level->DeepSleepDivId = + iceland_get_sleep_divider_id_from_clock(hwmgr, engine_clock, + data->display_timing.min_clock_insr); + } + + /* Default to slow, highest DPM level will be set to PPSMC_DISPLAY_WATERMARK_LOW later.*/ + graphic_level->DisplayWatermark = PPSMC_DISPLAY_WATERMARK_LOW; + + if (0 == result) { + graphic_level->MinVddc = PP_HOST_TO_SMC_UL(graphic_level->MinVddc * VOLTAGE_SCALE); + /* CONVERT_FROM_HOST_TO_SMC_UL(graphic_level->MinVoltage);*/ + CONVERT_FROM_HOST_TO_SMC_UL(graphic_level->MinVddcPhases); + CONVERT_FROM_HOST_TO_SMC_UL(graphic_level->SclkFrequency); + CONVERT_FROM_HOST_TO_SMC_US(graphic_level->ActivityLevel); + CONVERT_FROM_HOST_TO_SMC_UL(graphic_level->CgSpllFuncCntl3); + CONVERT_FROM_HOST_TO_SMC_UL(graphic_level->CgSpllFuncCntl4); + CONVERT_FROM_HOST_TO_SMC_UL(graphic_level->SpllSpreadSpectrum); + CONVERT_FROM_HOST_TO_SMC_UL(graphic_level->SpllSpreadSpectrum2); + CONVERT_FROM_HOST_TO_SMC_UL(graphic_level->CcPwrDynRm); + CONVERT_FROM_HOST_TO_SMC_UL(graphic_level->CcPwrDynRm1); + } + + return result; +} + +/** + * Populates all SMC SCLK levels' structure based on the trimmed allowed dpm engine clock states + * + * @param hwmgr the address of the hardware manager + */ +static int iceland_populate_all_graphic_levels(struct pp_hwmgr *hwmgr) +{ + iceland_hwmgr *data = (iceland_hwmgr *)(hwmgr->backend); + struct iceland_dpm_table *dpm_table = &data->dpm_table; + int result = 0; + uint32_t level_array_adress = data->dpm_table_start + + offsetof(SMU71_Discrete_DpmTable, GraphicsLevel); + + uint32_t level_array_size = sizeof(SMU71_Discrete_GraphicsLevel) * SMU71_MAX_LEVELS_GRAPHICS; + SMU71_Discrete_GraphicsLevel *levels = data->smc_state_table.GraphicsLevel; + uint32_t i; + uint8_t highest_pcie_level_enabled = 0, lowest_pcie_level_enabled = 0, mid_pcie_level_enabled = 0, count = 0; + memset(levels, 0x00, level_array_size); + + for (i = 0; i < dpm_table->sclk_table.count; i++) { + result = iceland_populate_single_graphic_level(hwmgr, + dpm_table->sclk_table.dpm_levels[i].value, + (uint16_t)data->activity_target[i], + &(data->smc_state_table.GraphicsLevel[i])); + if (0 != result) + return result; + + /* Making sure only DPM level 0-1 have Deep Sleep Div ID populated. */ + if (i > 1) + data->smc_state_table.GraphicsLevel[i].DeepSleepDivId = 0; + } + + /* set highest level watermark to high */ + if (dpm_table->sclk_table.count > 1) + data->smc_state_table.GraphicsLevel[dpm_table->sclk_table.count-1].DisplayWatermark = + PPSMC_DISPLAY_WATERMARK_HIGH; + + data->smc_state_table.GraphicsDpmLevelCount = + (uint8_t)dpm_table->sclk_table.count; + data->dpm_level_enable_mask.sclk_dpm_enable_mask = + iceland_get_dpm_level_enable_mask_value(&dpm_table->sclk_table); + + while ((data->dpm_level_enable_mask.pcie_dpm_enable_mask & + (1 << (highest_pcie_level_enabled + 1))) != 0) { + highest_pcie_level_enabled++; + } + + while ((data->dpm_level_enable_mask.pcie_dpm_enable_mask & + (1 << lowest_pcie_level_enabled)) == 0) { + lowest_pcie_level_enabled++; + } + + while ((count < highest_pcie_level_enabled) && + ((data->dpm_level_enable_mask.pcie_dpm_enable_mask & + (1 << (lowest_pcie_level_enabled + 1 + count))) == 0)) { + count++; + } + + mid_pcie_level_enabled = (lowest_pcie_level_enabled+1+count) < highest_pcie_level_enabled ? + (lowest_pcie_level_enabled + 1 + count) : highest_pcie_level_enabled; + + /* set pcieDpmLevel to highest_pcie_level_enabled*/ + for (i = 2; i < dpm_table->sclk_table.count; i++) { + data->smc_state_table.GraphicsLevel[i].pcieDpmLevel = highest_pcie_level_enabled; + } + + /* set pcieDpmLevel to lowest_pcie_level_enabled*/ + data->smc_state_table.GraphicsLevel[0].pcieDpmLevel = lowest_pcie_level_enabled; + + /* set pcieDpmLevel to mid_pcie_level_enabled*/ + data->smc_state_table.GraphicsLevel[1].pcieDpmLevel = mid_pcie_level_enabled; + + for (i = 0; i < dpm_table->sclk_table.count; i++) + + /* level count will send to smc once at init smc table and never change*/ + result = iceland_copy_bytes_to_smc(hwmgr->smumgr, level_array_adress, (uint8_t *)levels, (uint32_t)level_array_size, data->sram_end); + + if (0 != result) + return result; + + return 0; +} + +/** + * Populates all SMC MCLK levels' structure based on the trimmed allowed dpm memory clock states + * + * @param hwmgr the address of the hardware manager + */ + +static int iceland_populate_all_memory_levels(struct pp_hwmgr *hwmgr) +{ + iceland_hwmgr *data = (iceland_hwmgr *)(hwmgr->backend); + struct iceland_dpm_table *dpm_table = &data->dpm_table; + int result; + /* populate MCLK dpm table to SMU7 */ + uint32_t level_array_adress = data->dpm_table_start + offsetof(SMU71_Discrete_DpmTable, MemoryLevel); + uint32_t level_array_size = sizeof(SMU71_Discrete_MemoryLevel) * SMU71_MAX_LEVELS_MEMORY; + SMU71_Discrete_MemoryLevel *levels = data->smc_state_table.MemoryLevel; + uint32_t i; + + memset(levels, 0x00, level_array_size); + + for (i = 0; i < dpm_table->mclk_table.count; i++) { + PP_ASSERT_WITH_CODE((0 != dpm_table->mclk_table.dpm_levels[i].value), + "can not populate memory level as memory clock is zero", return -1); + result = iceland_populate_single_memory_level(hwmgr, dpm_table->mclk_table.dpm_levels[i].value, + &(data->smc_state_table.MemoryLevel[i])); + if (0 != result) { + return result; + } + } + + /* Only enable level 0 for now.*/ + data->smc_state_table.MemoryLevel[0].EnabledForActivity = 1; + + /* + * in order to prevent MC activity from stutter mode to push DPM up. + * the UVD change complements this by putting the MCLK in a higher state + * by default such that we are not effected by up threshold or and MCLK DPM latency. + */ + data->smc_state_table.MemoryLevel[0].ActivityLevel = 0x1F; + CONVERT_FROM_HOST_TO_SMC_US(data->smc_state_table.MemoryLevel[0].ActivityLevel); + + data->smc_state_table.MemoryDpmLevelCount = (uint8_t)dpm_table->mclk_table.count; + data->dpm_level_enable_mask.mclk_dpm_enable_mask = iceland_get_dpm_level_enable_mask_value(&dpm_table->mclk_table); + /* set highest level watermark to high*/ + data->smc_state_table.MemoryLevel[dpm_table->mclk_table.count-1].DisplayWatermark = PPSMC_DISPLAY_WATERMARK_HIGH; + + /* level count will send to smc once at init smc table and never change*/ + result = iceland_copy_bytes_to_smc(hwmgr->smumgr, + level_array_adress, (uint8_t *)levels, (uint32_t)level_array_size, data->sram_end); + + if (0 != result) { + return result; + } + + return 0; +} + +struct ICELAND_DLL_SPEED_SETTING +{ + uint16_t Min; /* Minimum Data Rate*/ + uint16_t Max; /* Maximum Data Rate*/ + uint32_t dll_speed; /* The desired DLL_SPEED setting*/ +}; + +static int iceland_populate_ulv_level(struct pp_hwmgr *hwmgr, SMU71_Discrete_Ulv *pstate) +{ + int result = 0; + iceland_hwmgr *data = (iceland_hwmgr *)(hwmgr->backend); + uint32_t voltage_response_time, ulv_voltage; + + pstate->CcPwrDynRm = 0; + pstate->CcPwrDynRm1 = 0; + + //backbiasResponseTime is use for ULV state voltage value. + result = pp_tables_get_response_times(hwmgr, &voltage_response_time, &ulv_voltage); + PP_ASSERT_WITH_CODE((0 == result), "can not get ULV voltage value", return result;); + + if(!ulv_voltage) { + data->ulv.ulv_supported = false; + return 0; + } + + if (ICELAND_VOLTAGE_CONTROL_BY_SVID2 != data->voltage_control) { + /* use minimum voltage if ulv voltage in pptable is bigger than minimum voltage */ + if (ulv_voltage > hwmgr->dyn_state.vddc_dependency_on_sclk->entries[0].v) { + pstate->VddcOffset = 0; + } + else { + /* used in SMIO Mode. not implemented for now. this is backup only for CI. */ + pstate->VddcOffset = (uint16_t)(hwmgr->dyn_state.vddc_dependency_on_sclk->entries[0].v - ulv_voltage); + } + } else { + /* use minimum voltage if ulv voltage in pptable is bigger than minimum voltage */ + if(ulv_voltage > hwmgr->dyn_state.vddc_dependency_on_sclk->entries[0].v) { + pstate->VddcOffsetVid = 0; + } else { + /* used in SVI2 Mode */ + pstate->VddcOffsetVid = (uint8_t)((hwmgr->dyn_state.vddc_dependency_on_sclk->entries[0].v - ulv_voltage) * VOLTAGE_VID_OFFSET_SCALE2 / VOLTAGE_VID_OFFSET_SCALE1); + } + } + + /* used in SVI2 Mode to shed phase */ + pstate->VddcPhase = (data->vddc_phase_shed_control) ? 0 : 1; + + if (0 == result) { + CONVERT_FROM_HOST_TO_SMC_UL(pstate->CcPwrDynRm); + CONVERT_FROM_HOST_TO_SMC_UL(pstate->CcPwrDynRm1); + CONVERT_FROM_HOST_TO_SMC_US(pstate->VddcOffset); + } + + return result; +} + +static int iceland_populate_ulv_state(struct pp_hwmgr *hwmgr, SMU71_Discrete_Ulv *ulv) +{ + return iceland_populate_ulv_level(hwmgr, ulv); +} + +static int iceland_populate_smc_initial_state(struct pp_hwmgr *hwmgr) +{ + iceland_hwmgr *data = (iceland_hwmgr *)(hwmgr->backend); + uint8_t count, level; + + count = (uint8_t)(hwmgr->dyn_state.vddc_dependency_on_sclk->count); + + for (level = 0; level < count; level++) { + if (hwmgr->dyn_state.vddc_dependency_on_sclk->entries[level].clk + >= data->vbios_boot_state.sclk_bootup_value) { + data->smc_state_table.GraphicsBootLevel = level; + break; + } + } + + count = (uint8_t)(hwmgr->dyn_state.vddc_dependency_on_mclk->count); + + for (level = 0; level < count; level++) { + if (hwmgr->dyn_state.vddc_dependency_on_mclk->entries[level].clk + >= data->vbios_boot_state.mclk_bootup_value) { + data->smc_state_table.MemoryBootLevel = level; + break; + } + } + + return 0; +} + +/** + * Initializes the SMC table and uploads it + * + * @param hwmgr the address of the powerplay hardware manager. + * @param pInput the pointer to input data (PowerState) + * @return always 0 + */ +int iceland_init_smc_table(struct pp_hwmgr *hwmgr) +{ + int result; + iceland_hwmgr *data = (iceland_hwmgr *)(hwmgr->backend); + SMU71_Discrete_DpmTable *table = &(data->smc_state_table); + const struct phw_iceland_ulv_parm *ulv = &(data->ulv); + + result = iceland_setup_default_dpm_tables(hwmgr); + PP_ASSERT_WITH_CODE(0 == result, + "Failed to setup default DPM tables!", return result;); + memset(&(data->smc_state_table), 0x00, sizeof(data->smc_state_table)); + + if (ICELAND_VOLTAGE_CONTROL_NONE != data->voltage_control) { + iceland_populate_smc_voltage_tables(hwmgr, table); + } + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_AutomaticDCTransition)) { + table->SystemFlags |= PPSMC_SYSTEMFLAG_GPIO_DC; + } + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_StepVddc)) { + table->SystemFlags |= PPSMC_SYSTEMFLAG_STEPVDDC; + } + + if (data->is_memory_GDDR5) { + table->SystemFlags |= PPSMC_SYSTEMFLAG_GDDR5; + } + + if (ulv->ulv_supported) { + result = iceland_populate_ulv_state(hwmgr, &data->ulv_setting); + PP_ASSERT_WITH_CODE(0 == result, + "Failed to initialize ULV state!", return result;); + + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, + ixCG_ULV_PARAMETER, ulv->ch_ulv_parameter); + } + + result = iceland_populate_smc_link_level(hwmgr, table); + PP_ASSERT_WITH_CODE(0 == result, + "Failed to initialize Link Level!", return result;); + + result = iceland_populate_all_graphic_levels(hwmgr); + PP_ASSERT_WITH_CODE(0 == result, + "Failed to initialize Graphics Level!", return result;); + + result = iceland_populate_all_memory_levels(hwmgr); + PP_ASSERT_WITH_CODE(0 == result, + "Failed to initialize Memory Level!", return result;); + + result = iceland_populate_smc_acpi_level(hwmgr, table); + PP_ASSERT_WITH_CODE(0 == result, + "Failed to initialize ACPI Level!", return result;); + + result = iceland_populate_smc_vce_level(hwmgr, table); + PP_ASSERT_WITH_CODE(0 == result, + "Failed to initialize VCE Level!", return result;); + + result = iceland_populate_smc_acp_level(hwmgr, table); + PP_ASSERT_WITH_CODE(0 == result, + "Failed to initialize ACP Level!", return result;); + + result = iceland_populate_smc_samu_level(hwmgr, table); + PP_ASSERT_WITH_CODE(0 == result, + "Failed to initialize SAMU Level!", return result;); + + /* + * Since only the initial state is completely set up at this + * point (the other states are just copies of the boot state) + * we only need to populate the ARB settings for the initial + * state. + */ + result = iceland_program_memory_timing_parameters(hwmgr); + PP_ASSERT_WITH_CODE(0 == result, + "Failed to Write ARB settings for the initial state.", return result;); + + result = iceland_populate_smc_uvd_level(hwmgr, table); + PP_ASSERT_WITH_CODE(0 == result, + "Failed to initialize UVD Level!", return result;); + + table->GraphicsBootLevel = 0; + table->MemoryBootLevel = 0; + + /* find boot level from dpm table */ + result = iceland_find_boot_level(&(data->dpm_table.sclk_table), + data->vbios_boot_state.sclk_bootup_value, + (uint32_t *)&(data->smc_state_table.GraphicsBootLevel)); + + if (result) + pr_warning("VBIOS did not find boot engine clock value in dependency table.\n"); + + result = iceland_find_boot_level(&(data->dpm_table.mclk_table), + data->vbios_boot_state.mclk_bootup_value, + (uint32_t *)&(data->smc_state_table.MemoryBootLevel)); + + if (result) + pr_warning("VBIOS did not find boot memory clock value in dependency table.\n"); + + table->BootVddc = data->vbios_boot_state.vddc_bootup_value; + if (ICELAND_VOLTAGE_CONTROL_NONE == data->vdd_ci_control) { + table->BootVddci = table->BootVddc; + } + else { + table->BootVddci = data->vbios_boot_state.vddci_bootup_value; + } + table->BootMVdd = data->vbios_boot_state.mvdd_bootup_value; + + result = iceland_populate_smc_initial_state(hwmgr); + PP_ASSERT_WITH_CODE(0 == result, "Failed to initialize Boot State!", return result); + + result = iceland_populate_bapm_parameters_in_dpm_table(hwmgr); + PP_ASSERT_WITH_CODE(0 == result, "Failed to populate BAPM Parameters!", return result); + + table->GraphicsVoltageChangeEnable = 1; + table->GraphicsThermThrottleEnable = 1; + table->GraphicsInterval = 1; + table->VoltageInterval = 1; + table->ThermalInterval = 1; + table->TemperatureLimitHigh = + (data->thermal_temp_setting.temperature_high * + ICELAND_Q88_FORMAT_CONVERSION_UNIT) / PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + table->TemperatureLimitLow = + (data->thermal_temp_setting.temperature_low * + ICELAND_Q88_FORMAT_CONVERSION_UNIT) / PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + table->MemoryVoltageChangeEnable = 1; + table->MemoryInterval = 1; + table->VoltageResponseTime = 0; + table->PhaseResponseTime = 0; + table->MemoryThermThrottleEnable = 1; + table->PCIeBootLinkLevel = 0; + table->PCIeGenInterval = 1; + + result = iceland_populate_smc_svi2_config(hwmgr, table); + PP_ASSERT_WITH_CODE(0 == result, + "Failed to populate SVI2 setting!", return result); + + table->ThermGpio = 17; + table->SclkStepSize = 0x4000; + + CONVERT_FROM_HOST_TO_SMC_UL(table->SystemFlags); + CONVERT_FROM_HOST_TO_SMC_UL(table->SmioMaskVddcVid); + CONVERT_FROM_HOST_TO_SMC_UL(table->SmioMaskVddcPhase); + CONVERT_FROM_HOST_TO_SMC_UL(table->SmioMaskVddciVid); + CONVERT_FROM_HOST_TO_SMC_UL(table->SmioMaskMvddVid); + CONVERT_FROM_HOST_TO_SMC_UL(table->SclkStepSize); + CONVERT_FROM_HOST_TO_SMC_US(table->TemperatureLimitHigh); + CONVERT_FROM_HOST_TO_SMC_US(table->TemperatureLimitLow); + CONVERT_FROM_HOST_TO_SMC_US(table->VoltageResponseTime); + CONVERT_FROM_HOST_TO_SMC_US(table->PhaseResponseTime); + + table->BootVddc = PP_HOST_TO_SMC_US(table->BootVddc * VOLTAGE_SCALE); + table->BootVddci = PP_HOST_TO_SMC_US(table->BootVddci * VOLTAGE_SCALE); + table->BootMVdd = PP_HOST_TO_SMC_US(table->BootMVdd * VOLTAGE_SCALE); + + /* Upload all dpm data to SMC memory.(dpm level, dpm level count etc) */ + result = iceland_copy_bytes_to_smc(hwmgr->smumgr, data->dpm_table_start + + offsetof(SMU71_Discrete_DpmTable, SystemFlags), + (uint8_t *)&(table->SystemFlags), + sizeof(SMU71_Discrete_DpmTable) - 3 * sizeof(SMU71_PIDController), + data->sram_end); + + PP_ASSERT_WITH_CODE(0 == result, + "Failed to upload dpm data to SMC memory!", return result); + + /* Upload all ulv setting to SMC memory.(dpm level, dpm level count etc) */ + result = iceland_copy_bytes_to_smc(hwmgr->smumgr, + data->ulv_settings_start, + (uint8_t *)&(data->ulv_setting), + sizeof(SMU71_Discrete_Ulv), + data->sram_end); + +#if 0 + /* Notify SMC to follow new GPIO scheme */ + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_AutomaticDCTransition)) { + if (0 == iceland_send_msg_to_smc(hwmgr->smumgr, PPSMC_MSG_UseNewGPIOScheme)) + phm_cap_set(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_SMCtoPPLIBAcdcGpioScheme); + } +#endif + + return result; +} + +int iceland_populate_mc_reg_address(struct pp_hwmgr *hwmgr, SMU71_Discrete_MCRegisters *mc_reg_table) +{ + const struct iceland_hwmgr *data = (struct iceland_hwmgr *)(hwmgr->backend); + + uint32_t i, j; + + for (i = 0, j = 0; j < data->iceland_mc_reg_table.last; j++) { + if (data->iceland_mc_reg_table.validflag & 1<address[] array out of boundary", return -1); + mc_reg_table->address[i].s0 = + PP_HOST_TO_SMC_US(data->iceland_mc_reg_table.mc_reg_address[j].s0); + mc_reg_table->address[i].s1 = + PP_HOST_TO_SMC_US(data->iceland_mc_reg_table.mc_reg_address[j].s1); + i++; + } + } + + mc_reg_table->last = (uint8_t)i; + + return 0; +} + +/* convert register values from driver to SMC format */ +void iceland_convert_mc_registers( + const phw_iceland_mc_reg_entry * pEntry, + SMU71_Discrete_MCRegisterSet *pData, + uint32_t numEntries, uint32_t validflag) +{ + uint32_t i, j; + + for (i = 0, j = 0; j < numEntries; j++) { + if (validflag & 1<value[i] = PP_HOST_TO_SMC_UL(pEntry->mc_data[j]); + i++; + } + } +} + +/* find the entry in the memory range table, then populate the value to SMC's iceland_mc_reg_table */ +int iceland_convert_mc_reg_table_entry_to_smc( + struct pp_hwmgr *hwmgr, + const uint32_t memory_clock, + SMU71_Discrete_MCRegisterSet *mc_reg_table_data + ) +{ + const iceland_hwmgr *data = (struct iceland_hwmgr *)(hwmgr->backend); + uint32_t i = 0; + + for (i = 0; i < data->iceland_mc_reg_table.num_entries; i++) { + if (memory_clock <= + data->iceland_mc_reg_table.mc_reg_table_entry[i].mclk_max) { + break; + } + } + + if ((i == data->iceland_mc_reg_table.num_entries) && (i > 0)) + --i; + + iceland_convert_mc_registers(&data->iceland_mc_reg_table.mc_reg_table_entry[i], + mc_reg_table_data, data->iceland_mc_reg_table.last, data->iceland_mc_reg_table.validflag); + + return 0; +} + +int iceland_convert_mc_reg_table_to_smc(struct pp_hwmgr *hwmgr, + SMU71_Discrete_MCRegisters *mc_reg_table) +{ + int result = 0; + iceland_hwmgr *data = (struct iceland_hwmgr *)(hwmgr->backend); + int res; + uint32_t i; + + for (i = 0; i < data->dpm_table.mclk_table.count; i++) { + res = iceland_convert_mc_reg_table_entry_to_smc( + hwmgr, + data->dpm_table.mclk_table.dpm_levels[i].value, + &mc_reg_table->data[i] + ); + + if (0 != res) + result = res; + } + + return result; +} + +int iceland_populate_initial_mc_reg_table(struct pp_hwmgr *hwmgr) +{ + int result; + struct iceland_hwmgr *data = (struct iceland_hwmgr *)(hwmgr->backend); + + memset(&data->mc_reg_table, 0x00, sizeof(SMU71_Discrete_MCRegisters)); + result = iceland_populate_mc_reg_address(hwmgr, &(data->mc_reg_table)); + PP_ASSERT_WITH_CODE(0 == result, + "Failed to initialize MCRegTable for the MC register addresses!", return result;); + + result = iceland_convert_mc_reg_table_to_smc(hwmgr, &data->mc_reg_table); + PP_ASSERT_WITH_CODE(0 == result, + "Failed to initialize MCRegTable for driver state!", return result;); + + return iceland_copy_bytes_to_smc(hwmgr->smumgr, data->mc_reg_table_start, + (uint8_t *)&data->mc_reg_table, sizeof(SMU71_Discrete_MCRegisters), data->sram_end); +} + +int iceland_notify_smc_display_change(struct pp_hwmgr *hwmgr, bool has_display) +{ + PPSMC_Msg msg = has_display? (PPSMC_Msg)PPSMC_HasDisplay : (PPSMC_Msg)PPSMC_NoDisplay; + + return (smum_send_msg_to_smc(hwmgr->smumgr, msg) == 0) ? 0 : -1; +} + +int iceland_enable_sclk_control(struct pp_hwmgr *hwmgr) +{ + PHM_WRITE_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, SCLK_PWRMGT_CNTL, SCLK_PWRMGT_OFF, 0); + + return 0; +} + +int iceland_enable_sclk_mclk_dpm(struct pp_hwmgr *hwmgr) +{ + iceland_hwmgr *data = (iceland_hwmgr *)(hwmgr->backend); + + /* enable SCLK dpm */ + if (0 == data->sclk_dpm_key_disabled) { + PP_ASSERT_WITH_CODE( + (0 == smum_send_msg_to_smc(hwmgr->smumgr, + PPSMC_MSG_DPM_Enable)), + "Failed to enable SCLK DPM during DPM Start Function!", + return -1); + } + + /* enable MCLK dpm */ + if (0 == data->mclk_dpm_key_disabled) { + PP_ASSERT_WITH_CODE( + (0 == smum_send_msg_to_smc(hwmgr->smumgr, + PPSMC_MSG_MCLKDPM_Enable)), + "Failed to enable MCLK DPM during DPM Start Function!", + return -1); + + PHM_WRITE_FIELD(hwmgr->device, MC_SEQ_CNTL_3, CAC_EN, 0x1); + + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, + ixLCAC_MC0_CNTL, 0x05);/* CH0,1 read */ + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, + ixLCAC_MC1_CNTL, 0x05);/* CH2,3 read */ + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, + ixLCAC_CPL_CNTL, 0x100005);/*Read */ + + udelay(10); + + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, + ixLCAC_MC0_CNTL, 0x400005);/* CH0,1 write */ + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, + ixLCAC_MC1_CNTL, 0x400005);/* CH2,3 write */ + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, + ixLCAC_CPL_CNTL, 0x500005);/* write */ + + } + + return 0; +} + +int iceland_start_dpm(struct pp_hwmgr *hwmgr) +{ + iceland_hwmgr *data = (iceland_hwmgr *)(hwmgr->backend); + + /* enable general power management */ + PHM_WRITE_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, GENERAL_PWRMGT, GLOBAL_PWRMGT_EN, 1); + /* enable sclk deep sleep */ + PHM_WRITE_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, SCLK_PWRMGT_CNTL, DYNAMIC_PM_EN, 1); + + /* prepare for PCIE DPM */ + PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, SOFT_REGISTERS_TABLE_12, VoltageChangeTimeout, 0x1000); + + PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__PCIE, SWRST_COMMAND_1, RESETLC, 0x0); + +#if 0 + PP_ASSERT_WITH_CODE( + (0 == smum_send_msg_to_smc(hwmgr->smumgr, + PPSMC_MSG_Voltage_Cntl_Enable)), + "Failed to enable voltage DPM during DPM Start Function!", + return -1); +#endif + + if (0 != iceland_enable_sclk_mclk_dpm(hwmgr)) { + PP_ASSERT_WITH_CODE(0, "Failed to enable Sclk DPM and Mclk DPM!", return -1); + } + + /* enable PCIE dpm */ + if (0 == data->pcie_dpm_key_disabled) { + PP_ASSERT_WITH_CODE( + (0 == smum_send_msg_to_smc(hwmgr->smumgr, + PPSMC_MSG_PCIeDPM_Enable)), + "Failed to enable pcie DPM during DPM Start Function!", + return -1 + ); + } + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_Falcon_QuickTransition)) { + smum_send_msg_to_smc(hwmgr->smumgr, + PPSMC_MSG_EnableACDCGPIOInterrupt); + } + + return 0; +} + +static void iceland_set_dpm_event_sources(struct pp_hwmgr *hwmgr, + uint32_t sources) +{ + bool protection; + enum DPM_EVENT_SRC src; + + switch (sources) { + default: + printk(KERN_ERR "Unknown throttling event sources."); + /* fall through */ + case 0: + protection = false; + /* src is unused */ + break; + case (1 << PHM_AutoThrottleSource_Thermal): + protection = true; + src = DPM_EVENT_SRC_DIGITAL; + break; + case (1 << PHM_AutoThrottleSource_External): + protection = true; + src = DPM_EVENT_SRC_EXTERNAL; + break; + case (1 << PHM_AutoThrottleSource_External) | + (1 << PHM_AutoThrottleSource_Thermal): + protection = true; + src = DPM_EVENT_SRC_DIGITAL_OR_EXTERNAL; + break; + } + /* Order matters - don't enable thermal protection for the wrong source. */ + if (protection) { + PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, CG_THERMAL_CTRL, + DPM_EVENT_SRC, src); + PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, GENERAL_PWRMGT, + THERMAL_PROTECTION_DIS, + !phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_ThermalController)); + } else + PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, GENERAL_PWRMGT, + THERMAL_PROTECTION_DIS, 1); +} + +static int iceland_enable_auto_throttle_source(struct pp_hwmgr *hwmgr, + PHM_AutoThrottleSource source) +{ + struct iceland_hwmgr *data = (struct iceland_hwmgr *)(hwmgr->backend); + + if (!(data->active_auto_throttle_sources & (1 << source))) { + data->active_auto_throttle_sources |= 1 << source; + iceland_set_dpm_event_sources(hwmgr, data->active_auto_throttle_sources); + } + return 0; +} + +static int iceland_enable_thermal_auto_throttle(struct pp_hwmgr *hwmgr) +{ + return iceland_enable_auto_throttle_source(hwmgr, PHM_AutoThrottleSource_Thermal); +} + +static int iceland_tf_start_smc(struct pp_hwmgr *hwmgr) +{ + int ret = 0; + + if (!iceland_is_smc_ram_running(hwmgr->smumgr)) + ret = iceland_smu_start_smc(hwmgr->smumgr); + + return ret; +} + +static int iceland_enable_dpm_tasks(struct pp_hwmgr *hwmgr) +{ + int tmp_result, result = 0; + + if (cf_iceland_voltage_control(hwmgr)) { + tmp_result = iceland_enable_voltage_control(hwmgr); + PP_ASSERT_WITH_CODE((0 == tmp_result), + "Failed to enable voltage control!", return tmp_result); + + tmp_result = iceland_construct_voltage_tables(hwmgr); + PP_ASSERT_WITH_CODE((0 == tmp_result), + "Failed to contruct voltage tables!", return tmp_result); + } + + tmp_result = iceland_initialize_mc_reg_table(hwmgr); + PP_ASSERT_WITH_CODE((0 == tmp_result), + "Failed to initialize MC reg table!", return tmp_result); + + tmp_result = iceland_program_static_screen_threshold_parameters(hwmgr); + PP_ASSERT_WITH_CODE((0 == tmp_result), + "Failed to program static screen threshold parameters!", return tmp_result); + + tmp_result = iceland_enable_display_gap(hwmgr); + PP_ASSERT_WITH_CODE((0 == tmp_result), + "Failed to enable display gap!", return tmp_result); + + tmp_result = iceland_program_voting_clients(hwmgr); + PP_ASSERT_WITH_CODE((0 == tmp_result), + "Failed to program voting clients!", return tmp_result); + + tmp_result = iceland_upload_firmware(hwmgr); + PP_ASSERT_WITH_CODE((0 == tmp_result), + "Failed to upload firmware header!", return tmp_result); + + tmp_result = iceland_process_firmware_header(hwmgr); + PP_ASSERT_WITH_CODE((0 == tmp_result), + "Failed to process firmware header!", return tmp_result); + + tmp_result = iceland_initial_switch_from_arb_f0_to_f1(hwmgr); + PP_ASSERT_WITH_CODE((0 == tmp_result), + "Failed to initialize switch from ArbF0 to F1!", return tmp_result); + + tmp_result = iceland_init_smc_table(hwmgr); + PP_ASSERT_WITH_CODE((0 == tmp_result), + "Failed to initialize SMC table!", return tmp_result); + + tmp_result = iceland_populate_initial_mc_reg_table(hwmgr); + PP_ASSERT_WITH_CODE((0 == tmp_result), + "Failed to populate initialize MC Reg table!", return tmp_result); + + tmp_result = iceland_populate_pm_fuses(hwmgr); + PP_ASSERT_WITH_CODE((0 == tmp_result), + "Failed to populate PM fuses!", return tmp_result); + + /* start SMC */ + tmp_result = iceland_tf_start_smc(hwmgr); + PP_ASSERT_WITH_CODE((0 == tmp_result), + "Failed to start SMC!", return tmp_result); + + /* enable SCLK control */ + tmp_result = iceland_enable_sclk_control(hwmgr); + PP_ASSERT_WITH_CODE((0 == tmp_result), + "Failed to enable SCLK control!", return tmp_result); + + /* enable DPM */ + tmp_result = iceland_start_dpm(hwmgr); + PP_ASSERT_WITH_CODE((0 == tmp_result), + "Failed to start DPM!", return tmp_result); + + tmp_result = iceland_enable_smc_cac(hwmgr); + PP_ASSERT_WITH_CODE((0 == tmp_result), + "Failed to enable SMC CAC!", return tmp_result); + + tmp_result = iceland_enable_power_containment(hwmgr); + PP_ASSERT_WITH_CODE((0 == tmp_result), + "Failed to enable power containment!", return tmp_result); + + tmp_result = iceland_power_control_set_level(hwmgr); + PP_ASSERT_WITH_CODE((0 == tmp_result), + "Failed to power control set level!", result = tmp_result); + + tmp_result = iceland_enable_thermal_auto_throttle(hwmgr); + PP_ASSERT_WITH_CODE((0 == tmp_result), + "Failed to enable thermal auto throttle!", result = tmp_result); + + return result; +} + +static int iceland_hwmgr_backend_fini(struct pp_hwmgr *hwmgr) +{ + return phm_hwmgr_backend_fini(hwmgr); +} + +static void iceland_initialize_dpm_defaults(struct pp_hwmgr *hwmgr) +{ + iceland_hwmgr *data = (struct iceland_hwmgr *)(hwmgr->backend); + struct phw_iceland_ulv_parm *ulv; + + ulv = &data->ulv; + ulv->ch_ulv_parameter = PPICELAND_CGULVPARAMETER_DFLT; + data->voting_rights_clients0 = PPICELAND_VOTINGRIGHTSCLIENTS_DFLT0; + data->voting_rights_clients1 = PPICELAND_VOTINGRIGHTSCLIENTS_DFLT1; + data->voting_rights_clients2 = PPICELAND_VOTINGRIGHTSCLIENTS_DFLT2; + data->voting_rights_clients3 = PPICELAND_VOTINGRIGHTSCLIENTS_DFLT3; + data->voting_rights_clients4 = PPICELAND_VOTINGRIGHTSCLIENTS_DFLT4; + data->voting_rights_clients5 = PPICELAND_VOTINGRIGHTSCLIENTS_DFLT5; + data->voting_rights_clients6 = PPICELAND_VOTINGRIGHTSCLIENTS_DFLT6; + data->voting_rights_clients7 = PPICELAND_VOTINGRIGHTSCLIENTS_DFLT7; + + data->static_screen_threshold_unit = PPICELAND_STATICSCREENTHRESHOLDUNIT_DFLT; + data->static_screen_threshold = PPICELAND_STATICSCREENTHRESHOLD_DFLT; + + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_ABM); + phm_cap_set(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_NonABMSupportInPPLib); + + phm_cap_set(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_DynamicACTiming); + + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_DisableMemoryTransition); + + iceland_initialize_power_tune_defaults(hwmgr); + + data->mclk_strobe_mode_threshold = 40000; + data->mclk_stutter_mode_threshold = 30000; + data->mclk_edc_enable_threshold = 40000; + data->mclk_edc_wr_enable_threshold = 40000; + + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_DisableMCLS); + + data->pcie_gen_performance.max = PP_PCIEGen1; + data->pcie_gen_performance.min = PP_PCIEGen3; + data->pcie_gen_power_saving.max = PP_PCIEGen1; + data->pcie_gen_power_saving.min = PP_PCIEGen3; + + data->pcie_lane_performance.max = 0; + data->pcie_lane_performance.min = 16; + data->pcie_lane_power_saving.max = 0; + data->pcie_lane_power_saving.min = 16; + + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_SclkThrottleLowNotification); +} + +static int iceland_get_evv_voltage(struct pp_hwmgr *hwmgr) +{ + iceland_hwmgr *data = (iceland_hwmgr *)(hwmgr->backend); + uint16_t virtual_voltage_id; + uint16_t vddc = 0; + uint16_t i; + + /* the count indicates actual number of entries */ + data->vddc_leakage.count = 0; + data->vddci_leakage.count = 0; + + if (!phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_EVV)) { + pr_err("Iceland should always support EVV\n"); + return -EINVAL; + } + + /* retrieve voltage for leakage ID (0xff01 + i) */ + for (i = 0; i < ICELAND_MAX_LEAKAGE_COUNT; i++) { + virtual_voltage_id = ATOM_VIRTUAL_VOLTAGE_ID0 + i; + + PP_ASSERT_WITH_CODE((0 == atomctrl_get_voltage_evv(hwmgr, virtual_voltage_id, &vddc)), + "Error retrieving EVV voltage value!\n", continue); + + if (vddc >= 2000) + pr_warning("Invalid VDDC value!\n"); + + if (vddc != 0 && vddc != virtual_voltage_id) { + data->vddc_leakage.actual_voltage[data->vddc_leakage.count] = vddc; + data->vddc_leakage.leakage_id[data->vddc_leakage.count] = virtual_voltage_id; + data->vddc_leakage.count++; + } + } + + return 0; +} + +static void iceland_patch_with_vddc_leakage(struct pp_hwmgr *hwmgr, + uint32_t *vddc) +{ + iceland_hwmgr *data = (struct iceland_hwmgr *)(hwmgr->backend); + uint32_t leakage_index; + struct phw_iceland_leakage_voltage *leakage_table = &data->vddc_leakage; + + /* search for leakage voltage ID 0xff01 ~ 0xff08 */ + for (leakage_index = 0; leakage_index < leakage_table->count; leakage_index++) { + /* + * If this voltage matches a leakage voltage ID, patch + * with actual leakage voltage. + */ + if (leakage_table->leakage_id[leakage_index] == *vddc) { + /* + * Need to make sure vddc is less than 2v or + * else, it could burn the ASIC. + */ + if (leakage_table->actual_voltage[leakage_index] >= 2000) + pr_warning("Invalid VDDC value!\n"); + *vddc = leakage_table->actual_voltage[leakage_index]; + /* we found leakage voltage */ + break; + } + } + + if (*vddc >= ATOM_VIRTUAL_VOLTAGE_ID0) + pr_warning("Voltage value looks like a Leakage ID but it's not patched\n"); +} + +static void iceland_patch_with_vddci_leakage(struct pp_hwmgr *hwmgr, + uint32_t *vddci) +{ + iceland_hwmgr *data = (struct iceland_hwmgr *)(hwmgr->backend); + uint32_t leakage_index; + struct phw_iceland_leakage_voltage *leakage_table = &data->vddci_leakage; + + /* search for leakage voltage ID 0xff01 ~ 0xff08 */ + for (leakage_index = 0; leakage_index < leakage_table->count; leakage_index++) { + /* + * If this voltage matches a leakage voltage ID, patch + * with actual leakage voltage. + */ + if (leakage_table->leakage_id[leakage_index] == *vddci) { + *vddci = leakage_table->actual_voltage[leakage_index]; + /* we found leakage voltage */ + break; + } + } + + if (*vddci >= ATOM_VIRTUAL_VOLTAGE_ID0) + pr_warning("Voltage value looks like a Leakage ID but it's not patched\n"); +} + +static int iceland_patch_vddc(struct pp_hwmgr *hwmgr, + struct phm_clock_voltage_dependency_table *tab) +{ + uint16_t i; + + if (tab) + for (i = 0; i < tab->count; i++) + iceland_patch_with_vddc_leakage(hwmgr, &tab->entries[i].v); + + return 0; +} + +static int iceland_patch_vddci(struct pp_hwmgr *hwmgr, + struct phm_clock_voltage_dependency_table *tab) +{ + uint16_t i; + + if (tab) + for (i = 0; i < tab->count; i++) + iceland_patch_with_vddci_leakage(hwmgr, &tab->entries[i].v); + + return 0; +} + +static int iceland_patch_vce_vddc(struct pp_hwmgr *hwmgr, + struct phm_vce_clock_voltage_dependency_table *tab) +{ + uint16_t i; + + if (tab) + for (i = 0; i < tab->count; i++) + iceland_patch_with_vddc_leakage(hwmgr, &tab->entries[i].v); + + return 0; +} + + +static int iceland_patch_uvd_vddc(struct pp_hwmgr *hwmgr, + struct phm_uvd_clock_voltage_dependency_table *tab) +{ + uint16_t i; + + if (tab) + for (i = 0; i < tab->count; i++) + iceland_patch_with_vddc_leakage(hwmgr, &tab->entries[i].v); + + return 0; +} + +static int iceland_patch_vddc_shed_limit(struct pp_hwmgr *hwmgr, + struct phm_phase_shedding_limits_table *tab) +{ + uint16_t i; + + if (tab) + for (i = 0; i < tab->count; i++) + iceland_patch_with_vddc_leakage(hwmgr, &tab->entries[i].Voltage); + + return 0; +} + +static int iceland_patch_samu_vddc(struct pp_hwmgr *hwmgr, + struct phm_samu_clock_voltage_dependency_table *tab) +{ + uint16_t i; + + if (tab) + for (i = 0; i < tab->count; i++) + iceland_patch_with_vddc_leakage(hwmgr, &tab->entries[i].v); + + return 0; +} + +static int iceland_patch_acp_vddc(struct pp_hwmgr *hwmgr, + struct phm_acp_clock_voltage_dependency_table *tab) +{ + uint16_t i; + + if (tab) + for (i = 0; i < tab->count; i++) + iceland_patch_with_vddc_leakage(hwmgr, &tab->entries[i].v); + + return 0; +} + +static int iceland_patch_limits_vddc(struct pp_hwmgr *hwmgr, + struct phm_clock_and_voltage_limits *tab) +{ + if (tab) { + iceland_patch_with_vddc_leakage(hwmgr, (uint32_t *)&tab->vddc); + iceland_patch_with_vddci_leakage(hwmgr, (uint32_t *)&tab->vddci); + } + + return 0; +} + +static int iceland_patch_cac_vddc(struct pp_hwmgr *hwmgr, struct phm_cac_leakage_table *tab) +{ + uint32_t i; + uint32_t vddc; + + if (tab) { + for (i = 0; i < tab->count; i++) { + vddc = (uint32_t)(tab->entries[i].Vddc); + iceland_patch_with_vddc_leakage(hwmgr, &vddc); + tab->entries[i].Vddc = (uint16_t)vddc; + } + } + + return 0; +} + +static int iceland_patch_dependency_tables_with_leakage(struct pp_hwmgr *hwmgr) +{ + int tmp; + + tmp = iceland_patch_vddc(hwmgr, hwmgr->dyn_state.vddc_dependency_on_sclk); + if(tmp) + return -EINVAL; + + tmp = iceland_patch_vddc(hwmgr, hwmgr->dyn_state.vddc_dependency_on_mclk); + if(tmp) + return -EINVAL; + + tmp = iceland_patch_vddc(hwmgr, hwmgr->dyn_state.vddc_dep_on_dal_pwrl); + if(tmp) + return -EINVAL; + + tmp = iceland_patch_vddci(hwmgr, hwmgr->dyn_state.vddci_dependency_on_mclk); + if(tmp) + return -EINVAL; + + tmp = iceland_patch_vce_vddc(hwmgr, hwmgr->dyn_state.vce_clock_voltage_dependency_table); + if(tmp) + return -EINVAL; + + tmp = iceland_patch_uvd_vddc(hwmgr, hwmgr->dyn_state.uvd_clock_voltage_dependency_table); + if(tmp) + return -EINVAL; + + tmp = iceland_patch_samu_vddc(hwmgr, hwmgr->dyn_state.samu_clock_voltage_dependency_table); + if(tmp) + return -EINVAL; + + tmp = iceland_patch_acp_vddc(hwmgr, hwmgr->dyn_state.acp_clock_voltage_dependency_table); + if(tmp) + return -EINVAL; + + tmp = iceland_patch_vddc_shed_limit(hwmgr, hwmgr->dyn_state.vddc_phase_shed_limits_table); + if(tmp) + return -EINVAL; + + tmp = iceland_patch_limits_vddc(hwmgr, &hwmgr->dyn_state.max_clock_voltage_on_ac); + if(tmp) + return -EINVAL; + + tmp = iceland_patch_limits_vddc(hwmgr, &hwmgr->dyn_state.max_clock_voltage_on_dc); + if(tmp) + return -EINVAL; + + tmp = iceland_patch_cac_vddc(hwmgr, hwmgr->dyn_state.cac_leakage_table); + if(tmp) + return -EINVAL; + + return 0; +} + +static int iceland_set_private_var_based_on_pptale(struct pp_hwmgr *hwmgr) +{ + iceland_hwmgr *data = (iceland_hwmgr *)(hwmgr->backend); + + struct phm_clock_voltage_dependency_table *allowed_sclk_vddc_table = hwmgr->dyn_state.vddc_dependency_on_sclk; + struct phm_clock_voltage_dependency_table *allowed_mclk_vddc_table = hwmgr->dyn_state.vddc_dependency_on_mclk; + struct phm_clock_voltage_dependency_table *allowed_mclk_vddci_table = hwmgr->dyn_state.vddci_dependency_on_mclk; + + PP_ASSERT_WITH_CODE(allowed_sclk_vddc_table != NULL, + "VDDC dependency on SCLK table is missing. This table is mandatory\n", return -EINVAL); + PP_ASSERT_WITH_CODE(allowed_sclk_vddc_table->count >= 1, + "VDDC dependency on SCLK table has to have is missing. This table is mandatory\n", return -EINVAL); + + PP_ASSERT_WITH_CODE(allowed_mclk_vddc_table != NULL, + "VDDC dependency on MCLK table is missing. This table is mandatory\n", return -EINVAL); + PP_ASSERT_WITH_CODE(allowed_mclk_vddc_table->count >= 1, + "VDD dependency on MCLK table has to have is missing. This table is mandatory\n", return -EINVAL); + + data->min_vddc_in_pp_table = (uint16_t)allowed_sclk_vddc_table->entries[0].v; + data->max_vddc_in_pp_table = (uint16_t)allowed_sclk_vddc_table->entries[allowed_sclk_vddc_table->count - 1].v; + + hwmgr->dyn_state.max_clock_voltage_on_ac.sclk = + allowed_sclk_vddc_table->entries[allowed_sclk_vddc_table->count - 1].clk; + hwmgr->dyn_state.max_clock_voltage_on_ac.mclk = + allowed_mclk_vddc_table->entries[allowed_mclk_vddc_table->count - 1].clk; + hwmgr->dyn_state.max_clock_voltage_on_ac.vddc = + allowed_sclk_vddc_table->entries[allowed_sclk_vddc_table->count - 1].v; + + if (allowed_mclk_vddci_table != NULL && allowed_mclk_vddci_table->count >= 1) { + data->min_vddci_in_pp_table = (uint16_t)allowed_mclk_vddci_table->entries[0].v; + data->max_vddci_in_pp_table = (uint16_t)allowed_mclk_vddci_table->entries[allowed_mclk_vddci_table->count - 1].v; + } + + if (hwmgr->dyn_state.vddci_dependency_on_mclk != NULL && hwmgr->dyn_state.vddci_dependency_on_mclk->count > 1) + hwmgr->dyn_state.max_clock_voltage_on_ac.vddci = hwmgr->dyn_state.vddci_dependency_on_mclk->entries[hwmgr->dyn_state.vddci_dependency_on_mclk->count - 1].v; + + return 0; +} + +static int iceland_initializa_dynamic_state_adjustment_rule_settings(struct pp_hwmgr *hwmgr) +{ + uint32_t table_size; + struct phm_clock_voltage_dependency_table *table_clk_vlt; + + hwmgr->dyn_state.mclk_sclk_ratio = 4; + hwmgr->dyn_state.sclk_mclk_delta = 15000; /* 150 MHz */ + hwmgr->dyn_state.vddc_vddci_delta = 200; /* 200mV */ + + /* initialize vddc_dep_on_dal_pwrl table */ + table_size = sizeof(uint32_t) + 4 * sizeof(struct phm_clock_voltage_dependency_record); + table_clk_vlt = (struct phm_clock_voltage_dependency_table *)kzalloc(table_size, GFP_KERNEL); + + if (NULL == table_clk_vlt) { + pr_err("[ powerplay ] Can not allocate space for vddc_dep_on_dal_pwrl! \n"); + return -ENOMEM; + } else { + table_clk_vlt->count = 4; + table_clk_vlt->entries[0].clk = PP_DAL_POWERLEVEL_ULTRALOW; + table_clk_vlt->entries[0].v = 0; + table_clk_vlt->entries[1].clk = PP_DAL_POWERLEVEL_LOW; + table_clk_vlt->entries[1].v = 720; + table_clk_vlt->entries[2].clk = PP_DAL_POWERLEVEL_NOMINAL; + table_clk_vlt->entries[2].v = 810; + table_clk_vlt->entries[3].clk = PP_DAL_POWERLEVEL_PERFORMANCE; + table_clk_vlt->entries[3].v = 900; + hwmgr->dyn_state.vddc_dep_on_dal_pwrl = table_clk_vlt; + } + + return 0; +} + +/** + * Initializes the Volcanic Islands Hardware Manager + * + * @param hwmgr the address of the powerplay hardware manager. + * @return 1 if success; otherwise appropriate error code. + */ +static int iceland_hwmgr_backend_init(struct pp_hwmgr *hwmgr) +{ + int result = 0; + SMU71_Discrete_DpmTable *table = NULL; + iceland_hwmgr *data = (struct iceland_hwmgr *)(hwmgr->backend); + pp_atomctrl_gpio_pin_assignment gpio_pin_assignment; + bool stay_in_boot; + struct phw_iceland_ulv_parm *ulv; + struct cgs_system_info sys_info = {0}; + + PP_ASSERT_WITH_CODE((NULL != hwmgr), + "Invalid Parameter!", return -EINVAL;); + + data->dll_defaule_on = 0; + data->sram_end = SMC_RAM_END; + + data->activity_target[0] = PPICELAND_TARGETACTIVITY_DFLT; + data->activity_target[1] = PPICELAND_TARGETACTIVITY_DFLT; + data->activity_target[2] = PPICELAND_TARGETACTIVITY_DFLT; + data->activity_target[3] = PPICELAND_TARGETACTIVITY_DFLT; + data->activity_target[4] = PPICELAND_TARGETACTIVITY_DFLT; + data->activity_target[5] = PPICELAND_TARGETACTIVITY_DFLT; + data->activity_target[6] = PPICELAND_TARGETACTIVITY_DFLT; + data->activity_target[7] = PPICELAND_TARGETACTIVITY_DFLT; + + data->mclk_activity_target = PPICELAND_MCLK_TARGETACTIVITY_DFLT; + + data->sclk_dpm_key_disabled = 0; + data->mclk_dpm_key_disabled = 0; + data->pcie_dpm_key_disabled = 0; + data->pcc_monitor_enabled = 0; + + phm_cap_set(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_UnTabledHardwareInterface); + + data->gpio_debug = 0; + data->engine_clock_data = 0; + data->memory_clock_data = 0; + + phm_cap_set(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_SclkDeepSleep); + + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_SclkDeepSleepAboveLow); + + phm_cap_set(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_DynamicPatchPowerState); + + phm_cap_set(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_TablelessHardwareInterface); + + /* Initializes DPM default values. */ + iceland_initialize_dpm_defaults(hwmgr); + + /* Enable Platform EVV support. */ + phm_cap_set(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_EVV); + + /* Get leakage voltage based on leakage ID. */ + result = iceland_get_evv_voltage(hwmgr); + if (result) + goto failed; + + /** + * Patch our voltage dependency table with actual leakage + * voltage. We need to perform leakage translation before it's + * used by other functions such as + * iceland_set_hwmgr_variables_based_on_pptable. + */ + result = iceland_patch_dependency_tables_with_leakage(hwmgr); + if (result) + goto failed; + + /* Parse pptable data read from VBIOS. */ + result = iceland_set_private_var_based_on_pptale(hwmgr); + if (result) + goto failed; + + /* ULV support */ + ulv = &(data->ulv); + ulv->ulv_supported = 1; + + /* Initalize Dynamic State Adjustment Rule Settings*/ + result = iceland_initializa_dynamic_state_adjustment_rule_settings(hwmgr); + if (result) { + pr_err("[ powerplay ] iceland_initializa_dynamic_state_adjustment_rule_settings failed!\n"); + goto failed; + } + + data->voltage_control = ICELAND_VOLTAGE_CONTROL_NONE; + data->vdd_ci_control = ICELAND_VOLTAGE_CONTROL_NONE; + data->mvdd_control = ICELAND_VOLTAGE_CONTROL_NONE; + + /* + * Hardcode thermal temperature settings for now, these will + * be overwritten if a custom policy exists. + */ + data->thermal_temp_setting.temperature_low = 99500; + data->thermal_temp_setting.temperature_high = 100000; + data->thermal_temp_setting.temperature_shutdown = 104000; + data->uvd_enabled = false; + + table = &data->smc_state_table; + + if (atomctrl_get_pp_assign_pin(hwmgr, VDDC_VRHOT_GPIO_PINID, + &gpio_pin_assignment)) { + table->VRHotGpio = gpio_pin_assignment.uc_gpio_pin_bit_shift; + phm_cap_set(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_RegulatorHot); + } else { + table->VRHotGpio = ICELAND_UNUSED_GPIO_PIN; + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_RegulatorHot); + } + + if (atomctrl_get_pp_assign_pin(hwmgr, PP_AC_DC_SWITCH_GPIO_PINID, + &gpio_pin_assignment)) { + table->AcDcGpio = gpio_pin_assignment.uc_gpio_pin_bit_shift; + phm_cap_set(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_AutomaticDCTransition); + } else { + table->AcDcGpio = ICELAND_UNUSED_GPIO_PIN; + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_AutomaticDCTransition); + } + + /* + * If ucGPIO_ID=VDDC_PCC_GPIO_PINID in GPIO_LUTable, Peak. + * Current Control feature is enabled and we should program + * PCC HW register + */ + if (atomctrl_get_pp_assign_pin(hwmgr, VDDC_PCC_GPIO_PINID, + &gpio_pin_assignment)) { + uint32_t temp_reg = cgs_read_ind_register(hwmgr->device, + CGS_IND_REG__SMC, + ixCNB_PWRMGT_CNTL); + + switch (gpio_pin_assignment.uc_gpio_pin_bit_shift) { + case 0: + temp_reg = PHM_SET_FIELD(temp_reg, + CNB_PWRMGT_CNTL, GNB_SLOW_MODE, 0x1); + break; + case 1: + temp_reg = PHM_SET_FIELD(temp_reg, + CNB_PWRMGT_CNTL, GNB_SLOW_MODE, 0x2); + break; + case 2: + temp_reg = PHM_SET_FIELD(temp_reg, + CNB_PWRMGT_CNTL, GNB_SLOW, 0x1); + break; + case 3: + temp_reg = PHM_SET_FIELD(temp_reg, + CNB_PWRMGT_CNTL, FORCE_NB_PS1, 0x1); + break; + case 4: + temp_reg = PHM_SET_FIELD(temp_reg, + CNB_PWRMGT_CNTL, DPM_ENABLED, 0x1); + break; + default: + pr_warning("[ powerplay ] Failed to setup PCC HW register! Wrong GPIO assigned for VDDC_PCC_GPIO_PINID!\n"); + break; + } + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, + ixCNB_PWRMGT_CNTL, temp_reg); + } + + phm_cap_set(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_EnableSMU7ThermalManagement); + phm_cap_set(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_SMU7); + + if (atomctrl_is_voltage_controled_by_gpio_v3(hwmgr, + VOLTAGE_TYPE_VDDC, + VOLTAGE_OBJ_GPIO_LUT)) + data->voltage_control = ICELAND_VOLTAGE_CONTROL_BY_GPIO; + else if (atomctrl_is_voltage_controled_by_gpio_v3(hwmgr, + VOLTAGE_TYPE_VDDC, + VOLTAGE_OBJ_SVID2)) + data->voltage_control = ICELAND_VOLTAGE_CONTROL_BY_SVID2; + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_ControlVDDCI)) { + if (atomctrl_is_voltage_controled_by_gpio_v3(hwmgr, + VOLTAGE_TYPE_VDDCI, + VOLTAGE_OBJ_GPIO_LUT)) + data->vdd_ci_control = ICELAND_VOLTAGE_CONTROL_BY_GPIO; + else if (atomctrl_is_voltage_controled_by_gpio_v3(hwmgr, + VOLTAGE_TYPE_VDDCI, + VOLTAGE_OBJ_SVID2)) + data->vdd_ci_control = ICELAND_VOLTAGE_CONTROL_BY_SVID2; + } + + if (data->vdd_ci_control == ICELAND_VOLTAGE_CONTROL_NONE) + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_ControlVDDCI); + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_EnableMVDDControl)) { + if (atomctrl_is_voltage_controled_by_gpio_v3(hwmgr, + VOLTAGE_TYPE_MVDDC, + VOLTAGE_OBJ_GPIO_LUT)) + data->mvdd_control = ICELAND_VOLTAGE_CONTROL_BY_GPIO; + else if (atomctrl_is_voltage_controled_by_gpio_v3(hwmgr, + VOLTAGE_TYPE_MVDDC, + VOLTAGE_OBJ_SVID2)) + data->mvdd_control = ICELAND_VOLTAGE_CONTROL_BY_SVID2; + } + + if (data->mvdd_control == ICELAND_VOLTAGE_CONTROL_NONE) + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_EnableMVDDControl); + + data->vddc_phase_shed_control = false; + + stay_in_boot = phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_StayInBootState); + + /* iceland doesn't support UVD and VCE */ + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_UVDPowerGating); + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_VCEPowerGating); + + sys_info.size = sizeof(struct cgs_system_info); + sys_info.info_id = CGS_SYSTEM_INFO_PG_FLAGS; + result = cgs_query_system_info(hwmgr->device, &sys_info); + if (!result) { + if (sys_info.value & AMD_PG_SUPPORT_UVD) + phm_cap_set(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_UVDPowerGating); + if (sys_info.value & AMD_PG_SUPPORT_VCE) + phm_cap_set(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_VCEPowerGating); + + data->is_tlu_enabled = false; + hwmgr->platform_descriptor.hardwareActivityPerformanceLevels = + ICELAND_MAX_HARDWARE_POWERLEVELS; + hwmgr->platform_descriptor.hardwarePerformanceLevels = 2; + hwmgr->platform_descriptor.minimumClocksReductionPercentage = 50; + + sys_info.size = sizeof(struct cgs_system_info); + sys_info.info_id = CGS_SYSTEM_INFO_PCIE_GEN_INFO; + result = cgs_query_system_info(hwmgr->device, &sys_info); + if (result) + data->pcie_gen_cap = AMDGPU_DEFAULT_PCIE_GEN_MASK; + else + data->pcie_gen_cap = (uint32_t)sys_info.value; + if (data->pcie_gen_cap & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) + data->pcie_spc_cap = 20; + sys_info.size = sizeof(struct cgs_system_info); + sys_info.info_id = CGS_SYSTEM_INFO_PCIE_MLW; + result = cgs_query_system_info(hwmgr->device, &sys_info); + if (result) + data->pcie_lane_cap = AMDGPU_DEFAULT_PCIE_MLW_MASK; + else + data->pcie_lane_cap = (uint32_t)sys_info.value; + } else { + /* Ignore return value in here, we are cleaning up a mess. */ + iceland_hwmgr_backend_fini(hwmgr); + } + + return 0; +failed: + return result; +} + +static int iceland_get_num_of_entries(struct pp_hwmgr *hwmgr) +{ + int result; + unsigned long ret = 0; + + result = pp_tables_get_num_of_entries(hwmgr, &ret); + + return result ? 0 : ret; +} + +static const unsigned long PhwIceland_Magic = (unsigned long)(PHM_VIslands_Magic); + +struct iceland_power_state *cast_phw_iceland_power_state( + struct pp_hw_power_state *hw_ps) +{ + if (hw_ps == NULL) + return NULL; + + PP_ASSERT_WITH_CODE((PhwIceland_Magic == hw_ps->magic), + "Invalid Powerstate Type!", + return NULL); + + return (struct iceland_power_state *)hw_ps; +} + +static int iceland_apply_state_adjust_rules(struct pp_hwmgr *hwmgr, + struct pp_power_state *prequest_ps, + const struct pp_power_state *pcurrent_ps) +{ + struct iceland_power_state *iceland_ps = + cast_phw_iceland_power_state(&prequest_ps->hardware); + + uint32_t sclk; + uint32_t mclk; + struct PP_Clocks minimum_clocks = {0}; + bool disable_mclk_switching; + bool disable_mclk_switching_for_frame_lock; + struct cgs_display_info info = {0}; + const struct phm_clock_and_voltage_limits *max_limits; + uint32_t i; + iceland_hwmgr *data = (struct iceland_hwmgr *)(hwmgr->backend); + + int32_t count; + int32_t stable_pstate_sclk = 0, stable_pstate_mclk = 0; + + data->battery_state = (PP_StateUILabel_Battery == prequest_ps->classification.ui_label); + + PP_ASSERT_WITH_CODE(iceland_ps->performance_level_count == 2, + "VI should always have 2 performance levels", + ); + + max_limits = (PP_PowerSource_AC == hwmgr->power_source) ? + &(hwmgr->dyn_state.max_clock_voltage_on_ac) : + &(hwmgr->dyn_state.max_clock_voltage_on_dc); + + if (PP_PowerSource_DC == hwmgr->power_source) { + for (i = 0; i < iceland_ps->performance_level_count; i++) { + if (iceland_ps->performance_levels[i].memory_clock > max_limits->mclk) + iceland_ps->performance_levels[i].memory_clock = max_limits->mclk; + if (iceland_ps->performance_levels[i].engine_clock > max_limits->sclk) + iceland_ps->performance_levels[i].engine_clock = max_limits->sclk; + } + } + + iceland_ps->vce_clocks.EVCLK = hwmgr->vce_arbiter.evclk; + iceland_ps->vce_clocks.ECCLK = hwmgr->vce_arbiter.ecclk; + + cgs_get_active_displays_info(hwmgr->device, &info); + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_StablePState)) { + + max_limits = &(hwmgr->dyn_state.max_clock_voltage_on_ac); + stable_pstate_sclk = (max_limits->sclk * 75) / 100; + + for (count = hwmgr->dyn_state.vddc_dependency_on_sclk->count-1; count >= 0; count--) { + if (stable_pstate_sclk >= hwmgr->dyn_state.vddc_dependency_on_sclk->entries[count].clk) { + stable_pstate_sclk = hwmgr->dyn_state.vddc_dependency_on_sclk->entries[count].clk; + break; + } + } + + if (count < 0) + stable_pstate_sclk = hwmgr->dyn_state.vddc_dependency_on_sclk->entries[0].clk; + + stable_pstate_mclk = max_limits->mclk; + + minimum_clocks.engineClock = stable_pstate_sclk; + minimum_clocks.memoryClock = stable_pstate_mclk; + } + + if (minimum_clocks.engineClock < hwmgr->gfx_arbiter.sclk) + minimum_clocks.engineClock = hwmgr->gfx_arbiter.sclk; + + if (minimum_clocks.memoryClock < hwmgr->gfx_arbiter.mclk) + minimum_clocks.memoryClock = hwmgr->gfx_arbiter.mclk; + + iceland_ps->sclk_threshold = hwmgr->gfx_arbiter.sclk_threshold; + + if (0 != hwmgr->gfx_arbiter.sclk_over_drive) { + PP_ASSERT_WITH_CODE((hwmgr->gfx_arbiter.sclk_over_drive <= hwmgr->platform_descriptor.overdriveLimit.engineClock), + "Overdrive sclk exceeds limit", + hwmgr->gfx_arbiter.sclk_over_drive = hwmgr->platform_descriptor.overdriveLimit.engineClock); + + if (hwmgr->gfx_arbiter.sclk_over_drive >= hwmgr->gfx_arbiter.sclk) + iceland_ps->performance_levels[1].engine_clock = hwmgr->gfx_arbiter.sclk_over_drive; + } + + if (0 != hwmgr->gfx_arbiter.mclk_over_drive) { + PP_ASSERT_WITH_CODE((hwmgr->gfx_arbiter.mclk_over_drive <= hwmgr->platform_descriptor.overdriveLimit.memoryClock), + "Overdrive mclk exceeds limit", + hwmgr->gfx_arbiter.mclk_over_drive = hwmgr->platform_descriptor.overdriveLimit.memoryClock); + + if (hwmgr->gfx_arbiter.mclk_over_drive >= hwmgr->gfx_arbiter.mclk) + iceland_ps->performance_levels[1].memory_clock = hwmgr->gfx_arbiter.mclk_over_drive; + } + + disable_mclk_switching_for_frame_lock = phm_cap_enabled( + hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_DisableMclkSwitchingForFrameLock); + + disable_mclk_switching = (1 < info.display_count) || + disable_mclk_switching_for_frame_lock; + + sclk = iceland_ps->performance_levels[0].engine_clock; + mclk = iceland_ps->performance_levels[0].memory_clock; + + if (disable_mclk_switching) + mclk = iceland_ps->performance_levels[iceland_ps->performance_level_count - 1].memory_clock; + + if (sclk < minimum_clocks.engineClock) + sclk = (minimum_clocks.engineClock > max_limits->sclk) ? max_limits->sclk : minimum_clocks.engineClock; + + if (mclk < minimum_clocks.memoryClock) + mclk = (minimum_clocks.memoryClock > max_limits->mclk) ? max_limits->mclk : minimum_clocks.memoryClock; + + iceland_ps->performance_levels[0].engine_clock = sclk; + iceland_ps->performance_levels[0].memory_clock = mclk; + + iceland_ps->performance_levels[1].engine_clock = + (iceland_ps->performance_levels[1].engine_clock >= iceland_ps->performance_levels[0].engine_clock) ? + iceland_ps->performance_levels[1].engine_clock : + iceland_ps->performance_levels[0].engine_clock; + + if (disable_mclk_switching) { + if (mclk < iceland_ps->performance_levels[1].memory_clock) + mclk = iceland_ps->performance_levels[1].memory_clock; + + iceland_ps->performance_levels[0].memory_clock = mclk; + iceland_ps->performance_levels[1].memory_clock = mclk; + } else { + if (iceland_ps->performance_levels[1].memory_clock < iceland_ps->performance_levels[0].memory_clock) + iceland_ps->performance_levels[1].memory_clock = iceland_ps->performance_levels[0].memory_clock; + } + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_StablePState)) { + for (i=0; i < iceland_ps->performance_level_count; i++) { + iceland_ps->performance_levels[i].engine_clock = stable_pstate_sclk; + iceland_ps->performance_levels[i].memory_clock = stable_pstate_mclk; + iceland_ps->performance_levels[i].pcie_gen = data->pcie_gen_performance.max; + iceland_ps->performance_levels[i].pcie_lane = data->pcie_gen_performance.max; + } + } + + return 0; +} + +static bool iceland_is_dpm_running(struct pp_hwmgr *hwmgr) +{ + /* + * We return the status of Voltage Control instead of checking SCLK/MCLK DPM + * because we may have test scenarios that need us intentionly disable SCLK/MCLK DPM, + * whereas voltage control is a fundemental change that will not be disabled + */ + return (0 == PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, + FEATURE_STATUS, VOLTAGE_CONTROLLER_ON) ? 1 : 0); +} + +/** + * force DPM power State + * + * @param hwmgr: the address of the powerplay hardware manager. + * @param n : DPM level + * @return The response that came from the SMC. + */ +int iceland_dpm_force_state(struct pp_hwmgr *hwmgr, uint32_t n) +{ + iceland_hwmgr *data = (iceland_hwmgr *)(hwmgr->backend); + + /* Checking if DPM is running. If we discover hang because of this, we should skip this message. */ + PP_ASSERT_WITH_CODE(0 == iceland_is_dpm_running(hwmgr), + "Trying to force SCLK when DPM is disabled", return -1;); + if (0 == data->sclk_dpm_key_disabled) + return (0 == smum_send_msg_to_smc_with_parameter( + hwmgr->smumgr, + PPSMC_MSG_DPM_ForceState, + n) ? 0 : 1); + + return 0; +} + +/** + * force DPM power State + * + * @param hwmgr: the address of the powerplay hardware manager. + * @param n : DPM level + * @return The response that came from the SMC. + */ +int iceland_dpm_force_state_mclk(struct pp_hwmgr *hwmgr, uint32_t n) +{ + iceland_hwmgr *data = (iceland_hwmgr *)(hwmgr->backend); + + /* Checking if DPM is running. If we discover hang because of this, we should skip this message. */ + PP_ASSERT_WITH_CODE(0 == iceland_is_dpm_running(hwmgr), + "Trying to Force MCLK when DPM is disabled", return -1;); + if (0 == data->mclk_dpm_key_disabled) + return (0 == smum_send_msg_to_smc_with_parameter( + hwmgr->smumgr, + PPSMC_MSG_MCLKDPM_ForceState, + n) ? 0 : 1); + + return 0; +} + +/** + * force DPM power State + * + * @param hwmgr: the address of the powerplay hardware manager. + * @param n : DPM level + * @return The response that came from the SMC. + */ +int iceland_dpm_force_state_pcie(struct pp_hwmgr *hwmgr, uint32_t n) +{ + iceland_hwmgr *data = (iceland_hwmgr *)(hwmgr->backend); + + /* Checking if DPM is running. If we discover hang because of this, we should skip this message.*/ + PP_ASSERT_WITH_CODE(0 == iceland_is_dpm_running(hwmgr), + "Trying to Force PCIE level when DPM is disabled", return -1;); + if (0 == data->pcie_dpm_key_disabled) + return (0 == smum_send_msg_to_smc_with_parameter( + hwmgr->smumgr, + PPSMC_MSG_PCIeDPM_ForceLevel, + n) ? 0 : 1); + + return 0; +} + +static int iceland_force_dpm_highest(struct pp_hwmgr *hwmgr) +{ + uint32_t level, tmp; + iceland_hwmgr *data = (iceland_hwmgr *)(hwmgr->backend); + + if (0 == data->sclk_dpm_key_disabled) { + /* SCLK */ + if (data->dpm_level_enable_mask.sclk_dpm_enable_mask != 0) { + level = 0; + tmp = data->dpm_level_enable_mask.sclk_dpm_enable_mask; + while (tmp >>= 1) + level++ ; + + if (0 != level) { + PP_ASSERT_WITH_CODE((0 == iceland_dpm_force_state(hwmgr, level)), + "force highest sclk dpm state failed!", return -1); + PHM_WAIT_INDIRECT_FIELD(hwmgr->device, + SMC_IND, TARGET_AND_CURRENT_PROFILE_INDEX, CURR_SCLK_INDEX, level); + } + } + } + + if (0 == data->mclk_dpm_key_disabled) { + /* MCLK */ + if (data->dpm_level_enable_mask.mclk_dpm_enable_mask != 0) { + level = 0; + tmp = data->dpm_level_enable_mask.mclk_dpm_enable_mask; + while (tmp >>= 1) + level++ ; + + if (0 != level) { + PP_ASSERT_WITH_CODE((0 == iceland_dpm_force_state_mclk(hwmgr, level)), + "force highest mclk dpm state failed!", return -1); + PHM_WAIT_INDIRECT_FIELD(hwmgr->device, SMC_IND, + TARGET_AND_CURRENT_PROFILE_INDEX, CURR_MCLK_INDEX, level); + } + } + } + + if (0 == data->pcie_dpm_key_disabled) { + /* PCIE */ + if (data->dpm_level_enable_mask.pcie_dpm_enable_mask != 0) { + level = 0; + tmp = data->dpm_level_enable_mask.pcie_dpm_enable_mask; + while (tmp >>= 1) + level++ ; + + if (0 != level) { + PP_ASSERT_WITH_CODE((0 == iceland_dpm_force_state_pcie(hwmgr, level)), + "force highest pcie dpm state failed!", return -1); + } + } + } + + return 0; +} + +static uint32_t iceland_get_lowest_enable_level(struct pp_hwmgr *hwmgr, + uint32_t level_mask) +{ + uint32_t level = 0; + + while (0 == (level_mask & (1 << level))) + level++; + + return level; +} + +static int iceland_force_dpm_lowest(struct pp_hwmgr *hwmgr) +{ + uint32_t level; + iceland_hwmgr *data = (iceland_hwmgr *)(hwmgr->backend); + + /* for now force only sclk */ + if (0 != data->dpm_level_enable_mask.sclk_dpm_enable_mask) { + level = iceland_get_lowest_enable_level(hwmgr, + data->dpm_level_enable_mask.sclk_dpm_enable_mask); + + PP_ASSERT_WITH_CODE((0 == iceland_dpm_force_state(hwmgr, level)), + "force sclk dpm state failed!", return -1); + + PHM_WAIT_INDIRECT_FIELD(hwmgr->device, SMC_IND, + TARGET_AND_CURRENT_PROFILE_INDEX, + CURR_SCLK_INDEX, + level); + } + + return 0; +} + +int iceland_unforce_dpm_levels(struct pp_hwmgr *hwmgr) +{ + iceland_hwmgr *data = (iceland_hwmgr *)(hwmgr->backend); + +#if 0 + PP_ASSERT_WITH_CODE (0 == iceland_is_dpm_running(hwmgr), + "Trying to Unforce DPM when DPM is disabled. Returning without sending SMC message.", + return -1); +#endif + + if (0 == data->sclk_dpm_key_disabled) { + PP_ASSERT_WITH_CODE((0 == smum_send_msg_to_smc( + hwmgr->smumgr, + PPSMC_MSG_NoForcedLevel)), + "unforce sclk dpm state failed!", + return -1); + } + + if (0 == data->pcie_dpm_key_disabled) { + PP_ASSERT_WITH_CODE((0 == smum_send_msg_to_smc( + hwmgr->smumgr, + PPSMC_MSG_MCLKDPM_NoForcedLevel)), + "unforce mclk dpm state failed!", + return -1); + } + + if (0 == data->pcie_dpm_key_disabled) { + PP_ASSERT_WITH_CODE((0 == smum_send_msg_to_smc( + hwmgr->smumgr, + PPSMC_MSG_PCIeDPM_UnForceLevel)), + "unforce pcie level failed!", + return -1); + } + + return 0; +} + +static int iceland_force_dpm_level(struct pp_hwmgr *hwmgr, + enum amd_dpm_forced_level level) +{ + int ret = 0; + + switch (level) { + case AMD_DPM_FORCED_LEVEL_HIGH: + ret = iceland_force_dpm_highest(hwmgr); + if (ret) + return ret; + break; + case AMD_DPM_FORCED_LEVEL_LOW: + ret = iceland_force_dpm_lowest(hwmgr); + if (ret) + return ret; + break; + case AMD_DPM_FORCED_LEVEL_AUTO: + ret = iceland_unforce_dpm_levels(hwmgr); + if (ret) + return ret; + break; + default: + break; + } + + hwmgr->dpm_level = level; + return ret; +} + +const struct iceland_power_state *cast_const_phw_iceland_power_state( + const struct pp_hw_power_state *hw_ps) +{ + if (hw_ps == NULL) + return NULL; + + PP_ASSERT_WITH_CODE((PhwIceland_Magic == hw_ps->magic), + "Invalid Powerstate Type!", + return NULL); + + return (const struct iceland_power_state *)hw_ps; +} + +static int iceland_find_dpm_states_clocks_in_dpm_table(struct pp_hwmgr *hwmgr, const void *input) +{ + const struct phm_set_power_state_input *states = (const struct phm_set_power_state_input *)input; + const struct iceland_power_state *iceland_ps = cast_const_phw_iceland_power_state(states->pnew_state); + struct iceland_hwmgr *data = (struct iceland_hwmgr *)(hwmgr->backend); + struct iceland_single_dpm_table *psclk_table = &(data->dpm_table.sclk_table); + uint32_t sclk = iceland_ps->performance_levels[iceland_ps->performance_level_count-1].engine_clock; + struct iceland_single_dpm_table *pmclk_table = &(data->dpm_table.mclk_table); + uint32_t mclk = iceland_ps->performance_levels[iceland_ps->performance_level_count-1].memory_clock; + struct PP_Clocks min_clocks = {0}; + uint32_t i; + struct cgs_display_info info = {0}; + + data->need_update_smu7_dpm_table = 0; + + for (i = 0; i < psclk_table->count; i++) { + if (sclk == psclk_table->dpm_levels[i].value) + break; + } + + if (i >= psclk_table->count) + data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_SCLK; + else { + /* + * TODO: Check SCLK in DAL's minimum clocks in case DeepSleep + * divider update is required. + */ + if(data->display_timing.min_clock_insr != min_clocks.engineClockInSR) + data->need_update_smu7_dpm_table |= DPMTABLE_UPDATE_SCLK; + } + + for (i = 0; i < pmclk_table->count; i++) { + if (mclk == pmclk_table->dpm_levels[i].value) + break; + } + + if (i >= pmclk_table->count) + data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_MCLK; + + cgs_get_active_displays_info(hwmgr->device, &info); + + if (data->display_timing.num_existing_displays != info.display_count) + data->need_update_smu7_dpm_table |= DPMTABLE_UPDATE_MCLK; + + return 0; +} + +static uint16_t iceland_get_maximum_link_speed(struct pp_hwmgr *hwmgr, const struct iceland_power_state *hw_ps) +{ + uint32_t i; + uint32_t pcie_speed, max_speed = 0; + + for (i = 0; i < hw_ps->performance_level_count; i++) { + pcie_speed = hw_ps->performance_levels[i].pcie_gen; + if (max_speed < pcie_speed) + max_speed = pcie_speed; + } + + return max_speed; +} + +static uint16_t iceland_get_current_pcie_speed(struct pp_hwmgr *hwmgr) +{ + uint32_t speed_cntl = 0; + + speed_cntl = cgs_read_ind_register(hwmgr->device, + CGS_IND_REG__PCIE, + ixPCIE_LC_SPEED_CNTL); + return((uint16_t)PHM_GET_FIELD(speed_cntl, + PCIE_LC_SPEED_CNTL, LC_CURRENT_DATA_RATE)); +} + + +static int iceland_request_link_speed_change_before_state_change(struct pp_hwmgr *hwmgr, const void *input) +{ + const struct phm_set_power_state_input *states = (const struct phm_set_power_state_input *)input; + struct iceland_hwmgr *data = (struct iceland_hwmgr *)(hwmgr->backend); + const struct iceland_power_state *iceland_nps = cast_const_phw_iceland_power_state(states->pnew_state); + const struct iceland_power_state *iceland_cps = cast_const_phw_iceland_power_state(states->pcurrent_state); + + uint16_t target_link_speed = iceland_get_maximum_link_speed(hwmgr, iceland_nps); + uint16_t current_link_speed; + + if (data->force_pcie_gen == PP_PCIEGenInvalid) + current_link_speed = iceland_get_maximum_link_speed(hwmgr, iceland_cps); + else + current_link_speed = data->force_pcie_gen; + + data->force_pcie_gen = PP_PCIEGenInvalid; + data->pspp_notify_required = false; + if (target_link_speed > current_link_speed) { + switch(target_link_speed) { + case PP_PCIEGen3: + if (0 == acpi_pcie_perf_request(hwmgr->device, PCIE_PERF_REQ_GEN3, false)) + break; + data->force_pcie_gen = PP_PCIEGen2; + if (current_link_speed == PP_PCIEGen2) + break; + case PP_PCIEGen2: + if (0 == acpi_pcie_perf_request(hwmgr->device, PCIE_PERF_REQ_GEN2, false)) + break; + default: + data->force_pcie_gen = iceland_get_current_pcie_speed(hwmgr); + break; + } + } else { + if (target_link_speed < current_link_speed) + data->pspp_notify_required = true; + } + + return 0; +} + +static int iceland_freeze_sclk_mclk_dpm(struct pp_hwmgr *hwmgr) +{ + struct iceland_hwmgr *data = (struct iceland_hwmgr *)(hwmgr->backend); + + if (0 == data->need_update_smu7_dpm_table) + return 0; + + if ((0 == data->sclk_dpm_key_disabled) && + (data->need_update_smu7_dpm_table & + (DPMTABLE_OD_UPDATE_SCLK + DPMTABLE_UPDATE_SCLK))) { + PP_ASSERT_WITH_CODE( + 0 == iceland_is_dpm_running(hwmgr), + "Trying to freeze SCLK DPM when DPM is disabled", + ); + PP_ASSERT_WITH_CODE( + 0 == smum_send_msg_to_smc(hwmgr->smumgr, + PPSMC_MSG_SCLKDPM_FreezeLevel), + "Failed to freeze SCLK DPM during FreezeSclkMclkDPM Function!", + return -1); + } + + if ((0 == data->mclk_dpm_key_disabled) && + (data->need_update_smu7_dpm_table & + DPMTABLE_OD_UPDATE_MCLK)) { + PP_ASSERT_WITH_CODE(0 == iceland_is_dpm_running(hwmgr), + "Trying to freeze MCLK DPM when DPM is disabled", + ); + PP_ASSERT_WITH_CODE( + 0 == smum_send_msg_to_smc(hwmgr->smumgr, + PPSMC_MSG_MCLKDPM_FreezeLevel), + "Failed to freeze MCLK DPM during FreezeSclkMclkDPM Function!", + return -1); + } + + return 0; +} + +static int iceland_populate_and_upload_sclk_mclk_dpm_levels(struct pp_hwmgr *hwmgr, const void *input) +{ + int result = 0; + + const struct phm_set_power_state_input *states = (const struct phm_set_power_state_input *)input; + const struct iceland_power_state *iceland_ps = cast_const_phw_iceland_power_state(states->pnew_state); + struct iceland_hwmgr *data = (struct iceland_hwmgr *)(hwmgr->backend); + uint32_t sclk = iceland_ps->performance_levels[iceland_ps->performance_level_count-1].engine_clock; + uint32_t mclk = iceland_ps->performance_levels[iceland_ps->performance_level_count-1].memory_clock; + struct iceland_dpm_table *pdpm_table = &data->dpm_table; + + struct iceland_dpm_table *pgolden_dpm_table = &data->golden_dpm_table; + uint32_t dpm_count, clock_percent; + uint32_t i; + + if (0 == data->need_update_smu7_dpm_table) + return 0; + + if (data->need_update_smu7_dpm_table & DPMTABLE_OD_UPDATE_SCLK) { + pdpm_table->sclk_table.dpm_levels[pdpm_table->sclk_table.count-1].value = sclk; + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_OD6PlusinACSupport) || + phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_OD6PlusinDCSupport)) { + /* + * Need to do calculation based on the golden DPM table + * as the Heatmap GPU Clock axis is also based on the default values + */ + PP_ASSERT_WITH_CODE( + (pgolden_dpm_table->sclk_table.dpm_levels[pgolden_dpm_table->sclk_table.count-1].value != 0), + "Divide by 0!", + return -1); + dpm_count = pdpm_table->sclk_table.count < 2 ? 0 : pdpm_table->sclk_table.count-2; + for (i = dpm_count; i > 1; i--) { + if (sclk > pgolden_dpm_table->sclk_table.dpm_levels[pgolden_dpm_table->sclk_table.count-1].value) { + clock_percent = ((sclk - pgolden_dpm_table->sclk_table.dpm_levels[pgolden_dpm_table->sclk_table.count-1].value)*100) / + pgolden_dpm_table->sclk_table.dpm_levels[pgolden_dpm_table->sclk_table.count-1].value; + + pdpm_table->sclk_table.dpm_levels[i].value = + pgolden_dpm_table->sclk_table.dpm_levels[i].value + + (pgolden_dpm_table->sclk_table.dpm_levels[i].value * clock_percent)/100; + + } else if (pgolden_dpm_table->sclk_table.dpm_levels[pdpm_table->sclk_table.count-1].value > sclk) { + clock_percent = ((pgolden_dpm_table->sclk_table.dpm_levels[pgolden_dpm_table->sclk_table.count-1].value - sclk)*100) / + pgolden_dpm_table->sclk_table.dpm_levels[pgolden_dpm_table->sclk_table.count-1].value; + + pdpm_table->sclk_table.dpm_levels[i].value = + pgolden_dpm_table->sclk_table.dpm_levels[i].value - + (pgolden_dpm_table->sclk_table.dpm_levels[i].value * clock_percent)/100; + } else + pdpm_table->sclk_table.dpm_levels[i].value = + pgolden_dpm_table->sclk_table.dpm_levels[i].value; + } + } + } + + if (data->need_update_smu7_dpm_table & DPMTABLE_OD_UPDATE_MCLK) { + pdpm_table->mclk_table.dpm_levels[pdpm_table->mclk_table.count-1].value = mclk; + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_OD6PlusinACSupport) || + phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_OD6PlusinDCSupport)) { + + PP_ASSERT_WITH_CODE( + (pgolden_dpm_table->mclk_table.dpm_levels[pgolden_dpm_table->mclk_table.count-1].value != 0), + "Divide by 0!", + return -1); + dpm_count = pdpm_table->mclk_table.count < 2? 0 : pdpm_table->mclk_table.count-2; + for (i = dpm_count; i > 1; i--) { + if (mclk > pgolden_dpm_table->mclk_table.dpm_levels[pgolden_dpm_table->mclk_table.count-1].value) { + clock_percent = ((mclk - pgolden_dpm_table->mclk_table.dpm_levels[pgolden_dpm_table->mclk_table.count-1].value)*100) / + pgolden_dpm_table->mclk_table.dpm_levels[pgolden_dpm_table->mclk_table.count-1].value; + + pdpm_table->mclk_table.dpm_levels[i].value = + pgolden_dpm_table->mclk_table.dpm_levels[i].value + + (pgolden_dpm_table->mclk_table.dpm_levels[i].value * clock_percent)/100; + + } else if (pgolden_dpm_table->mclk_table.dpm_levels[pdpm_table->mclk_table.count-1].value > mclk) { + clock_percent = ((pgolden_dpm_table->mclk_table.dpm_levels[pgolden_dpm_table->mclk_table.count-1].value - mclk)*100) / + pgolden_dpm_table->mclk_table.dpm_levels[pgolden_dpm_table->mclk_table.count-1].value; + + pdpm_table->mclk_table.dpm_levels[i].value = + pgolden_dpm_table->mclk_table.dpm_levels[i].value - + (pgolden_dpm_table->mclk_table.dpm_levels[i].value * clock_percent)/100; + } else + pdpm_table->mclk_table.dpm_levels[i].value = pgolden_dpm_table->mclk_table.dpm_levels[i].value; + } + } + } + + + if (data->need_update_smu7_dpm_table & (DPMTABLE_OD_UPDATE_SCLK + DPMTABLE_UPDATE_SCLK)) { + result = iceland_populate_all_graphic_levels(hwmgr); + PP_ASSERT_WITH_CODE((0 == result), + "Failed to populate SCLK during PopulateNewDPMClocksStates Function!", + return result); + } + + if (data->need_update_smu7_dpm_table & (DPMTABLE_OD_UPDATE_MCLK + DPMTABLE_UPDATE_MCLK)) { + /*populate MCLK dpm table to SMU7 */ + result = iceland_populate_all_memory_levels(hwmgr); + PP_ASSERT_WITH_CODE((0 == result), + "Failed to populate MCLK during PopulateNewDPMClocksStates Function!", + return result); + } + + return result; +} + +static int iceland_trim_single_dpm_states(struct pp_hwmgr *hwmgr, + struct iceland_single_dpm_table *pdpm_table, + uint32_t low_limit, uint32_t high_limit) +{ + uint32_t i; + + for (i = 0; i < pdpm_table->count; i++) { + if ((pdpm_table->dpm_levels[i].value < low_limit) || + (pdpm_table->dpm_levels[i].value > high_limit)) + pdpm_table->dpm_levels[i].enabled = false; + else + pdpm_table->dpm_levels[i].enabled = true; + } + return 0; +} + +static int iceland_trim_dpm_states(struct pp_hwmgr *hwmgr, const struct iceland_power_state *hw_state) +{ + int result = 0; + struct iceland_hwmgr *data = (struct iceland_hwmgr *)(hwmgr->backend); + uint32_t high_limit_count; + + PP_ASSERT_WITH_CODE((hw_state->performance_level_count >= 1), + "power state did not have any performance level", + return -1); + + high_limit_count = (1 == hw_state->performance_level_count) ? 0: 1; + + iceland_trim_single_dpm_states(hwmgr, &(data->dpm_table.sclk_table), + hw_state->performance_levels[0].engine_clock, + hw_state->performance_levels[high_limit_count].engine_clock); + + iceland_trim_single_dpm_states(hwmgr, &(data->dpm_table.mclk_table), + hw_state->performance_levels[0].memory_clock, + hw_state->performance_levels[high_limit_count].memory_clock); + + return result; +} + +static int iceland_generate_dpm_level_enable_mask(struct pp_hwmgr *hwmgr, const void *input) +{ + int result; + const struct phm_set_power_state_input *states = (const struct phm_set_power_state_input *)input; + struct iceland_hwmgr *data = (struct iceland_hwmgr *)(hwmgr->backend); + const struct iceland_power_state *iceland_ps = cast_const_phw_iceland_power_state(states->pnew_state); + + result = iceland_trim_dpm_states(hwmgr, iceland_ps); + if (0 != result) + return result; + + data->dpm_level_enable_mask.sclk_dpm_enable_mask = iceland_get_dpm_level_enable_mask_value(&data->dpm_table.sclk_table); + data->dpm_level_enable_mask.mclk_dpm_enable_mask = iceland_get_dpm_level_enable_mask_value(&data->dpm_table.mclk_table); + data->last_mclk_dpm_enable_mask = data->dpm_level_enable_mask.mclk_dpm_enable_mask; + if (data->uvd_enabled && (data->dpm_level_enable_mask.mclk_dpm_enable_mask & 1)) + data->dpm_level_enable_mask.mclk_dpm_enable_mask &= 0xFFFFFFFE; + + data->dpm_level_enable_mask.pcie_dpm_enable_mask = iceland_get_dpm_level_enable_mask_value(&data->dpm_table.pcie_speed_table); + + return 0; +} + +static int iceland_update_vce_dpm(struct pp_hwmgr *hwmgr, const void *input) +{ + return 0; +} + +int iceland_update_sclk_threshold(struct pp_hwmgr *hwmgr) +{ + iceland_hwmgr *data = (iceland_hwmgr *)(hwmgr->backend); + + int result = 0; + uint32_t low_sclk_interrupt_threshold = 0; + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_SclkThrottleLowNotification) + && (hwmgr->gfx_arbiter.sclk_threshold != data->low_sclk_interrupt_threshold)) { + data->low_sclk_interrupt_threshold = hwmgr->gfx_arbiter.sclk_threshold; + low_sclk_interrupt_threshold = data->low_sclk_interrupt_threshold; + + CONVERT_FROM_HOST_TO_SMC_UL(low_sclk_interrupt_threshold); + + result = iceland_copy_bytes_to_smc( + hwmgr->smumgr, + data->dpm_table_start + offsetof(SMU71_Discrete_DpmTable, + LowSclkInterruptThreshold), + (uint8_t *)&low_sclk_interrupt_threshold, + sizeof(uint32_t), + data->sram_end + ); + } + + return result; +} + +static int iceland_update_and_upload_mc_reg_table(struct pp_hwmgr *hwmgr) +{ + struct iceland_hwmgr *data = (struct iceland_hwmgr *)(hwmgr->backend); + + uint32_t address; + int32_t result; + + if (0 == (data->need_update_smu7_dpm_table & DPMTABLE_OD_UPDATE_MCLK)) + return 0; + + + memset(&data->mc_reg_table, 0, sizeof(SMU71_Discrete_MCRegisters)); + + result = iceland_convert_mc_reg_table_to_smc(hwmgr, &(data->mc_reg_table)); + + if(result != 0) + return result; + + + address = data->mc_reg_table_start + (uint32_t)offsetof(SMU71_Discrete_MCRegisters, data[0]); + + return iceland_copy_bytes_to_smc(hwmgr->smumgr, address, + (uint8_t *)&data->mc_reg_table.data[0], + sizeof(SMU71_Discrete_MCRegisterSet) * data->dpm_table.mclk_table.count, + data->sram_end); +} + +static int iceland_program_memory_timing_parameters_conditionally(struct pp_hwmgr *hwmgr) +{ + struct iceland_hwmgr *data = (struct iceland_hwmgr *)(hwmgr->backend); + + if (data->need_update_smu7_dpm_table & + (DPMTABLE_OD_UPDATE_SCLK + DPMTABLE_OD_UPDATE_MCLK)) + return iceland_program_memory_timing_parameters(hwmgr); + + return 0; +} + +static int iceland_unfreeze_sclk_mclk_dpm(struct pp_hwmgr *hwmgr) +{ + struct iceland_hwmgr *data = (struct iceland_hwmgr *)(hwmgr->backend); + + if (0 == data->need_update_smu7_dpm_table) + return 0; + + if ((0 == data->sclk_dpm_key_disabled) && + (data->need_update_smu7_dpm_table & + (DPMTABLE_OD_UPDATE_SCLK + DPMTABLE_UPDATE_SCLK))) { + + PP_ASSERT_WITH_CODE(0 == iceland_is_dpm_running(hwmgr), + "Trying to Unfreeze SCLK DPM when DPM is disabled", + ); + PP_ASSERT_WITH_CODE( + 0 == smum_send_msg_to_smc(hwmgr->smumgr, + PPSMC_MSG_SCLKDPM_UnfreezeLevel), + "Failed to unfreeze SCLK DPM during UnFreezeSclkMclkDPM Function!", + return -1); + } + + if ((0 == data->mclk_dpm_key_disabled) && + (data->need_update_smu7_dpm_table & DPMTABLE_OD_UPDATE_MCLK)) { + + PP_ASSERT_WITH_CODE( + 0 == iceland_is_dpm_running(hwmgr), + "Trying to Unfreeze MCLK DPM when DPM is disabled", + ); + PP_ASSERT_WITH_CODE( + 0 == smum_send_msg_to_smc(hwmgr->smumgr, + PPSMC_MSG_MCLKDPM_UnfreezeLevel), + "Failed to unfreeze MCLK DPM during UnFreezeSclkMclkDPM Function!", + return -1); + } + + data->need_update_smu7_dpm_table = 0; + + return 0; +} + +static int iceland_notify_link_speed_change_after_state_change(struct pp_hwmgr *hwmgr, const void *input) +{ + const struct phm_set_power_state_input *states = (const struct phm_set_power_state_input *)input; + struct iceland_hwmgr *data = (struct iceland_hwmgr *)(hwmgr->backend); + const struct iceland_power_state *iceland_ps = cast_const_phw_iceland_power_state(states->pnew_state); + uint16_t target_link_speed = iceland_get_maximum_link_speed(hwmgr, iceland_ps); + uint8_t request; + + if (data->pspp_notify_required || + data->pcie_performance_request) { + if (target_link_speed == PP_PCIEGen3) + request = PCIE_PERF_REQ_GEN3; + else if (target_link_speed == PP_PCIEGen2) + request = PCIE_PERF_REQ_GEN2; + else + request = PCIE_PERF_REQ_GEN1; + + if(request == PCIE_PERF_REQ_GEN1 && iceland_get_current_pcie_speed(hwmgr) > 0) { + data->pcie_performance_request = false; + return 0; + } + + if (0 != acpi_pcie_perf_request(hwmgr->device, request, false)) { + if (PP_PCIEGen2 == target_link_speed) + printk("PSPP request to switch to Gen2 from Gen3 Failed!"); + else + printk("PSPP request to switch to Gen1 from Gen2 Failed!"); + } + } + + data->pcie_performance_request = false; + return 0; +} + +int iceland_upload_dpm_level_enable_mask(struct pp_hwmgr *hwmgr) +{ + PPSMC_Result result; + iceland_hwmgr *data = (iceland_hwmgr *)(hwmgr->backend); + + if (0 == data->sclk_dpm_key_disabled) { + /* Checking if DPM is running. If we discover hang because of this, we should skip this message.*/ + if (0 != iceland_is_dpm_running(hwmgr)) + printk(KERN_ERR "[ powerplay ] Trying to set Enable Sclk Mask when DPM is disabled \n"); + + if (0 != data->dpm_level_enable_mask.sclk_dpm_enable_mask) { + result = smum_send_msg_to_smc_with_parameter( + hwmgr->smumgr, + (PPSMC_Msg)PPSMC_MSG_SCLKDPM_SetEnabledMask, + data->dpm_level_enable_mask.sclk_dpm_enable_mask); + PP_ASSERT_WITH_CODE((0 == result), + "Set Sclk Dpm enable Mask failed", return -1); + } + } + + if (0 == data->mclk_dpm_key_disabled) { + /* Checking if DPM is running. If we discover hang because of this, we should skip this message.*/ + if (0 != iceland_is_dpm_running(hwmgr)) + printk(KERN_ERR "[ powerplay ] Trying to set Enable Mclk Mask when DPM is disabled \n"); + + if (0 != data->dpm_level_enable_mask.mclk_dpm_enable_mask) { + result = smum_send_msg_to_smc_with_parameter( + hwmgr->smumgr, + (PPSMC_Msg)PPSMC_MSG_MCLKDPM_SetEnabledMask, + data->dpm_level_enable_mask.mclk_dpm_enable_mask); + PP_ASSERT_WITH_CODE((0 == result), + "Set Mclk Dpm enable Mask failed", return -1); + } + } + + return 0; +} + +static int iceland_set_power_state_tasks(struct pp_hwmgr *hwmgr, const void *input) +{ + int tmp_result, result = 0; + + tmp_result = iceland_find_dpm_states_clocks_in_dpm_table(hwmgr, input); + PP_ASSERT_WITH_CODE((0 == tmp_result), "Failed to find DPM states clocks in DPM table!", result = tmp_result); + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_PCIEPerformanceRequest)) { + tmp_result = iceland_request_link_speed_change_before_state_change(hwmgr, input); + PP_ASSERT_WITH_CODE((0 == tmp_result), "Failed to request link speed change before state change!", result = tmp_result); + } + + tmp_result = iceland_freeze_sclk_mclk_dpm(hwmgr); + PP_ASSERT_WITH_CODE((0 == tmp_result), "Failed to freeze SCLK MCLK DPM!", result = tmp_result); + + tmp_result = iceland_populate_and_upload_sclk_mclk_dpm_levels(hwmgr, input); + PP_ASSERT_WITH_CODE((0 == tmp_result), "Failed to populate and upload SCLK MCLK DPM levels!", result = tmp_result); + + tmp_result = iceland_generate_dpm_level_enable_mask(hwmgr, input); + PP_ASSERT_WITH_CODE((0 == tmp_result), "Failed to generate DPM level enabled mask!", result = tmp_result); + + tmp_result = iceland_update_vce_dpm(hwmgr, input); + PP_ASSERT_WITH_CODE((0 == tmp_result), "Failed to update VCE DPM!", result = tmp_result); + + tmp_result = iceland_update_sclk_threshold(hwmgr); + PP_ASSERT_WITH_CODE((0 == tmp_result), "Failed to update SCLK threshold!", result = tmp_result); + + tmp_result = iceland_update_and_upload_mc_reg_table(hwmgr); + PP_ASSERT_WITH_CODE((0 == tmp_result), "Failed to upload MC reg table!", result = tmp_result); + + tmp_result = iceland_program_memory_timing_parameters_conditionally(hwmgr); + PP_ASSERT_WITH_CODE((0 == tmp_result), "Failed to program memory timing parameters!", result = tmp_result); + + tmp_result = iceland_unfreeze_sclk_mclk_dpm(hwmgr); + PP_ASSERT_WITH_CODE((0 == tmp_result), "Failed to unfreeze SCLK MCLK DPM!", result = tmp_result); + + tmp_result = iceland_upload_dpm_level_enable_mask(hwmgr); + PP_ASSERT_WITH_CODE((0 == tmp_result), "Failed to upload DPM level enabled mask!", result = tmp_result); + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_PCIEPerformanceRequest)) { + tmp_result = iceland_notify_link_speed_change_after_state_change(hwmgr, input); + PP_ASSERT_WITH_CODE((0 == tmp_result), "Failed to notify link speed change after state change!", result = tmp_result); + } + + return result; +} + +static int iceland_get_power_state_size(struct pp_hwmgr *hwmgr) +{ + return sizeof(struct iceland_power_state); +} + +static int iceland_dpm_get_mclk(struct pp_hwmgr *hwmgr, bool low) +{ + struct pp_power_state *ps; + struct iceland_power_state *iceland_ps; + + if (hwmgr == NULL) + return -EINVAL; + + ps = hwmgr->request_ps; + + if (ps == NULL) + return -EINVAL; + + iceland_ps = cast_phw_iceland_power_state(&ps->hardware); + + if (low) + return iceland_ps->performance_levels[0].memory_clock; + else + return iceland_ps->performance_levels[iceland_ps->performance_level_count-1].memory_clock; +} + +static int iceland_dpm_get_sclk(struct pp_hwmgr *hwmgr, bool low) +{ + struct pp_power_state *ps; + struct iceland_power_state *iceland_ps; + + if (hwmgr == NULL) + return -EINVAL; + + ps = hwmgr->request_ps; + + if (ps == NULL) + return -EINVAL; + + iceland_ps = cast_phw_iceland_power_state(&ps->hardware); + + if (low) + return iceland_ps->performance_levels[0].engine_clock; + else + return iceland_ps->performance_levels[iceland_ps->performance_level_count-1].engine_clock; +} + +static int iceland_get_current_pcie_lane_number( + struct pp_hwmgr *hwmgr) +{ + uint32_t link_width; + + link_width = PHM_READ_INDIRECT_FIELD(hwmgr->device, + CGS_IND_REG__PCIE, + PCIE_LC_LINK_WIDTH_CNTL, + LC_LINK_WIDTH_RD); + + PP_ASSERT_WITH_CODE((7 >= link_width), + "Invalid PCIe lane width!", return 0); + + return decode_pcie_lane_width(link_width); +} + +static int iceland_dpm_patch_boot_state(struct pp_hwmgr *hwmgr, + struct pp_hw_power_state *hw_ps) +{ + struct iceland_hwmgr *data = (struct iceland_hwmgr *)(hwmgr->backend); + struct iceland_power_state *ps = (struct iceland_power_state *)hw_ps; + ATOM_FIRMWARE_INFO_V2_2 *fw_info; + uint16_t size; + uint8_t frev, crev; + int index = GetIndexIntoMasterTable(DATA, FirmwareInfo); + + /* First retrieve the Boot clocks and VDDC from the firmware info table. + * We assume here that fw_info is unchanged if this call fails. + */ + fw_info = (ATOM_FIRMWARE_INFO_V2_2 *)cgs_atom_get_data_table( + hwmgr->device, index, + &size, &frev, &crev); + if (!fw_info) + /* During a test, there is no firmware info table. */ + return 0; + + /* Patch the state. */ + data->vbios_boot_state.sclk_bootup_value = le32_to_cpu(fw_info->ulDefaultEngineClock); + data->vbios_boot_state.mclk_bootup_value = le32_to_cpu(fw_info->ulDefaultMemoryClock); + data->vbios_boot_state.mvdd_bootup_value = le16_to_cpu(fw_info->usBootUpMVDDCVoltage); + data->vbios_boot_state.vddc_bootup_value = le16_to_cpu(fw_info->usBootUpVDDCVoltage); + data->vbios_boot_state.vddci_bootup_value = le16_to_cpu(fw_info->usBootUpVDDCIVoltage); + data->vbios_boot_state.pcie_gen_bootup_value = iceland_get_current_pcie_speed(hwmgr); + data->vbios_boot_state.pcie_lane_bootup_value = + (uint16_t)iceland_get_current_pcie_lane_number(hwmgr); + + /* set boot power state */ + ps->performance_levels[0].memory_clock = data->vbios_boot_state.mclk_bootup_value; + ps->performance_levels[0].engine_clock = data->vbios_boot_state.sclk_bootup_value; + ps->performance_levels[0].pcie_gen = data->vbios_boot_state.pcie_gen_bootup_value; + ps->performance_levels[0].pcie_lane = data->vbios_boot_state.pcie_lane_bootup_value; + + return 0; +} + +static int iceland_get_pp_table_entry_callback_func(struct pp_hwmgr *hwmgr, + struct pp_hw_power_state *power_state, + unsigned int index, const void *clock_info) +{ + struct iceland_hwmgr *data = (struct iceland_hwmgr *)(hwmgr->backend); + struct iceland_power_state *iceland_power_state = cast_phw_iceland_power_state(power_state); + const ATOM_PPLIB_CI_CLOCK_INFO *visland_clk_info = clock_info; + struct iceland_performance_level *performance_level; + uint32_t engine_clock, memory_clock; + uint16_t pcie_gen_from_bios; + + engine_clock = visland_clk_info->ucEngineClockHigh << 16 | visland_clk_info->usEngineClockLow; + memory_clock = visland_clk_info->ucMemoryClockHigh << 16 | visland_clk_info->usMemoryClockLow; + + if (!(data->mc_micro_code_feature & DISABLE_MC_LOADMICROCODE) && memory_clock > data->highest_mclk) + data->highest_mclk = memory_clock; + + performance_level = &(iceland_power_state->performance_levels + [iceland_power_state->performance_level_count++]); + + PP_ASSERT_WITH_CODE( + (iceland_power_state->performance_level_count < SMU71_MAX_LEVELS_GRAPHICS), + "Performance levels exceeds SMC limit!", + return -1); + + PP_ASSERT_WITH_CODE( + (iceland_power_state->performance_level_count <= + hwmgr->platform_descriptor.hardwareActivityPerformanceLevels), + "Performance levels exceeds Driver limit!", + return -1); + + /* Performance levels are arranged from low to high. */ + performance_level->memory_clock = memory_clock; + performance_level->engine_clock = engine_clock; + + pcie_gen_from_bios = visland_clk_info->ucPCIEGen; + + performance_level->pcie_gen = get_pcie_gen_support(data->pcie_gen_cap, pcie_gen_from_bios); + performance_level->pcie_lane = get_pcie_lane_support(data->pcie_lane_cap, visland_clk_info->usPCIELane); + + return 0; +} + +static int iceland_get_pp_table_entry(struct pp_hwmgr *hwmgr, + unsigned long entry_index, struct pp_power_state *state) +{ + int result; + struct iceland_power_state *ps; + struct iceland_hwmgr *data = (struct iceland_hwmgr *)(hwmgr->backend); + struct phm_clock_voltage_dependency_table *dep_mclk_table = + hwmgr->dyn_state.vddci_dependency_on_mclk; + + memset(&state->hardware, 0x00, sizeof(struct pp_hw_power_state)); + + state->hardware.magic = PHM_VIslands_Magic; + + ps = (struct iceland_power_state *)(&state->hardware); + + result = pp_tables_get_entry(hwmgr, entry_index, state, + iceland_get_pp_table_entry_callback_func); + + /* + * This is the earliest time we have all the dependency table + * and the VBIOS boot state as + * PP_Tables_GetPowerPlayTableEntry retrieves the VBIOS boot + * state if there is only one VDDCI/MCLK level, check if it's + * the same as VBIOS boot state + */ + if (dep_mclk_table != NULL && dep_mclk_table->count == 1) { + if (dep_mclk_table->entries[0].clk != + data->vbios_boot_state.mclk_bootup_value) + printk(KERN_ERR "Single MCLK entry VDDCI/MCLK dependency table " + "does not match VBIOS boot MCLK level"); + if (dep_mclk_table->entries[0].v != + data->vbios_boot_state.vddci_bootup_value) + printk(KERN_ERR "Single VDDCI entry VDDCI/MCLK dependency table " + "does not match VBIOS boot VDDCI level"); + } + + /* set DC compatible flag if this state supports DC */ + if (!state->validation.disallowOnDC) + ps->dc_compatible = true; + + if (state->classification.flags & PP_StateClassificationFlag_ACPI) + data->acpi_pcie_gen = ps->performance_levels[0].pcie_gen; + else if (0 != (state->classification.flags & PP_StateClassificationFlag_Boot)) { + if (data->bacos.best_match == 0xffff) { + /* For C.I. use boot state as base BACO state */ + data->bacos.best_match = PP_StateClassificationFlag_Boot; + data->bacos.performance_level = ps->performance_levels[0]; + } + } + + + ps->uvd_clocks.VCLK = state->uvd_clocks.VCLK; + ps->uvd_clocks.DCLK = state->uvd_clocks.DCLK; + + if (!result) { + uint32_t i; + + switch (state->classification.ui_label) { + case PP_StateUILabel_Performance: + data->use_pcie_performance_levels = true; + + for (i = 0; i < ps->performance_level_count; i++) { + if (data->pcie_gen_performance.max < + ps->performance_levels[i].pcie_gen) + data->pcie_gen_performance.max = + ps->performance_levels[i].pcie_gen; + + if (data->pcie_gen_performance.min > + ps->performance_levels[i].pcie_gen) + data->pcie_gen_performance.min = + ps->performance_levels[i].pcie_gen; + + if (data->pcie_lane_performance.max < + ps->performance_levels[i].pcie_lane) + data->pcie_lane_performance.max = + ps->performance_levels[i].pcie_lane; + + if (data->pcie_lane_performance.min > + ps->performance_levels[i].pcie_lane) + data->pcie_lane_performance.min = + ps->performance_levels[i].pcie_lane; + } + break; + case PP_StateUILabel_Battery: + data->use_pcie_power_saving_levels = true; + + for (i = 0; i < ps->performance_level_count; i++) { + if (data->pcie_gen_power_saving.max < + ps->performance_levels[i].pcie_gen) + data->pcie_gen_power_saving.max = + ps->performance_levels[i].pcie_gen; + + if (data->pcie_gen_power_saving.min > + ps->performance_levels[i].pcie_gen) + data->pcie_gen_power_saving.min = + ps->performance_levels[i].pcie_gen; + + if (data->pcie_lane_power_saving.max < + ps->performance_levels[i].pcie_lane) + data->pcie_lane_power_saving.max = + ps->performance_levels[i].pcie_lane; + + if (data->pcie_lane_power_saving.min > + ps->performance_levels[i].pcie_lane) + data->pcie_lane_power_saving.min = + ps->performance_levels[i].pcie_lane; + } + break; + default: + break; + } + } + return 0; +} + +static void +iceland_print_current_perforce_level(struct pp_hwmgr *hwmgr, struct seq_file *m) +{ + uint32_t sclk, mclk, activity_percent; + uint32_t offset; + struct iceland_hwmgr *data = (struct iceland_hwmgr *)(hwmgr->backend); + + smum_send_msg_to_smc(hwmgr->smumgr, (PPSMC_Msg)(PPSMC_MSG_API_GetSclkFrequency)); + + sclk = cgs_read_register(hwmgr->device, mmSMC_MSG_ARG_0); + + smum_send_msg_to_smc(hwmgr->smumgr, (PPSMC_Msg)(PPSMC_MSG_API_GetMclkFrequency)); + + mclk = cgs_read_register(hwmgr->device, mmSMC_MSG_ARG_0); + seq_printf(m, "\n [ mclk ]: %u MHz\n\n [ sclk ]: %u MHz\n", mclk/100, sclk/100); + + offset = data->soft_regs_start + offsetof(SMU71_SoftRegisters, AverageGraphicsActivity); + activity_percent = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, offset); + activity_percent += 0x80; + activity_percent >>= 8; + + seq_printf(m, "\n [GPU load]: %u%%\n\n", activity_percent > 100 ? 100 : activity_percent); + + seq_printf(m, "uvd %sabled\n", data->uvd_power_gated ? "dis" : "en"); + + seq_printf(m, "vce %sabled\n", data->vce_power_gated ? "dis" : "en"); +} + +int iceland_notify_smc_display_config_after_ps_adjustment(struct pp_hwmgr *hwmgr) +{ + uint32_t num_active_displays = 0; + struct cgs_display_info info = {0}; + info.mode_info = NULL; + + cgs_get_active_displays_info(hwmgr->device, &info); + + num_active_displays = info.display_count; + + if (num_active_displays > 1) /* to do && (pHwMgr->pPECI->displayConfiguration.bMultiMonitorInSync != TRUE)) */ + iceland_notify_smc_display_change(hwmgr, false); + else + iceland_notify_smc_display_change(hwmgr, true); + + return 0; +} + +/** +* Programs the display gap +* +* @param hwmgr the address of the powerplay hardware manager. +* @return always OK +*/ +int iceland_program_display_gap(struct pp_hwmgr *hwmgr) +{ + uint32_t num_active_displays = 0; + uint32_t display_gap = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixCG_DISPLAY_GAP_CNTL); + uint32_t display_gap2; + uint32_t pre_vbi_time_in_us; + uint32_t frame_time_in_us; + uint32_t ref_clock; + uint32_t refresh_rate = 0; + struct cgs_display_info info = {0}; + struct cgs_mode_info mode_info; + + info.mode_info = &mode_info; + + cgs_get_active_displays_info(hwmgr->device, &info); + num_active_displays = info.display_count; + + display_gap = PHM_SET_FIELD(display_gap, CG_DISPLAY_GAP_CNTL, DISP_GAP, (num_active_displays > 0)? DISPLAY_GAP_VBLANK_OR_WM : DISPLAY_GAP_IGNORE); + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixCG_DISPLAY_GAP_CNTL, display_gap); + + ref_clock = mode_info.ref_clock; + refresh_rate = mode_info.refresh_rate; + + if(0 == refresh_rate) + refresh_rate = 60; + + frame_time_in_us = 1000000 / refresh_rate; + + pre_vbi_time_in_us = frame_time_in_us - 200 - mode_info.vblank_time_us; + display_gap2 = pre_vbi_time_in_us * (ref_clock / 100); + + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixCG_DISPLAY_GAP_CNTL2, display_gap2); + + PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, SOFT_REGISTERS_TABLE_4, PreVBlankGap, 0x64); + + PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, SOFT_REGISTERS_TABLE_5, VBlankTimeout, (frame_time_in_us - pre_vbi_time_in_us)); + + if (num_active_displays == 1) + iceland_notify_smc_display_change(hwmgr, true); + + return 0; +} + +int iceland_display_configuration_changed_task(struct pp_hwmgr *hwmgr) +{ + iceland_program_display_gap(hwmgr); + + return 0; +} + +/** +* Set maximum target operating fan output PWM +* +* @param pHwMgr: the address of the powerplay hardware manager. +* @param usMaxFanPwm: max operating fan PWM in percents +* @return The response that came from the SMC. +*/ +static int iceland_set_max_fan_pwm_output(struct pp_hwmgr *hwmgr, uint16_t us_max_fan_pwm) +{ + hwmgr->thermal_controller.advanceFanControlParameters.usMaxFanPWM = us_max_fan_pwm; + + if (phm_is_hw_access_blocked(hwmgr)) + return 0; + + return (0 == smum_send_msg_to_smc_with_parameter(hwmgr->smumgr, PPSMC_MSG_SetFanPwmMax, us_max_fan_pwm) ? 0 : -1); +} + +/** +* Set maximum target operating fan output RPM +* +* @param pHwMgr: the address of the powerplay hardware manager. +* @param usMaxFanRpm: max operating fan RPM value. +* @return The response that came from the SMC. +*/ +static int iceland_set_max_fan_rpm_output(struct pp_hwmgr *hwmgr, uint16_t us_max_fan_pwm) +{ + hwmgr->thermal_controller.advanceFanControlParameters.usMaxFanRPM = us_max_fan_pwm; + + if (phm_is_hw_access_blocked(hwmgr)) + return 0; + + return (0 == smum_send_msg_to_smc_with_parameter(hwmgr->smumgr, PPSMC_MSG_SetFanRpmMax, us_max_fan_pwm) ? 0 : -1); +} + +static int iceland_dpm_set_interrupt_state(void *private_data, + unsigned src_id, unsigned type, + int enabled) +{ + uint32_t cg_thermal_int; + struct pp_hwmgr *hwmgr = ((struct pp_eventmgr *)private_data)->hwmgr; + + if (hwmgr == NULL) + return -EINVAL; + + switch (type) { + case AMD_THERMAL_IRQ_LOW_TO_HIGH: + if (enabled) { + cg_thermal_int = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixCG_THERMAL_INT); + cg_thermal_int |= CG_THERMAL_INT_CTRL__THERM_INTH_MASK_MASK; + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixCG_THERMAL_INT, cg_thermal_int); + } else { + cg_thermal_int = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixCG_THERMAL_INT); + cg_thermal_int &= ~CG_THERMAL_INT_CTRL__THERM_INTH_MASK_MASK; + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixCG_THERMAL_INT, cg_thermal_int); + } + break; + + case AMD_THERMAL_IRQ_HIGH_TO_LOW: + if (enabled) { + cg_thermal_int = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixCG_THERMAL_INT); + cg_thermal_int |= CG_THERMAL_INT_CTRL__THERM_INTL_MASK_MASK; + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixCG_THERMAL_INT, cg_thermal_int); + } else { + cg_thermal_int = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixCG_THERMAL_INT); + cg_thermal_int &= ~CG_THERMAL_INT_CTRL__THERM_INTL_MASK_MASK; + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixCG_THERMAL_INT, cg_thermal_int); + } + break; + default: + break; + } + return 0; +} + +static int iceland_register_internal_thermal_interrupt(struct pp_hwmgr *hwmgr, + const void *thermal_interrupt_info) +{ + int result; + const struct pp_interrupt_registration_info *info = + (const struct pp_interrupt_registration_info *)thermal_interrupt_info; + + if (info == NULL) + return -EINVAL; + + result = cgs_add_irq_source(hwmgr->device, 230, AMD_THERMAL_IRQ_LAST, + iceland_dpm_set_interrupt_state, + info->call_back, info->context); + + if (result) + return -EINVAL; + + result = cgs_add_irq_source(hwmgr->device, 231, AMD_THERMAL_IRQ_LAST, + iceland_dpm_set_interrupt_state, + info->call_back, info->context); + + if (result) + return -EINVAL; + + return 0; +} + + +static bool iceland_check_smc_update_required_for_display_configuration(struct pp_hwmgr *hwmgr) +{ + struct iceland_hwmgr *data = (struct iceland_hwmgr *)(hwmgr->backend); + bool is_update_required = false; + struct cgs_display_info info = {0,0,NULL}; + + cgs_get_active_displays_info(hwmgr->device, &info); + + if (data->display_timing.num_existing_displays != info.display_count) + is_update_required = true; +/* TO DO NEED TO GET DEEP SLEEP CLOCK FROM DAL + if (phm_cap_enabled(hwmgr->hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_SclkDeepSleep)) { + cgs_get_min_clock_settings(hwmgr->device, &min_clocks); + if(min_clocks.engineClockInSR != data->display_timing.minClockInSR) + is_update_required = true; +*/ + return is_update_required; +} + + +static inline bool iceland_are_power_levels_equal(const struct iceland_performance_level *pl1, + const struct iceland_performance_level *pl2) +{ + return ((pl1->memory_clock == pl2->memory_clock) && + (pl1->engine_clock == pl2->engine_clock) && + (pl1->pcie_gen == pl2->pcie_gen) && + (pl1->pcie_lane == pl2->pcie_lane)); +} + +int iceland_check_states_equal(struct pp_hwmgr *hwmgr, const struct pp_hw_power_state *pstate1, + const struct pp_hw_power_state *pstate2, bool *equal) +{ + const struct iceland_power_state *psa = cast_const_phw_iceland_power_state(pstate1); + const struct iceland_power_state *psb = cast_const_phw_iceland_power_state(pstate2); + int i; + + if (equal == NULL || psa == NULL || psb == NULL) + return -EINVAL; + + /* If the two states don't even have the same number of performance levels they cannot be the same state. */ + if (psa->performance_level_count != psb->performance_level_count) { + *equal = false; + return 0; + } + + for (i = 0; i < psa->performance_level_count; i++) { + if (!iceland_are_power_levels_equal(&(psa->performance_levels[i]), &(psb->performance_levels[i]))) { + /* If we have found even one performance level pair that is different the states are different. */ + *equal = false; + return 0; + } + } + + /* If all performance levels are the same try to use the UVD clocks to break the tie.*/ + *equal = ((psa->uvd_clocks.VCLK == psb->uvd_clocks.VCLK) && (psa->uvd_clocks.DCLK == psb->uvd_clocks.DCLK)); + *equal &= ((psa->vce_clocks.EVCLK == psb->vce_clocks.EVCLK) && (psa->vce_clocks.ECCLK == psb->vce_clocks.ECCLK)); + *equal &= (psa->sclk_threshold == psb->sclk_threshold); + *equal &= (psa->acp_clk == psb->acp_clk); + + return 0; +} + +static int iceland_set_fan_control_mode(struct pp_hwmgr *hwmgr, uint32_t mode) +{ + if (mode) { + /* stop auto-manage */ + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_MicrocodeFanControl)) + iceland_fan_ctrl_stop_smc_fan_control(hwmgr); + iceland_fan_ctrl_set_static_mode(hwmgr, mode); + } else + /* restart auto-manage */ + iceland_fan_ctrl_reset_fan_speed_to_default(hwmgr); + + return 0; +} + +static int iceland_get_fan_control_mode(struct pp_hwmgr *hwmgr) +{ + if (hwmgr->fan_ctrl_is_in_default_mode) + return hwmgr->fan_ctrl_default_mode; + else + return PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, + CG_FDO_CTRL2, FDO_PWM_MODE); +} + +static int iceland_force_clock_level(struct pp_hwmgr *hwmgr, + enum pp_clock_type type, uint32_t mask) +{ + struct iceland_hwmgr *data = (struct iceland_hwmgr *)(hwmgr->backend); + + if (hwmgr->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL) + return -EINVAL; + + switch (type) { + case PP_SCLK: + if (!data->sclk_dpm_key_disabled) + smum_send_msg_to_smc_with_parameter(hwmgr->smumgr, + PPSMC_MSG_SCLKDPM_SetEnabledMask, + data->dpm_level_enable_mask.sclk_dpm_enable_mask & mask); + break; + case PP_MCLK: + if (!data->mclk_dpm_key_disabled) + smum_send_msg_to_smc_with_parameter(hwmgr->smumgr, + PPSMC_MSG_MCLKDPM_SetEnabledMask, + data->dpm_level_enable_mask.mclk_dpm_enable_mask & mask); + break; + case PP_PCIE: + { + uint32_t tmp = mask & data->dpm_level_enable_mask.pcie_dpm_enable_mask; + uint32_t level = 0; + + while (tmp >>= 1) + level++; + + if (!data->pcie_dpm_key_disabled) + smum_send_msg_to_smc_with_parameter(hwmgr->smumgr, + PPSMC_MSG_PCIeDPM_ForceLevel, + level); + break; + } + default: + break; + } + + return 0; +} + +static int iceland_print_clock_levels(struct pp_hwmgr *hwmgr, + enum pp_clock_type type, char *buf) +{ + struct iceland_hwmgr *data = (struct iceland_hwmgr *)(hwmgr->backend); + struct iceland_single_dpm_table *sclk_table = &(data->dpm_table.sclk_table); + struct iceland_single_dpm_table *mclk_table = &(data->dpm_table.mclk_table); + struct iceland_single_dpm_table *pcie_table = &(data->dpm_table.pcie_speed_table); + int i, now, size = 0; + uint32_t clock, pcie_speed; + + switch (type) { + case PP_SCLK: + smum_send_msg_to_smc(hwmgr->smumgr, PPSMC_MSG_API_GetSclkFrequency); + clock = cgs_read_register(hwmgr->device, mmSMC_MSG_ARG_0); + + for (i = 0; i < sclk_table->count; i++) { + if (clock > sclk_table->dpm_levels[i].value) + continue; + break; + } + now = i; + + for (i = 0; i < sclk_table->count; i++) + size += sprintf(buf + size, "%d: %uMhz %s\n", + i, sclk_table->dpm_levels[i].value / 100, + (i == now) ? "*" : ""); + break; + case PP_MCLK: + smum_send_msg_to_smc(hwmgr->smumgr, PPSMC_MSG_API_GetMclkFrequency); + clock = cgs_read_register(hwmgr->device, mmSMC_MSG_ARG_0); + + for (i = 0; i < mclk_table->count; i++) { + if (clock > mclk_table->dpm_levels[i].value) + continue; + break; + } + now = i; + + for (i = 0; i < mclk_table->count; i++) + size += sprintf(buf + size, "%d: %uMhz %s\n", + i, mclk_table->dpm_levels[i].value / 100, + (i == now) ? "*" : ""); + break; + case PP_PCIE: + pcie_speed = iceland_get_current_pcie_speed(hwmgr); + for (i = 0; i < pcie_table->count; i++) { + if (pcie_speed != pcie_table->dpm_levels[i].value) + continue; + break; + } + now = i; + + for (i = 0; i < pcie_table->count; i++) + size += sprintf(buf + size, "%d: %s %s\n", i, + (pcie_table->dpm_levels[i].value == 0) ? "2.5GB, x8" : + (pcie_table->dpm_levels[i].value == 1) ? "5.0GB, x16" : + (pcie_table->dpm_levels[i].value == 2) ? "8.0GB, x16" : "", + (i == now) ? "*" : ""); + break; + default: + break; + } + return size; +} + +static int iceland_get_sclk_od(struct pp_hwmgr *hwmgr) +{ + struct iceland_hwmgr *data = (struct iceland_hwmgr *)(hwmgr->backend); + struct iceland_single_dpm_table *sclk_table = &(data->dpm_table.sclk_table); + struct iceland_single_dpm_table *golden_sclk_table = + &(data->golden_dpm_table.sclk_table); + int value; + + value = (sclk_table->dpm_levels[sclk_table->count - 1].value - + golden_sclk_table->dpm_levels[golden_sclk_table->count - 1].value) * + 100 / + golden_sclk_table->dpm_levels[golden_sclk_table->count - 1].value; + + return value; +} + +static int iceland_set_sclk_od(struct pp_hwmgr *hwmgr, uint32_t value) +{ + struct iceland_hwmgr *data = (struct iceland_hwmgr *)(hwmgr->backend); + struct iceland_single_dpm_table *golden_sclk_table = + &(data->golden_dpm_table.sclk_table); + struct pp_power_state *ps; + struct iceland_power_state *iceland_ps; + + if (value > 20) + value = 20; + + ps = hwmgr->request_ps; + + if (ps == NULL) + return -EINVAL; + + iceland_ps = cast_phw_iceland_power_state(&ps->hardware); + + iceland_ps->performance_levels[iceland_ps->performance_level_count - 1].engine_clock = + golden_sclk_table->dpm_levels[golden_sclk_table->count - 1].value * + value / 100 + + golden_sclk_table->dpm_levels[golden_sclk_table->count - 1].value; + + return 0; +} + +static int iceland_get_mclk_od(struct pp_hwmgr *hwmgr) +{ + struct iceland_hwmgr *data = (struct iceland_hwmgr *)(hwmgr->backend); + struct iceland_single_dpm_table *mclk_table = &(data->dpm_table.mclk_table); + struct iceland_single_dpm_table *golden_mclk_table = + &(data->golden_dpm_table.mclk_table); + int value; + + value = (mclk_table->dpm_levels[mclk_table->count - 1].value - + golden_mclk_table->dpm_levels[golden_mclk_table->count - 1].value) * + 100 / + golden_mclk_table->dpm_levels[golden_mclk_table->count - 1].value; + + return value; +} + +uint32_t iceland_get_xclk(struct pp_hwmgr *hwmgr) +{ + uint32_t reference_clock; + uint32_t tc; + uint32_t divide; + + ATOM_FIRMWARE_INFO *fw_info; + uint16_t size; + uint8_t frev, crev; + int index = GetIndexIntoMasterTable(DATA, FirmwareInfo); + + tc = PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, CG_CLKPIN_CNTL_2, MUX_TCLK_TO_XCLK); + + if (tc) + return TCLK; + + fw_info = (ATOM_FIRMWARE_INFO *)cgs_atom_get_data_table(hwmgr->device, index, + &size, &frev, &crev); + + if (!fw_info) + return 0; + + reference_clock = le16_to_cpu(fw_info->usReferenceClock); + + divide = PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, CG_CLKPIN_CNTL, XTALIN_DIVIDE); + + if (0 != divide) + return reference_clock / 4; + + return reference_clock; +} + +static int iceland_set_mclk_od(struct pp_hwmgr *hwmgr, uint32_t value) +{ + struct iceland_hwmgr *data = (struct iceland_hwmgr *)(hwmgr->backend); + struct iceland_single_dpm_table *golden_mclk_table = + &(data->golden_dpm_table.mclk_table); + struct pp_power_state *ps; + struct iceland_power_state *iceland_ps; + + if (value > 20) + value = 20; + + ps = hwmgr->request_ps; + + if (ps == NULL) + return -EINVAL; + + iceland_ps = cast_phw_iceland_power_state(&ps->hardware); + + iceland_ps->performance_levels[iceland_ps->performance_level_count - 1].memory_clock = + golden_mclk_table->dpm_levels[golden_mclk_table->count - 1].value * + value / 100 + + golden_mclk_table->dpm_levels[golden_mclk_table->count - 1].value; + + return 0; +} + +static const struct pp_hwmgr_func iceland_hwmgr_funcs = { + .backend_init = &iceland_hwmgr_backend_init, + .backend_fini = &iceland_hwmgr_backend_fini, + .asic_setup = &iceland_setup_asic_task, + .dynamic_state_management_enable = &iceland_enable_dpm_tasks, + .apply_state_adjust_rules = iceland_apply_state_adjust_rules, + .force_dpm_level = &iceland_force_dpm_level, + .power_state_set = iceland_set_power_state_tasks, + .get_power_state_size = iceland_get_power_state_size, + .get_mclk = iceland_dpm_get_mclk, + .get_sclk = iceland_dpm_get_sclk, + .patch_boot_state = iceland_dpm_patch_boot_state, + .get_pp_table_entry = iceland_get_pp_table_entry, + .get_num_of_pp_table_entries = iceland_get_num_of_entries, + .print_current_perforce_level = iceland_print_current_perforce_level, + .powerdown_uvd = iceland_phm_powerdown_uvd, + .powergate_uvd = iceland_phm_powergate_uvd, + .powergate_vce = iceland_phm_powergate_vce, + .disable_clock_power_gating = iceland_phm_disable_clock_power_gating, + .update_clock_gatings = iceland_phm_update_clock_gatings, + .notify_smc_display_config_after_ps_adjustment = iceland_notify_smc_display_config_after_ps_adjustment, + .display_config_changed = iceland_display_configuration_changed_task, + .set_max_fan_pwm_output = iceland_set_max_fan_pwm_output, + .set_max_fan_rpm_output = iceland_set_max_fan_rpm_output, + .get_temperature = iceland_thermal_get_temperature, + .stop_thermal_controller = iceland_thermal_stop_thermal_controller, + .get_fan_speed_info = iceland_fan_ctrl_get_fan_speed_info, + .get_fan_speed_percent = iceland_fan_ctrl_get_fan_speed_percent, + .set_fan_speed_percent = iceland_fan_ctrl_set_fan_speed_percent, + .reset_fan_speed_to_default = iceland_fan_ctrl_reset_fan_speed_to_default, + .get_fan_speed_rpm = iceland_fan_ctrl_get_fan_speed_rpm, + .set_fan_speed_rpm = iceland_fan_ctrl_set_fan_speed_rpm, + .uninitialize_thermal_controller = iceland_thermal_ctrl_uninitialize_thermal_controller, + .register_internal_thermal_interrupt = iceland_register_internal_thermal_interrupt, + .check_smc_update_required_for_display_configuration = iceland_check_smc_update_required_for_display_configuration, + .check_states_equal = iceland_check_states_equal, + .set_fan_control_mode = iceland_set_fan_control_mode, + .get_fan_control_mode = iceland_get_fan_control_mode, + .force_clock_level = iceland_force_clock_level, + .print_clock_levels = iceland_print_clock_levels, + .get_sclk_od = iceland_get_sclk_od, + .set_sclk_od = iceland_set_sclk_od, + .get_mclk_od = iceland_get_mclk_od, + .set_mclk_od = iceland_set_mclk_od, +}; + +int iceland_hwmgr_init(struct pp_hwmgr *hwmgr) +{ + iceland_hwmgr *data; + + data = kzalloc (sizeof(iceland_hwmgr), GFP_KERNEL); + if (data == NULL) + return -ENOMEM; + memset(data, 0x00, sizeof(iceland_hwmgr)); + + hwmgr->backend = data; + hwmgr->hwmgr_func = &iceland_hwmgr_funcs; + hwmgr->pptable_func = &pptable_funcs; + + /* thermal */ + pp_iceland_thermal_initialize(hwmgr); + return 0; +} diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/iceland_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/iceland_hwmgr.h new file mode 100644 index 0000000..f253988 --- /dev/null +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/iceland_hwmgr.h @@ -0,0 +1,424 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Author: Huang Rui + * + */ +#ifndef ICELAND_HWMGR_H +#define ICELAND_HWMGR_H + +#include "hwmgr.h" +#include "ppatomctrl.h" +#include "ppinterrupt.h" +#include "ppsmc.h" +#include "iceland_powertune.h" +#include "pp_endian.h" +#include "smu71_discrete.h" + +#define ICELAND_MAX_HARDWARE_POWERLEVELS 2 +#define ICELAND_DYNCLK_NUMBER_OF_TREND_COEFFICIENTS 15 + +struct iceland_performance_level { + uint32_t memory_clock; + uint32_t engine_clock; + uint16_t pcie_gen; + uint16_t pcie_lane; +}; + +struct _phw_iceland_bacos { + uint32_t best_match; + uint32_t baco_flags; + struct iceland_performance_level performance_level; +}; +typedef struct _phw_iceland_bacos phw_iceland_bacos; + +struct _phw_iceland_uvd_clocks { + uint32_t VCLK; + uint32_t DCLK; +}; + +typedef struct _phw_iceland_uvd_clocks phw_iceland_uvd_clocks; + +struct _phw_iceland_vce_clocks { + uint32_t EVCLK; + uint32_t ECCLK; +}; + +typedef struct _phw_iceland_vce_clocks phw_iceland_vce_clocks; + +struct iceland_power_state { + uint32_t magic; + phw_iceland_uvd_clocks uvd_clocks; + phw_iceland_vce_clocks vce_clocks; + uint32_t sam_clk; + uint32_t acp_clk; + uint16_t performance_level_count; + bool dc_compatible; + uint32_t sclk_threshold; + struct iceland_performance_level performance_levels[ICELAND_MAX_HARDWARE_POWERLEVELS]; +}; + +struct _phw_iceland_dpm_level { + bool enabled; + uint32_t value; + uint32_t param1; +}; +typedef struct _phw_iceland_dpm_level phw_iceland_dpm_level; + +#define ICELAND_MAX_DEEPSLEEP_DIVIDER_ID 5 +#define MAX_REGULAR_DPM_NUMBER 8 +#define ICELAND_MINIMUM_ENGINE_CLOCK 5000 + +struct iceland_single_dpm_table { + uint32_t count; + phw_iceland_dpm_level dpm_levels[MAX_REGULAR_DPM_NUMBER]; +}; + +struct iceland_dpm_table { + struct iceland_single_dpm_table sclk_table; + struct iceland_single_dpm_table mclk_table; + struct iceland_single_dpm_table pcie_speed_table; + struct iceland_single_dpm_table vddc_table; + struct iceland_single_dpm_table vdd_gfx_table; + struct iceland_single_dpm_table vdd_ci_table; + struct iceland_single_dpm_table mvdd_table; +}; +typedef struct _phw_iceland_dpm_table phw_iceland_dpm_table; + + +struct _phw_iceland_clock_regisiters { + uint32_t vCG_SPLL_FUNC_CNTL; + uint32_t vCG_SPLL_FUNC_CNTL_2; + uint32_t vCG_SPLL_FUNC_CNTL_3; + uint32_t vCG_SPLL_FUNC_CNTL_4; + uint32_t vCG_SPLL_SPREAD_SPECTRUM; + uint32_t vCG_SPLL_SPREAD_SPECTRUM_2; + uint32_t vDLL_CNTL; + uint32_t vMCLK_PWRMGT_CNTL; + uint32_t vMPLL_AD_FUNC_CNTL; + uint32_t vMPLL_DQ_FUNC_CNTL; + uint32_t vMPLL_FUNC_CNTL; + uint32_t vMPLL_FUNC_CNTL_1; + uint32_t vMPLL_FUNC_CNTL_2; + uint32_t vMPLL_SS1; + uint32_t vMPLL_SS2; +}; +typedef struct _phw_iceland_clock_regisiters phw_iceland_clock_registers; + +struct _phw_iceland_voltage_smio_registers { + uint32_t vs0_vid_lower_smio_cntl; +}; +typedef struct _phw_iceland_voltage_smio_registers phw_iceland_voltage_smio_registers; + + +struct _phw_iceland_mc_reg_entry { + uint32_t mclk_max; + uint32_t mc_data[SMU71_DISCRETE_MC_REGISTER_ARRAY_SIZE]; +}; +typedef struct _phw_iceland_mc_reg_entry phw_iceland_mc_reg_entry; + +struct _phw_iceland_mc_reg_table { + uint8_t last; /* number of registers*/ + uint8_t num_entries; /* number of entries in mc_reg_table_entry used*/ + uint16_t validflag; /* indicate the corresponding register is valid or not. 1: valid, 0: invalid. bit0->address[0], bit1->address[1], etc.*/ + phw_iceland_mc_reg_entry mc_reg_table_entry[MAX_AC_TIMING_ENTRIES]; + SMU71_Discrete_MCRegisterAddress mc_reg_address[SMU71_DISCRETE_MC_REGISTER_ARRAY_SIZE]; +}; +typedef struct _phw_iceland_mc_reg_table phw_iceland_mc_reg_table; + +#define DISABLE_MC_LOADMICROCODE 1 +#define DISABLE_MC_CFGPROGRAMMING 2 + + +/*Ultra Low Voltage parameter structure */ +struct phw_iceland_ulv_parm{ + bool ulv_supported; + uint32_t ch_ulv_parameter; + uint32_t ulv_volt_change_delay; + struct iceland_performance_level ulv_power_level; +}; + +#define ICELAND_MAX_LEAKAGE_COUNT 8 + +struct phw_iceland_leakage_voltage { + uint16_t count; + uint16_t leakage_id[ICELAND_MAX_LEAKAGE_COUNT]; + uint16_t actual_voltage[ICELAND_MAX_LEAKAGE_COUNT]; +}; + +struct _phw_iceland_display_timing { + uint32_t min_clock_insr; + uint32_t num_existing_displays; +}; +typedef struct _phw_iceland_display_timing phw_iceland_display_timing; + + +struct phw_iceland_thermal_temperature_setting +{ + long temperature_low; + long temperature_high; + long temperature_shutdown; +}; + +struct _phw_iceland_dpmlevel_enable_mask { + uint32_t uvd_dpm_enable_mask; + uint32_t vce_dpm_enable_mask; + uint32_t acp_dpm_enable_mask; + uint32_t samu_dpm_enable_mask; + uint32_t sclk_dpm_enable_mask; + uint32_t mclk_dpm_enable_mask; + uint32_t pcie_dpm_enable_mask; +}; +typedef struct _phw_iceland_dpmlevel_enable_mask phw_iceland_dpmlevel_enable_mask; + +struct _phw_iceland_pcie_perf_range { + uint16_t max; + uint16_t min; +}; +typedef struct _phw_iceland_pcie_perf_range phw_iceland_pcie_perf_range; + +struct _phw_iceland_vbios_boot_state { + uint16_t mvdd_bootup_value; + uint16_t vddc_bootup_value; + uint16_t vddci_bootup_value; + uint16_t vddgfx_bootup_value; + uint32_t sclk_bootup_value; + uint32_t mclk_bootup_value; + uint16_t pcie_gen_bootup_value; + uint16_t pcie_lane_bootup_value; +}; +typedef struct _phw_iceland_vbios_boot_state phw_iceland_vbios_boot_state; + +#define DPMTABLE_OD_UPDATE_SCLK 0x00000001 +#define DPMTABLE_OD_UPDATE_MCLK 0x00000002 +#define DPMTABLE_UPDATE_SCLK 0x00000004 +#define DPMTABLE_UPDATE_MCLK 0x00000008 + +/* We need to review which fields are needed. */ +/* This is mostly a copy of the RV7xx/Evergreen structure which is close, but not identical to the N.Islands one. */ +struct iceland_hwmgr { + struct iceland_dpm_table dpm_table; + struct iceland_dpm_table golden_dpm_table; + + uint32_t voting_rights_clients0; + uint32_t voting_rights_clients1; + uint32_t voting_rights_clients2; + uint32_t voting_rights_clients3; + uint32_t voting_rights_clients4; + uint32_t voting_rights_clients5; + uint32_t voting_rights_clients6; + uint32_t voting_rights_clients7; + uint32_t static_screen_threshold_unit; + uint32_t static_screen_threshold; + uint32_t voltage_control; + uint32_t vdd_gfx_control; + + uint32_t vddc_vddci_delta; + uint32_t vddc_vddgfx_delta; + + struct pp_interrupt_registration_info internal_high_thermal_interrupt_info; + struct pp_interrupt_registration_info internal_low_thermal_interrupt_info; + struct pp_interrupt_registration_info smc_to_host_interrupt_info; + uint32_t active_auto_throttle_sources; + + struct pp_interrupt_registration_info external_throttle_interrupt; + irq_handler_func_t external_throttle_callback; + void *external_throttle_context; + + struct pp_interrupt_registration_info ctf_interrupt_info; + irq_handler_func_t ctf_callback; + void *ctf_context; + + phw_iceland_clock_registers clock_registers; + phw_iceland_voltage_smio_registers voltage_smio_registers; + + bool is_memory_GDDR5; + uint16_t acpi_vddc; + bool pspp_notify_required; /* Flag to indicate if PSPP notification to SBIOS is required */ + uint16_t force_pcie_gen; /* The forced PCI-E speed if not 0xffff */ + uint16_t acpi_pcie_gen; /* The PCI-E speed at ACPI time */ + uint32_t pcie_gen_cap; /* The PCI-E speed capabilities bitmap from CAIL */ + uint32_t pcie_lane_cap; /* The PCI-E lane capabilities bitmap from CAIL */ + uint32_t pcie_spc_cap; /* Symbol Per Clock Capabilities from registry */ + struct phw_iceland_leakage_voltage vddc_leakage; /* The Leakage VDDC supported (based on leakage ID).*/ + struct phw_iceland_leakage_voltage vddcgfx_leakage; /* The Leakage VDDC supported (based on leakage ID). */ + struct phw_iceland_leakage_voltage vddci_leakage; /* The Leakage VDDCI supported (based on leakage ID). */ + + uint32_t mvdd_control; + uint32_t vddc_mask_low; + uint32_t mvdd_mask_low; + uint16_t max_vddc_in_pp_table; /* the maximum VDDC value in the powerplay table*/ + uint16_t min_vddc_in_pp_table; + uint16_t max_vddci_in_pp_table; /* the maximum VDDCI value in the powerplay table */ + uint16_t min_vddci_in_pp_table; + uint32_t mclk_strobe_mode_threshold; + uint32_t mclk_stutter_mode_threshold; + uint32_t mclk_edc_enable_threshold; + uint32_t mclk_edc_wr_enable_threshold; + bool is_uvd_enabled; + bool is_xdma_enabled; + phw_iceland_vbios_boot_state vbios_boot_state; + + bool battery_state; + bool is_tlu_enabled; + bool pcie_performance_request; + + /* -------------- SMC SRAM Address of firmware header tables ----------------*/ + uint32_t sram_end; /* The first address after the SMC SRAM. */ + uint32_t dpm_table_start; /* The start of the dpm table in the SMC SRAM. */ + uint32_t soft_regs_start; /* The start of the soft registers in the SMC SRAM. */ + uint32_t mc_reg_table_start; /* The start of the mc register table in the SMC SRAM. */ + uint32_t fan_table_start; /* The start of the fan table in the SMC SRAM. */ + uint32_t arb_table_start; /* The start of the ARB setting table in the SMC SRAM. */ + uint32_t ulv_settings_start; + SMU71_Discrete_DpmTable smc_state_table; /* The carbon copy of the SMC state table. */ + SMU71_Discrete_MCRegisters mc_reg_table; + SMU71_Discrete_Ulv ulv_setting; /* The carbon copy of ULV setting. */ + + /* -------------- Stuff originally coming from Evergreen --------------------*/ + phw_iceland_mc_reg_table iceland_mc_reg_table; + uint32_t vdd_ci_control; + pp_atomctrl_voltage_table vddc_voltage_table; + pp_atomctrl_voltage_table vddci_voltage_table; + pp_atomctrl_voltage_table vddgfx_voltage_table; + pp_atomctrl_voltage_table mvdd_voltage_table; + + uint32_t mgcg_cgtt_local2; + uint32_t mgcg_cgtt_local3; + uint32_t gpio_debug; + uint32_t mc_micro_code_feature; + uint32_t highest_mclk; + uint16_t acpi_vdd_ci; + uint8_t mvdd_high_index; + uint8_t mvdd_low_index; + bool dll_defaule_on; + bool performance_request_registered; + + /* ----------------- Low Power Features ---------------------*/ + phw_iceland_bacos bacos; + struct phw_iceland_ulv_parm ulv; + + /* ----------------- CAC Stuff ---------------------*/ + uint32_t cac_table_start; + bool cac_configuration_required; /* TRUE if PP_CACConfigurationRequired == 1 */ + bool driver_calculate_cac_leakage; /* TRUE if PP_DriverCalculateCACLeakage == 1 */ + bool cac_enabled; + + /* ----------------- DPM2 Parameters ---------------------*/ + uint32_t power_containment_features; + bool enable_bapm_feature; + bool enable_dte_feature; + bool enable_tdc_limit_feature; + bool enable_pkg_pwr_tracking_feature; + bool disable_uvd_power_tune_feature; + struct iceland_pt_defaults *power_tune_defaults; + SMU71_Discrete_PmFuses power_tune_table; + uint32_t ul_dte_tj_offset; /* Fudge factor in DPM table to correct HW DTE errors */ + uint32_t fast_watermark_threshold; /* use fast watermark if clock is equal or above this. In percentage of the target high sclk. */ + + /* ----------------- Phase Shedding ---------------------*/ + bool vddc_phase_shed_control; + + /* --------------------- DI/DT --------------------------*/ + phw_iceland_display_timing display_timing; + + /* --------- ReadRegistry data for memory and engine clock margins ---- */ + uint32_t engine_clock_data; + uint32_t memory_clock_data; + + /* -------- Thermal Temperature Setting --------------*/ + struct phw_iceland_thermal_temperature_setting thermal_temp_setting; + phw_iceland_dpmlevel_enable_mask dpm_level_enable_mask; + + uint32_t need_update_smu7_dpm_table; + uint32_t sclk_dpm_key_disabled; + uint32_t mclk_dpm_key_disabled; + uint32_t pcie_dpm_key_disabled; + /* used to store the previous dal min sclock */ + uint32_t min_engine_clocks; + phw_iceland_pcie_perf_range pcie_gen_performance; + phw_iceland_pcie_perf_range pcie_lane_performance; + phw_iceland_pcie_perf_range pcie_gen_power_saving; + phw_iceland_pcie_perf_range pcie_lane_power_saving; + bool use_pcie_performance_levels; + bool use_pcie_power_saving_levels; + /* percentage value from 0-100, default 50 */ + uint32_t activity_target[SMU71_MAX_LEVELS_GRAPHICS]; + uint32_t mclk_activity_target; + uint32_t low_sclk_interrupt_threshold; + uint32_t last_mclk_dpm_enable_mask; + bool uvd_enabled; + uint32_t pcc_monitor_enabled; + + /* --------- Power Gating States ------------*/ + bool uvd_power_gated; /* 1: gated, 0:not gated */ + bool vce_power_gated; /* 1: gated, 0:not gated */ + bool samu_power_gated; /* 1: gated, 0:not gated */ + bool acp_power_gated; /* 1: gated, 0:not gated */ + bool pg_acp_init; + + /* soft pptable for re-uploading into smu */ + void *soft_pp_table; +}; + +typedef struct iceland_hwmgr iceland_hwmgr; + +int iceland_hwmgr_init(struct pp_hwmgr *hwmgr); +int iceland_update_uvd_dpm(struct pp_hwmgr *hwmgr, bool bgate); +uint32_t iceland_get_xclk(struct pp_hwmgr *hwmgr); +int iceland_populate_bapm_vddc_vid_sidd(struct pp_hwmgr *hwmgr); +int iceland_populate_vddc_vid(struct pp_hwmgr *hwmgr); + +#define ICELAND_DPM2_NEAR_TDP_DEC 10 +#define ICELAND_DPM2_ABOVE_SAFE_INC 5 +#define ICELAND_DPM2_BELOW_SAFE_INC 20 + +/* + * Log2 of the LTA window size (l2numWin_TDP). Eg. If LTA windows size + * is 128, then this value should be Log2(128) = 7. + */ +#define ICELAND_DPM2_LTA_WINDOW_SIZE 7 + +#define ICELAND_DPM2_LTS_TRUNCATE 0 + +#define ICELAND_DPM2_TDP_SAFE_LIMIT_PERCENT 80 // Maximum 100 + +#define ICELAND_DPM2_MAXPS_PERCENT_H 90 // Maximum 0xFF +#define ICELAND_DPM2_MAXPS_PERCENT_M 90 // Maximum 0xFF + +#define ICELAND_DPM2_PWREFFICIENCYRATIO_MARGIN 50 + +#define ICELAND_DPM2_SQ_RAMP_MAX_POWER 0x3FFF +#define ICELAND_DPM2_SQ_RAMP_MIN_POWER 0x12 +#define ICELAND_DPM2_SQ_RAMP_MAX_POWER_DELTA 0x15 +#define ICELAND_DPM2_SQ_RAMP_SHORT_TERM_INTERVAL_SIZE 0x1E +#define ICELAND_DPM2_SQ_RAMP_LONG_TERM_INTERVAL_RATIO 0xF + +#define ICELAND_VOLTAGE_CONTROL_NONE 0x0 +#define ICELAND_VOLTAGE_CONTROL_BY_GPIO 0x1 +#define ICELAND_VOLTAGE_CONTROL_BY_SVID2 0x2 + +/* convert to Q8.8 format for firmware */ +#define ICELAND_Q88_FORMAT_CONVERSION_UNIT 256 + +#define ICELAND_UNUSED_GPIO_PIN 0x7F + +#endif diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/iceland_powertune.c b/drivers/gpu/drm/amd/powerplay/hwmgr/iceland_powertune.c new file mode 100644 index 0000000..d10cd9f --- /dev/null +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/iceland_powertune.c @@ -0,0 +1,491 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Author: Huang Rui + * + */ + +#include "amdgpu.h" +#include "hwmgr.h" +#include "smumgr.h" +#include "iceland_hwmgr.h" +#include "iceland_powertune.h" +#include "iceland_smumgr.h" +#include "smu71_discrete.h" +#include "smu71.h" +#include "pp_debug.h" +#include "cgs_common.h" +#include "pp_endian.h" + +#include "bif/bif_5_0_d.h" +#include "bif/bif_5_0_sh_mask.h" + +#define VOLTAGE_SCALE 4 +#define POWERTUNE_DEFAULT_SET_MAX 1 + +#define DEVICE_ID_VI_ICELAND_M_6900 0x6900 +#define DEVICE_ID_VI_ICELAND_M_6901 0x6901 +#define DEVICE_ID_VI_ICELAND_M_6902 0x6902 +#define DEVICE_ID_VI_ICELAND_M_6903 0x6903 + + +struct iceland_pt_defaults defaults_iceland = +{ + /* + * sviLoadLIneEn, SviLoadLineVddC, TDC_VDDC_ThrottleReleaseLimitPerc, + * TDC_MAWt, TdcWaterfallCtl, DTEAmbientTempBase, DisplayCac, BAPM_TEMP_GRADIENT + */ + 1, 0xF, 0xFD, 0x19, 5, 45, 0, 0xB0000, + { 0x79, 0x253, 0x25D, 0xAE, 0x72, 0x80, 0x83, 0x86, 0x6F, 0xC8, 0xC9, 0xC9, 0x2F, 0x4D, 0x61 }, + { 0x17C, 0x172, 0x180, 0x1BC, 0x1B3, 0x1BD, 0x206, 0x200, 0x203, 0x25D, 0x25A, 0x255, 0x2C3, 0x2C5, 0x2B4 } +}; + +/* 35W - XT, XTL */ +struct iceland_pt_defaults defaults_icelandxt = +{ + /* + * sviLoadLIneEn, SviLoadLineVddC, + * TDC_VDDC_ThrottleReleaseLimitPerc, TDC_MAWt, + * TdcWaterfallCtl, DTEAmbientTempBase, DisplayCac, + * BAPM_TEMP_GRADIENT + */ + 1, 0xF, 0xFD, 0x19, 5, 45, 0, 0x0, + { 0xA7, 0x0, 0x0, 0xB5, 0x0, 0x0, 0x9F, 0x0, 0x0, 0xD6, 0x0, 0x0, 0xD7, 0x0, 0x0}, + { 0x1EA, 0x0, 0x0, 0x224, 0x0, 0x0, 0x25E, 0x0, 0x0, 0x28E, 0x0, 0x0, 0x2AB, 0x0, 0x0} +}; + +/* 25W - PRO, LE */ +struct iceland_pt_defaults defaults_icelandpro = +{ + /* + * sviLoadLIneEn, SviLoadLineVddC, + * TDC_VDDC_ThrottleReleaseLimitPerc, TDC_MAWt, + * TdcWaterfallCtl, DTEAmbientTempBase, DisplayCac, + * BAPM_TEMP_GRADIENT + */ + 1, 0xF, 0xFD, 0x19, 5, 45, 0, 0x0, + { 0xB7, 0x0, 0x0, 0xC3, 0x0, 0x0, 0xB5, 0x0, 0x0, 0xEA, 0x0, 0x0, 0xE6, 0x0, 0x0}, + { 0x1EA, 0x0, 0x0, 0x224, 0x0, 0x0, 0x25E, 0x0, 0x0, 0x28E, 0x0, 0x0, 0x2AB, 0x0, 0x0} +}; + +void iceland_initialize_power_tune_defaults(struct pp_hwmgr *hwmgr) +{ + struct iceland_hwmgr *data = (struct iceland_hwmgr *)(hwmgr->backend); + uint32_t tmp = 0; + struct cgs_system_info sys_info = {0}; + uint32_t pdev_id; + + sys_info.size = sizeof(struct cgs_system_info); + sys_info.info_id = CGS_SYSTEM_INFO_PCIE_DEV; + cgs_query_system_info(hwmgr->device, &sys_info); + pdev_id = (uint32_t)sys_info.value; + + switch (pdev_id) { + case DEVICE_ID_VI_ICELAND_M_6900: + case DEVICE_ID_VI_ICELAND_M_6903: + data->power_tune_defaults = &defaults_icelandxt; + break; + + case DEVICE_ID_VI_ICELAND_M_6901: + case DEVICE_ID_VI_ICELAND_M_6902: + data->power_tune_defaults = &defaults_icelandpro; + break; + default: + /* TODO: need to assign valid defaults */ + data->power_tune_defaults = &defaults_iceland; + pr_warning("Unknown V.I. Device ID.\n"); + break; + } + + /* Assume disabled */ + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_PowerContainment); + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_CAC); + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_SQRamping); + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_DBRamping); + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_TDRamping); + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_TCPRamping); + + data->ul_dte_tj_offset = tmp; + + if (!tmp) { + phm_cap_set(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_CAC); + + data->fast_watermark_threshold = 100; + + if (hwmgr->powercontainment_enabled) { + phm_cap_set(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_PowerContainment); + tmp = 1; + data->enable_dte_feature = tmp ? false : true; + data->enable_tdc_limit_feature = tmp ? true : false; + data->enable_pkg_pwr_tracking_feature = tmp ? true : false; + } + } +} + +int iceland_populate_bapm_parameters_in_dpm_table(struct pp_hwmgr *hwmgr) +{ + struct iceland_hwmgr *data = (struct iceland_hwmgr *)(hwmgr->backend); + struct iceland_pt_defaults *defaults = data->power_tune_defaults; + SMU71_Discrete_DpmTable *dpm_table = &(data->smc_state_table); + struct phm_cac_tdp_table *cac_dtp_table = hwmgr->dyn_state.cac_dtp_table; + struct phm_ppm_table *ppm = hwmgr->dyn_state.ppm_parameter_table; + uint16_t *def1, *def2; + int i, j, k; + + /* + * TDP number of fraction bits are changed from 8 to 7 for Iceland + * as requested by SMC team + */ + dpm_table->DefaultTdp = PP_HOST_TO_SMC_US((uint16_t)(cac_dtp_table->usTDP * 256)); + dpm_table->TargetTdp = PP_HOST_TO_SMC_US((uint16_t)(cac_dtp_table->usConfigurableTDP * 256)); + + dpm_table->DTETjOffset = (uint8_t)data->ul_dte_tj_offset; + + dpm_table->GpuTjMax = (uint8_t)(data->thermal_temp_setting.temperature_high / PP_TEMPERATURE_UNITS_PER_CENTIGRADES); + dpm_table->GpuTjHyst = 8; + + dpm_table->DTEAmbientTempBase = defaults->dte_ambient_temp_base; + + /* The following are for new Iceland Multi-input fan/thermal control */ + if(NULL != ppm) { + dpm_table->PPM_PkgPwrLimit = (uint16_t)ppm->dgpu_tdp * 256 / 1000; + dpm_table->PPM_TemperatureLimit = (uint16_t)ppm->tj_max * 256; + } else { + dpm_table->PPM_PkgPwrLimit = 0; + dpm_table->PPM_TemperatureLimit = 0; + } + + CONVERT_FROM_HOST_TO_SMC_US(dpm_table->PPM_PkgPwrLimit); + CONVERT_FROM_HOST_TO_SMC_US(dpm_table->PPM_TemperatureLimit); + + dpm_table->BAPM_TEMP_GRADIENT = PP_HOST_TO_SMC_UL(defaults->bamp_temp_gradient); + def1 = defaults->bapmti_r; + def2 = defaults->bapmti_rc; + + for (i = 0; i < SMU71_DTE_ITERATIONS; i++) { + for (j = 0; j < SMU71_DTE_SOURCES; j++) { + for (k = 0; k < SMU71_DTE_SINKS; k++) { + dpm_table->BAPMTI_R[i][j][k] = PP_HOST_TO_SMC_US(*def1); + dpm_table->BAPMTI_RC[i][j][k] = PP_HOST_TO_SMC_US(*def2); + def1++; + def2++; + } + } + } + + return 0; +} + +static int iceland_populate_svi_load_line(struct pp_hwmgr *hwmgr) +{ + struct iceland_hwmgr *data = (struct iceland_hwmgr *)(hwmgr->backend); + const struct iceland_pt_defaults *defaults = data->power_tune_defaults; + + data->power_tune_table.SviLoadLineEn = defaults->svi_load_line_en; + data->power_tune_table.SviLoadLineVddC = defaults->svi_load_line_vddc; + data->power_tune_table.SviLoadLineTrimVddC = 3; + data->power_tune_table.SviLoadLineOffsetVddC = 0; + + return 0; +} + +static int iceland_populate_tdc_limit(struct pp_hwmgr *hwmgr) +{ + uint16_t tdc_limit; + struct iceland_hwmgr *data = (struct iceland_hwmgr *)(hwmgr->backend); + const struct iceland_pt_defaults *defaults = data->power_tune_defaults; + + /* TDC number of fraction bits are changed from 8 to 7 + * for Iceland as requested by SMC team + */ + tdc_limit = (uint16_t)(hwmgr->dyn_state.cac_dtp_table->usTDC * 256); + data->power_tune_table.TDC_VDDC_PkgLimit = + CONVERT_FROM_HOST_TO_SMC_US(tdc_limit); + data->power_tune_table.TDC_VDDC_ThrottleReleaseLimitPerc = + defaults->tdc_vddc_throttle_release_limit_perc; + data->power_tune_table.TDC_MAWt = defaults->tdc_mawt; + + return 0; +} + +static int iceland_populate_dw8(struct pp_hwmgr *hwmgr, uint32_t fuse_table_offset) +{ + struct iceland_hwmgr *data = (struct iceland_hwmgr *)(hwmgr->backend); + const struct iceland_pt_defaults *defaults = data->power_tune_defaults; + uint32_t temp; + + if (iceland_read_smc_sram_dword(hwmgr->smumgr, + fuse_table_offset + + offsetof(SMU71_Discrete_PmFuses, TdcWaterfallCtl), + (uint32_t *)&temp, data->sram_end)) + PP_ASSERT_WITH_CODE(false, + "Attempt to read PmFuses.DW6 (SviLoadLineEn) from SMC Failed!", + return -EINVAL); + else + data->power_tune_table.TdcWaterfallCtl = defaults->tdc_waterfall_ctl; + + return 0; +} + +static int iceland_populate_temperature_scaler(struct pp_hwmgr *hwmgr) +{ + return 0; +} + +static int iceland_populate_gnb_lpml(struct pp_hwmgr *hwmgr) +{ + int i; + struct iceland_hwmgr *data = (struct iceland_hwmgr *)(hwmgr->backend); + + /* Currently not used. Set all to zero. */ + for (i = 0; i < 8; i++) + data->power_tune_table.GnbLPML[i] = 0; + + return 0; +} + +static int iceland_min_max_vgnb_lpml_id_from_bapm_vddc(struct pp_hwmgr *hwmgr) +{ + return 0; +} + +static int iceland_populate_bapm_vddc_base_leakage_sidd(struct pp_hwmgr *hwmgr) +{ + struct iceland_hwmgr *data = (struct iceland_hwmgr *)(hwmgr->backend); + uint16_t HiSidd = data->power_tune_table.BapmVddCBaseLeakageHiSidd; + uint16_t LoSidd = data->power_tune_table.BapmVddCBaseLeakageLoSidd; + struct phm_cac_tdp_table *cac_table = hwmgr->dyn_state.cac_dtp_table; + + HiSidd = (uint16_t)(cac_table->usHighCACLeakage / 100 * 256); + LoSidd = (uint16_t)(cac_table->usLowCACLeakage / 100 * 256); + + data->power_tune_table.BapmVddCBaseLeakageHiSidd = + CONVERT_FROM_HOST_TO_SMC_US(HiSidd); + data->power_tune_table.BapmVddCBaseLeakageLoSidd = + CONVERT_FROM_HOST_TO_SMC_US(LoSidd); + + return 0; +} + +int iceland_populate_pm_fuses(struct pp_hwmgr *hwmgr) +{ + struct iceland_hwmgr *data = (struct iceland_hwmgr *)(hwmgr->backend); + uint32_t pm_fuse_table_offset; + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_PowerContainment)) { + if (iceland_read_smc_sram_dword(hwmgr->smumgr, + SMU71_FIRMWARE_HEADER_LOCATION + + offsetof(SMU71_Firmware_Header, PmFuseTable), + &pm_fuse_table_offset, data->sram_end)) + PP_ASSERT_WITH_CODE(false, + "Attempt to get pm_fuse_table_offset Failed!", + return -EINVAL); + + /* DW0 - DW3 */ + if (iceland_populate_bapm_vddc_vid_sidd(hwmgr)) + PP_ASSERT_WITH_CODE(false, + "Attempt to populate bapm vddc vid Failed!", + return -EINVAL); + + /* DW4 - DW5 */ + if (iceland_populate_vddc_vid(hwmgr)) + PP_ASSERT_WITH_CODE(false, + "Attempt to populate vddc vid Failed!", + return -EINVAL); + + /* DW6 */ + if (iceland_populate_svi_load_line(hwmgr)) + PP_ASSERT_WITH_CODE(false, + "Attempt to populate SviLoadLine Failed!", + return -EINVAL); + /* DW7 */ + if (iceland_populate_tdc_limit(hwmgr)) + PP_ASSERT_WITH_CODE(false, + "Attempt to populate TDCLimit Failed!", return -EINVAL); + /* DW8 */ + if (iceland_populate_dw8(hwmgr, pm_fuse_table_offset)) + PP_ASSERT_WITH_CODE(false, + "Attempt to populate TdcWaterfallCtl, " + "LPMLTemperature Min and Max Failed!", + return -EINVAL); + + /* DW9-DW12 */ + if (0 != iceland_populate_temperature_scaler(hwmgr)) + PP_ASSERT_WITH_CODE(false, + "Attempt to populate LPMLTemperatureScaler Failed!", + return -EINVAL); + + /* DW13-DW16 */ + if (iceland_populate_gnb_lpml(hwmgr)) + PP_ASSERT_WITH_CODE(false, + "Attempt to populate GnbLPML Failed!", + return -EINVAL); + + /* DW17 */ + if (iceland_min_max_vgnb_lpml_id_from_bapm_vddc(hwmgr)) + PP_ASSERT_WITH_CODE(false, + "Attempt to populate GnbLPML Min and Max Vid Failed!", + return -EINVAL); + + /* DW18 */ + if (iceland_populate_bapm_vddc_base_leakage_sidd(hwmgr)) + PP_ASSERT_WITH_CODE(false, + "Attempt to populate BapmVddCBaseLeakage Hi and Lo Sidd Failed!", + return -EINVAL); + + if (iceland_copy_bytes_to_smc(hwmgr->smumgr, pm_fuse_table_offset, + (uint8_t *)&data->power_tune_table, + sizeof(struct SMU71_Discrete_PmFuses), data->sram_end)) + PP_ASSERT_WITH_CODE(false, + "Attempt to download PmFuseTable Failed!", + return -EINVAL); + } + return 0; +} + +int iceland_enable_smc_cac(struct pp_hwmgr *hwmgr) +{ + struct iceland_hwmgr *data = (struct iceland_hwmgr *)(hwmgr->backend); + int result = 0; + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_CAC)) { + int smc_result; + smc_result = smum_send_msg_to_smc(hwmgr->smumgr, + (uint16_t)(PPSMC_MSG_EnableCac)); + PP_ASSERT_WITH_CODE((0 == smc_result), + "Failed to enable CAC in SMC.", result = -1); + + data->cac_enabled = (0 == smc_result) ? true : false; + } + return result; +} + +static int iceland_set_power_limit(struct pp_hwmgr *hwmgr, uint32_t n) +{ + struct iceland_hwmgr *data = (struct iceland_hwmgr *)(hwmgr->backend); + + if(data->power_containment_features & + POWERCONTAINMENT_FEATURE_PkgPwrLimit) + return smum_send_msg_to_smc_with_parameter(hwmgr->smumgr, + PPSMC_MSG_PkgPwrSetLimit, n); + return 0; +} + +static int iceland_set_overdriver_target_tdp(struct pp_hwmgr *pHwMgr, uint32_t target_tdp) +{ + return smum_send_msg_to_smc_with_parameter(pHwMgr->smumgr, + PPSMC_MSG_OverDriveSetTargetTdp, target_tdp); +} + +int iceland_enable_power_containment(struct pp_hwmgr *hwmgr) +{ + struct iceland_hwmgr *data = (struct iceland_hwmgr *)(hwmgr->backend); + SMU71_Discrete_DpmTable *dpm_table = &data->smc_state_table; + int smc_result; + int result = 0; + uint32_t is_asic_kicker; + + data->power_containment_features = 0; + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_PowerContainment)) { + is_asic_kicker = cgs_read_register(hwmgr->device, mmCC_BIF_BX_STRAP2); + is_asic_kicker = (is_asic_kicker >> 12) & 0x01; + + if (data->enable_bapm_feature && + (!is_asic_kicker || + phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_DisableUsingActualTemperatureForPowerCalc))) { + smc_result = smum_send_msg_to_smc(hwmgr->smumgr, + (uint16_t)(PPSMC_MSG_EnableDTE)); + PP_ASSERT_WITH_CODE((0 == smc_result), + "Failed to enable BAPM in SMC.", result = -1;); + if (0 == smc_result) + data->power_containment_features |= POWERCONTAINMENT_FEATURE_BAPM; + } + + if (is_asic_kicker && !phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_DisableUsingActualTemperatureForPowerCalc)) + dpm_table->DTEMode = 2; + + if (data->enable_tdc_limit_feature) { + smc_result = smum_send_msg_to_smc(hwmgr->smumgr, + (uint16_t)(PPSMC_MSG_TDCLimitEnable)); + PP_ASSERT_WITH_CODE((0 == smc_result), + "Failed to enable TDCLimit in SMC.", result = -1;); + if (0 == smc_result) + data->power_containment_features |= + POWERCONTAINMENT_FEATURE_TDCLimit; + } + + if (data->enable_pkg_pwr_tracking_feature) { + smc_result = smum_send_msg_to_smc(hwmgr->smumgr, + (uint16_t)(PPSMC_MSG_PkgPwrLimitEnable)); + PP_ASSERT_WITH_CODE((0 == smc_result), + "Failed to enable PkgPwrTracking in SMC.", result = -1;); + if (0 == smc_result) { + struct phm_cac_tdp_table *cac_table = + hwmgr->dyn_state.cac_dtp_table; + uint32_t default_limit = + (uint32_t)(cac_table->usMaximumPowerDeliveryLimit * 256); + + data->power_containment_features |= + POWERCONTAINMENT_FEATURE_PkgPwrLimit; + + if (iceland_set_power_limit(hwmgr, default_limit)) + printk(KERN_ERR "Failed to set Default Power Limit in SMC!"); + } + } + } + return result; +} + +int iceland_power_control_set_level(struct pp_hwmgr *hwmgr) +{ + struct phm_cac_tdp_table *cac_table = hwmgr->dyn_state.cac_dtp_table; + int adjust_percent, target_tdp; + int result = 0; + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_PowerContainment)) { + /* adjustment percentage has already been validated */ + adjust_percent = hwmgr->platform_descriptor.TDPAdjustmentPolarity ? + hwmgr->platform_descriptor.TDPAdjustment : + (-1 * hwmgr->platform_descriptor.TDPAdjustment); + /* + * SMC requested that target_tdp to be 7 bit fraction in DPM table + * but message to be 8 bit fraction for messages + */ + target_tdp = ((100 + adjust_percent) * (int)(cac_table->usTDP * 256)) / 100; + result = iceland_set_overdriver_target_tdp(hwmgr, (uint32_t)target_tdp); + } + + return result; +} diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/iceland_powertune.h b/drivers/gpu/drm/amd/powerplay/hwmgr/iceland_powertune.h new file mode 100644 index 0000000..6c25ee1 --- /dev/null +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/iceland_powertune.h @@ -0,0 +1,74 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Author: Huang Rui + * + */ +#ifndef ICELAND_POWERTUNE_H +#define ICELAND_POWERTUNE_H + +#include "smu71.h" + +enum iceland_pt_config_reg_type { + ICELAND_CONFIGREG_MMR = 0, + ICELAND_CONFIGREG_SMC_IND, + ICELAND_CONFIGREG_DIDT_IND, + ICELAND_CONFIGREG_CACHE, + ICELAND_CONFIGREG_MAX +}; + +/* PowerContainment Features */ +#define POWERCONTAINMENT_FEATURE_DTE 0x00000001 +#define POWERCONTAINMENT_FEATURE_TDCLimit 0x00000002 +#define POWERCONTAINMENT_FEATURE_PkgPwrLimit 0x00000004 +#define POWERCONTAINMENT_FEATURE_BAPM 0x00000001 + +struct iceland_pt_config_reg { + uint32_t offset; + uint32_t mask; + uint32_t shift; + uint32_t value; + enum iceland_pt_config_reg_type type; +}; + +struct iceland_pt_defaults +{ + uint8_t svi_load_line_en; + uint8_t svi_load_line_vddc; + uint8_t tdc_vddc_throttle_release_limit_perc; + uint8_t tdc_mawt; + uint8_t tdc_waterfall_ctl; + uint8_t dte_ambient_temp_base; + uint32_t display_cac; + uint32_t bamp_temp_gradient; + uint16_t bapmti_r[SMU71_DTE_ITERATIONS * SMU71_DTE_SOURCES * SMU71_DTE_SINKS]; + uint16_t bapmti_rc[SMU71_DTE_ITERATIONS * SMU71_DTE_SOURCES * SMU71_DTE_SINKS]; +}; + +void iceland_initialize_power_tune_defaults(struct pp_hwmgr *hwmgr); +int iceland_populate_bapm_parameters_in_dpm_table(struct pp_hwmgr *hwmgr); +int iceland_populate_pm_fuses(struct pp_hwmgr *hwmgr); +int iceland_enable_smc_cac(struct pp_hwmgr *hwmgr); +int iceland_enable_power_containment(struct pp_hwmgr *hwmgr); +int iceland_power_control_set_level(struct pp_hwmgr *hwmgr); + +#endif /* ICELAND_POWERTUNE_H */ + diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/iceland_thermal.c b/drivers/gpu/drm/amd/powerplay/hwmgr/iceland_thermal.c new file mode 100644 index 0000000..527f370 --- /dev/null +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/iceland_thermal.c @@ -0,0 +1,595 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Author: Huang Rui + * + */ +#include +#include "iceland_thermal.h" +#include "iceland_hwmgr.h" +#include "iceland_smumgr.h" +#include "atombios.h" +#include "ppsmc.h" + +#include "gmc/gmc_8_1_d.h" +#include "gmc/gmc_8_1_sh_mask.h" + +#include "bif/bif_5_0_d.h" +#include "bif/bif_5_0_sh_mask.h" + +#include "smu/smu_7_1_1_d.h" +#include "smu/smu_7_1_1_sh_mask.h" + + +/** +* Get Fan Speed Control Parameters. +* @param hwmgr the address of the powerplay hardware manager. +* @param pSpeed is the address of the structure where the result is to be placed. +* @exception Always succeeds except if we cannot zero out the output structure. +*/ +int iceland_fan_ctrl_get_fan_speed_info(struct pp_hwmgr *hwmgr, + struct phm_fan_speed_info *fan_speed_info) +{ + + if (hwmgr->thermal_controller.fanInfo.bNoFan) + return 0; + + fan_speed_info->supports_percent_read = true; + fan_speed_info->supports_percent_write = true; + fan_speed_info->min_percent = 0; + fan_speed_info->max_percent = 100; + + if (0 != hwmgr->thermal_controller.fanInfo.ucTachometerPulsesPerRevolution) { + fan_speed_info->supports_rpm_read = true; + fan_speed_info->supports_rpm_write = true; + fan_speed_info->min_rpm = hwmgr->thermal_controller.fanInfo.ulMinRPM; + fan_speed_info->max_rpm = hwmgr->thermal_controller.fanInfo.ulMaxRPM; + } else { + fan_speed_info->min_rpm = 0; + fan_speed_info->max_rpm = 0; + } + + return 0; +} + +/** +* Get Fan Speed in percent. +* @param hwmgr the address of the powerplay hardware manager. +* @param pSpeed is the address of the structure where the result is to be placed. +* @exception Fails is the 100% setting appears to be 0. +*/ +int iceland_fan_ctrl_get_fan_speed_percent(struct pp_hwmgr *hwmgr, uint32_t *speed) +{ + uint32_t duty100; + uint32_t duty; + uint64_t tmp64; + + if (hwmgr->thermal_controller.fanInfo.bNoFan) + return 0; + + duty100 = PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, CG_FDO_CTRL1, FMAX_DUTY100); + duty = PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, CG_THERMAL_STATUS, FDO_PWM_DUTY); + + if (0 == duty100) + return -EINVAL; + + + tmp64 = (uint64_t)duty * 100; + do_div(tmp64, duty100); + *speed = (uint32_t)tmp64; + + if (*speed > 100) + *speed = 100; + + return 0; +} + +/** +* Get Fan Speed in RPM. +* @param hwmgr the address of the powerplay hardware manager. +* @param speed is the address of the structure where the result is to be placed. +* @exception Returns not supported if no fan is found or if pulses per revolution are not set +*/ +int iceland_fan_ctrl_get_fan_speed_rpm(struct pp_hwmgr *hwmgr, uint32_t *speed) +{ + return 0; +} + +/** +* Set Fan Speed Control to static mode, so that the user can decide what speed to use. +* @param hwmgr the address of the powerplay hardware manager. +* mode the fan control mode, 0 default, 1 by percent, 5, by RPM +* @exception Should always succeed. +*/ +int iceland_fan_ctrl_set_static_mode(struct pp_hwmgr *hwmgr, uint32_t mode) +{ + + if (hwmgr->fan_ctrl_is_in_default_mode) { + hwmgr->fan_ctrl_default_mode = PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, CG_FDO_CTRL2, FDO_PWM_MODE); + hwmgr->tmin = PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, CG_FDO_CTRL2, TMIN); + hwmgr->fan_ctrl_is_in_default_mode = false; + } + + PHM_WRITE_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, CG_FDO_CTRL2, TMIN, 0); + PHM_WRITE_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, CG_FDO_CTRL2, FDO_PWM_MODE, mode); + + return 0; +} + +/** +* Reset Fan Speed Control to default mode. +* @param hwmgr the address of the powerplay hardware manager. +* @exception Should always succeed. +*/ +static int iceland_fan_ctrl_set_default_mode(struct pp_hwmgr *hwmgr) +{ + if (!hwmgr->fan_ctrl_is_in_default_mode) { + PHM_WRITE_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, CG_FDO_CTRL2, FDO_PWM_MODE, hwmgr->fan_ctrl_default_mode); + PHM_WRITE_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, CG_FDO_CTRL2, TMIN, hwmgr->tmin); + hwmgr->fan_ctrl_is_in_default_mode = true; + } + + return 0; +} + +int iceland_fan_ctrl_start_smc_fan_control(struct pp_hwmgr *hwmgr) +{ + return (smum_send_msg_to_smc(hwmgr->smumgr, PPSMC_StartFanControl) == 0) ? 0 : -EINVAL; +} + + +int iceland_fan_ctrl_stop_smc_fan_control(struct pp_hwmgr *hwmgr) +{ + return (smum_send_msg_to_smc(hwmgr->smumgr, PPSMC_StopFanControl) == 0) ? 0 : -EINVAL; +} + +/** +* Set Fan Speed in percent. +* @param hwmgr the address of the powerplay hardware manager. +* @param speed is the percentage value (0% - 100%) to be set. +* @exception Fails is the 100% setting appears to be 0. +*/ +int iceland_fan_ctrl_set_fan_speed_percent(struct pp_hwmgr *hwmgr, uint32_t speed) +{ + uint32_t duty100; + uint32_t duty; + uint64_t tmp64; + + if (hwmgr->thermal_controller.fanInfo.bNoFan) + return -EINVAL; + + if (speed > 100) { + pr_warning("Cannot set more than 100%% duty cycle. Set it to 100.\n"); + speed = 100; + } + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_MicrocodeFanControl)) + iceland_fan_ctrl_stop_smc_fan_control(hwmgr); + + duty100 = PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, CG_FDO_CTRL1, FMAX_DUTY100); + + if (0 == duty100) + return -EINVAL; + + tmp64 = (uint64_t)speed * duty100; + do_div(tmp64, 100); + duty = (uint32_t)tmp64; + + PHM_WRITE_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, CG_FDO_CTRL0, FDO_STATIC_DUTY, duty); + + return iceland_fan_ctrl_set_static_mode(hwmgr, FDO_PWM_MODE_STATIC); +} + +/** +* Reset Fan Speed to default. +* @param hwmgr the address of the powerplay hardware manager. +* @exception Always succeeds. +*/ +int iceland_fan_ctrl_reset_fan_speed_to_default(struct pp_hwmgr *hwmgr) +{ + int result; + + if (hwmgr->thermal_controller.fanInfo.bNoFan) + return 0; + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_MicrocodeFanControl)) { + result = iceland_fan_ctrl_set_static_mode(hwmgr, FDO_PWM_MODE_STATIC); + if (0 == result) + result = iceland_fan_ctrl_start_smc_fan_control(hwmgr); + } else + result = iceland_fan_ctrl_set_default_mode(hwmgr); + + return result; +} + +/** +* Set Fan Speed in RPM. +* @param hwmgr the address of the powerplay hardware manager. +* @param speed is the percentage value (min - max) to be set. +* @exception Fails is the speed not lie between min and max. +*/ +int iceland_fan_ctrl_set_fan_speed_rpm(struct pp_hwmgr *hwmgr, uint32_t speed) +{ + return 0; +} + +/** +* Reads the remote temperature from the SIslands thermal controller. +* +* @param hwmgr The address of the hardware manager. +*/ +int iceland_thermal_get_temperature(struct pp_hwmgr *hwmgr) +{ + int temp; + + temp = PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, CG_MULT_THERMAL_STATUS, CTF_TEMP); + + /* + * Bit 9 means the reading is lower than the lowest usable + * value. + */ + if (0 != (0x200 & temp)) + temp = ICELAND_THERMAL_MAXIMUM_TEMP_READING; + else + temp = (temp & 0x1ff); + + temp = temp * PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + + return temp; +} + +/** +* Set the requested temperature range for high and low alert signals +* +* @param hwmgr The address of the hardware manager. +* @param range Temperature range to be programmed for high and low alert signals +* @exception PP_Result_BadInput if the input data is not valid. +*/ +static int iceland_thermal_set_temperature_range(struct pp_hwmgr *hwmgr, uint32_t low_temp, uint32_t high_temp) +{ + uint32_t low = ICELAND_THERMAL_MINIMUM_ALERT_TEMP * PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + uint32_t high = ICELAND_THERMAL_MAXIMUM_ALERT_TEMP * PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + + if (low < low_temp) + low = low_temp; + if (high > high_temp) + high = high_temp; + + if (low > high) + return -EINVAL; + + PHM_WRITE_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, CG_THERMAL_INT, DIG_THERM_INTH, (high / PP_TEMPERATURE_UNITS_PER_CENTIGRADES)); + PHM_WRITE_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, CG_THERMAL_INT, DIG_THERM_INTL, (low / PP_TEMPERATURE_UNITS_PER_CENTIGRADES)); + PHM_WRITE_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, CG_THERMAL_CTRL, DIG_THERM_DPM, (high / PP_TEMPERATURE_UNITS_PER_CENTIGRADES)); + + return 0; +} + +/** +* Programs thermal controller one-time setting registers +* +* @param hwmgr The address of the hardware manager. +*/ +static int iceland_thermal_initialize(struct pp_hwmgr *hwmgr) +{ + if (0 != hwmgr->thermal_controller.fanInfo.ucTachometerPulsesPerRevolution) + PHM_WRITE_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, + CG_TACH_CTRL, EDGE_PER_REV, + hwmgr->thermal_controller.fanInfo.ucTachometerPulsesPerRevolution - 1); + + PHM_WRITE_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, CG_FDO_CTRL2, TACH_PWM_RESP_RATE, 0x28); + + return 0; +} + +/** +* Enable thermal alerts on the RV770 thermal controller. +* +* @param hwmgr The address of the hardware manager. +*/ +static int iceland_thermal_enable_alert(struct pp_hwmgr *hwmgr) +{ + uint32_t alert; + + alert = PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, CG_THERMAL_INT, THERM_INT_MASK); + alert &= ~(ICELAND_THERMAL_HIGH_ALERT_MASK | ICELAND_THERMAL_LOW_ALERT_MASK); + PHM_WRITE_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, CG_THERMAL_INT, THERM_INT_MASK, alert); + + /* send message to SMU to enable internal thermal interrupts */ + return (smum_send_msg_to_smc(hwmgr->smumgr, PPSMC_MSG_Thermal_Cntl_Enable) == 0) ? 0 : -1; +} + +/** +* Disable thermal alerts on the RV770 thermal controller. +* @param hwmgr The address of the hardware manager. +*/ +static int iceland_thermal_disable_alert(struct pp_hwmgr *hwmgr) +{ + uint32_t alert; + + alert = PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, CG_THERMAL_INT, THERM_INT_MASK); + alert |= (ICELAND_THERMAL_HIGH_ALERT_MASK | ICELAND_THERMAL_LOW_ALERT_MASK); + PHM_WRITE_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, CG_THERMAL_INT, THERM_INT_MASK, alert); + + /* send message to SMU to disable internal thermal interrupts */ + return (smum_send_msg_to_smc(hwmgr->smumgr, PPSMC_MSG_Thermal_Cntl_Disable) == 0) ? 0 : -1; +} + +/** +* Uninitialize the thermal controller. +* Currently just disables alerts. +* @param hwmgr The address of the hardware manager. +*/ +int iceland_thermal_stop_thermal_controller(struct pp_hwmgr *hwmgr) +{ + int result = iceland_thermal_disable_alert(hwmgr); + + if (result) + pr_warning("Failed to disable thermal alerts!\n"); + + if (hwmgr->thermal_controller.fanInfo.bNoFan) + iceland_fan_ctrl_set_default_mode(hwmgr); + + return result; +} + +/** +* Set up the fan table to control the fan using the SMC. +* @param hwmgr the address of the powerplay hardware manager. +* @param pInput the pointer to input data +* @param pOutput the pointer to output data +* @param pStorage the pointer to temporary storage +* @param Result the last failure code +* @return result from set temperature range routine +*/ +int tf_iceland_thermal_setup_fan_table(struct pp_hwmgr *hwmgr, void *input, void *output, void *storage, int result) +{ + struct iceland_hwmgr *data = (struct iceland_hwmgr *)(hwmgr->backend); + SMU71_Discrete_FanTable fan_table = { FDO_MODE_HARDWARE }; + uint32_t duty100; + uint32_t t_diff1, t_diff2, pwm_diff1, pwm_diff2; + uint16_t fdo_min, slope1, slope2; + uint32_t reference_clock; + int res; + uint64_t tmp64; + + if (!phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_MicrocodeFanControl)) + return 0; + + if (0 == data->fan_table_start) { + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_MicrocodeFanControl); + return 0; + } + + duty100 = PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, CG_FDO_CTRL1, FMAX_DUTY100); + + if (0 == duty100) { + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_MicrocodeFanControl); + return 0; + } + + tmp64 = hwmgr->thermal_controller.advanceFanControlParameters.usPWMMin * duty100; + do_div(tmp64, 10000); + fdo_min = (uint16_t)tmp64; + + t_diff1 = hwmgr->thermal_controller.advanceFanControlParameters.usTMed - hwmgr->thermal_controller.advanceFanControlParameters.usTMin; + t_diff2 = hwmgr->thermal_controller.advanceFanControlParameters.usTHigh - hwmgr->thermal_controller.advanceFanControlParameters.usTMed; + + pwm_diff1 = hwmgr->thermal_controller.advanceFanControlParameters.usPWMMed - hwmgr->thermal_controller.advanceFanControlParameters.usPWMMin; + pwm_diff2 = hwmgr->thermal_controller.advanceFanControlParameters.usPWMHigh - hwmgr->thermal_controller.advanceFanControlParameters.usPWMMed; + + slope1 = (uint16_t)((50 + ((16 * duty100 * pwm_diff1) / t_diff1)) / 100); + slope2 = (uint16_t)((50 + ((16 * duty100 * pwm_diff2) / t_diff2)) / 100); + + fan_table.TempMin = cpu_to_be16((50 + hwmgr->thermal_controller.advanceFanControlParameters.usTMin) / 100); + fan_table.TempMed = cpu_to_be16((50 + hwmgr->thermal_controller.advanceFanControlParameters.usTMed) / 100); + fan_table.TempMax = cpu_to_be16((50 + hwmgr->thermal_controller.advanceFanControlParameters.usTMax) / 100); + + fan_table.Slope1 = cpu_to_be16(slope1); + fan_table.Slope2 = cpu_to_be16(slope2); + + fan_table.FdoMin = cpu_to_be16(fdo_min); + + fan_table.HystDown = cpu_to_be16(hwmgr->thermal_controller.advanceFanControlParameters.ucTHyst); + + fan_table.HystUp = cpu_to_be16(1); + + fan_table.HystSlope = cpu_to_be16(1); + + fan_table.TempRespLim = cpu_to_be16(5); + + reference_clock = iceland_get_xclk(hwmgr); + + fan_table.RefreshPeriod = cpu_to_be32((hwmgr->thermal_controller.advanceFanControlParameters.ulCycleDelay * reference_clock) / 1600); + + fan_table.FdoMax = cpu_to_be16((uint16_t)duty100); + + fan_table.TempSrc = (uint8_t)PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, CG_MULT_THERMAL_CTRL, TEMP_SEL); + + //fan_table.FanControl_GL_Flag = 1; + + res = iceland_copy_bytes_to_smc(hwmgr->smumgr, data->fan_table_start, (uint8_t *)&fan_table, (uint32_t)sizeof(fan_table), data->sram_end); +/* TO DO FOR SOME DEVICE ID 0X692b, send this msg return invalid command. + if (res == 0 && hwmgr->thermal_controller.advanceFanControlParameters.ucMinimumPWMLimit != 0) + res = (0 == smum_send_msg_to_smc_with_parameter(hwmgr->smumgr, PPSMC_MSG_SetFanMinPwm, \ + hwmgr->thermal_controller.advanceFanControlParameters.ucMinimumPWMLimit) ? 0 : -1); + + if (res == 0 && hwmgr->thermal_controller.advanceFanControlParameters.ulMinFanSCLKAcousticLimit != 0) + res = (0 == smum_send_msg_to_smc_with_parameter(hwmgr->smumgr, PPSMC_MSG_SetFanSclkTarget, \ + hwmgr->thermal_controller.advanceFanControlParameters.ulMinFanSCLKAcousticLimit) ? 0 : -1); + + if (0 != res) + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_MicrocodeFanControl); +*/ + return 0; +} + +/** +* Start the fan control on the SMC. +* @param hwmgr the address of the powerplay hardware manager. +* @param pInput the pointer to input data +* @param pOutput the pointer to output data +* @param pStorage the pointer to temporary storage +* @param Result the last failure code +* @return result from set temperature range routine +*/ +int tf_iceland_thermal_start_smc_fan_control(struct pp_hwmgr *hwmgr, void *input, void *output, void *storage, int result) +{ +/* If the fantable setup has failed we could have disabled PHM_PlatformCaps_MicrocodeFanControl even after this function was included in the table. + * Make sure that we still think controlling the fan is OK. +*/ + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_MicrocodeFanControl)) { + iceland_fan_ctrl_start_smc_fan_control(hwmgr); + iceland_fan_ctrl_set_static_mode(hwmgr, FDO_PWM_MODE_STATIC); + } + + return 0; +} + +/** +* Set temperature range for high and low alerts +* @param hwmgr the address of the powerplay hardware manager. +* @param pInput the pointer to input data +* @param pOutput the pointer to output data +* @param pStorage the pointer to temporary storage +* @param Result the last failure code +* @return result from set temperature range routine +*/ +static int tf_iceland_thermal_set_temperature_range(struct pp_hwmgr *hwmgr, + void *input, void *output, void *storage, int result) +{ + struct PP_TemperatureRange *range = (struct PP_TemperatureRange *)input; + + if (range == NULL) + return -EINVAL; + + return iceland_thermal_set_temperature_range(hwmgr, range->min, range->max); +} + +/** +* Programs one-time setting registers +* @param hwmgr the address of the powerplay hardware manager. +* @param pInput the pointer to input data +* @param pOutput the pointer to output data +* @param pStorage the pointer to temporary storage +* @param Result the last failure code +* @return result from initialize thermal controller routine +*/ +static int tf_iceland_thermal_initialize(struct pp_hwmgr *hwmgr, void *input, + void *output, void *storage, int result) +{ + return iceland_thermal_initialize(hwmgr); +} + +/** +* Enable high and low alerts +* @param hwmgr the address of the powerplay hardware manager. +* @param pInput the pointer to input data +* @param pOutput the pointer to output data +* @param pStorage the pointer to temporary storage +* @param Result the last failure code +* @return result from enable alert routine +*/ +static int tf_iceland_thermal_enable_alert(struct pp_hwmgr *hwmgr, + void *input, void *output, void *storage, int result) +{ + return iceland_thermal_enable_alert(hwmgr); +} + +/** +* Disable high and low alerts +* @param hwmgr the address of the powerplay hardware manager. +* @param pInput the pointer to input data +* @param pOutput the pointer to output data +* @param pStorage the pointer to temporary storage +* @param Result the last failure code +* @return result from disable alert routine +*/ +static int tf_iceland_thermal_disable_alert(struct pp_hwmgr *hwmgr, void *input, void *output, void *storage, int result) +{ + return iceland_thermal_disable_alert(hwmgr); +} + +static const struct phm_master_table_item iceland_thermal_start_thermal_controller_master_list[] = { + { NULL, tf_iceland_thermal_initialize }, + { NULL, tf_iceland_thermal_set_temperature_range }, + { NULL, tf_iceland_thermal_enable_alert }, + /* + * We should restrict performance levels to low before we halt + * the SMC. On the other hand we are still in boot state when + * we do this so it would be pointless. If this assumption + * changes we have to revisit this table. + */ + { NULL, tf_iceland_thermal_setup_fan_table}, + { NULL, tf_iceland_thermal_start_smc_fan_control}, + { NULL, NULL } +}; + +static const struct phm_master_table_header iceland_thermal_start_thermal_controller_master = { + 0, + PHM_MasterTableFlag_None, + iceland_thermal_start_thermal_controller_master_list +}; + +static const struct phm_master_table_item iceland_thermal_set_temperature_range_master_list[] = { + { NULL, tf_iceland_thermal_disable_alert}, + { NULL, tf_iceland_thermal_set_temperature_range}, + { NULL, tf_iceland_thermal_enable_alert}, + { NULL, NULL } +}; + +static const struct phm_master_table_header iceland_thermal_set_temperature_range_master = { + 0, + PHM_MasterTableFlag_None, + iceland_thermal_set_temperature_range_master_list +}; + +int iceland_thermal_ctrl_uninitialize_thermal_controller(struct pp_hwmgr *hwmgr) +{ + if (!hwmgr->thermal_controller.fanInfo.bNoFan) + iceland_fan_ctrl_set_default_mode(hwmgr); + return 0; +} + +/** +* Initializes the thermal controller related functions in the Hardware Manager structure. +* @param hwmgr The address of the hardware manager. +* @exception Any error code from the low-level communication. +*/ +int pp_iceland_thermal_initialize(struct pp_hwmgr *hwmgr) +{ + int result; + + result = phm_construct_table(hwmgr, &iceland_thermal_set_temperature_range_master, &(hwmgr->set_temperature_range)); + + if (0 == result) { + result = phm_construct_table(hwmgr, + &iceland_thermal_start_thermal_controller_master, + &(hwmgr->start_thermal_controller)); + if (0 != result) + phm_destroy_table(hwmgr, &(hwmgr->set_temperature_range)); + } + + if (0 == result) + hwmgr->fan_ctrl_is_in_default_mode = true; + return result; +} + diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/iceland_thermal.h b/drivers/gpu/drm/amd/powerplay/hwmgr/iceland_thermal.h new file mode 100644 index 0000000..267945f --- /dev/null +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/iceland_thermal.h @@ -0,0 +1,58 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Author: Huang Rui + * + */ + +#ifndef ICELAND_THERMAL_H +#define ICELAND_THERMAL_H + +#include "hwmgr.h" + +#define ICELAND_THERMAL_HIGH_ALERT_MASK 0x1 +#define ICELAND_THERMAL_LOW_ALERT_MASK 0x2 + +#define ICELAND_THERMAL_MINIMUM_TEMP_READING -256 +#define ICELAND_THERMAL_MAXIMUM_TEMP_READING 255 + +#define ICELAND_THERMAL_MINIMUM_ALERT_TEMP 0 +#define ICELAND_THERMAL_MAXIMUM_ALERT_TEMP 255 + +#define FDO_PWM_MODE_STATIC 1 +#define FDO_PWM_MODE_STATIC_RPM 5 + + +extern int iceland_thermal_get_temperature(struct pp_hwmgr *hwmgr); +extern int iceland_thermal_stop_thermal_controller(struct pp_hwmgr *hwmgr); +extern int iceland_fan_ctrl_get_fan_speed_info(struct pp_hwmgr *hwmgr, struct phm_fan_speed_info *fan_speed_info); +extern int iceland_fan_ctrl_get_fan_speed_percent(struct pp_hwmgr *hwmgr, uint32_t *speed); +extern int iceland_fan_ctrl_set_static_mode(struct pp_hwmgr *hwmgr, uint32_t mode); +extern int iceland_fan_ctrl_set_fan_speed_percent(struct pp_hwmgr *hwmgr, uint32_t speed); +extern int iceland_fan_ctrl_reset_fan_speed_to_default(struct pp_hwmgr *hwmgr); +extern int pp_iceland_thermal_initialize(struct pp_hwmgr *hwmgr); +extern int iceland_thermal_ctrl_uninitialize_thermal_controller(struct pp_hwmgr *hwmgr); +extern int iceland_fan_ctrl_set_fan_speed_rpm(struct pp_hwmgr *hwmgr, uint32_t speed); +extern int iceland_fan_ctrl_get_fan_speed_rpm(struct pp_hwmgr *hwmgr, uint32_t *speed); +extern int iceland_fan_ctrl_stop_smc_fan_control(struct pp_hwmgr *hwmgr); + +#endif + -- cgit v0.10.2 From 54c825a9166f0c13aa4044500910633105a7e6a7 Mon Sep 17 00:00:00 2001 From: Huang Rui Date: Mon, 20 Jun 2016 13:35:28 +0800 Subject: drm/amdgpu: enable iceland powerplay manually It's able to enable iceland powerplay manually via the module parameter. The default state is disabled. Signed-off-by: Huang Rui Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c index c5738a22..260da02 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c @@ -112,13 +112,15 @@ static int amdgpu_pp_early_init(void *handle) case CHIP_STONEY: adev->pp_enabled = (amdgpu_powerplay > 0) ? true : false; break; + case CHIP_TOPAZ: + adev->pp_enabled = (amdgpu_powerplay > 0) ? true : false; + break; /* These chips don't have powerplay implemenations */ case CHIP_BONAIRE: case CHIP_HAWAII: case CHIP_KABINI: case CHIP_MULLINS: case CHIP_KAVERI: - case CHIP_TOPAZ: default: adev->pp_enabled = false; break; -- cgit v0.10.2 From 9f7aab6fd396dcb7dacee152784ddf9c2f9b0e8b Mon Sep 17 00:00:00 2001 From: Huang Rui Date: Tue, 26 Jul 2016 16:42:11 +0800 Subject: drm/amd/powerplay: fix the incorrect checking condition Signed-off-by: Huang Rui Reviewed-by: Ken Wang Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/iceland_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/iceland_hwmgr.c index 9c6d7e3..7ffbbef 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/iceland_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/iceland_hwmgr.c @@ -4170,7 +4170,7 @@ int iceland_unforce_dpm_levels(struct pp_hwmgr *hwmgr) return -1); } - if (0 == data->pcie_dpm_key_disabled) { + if (0 == data->mclk_dpm_key_disabled) { PP_ASSERT_WITH_CODE((0 == smum_send_msg_to_smc( hwmgr->smumgr, PPSMC_MSG_MCLKDPM_NoForcedLevel)), -- cgit v0.10.2 From e4e2c8f29a343953f0ff31bd2775b73cd8c07a4c Mon Sep 17 00:00:00 2001 From: Huang Rui Date: Tue, 26 Jul 2016 15:57:07 +0800 Subject: drm/amd/powerplay: add deep sleep initialization This patch adds the deep sleep initialization at DPM, it needs send a message to SMC to enable this feature before enable voltage controller. Signed-off-by: Huang Rui Reviewed-by: Ken Wang Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/iceland_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/iceland_hwmgr.c index 7ffbbef..7a9749f 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/iceland_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/iceland_hwmgr.c @@ -3069,6 +3069,36 @@ static int iceland_tf_start_smc(struct pp_hwmgr *hwmgr) return ret; } +/** +* Programs the Deep Sleep registers +* +* @param pHwMgr the address of the powerplay hardware manager. +* @param pInput the pointer to input data (PhwEvergreen_DisplayConfiguration) +* @param pOutput the pointer to output data (unused) +* @param pStorage the pointer to temporary storage (unused) +* @param Result the last failure code (unused) +* @return always 0 +*/ +static int iceland_enable_deep_sleep_master_switch(struct pp_hwmgr *hwmgr) +{ + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_SclkDeepSleep)) { + if (smum_send_msg_to_smc(hwmgr->smumgr, + PPSMC_MSG_MASTER_DeepSleep_ON) != 0) + PP_ASSERT_WITH_CODE(false, + "Attempt to enable Master Deep Sleep switch failed!", + return -EINVAL); + } else { + if (smum_send_msg_to_smc(hwmgr->smumgr, + PPSMC_MSG_MASTER_DeepSleep_OFF) != 0) + PP_ASSERT_WITH_CODE(false, + "Attempt to disable Master Deep Sleep switch failed!", + return -EINVAL); + } + + return 0; +} + static int iceland_enable_dpm_tasks(struct pp_hwmgr *hwmgr) { int tmp_result, result = 0; @@ -3133,6 +3163,10 @@ static int iceland_enable_dpm_tasks(struct pp_hwmgr *hwmgr) PP_ASSERT_WITH_CODE((0 == tmp_result), "Failed to enable SCLK control!", return tmp_result); + tmp_result = iceland_enable_deep_sleep_master_switch(hwmgr); + PP_ASSERT_WITH_CODE((tmp_result == 0), + "Failed to enable deep sleep!", return tmp_result); + /* enable DPM */ tmp_result = iceland_start_dpm(hwmgr); PP_ASSERT_WITH_CODE((0 == tmp_result), -- cgit v0.10.2 From f6bf6dcaab35ba6de4a41de05271bf567ea4cadd Mon Sep 17 00:00:00 2001 From: Huang Rui Date: Tue, 26 Jul 2016 16:25:57 +0800 Subject: drm/amd/powerplay: set the platform capability flags for iceland Signed-off-by: Huang Rui Reviewed-by: Ken Wang Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/iceland_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/iceland_hwmgr.c index 7a9749f..6075050 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/iceland_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/iceland_hwmgr.c @@ -3801,6 +3801,57 @@ static int iceland_hwmgr_backend_init(struct pp_hwmgr *hwmgr) stay_in_boot = phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_StayInBootState); + phm_cap_set(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_DynamicPowerManagement); + + phm_cap_set(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_ActivityReporting); + + phm_cap_set(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_GFXClockGatingSupport); + + phm_cap_set(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_MemorySpreadSpectrumSupport); + phm_cap_set(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_EngineSpreadSpectrumSupport); + + phm_cap_set(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_DynamicPCIEGen2Support); + phm_cap_set(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_SMC); + + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_DisablePowerGating); + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_BACO); + + phm_cap_set(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_ThermalAutoThrottling); + phm_cap_set(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_DisableLSClockGating); + phm_cap_set(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_SamuDPM); + phm_cap_set(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_AcpDPM); + phm_cap_set(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_OD6inACSupport); + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_EnablePlatformPowerManagement); + + phm_cap_set(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_PauseMMSessions); + + phm_cap_set(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_OD6PlusinACSupport); + phm_cap_set(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_PauseMMSessions); + phm_cap_set(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_GFXClockGatingManagedInCAIL); + phm_cap_set(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_IcelandULPSSWWorkAround); + phm_cap_set(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_PowerContainment); + /* iceland doesn't support UVD and VCE */ phm_cap_unset(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_UVDPowerGating); -- cgit v0.10.2 From db7992d8205cb5fce806dac5d818e75669b35f11 Mon Sep 17 00:00:00 2001 From: Huang Rui Date: Tue, 26 Jul 2016 16:33:41 +0800 Subject: drm/amd/powerplay: add enabling voltage controller back Signed-off-by: Huang Rui Reviewed-by: Ken Wang Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/iceland_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/iceland_hwmgr.c index 6075050..d538d28 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/iceland_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/iceland_hwmgr.c @@ -2970,13 +2970,11 @@ int iceland_start_dpm(struct pp_hwmgr *hwmgr) PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__PCIE, SWRST_COMMAND_1, RESETLC, 0x0); -#if 0 PP_ASSERT_WITH_CODE( (0 == smum_send_msg_to_smc(hwmgr->smumgr, PPSMC_MSG_Voltage_Cntl_Enable)), "Failed to enable voltage DPM during DPM Start Function!", return -1); -#endif if (0 != iceland_enable_sclk_mclk_dpm(hwmgr)) { PP_ASSERT_WITH_CODE(0, "Failed to enable Sclk DPM and Mclk DPM!", return -1); -- cgit v0.10.2 From 3c18266cffc9741e1982005034a0a3494acbdf86 Mon Sep 17 00:00:00 2001 From: Huang Rui Date: Tue, 26 Jul 2016 16:38:36 +0800 Subject: drm/amd/powerplay: add DPM running checking back This patch adds DPM running checking back, because the DPM issue is fixed. Signed-off-by: Huang Rui Reviewed-by: Ken Wang Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/iceland_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/iceland_hwmgr.c index d538d28..d8ca59b 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/iceland_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/iceland_hwmgr.c @@ -4239,11 +4239,9 @@ int iceland_unforce_dpm_levels(struct pp_hwmgr *hwmgr) { iceland_hwmgr *data = (iceland_hwmgr *)(hwmgr->backend); -#if 0 PP_ASSERT_WITH_CODE (0 == iceland_is_dpm_running(hwmgr), "Trying to Unforce DPM when DPM is disabled. Returning without sending SMC message.", return -1); -#endif if (0 == data->sclk_dpm_key_disabled) { PP_ASSERT_WITH_CODE((0 == smum_send_msg_to_smc( -- cgit v0.10.2 From 70bb246154229550e5c9095d484b39fb82047907 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 28 Jul 2016 13:35:42 -0400 Subject: drm/amdgpu/powerplay: enable powerplay by default on TOPAZ Now that the implementation is complete. Acked-by: Huang Rui Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c index 260da02..57aa342 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c @@ -106,15 +106,13 @@ static int amdgpu_pp_early_init(void *handle) break; case CHIP_TONGA: case CHIP_FIJI: + case CHIP_TOPAZ: adev->pp_enabled = (amdgpu_powerplay == 0) ? false : true; break; case CHIP_CARRIZO: case CHIP_STONEY: adev->pp_enabled = (amdgpu_powerplay > 0) ? true : false; break; - case CHIP_TOPAZ: - adev->pp_enabled = (amdgpu_powerplay > 0) ? true : false; - break; /* These chips don't have powerplay implemenations */ case CHIP_BONAIRE: case CHIP_HAWAII: -- cgit v0.10.2 From e95a14a9094a8e6396fba22cbdea9201824d6fd1 Mon Sep 17 00:00:00 2001 From: Tom St Denis Date: Thu, 28 Jul 2016 09:40:07 -0400 Subject: drm/amd/amdgpu: add mutex locking for both DPM and PP based powergating for UVD/VCE This adds a mutex lock for both DPM/PP around the changes in power gating state so that userspace can poll registers without a race condition on power state. Signed-off-by: Tom St Denis Reviewed-by: Rex Zhu Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index ff63b88..411965f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -1106,54 +1106,46 @@ force: void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable) { - if (adev->pp_enabled) + if (adev->pp_enabled || adev->pm.funcs->powergate_uvd) { + /* enable/disable UVD */ + mutex_lock(&adev->pm.mutex); amdgpu_dpm_powergate_uvd(adev, !enable); - else { - if (adev->pm.funcs->powergate_uvd) { + mutex_unlock(&adev->pm.mutex); + } else { + if (enable) { mutex_lock(&adev->pm.mutex); - /* enable/disable UVD */ - amdgpu_dpm_powergate_uvd(adev, !enable); + adev->pm.dpm.uvd_active = true; + adev->pm.dpm.state = POWER_STATE_TYPE_INTERNAL_UVD; mutex_unlock(&adev->pm.mutex); } else { - if (enable) { - mutex_lock(&adev->pm.mutex); - adev->pm.dpm.uvd_active = true; - adev->pm.dpm.state = POWER_STATE_TYPE_INTERNAL_UVD; - mutex_unlock(&adev->pm.mutex); - } else { - mutex_lock(&adev->pm.mutex); - adev->pm.dpm.uvd_active = false; - mutex_unlock(&adev->pm.mutex); - } - amdgpu_pm_compute_clocks(adev); + mutex_lock(&adev->pm.mutex); + adev->pm.dpm.uvd_active = false; + mutex_unlock(&adev->pm.mutex); } - + amdgpu_pm_compute_clocks(adev); } } void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable) { - if (adev->pp_enabled) + if (adev->pp_enabled || adev->pm.funcs->powergate_vce) { + /* enable/disable VCE */ + mutex_lock(&adev->pm.mutex); amdgpu_dpm_powergate_vce(adev, !enable); - else { - if (adev->pm.funcs->powergate_vce) { + mutex_unlock(&adev->pm.mutex); + } else { + if (enable) { mutex_lock(&adev->pm.mutex); - amdgpu_dpm_powergate_vce(adev, !enable); + adev->pm.dpm.vce_active = true; + /* XXX select vce level based on ring/task */ + adev->pm.dpm.vce_level = AMDGPU_VCE_LEVEL_AC_ALL; mutex_unlock(&adev->pm.mutex); } else { - if (enable) { - mutex_lock(&adev->pm.mutex); - adev->pm.dpm.vce_active = true; - /* XXX select vce level based on ring/task */ - adev->pm.dpm.vce_level = AMDGPU_VCE_LEVEL_AC_ALL; - mutex_unlock(&adev->pm.mutex); - } else { - mutex_lock(&adev->pm.mutex); - adev->pm.dpm.vce_active = false; - mutex_unlock(&adev->pm.mutex); - } - amdgpu_pm_compute_clocks(adev); + mutex_lock(&adev->pm.mutex); + adev->pm.dpm.vce_active = false; + mutex_unlock(&adev->pm.mutex); } + amdgpu_pm_compute_clocks(adev); } } -- cgit v0.10.2 From bd12267dd896ff8e3dfd25ac31cbc255e9184715 Mon Sep 17 00:00:00 2001 From: Tom St Denis Date: Thu, 28 Jul 2016 09:39:22 -0400 Subject: drm/amd/amdgpu: add pm lock to debugfs mmio entry Adds support for PM locks around access to registers that might have race conditions on PG transistions. Signed-off-by: Tom St Denis Reviewed-by: Rex Zhu Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 309e58c..9b358df 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2282,22 +2282,26 @@ static ssize_t amdgpu_debugfs_regs_read(struct file *f, char __user *buf, struct amdgpu_device *adev = f->f_inode->i_private; ssize_t result = 0; int r; - bool use_bank; + bool pm_pg_lock, use_bank; unsigned instance_bank, sh_bank, se_bank; if (size & 0x3 || *pos & 0x3) return -EINVAL; + /* are we reading registers for which a PG lock is necessary? */ + pm_pg_lock = (*pos >> 23) & 1; + if (*pos & (1ULL << 62)) { se_bank = (*pos >> 24) & 0x3FF; sh_bank = (*pos >> 34) & 0x3FF; instance_bank = (*pos >> 44) & 0x3FF; use_bank = 1; - *pos &= 0xFFFFFF; } else { use_bank = 0; } + *pos &= 0x3FFFF; + if (use_bank) { if (sh_bank >= adev->gfx.config.max_sh_per_se || se_bank >= adev->gfx.config.max_shader_engines) @@ -2307,6 +2311,9 @@ static ssize_t amdgpu_debugfs_regs_read(struct file *f, char __user *buf, sh_bank, instance_bank); } + if (pm_pg_lock) + mutex_lock(&adev->pm.mutex); + while (size) { uint32_t value; @@ -2332,6 +2339,9 @@ end: mutex_unlock(&adev->grbm_idx_mutex); } + if (pm_pg_lock) + mutex_unlock(&adev->pm.mutex); + return result; } -- cgit v0.10.2 From 7acc83d4df77d2ed0153db2583f00a34e31fce92 Mon Sep 17 00:00:00 2001 From: Tom St Denis Date: Thu, 28 Jul 2016 09:44:14 -0400 Subject: drm/amd/amdgpu: don't set clockgating in uvd_v6_0_start() This is handled properly by both DPM and PP externally. Signed-off-by: Tom St Denis Reviewed-by: Rex Zhu Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index 4fa5091..4dbd5ab 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -396,15 +396,10 @@ static int uvd_v6_0_start(struct amdgpu_device *adev) uvd_v6_0_mc_resume(adev); - /* Set dynamic clock gating in S/W control mode */ - if (adev->cg_flags & AMD_CG_SUPPORT_UVD_MGCG) { - uvd_v6_0_set_sw_clock_gating(adev); - } else { - /* disable clock gating */ - uint32_t data = RREG32(mmUVD_CGC_CTRL); - data &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK; - WREG32(mmUVD_CGC_CTRL, data); - } + /* disable clock gating */ + tmp = RREG32(mmUVD_CGC_CTRL); + tmp &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK; + WREG32(mmUVD_CGC_CTRL, tmp); /* disable interupt */ WREG32_P(mmUVD_MASTINT_EN, 0, ~UVD_MASTINT_EN__VCPU_EN_MASK); -- cgit v0.10.2 From 2875125c3184c4b05f2c2c67540339e5b90813ad Mon Sep 17 00:00:00 2001 From: Tom St Denis Date: Thu, 28 Jul 2016 09:44:49 -0400 Subject: drm/amd/amdgpu: don't track state in UVD clockgating There's no need to track CG state anymore. Signed-off-by: Tom St Denis Reviewed-by: Rex Zhu Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index 4dbd5ab..7f2b5de 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -959,21 +959,15 @@ static int uvd_v6_0_set_clockgating_state(void *handle, enum amd_clockgating_state state) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - bool enable = (state == AMD_CG_STATE_GATE) ? true : false; - static int curstate = -1; - - if (adev->asic_type == CHIP_FIJI || - adev->asic_type == CHIP_POLARIS10) - uvd_v6_set_bypass_mode(adev, enable); if (!(adev->cg_flags & AMD_CG_SUPPORT_UVD_MGCG)) return 0; - if (curstate == state) - return 0; + if (adev->asic_type == CHIP_FIJI || + adev->asic_type == CHIP_POLARIS10) + uvd_v6_set_bypass_mode(adev, state == AMD_CG_STATE_GATE ? true : false); - curstate = state; - if (enable) { + if (state == AMD_CG_STATE_GATE) { /* disable HW gating and enable Sw gating */ uvd_v6_0_set_sw_clock_gating(adev); } else { -- cgit v0.10.2 From fa5d2e0c103a7763b140746d396dcb8c723e74f2 Mon Sep 17 00:00:00 2001 From: Tom St Denis Date: Thu, 28 Jul 2016 09:45:07 -0400 Subject: drm/amd/amdgpu: enable PG_EN bit in powergating UVD Enable the PG_EN bit just before the SMU would be tasked with the PG transition. Signed-off-by: Tom St Denis Reviewed-by: Rex Zhu Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index 7f2b5de..391457f 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -997,6 +997,8 @@ static int uvd_v6_0_set_powergating_state(void *handle, if (!(adev->pg_flags & AMD_PG_SUPPORT_UVD)) return 0; + WREG32(mmUVD_POWER_STATUS, UVD_POWER_STATUS__UVD_PG_EN_MASK); + if (state == AMD_PG_STATE_GATE) { uvd_v6_0_stop(adev); return 0; -- cgit v0.10.2 From 0da31ffce99ea2867e78f2b88c35c55ca15f033d Mon Sep 17 00:00:00 2001 From: Tom St Denis Date: Thu, 28 Jul 2016 09:46:00 -0400 Subject: drm/amd/amdgpu: Add error messages to UVD PG in DPM Signed-off-by: Tom St Denis Reviewed-by: Rex Zhu Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/cz_dpm.c b/drivers/gpu/drm/amd/amdgpu/cz_dpm.c index 2a11413..bfb6b01 100644 --- a/drivers/gpu/drm/amd/amdgpu/cz_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/cz_dpm.c @@ -2108,29 +2108,58 @@ static void cz_dpm_powergate_uvd(struct amdgpu_device *adev, bool gate) /* disable clockgating so we can properly shut down the block */ ret = amdgpu_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD, AMD_CG_STATE_UNGATE); + if (ret) { + DRM_ERROR("UVD DPM Power Gating failed to set clockgating state\n"); + return; + } + /* shutdown the UVD block */ ret = amdgpu_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD, AMD_PG_STATE_GATE); - /* XXX: check for errors */ + + if (ret) { + DRM_ERROR("UVD DPM Power Gating failed to set powergating state\n"); + return; + } } cz_update_uvd_dpm(adev, gate); - if (pi->caps_uvd_pg) + if (pi->caps_uvd_pg) { /* power off the UVD block */ - cz_send_msg_to_smc(adev, PPSMC_MSG_UVDPowerOFF); + ret = cz_send_msg_to_smc(adev, PPSMC_MSG_UVDPowerOFF); + if (ret) { + DRM_ERROR("UVD DPM Power Gating failed to send SMU PowerOFF message\n"); + return; + } + } } else { if (pi->caps_uvd_pg) { /* power on the UVD block */ if (pi->uvd_dynamic_pg) - cz_send_msg_to_smc_with_parameter(adev, PPSMC_MSG_UVDPowerON, 1); + ret = cz_send_msg_to_smc_with_parameter(adev, PPSMC_MSG_UVDPowerON, 1); else - cz_send_msg_to_smc_with_parameter(adev, PPSMC_MSG_UVDPowerON, 0); + ret = cz_send_msg_to_smc_with_parameter(adev, PPSMC_MSG_UVDPowerON, 0); + + if (ret) { + DRM_ERROR("UVD DPM Power Gating Failed to send SMU PowerON message\n"); + return; + } + /* re-init the UVD block */ ret = amdgpu_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD, AMD_PG_STATE_UNGATE); + + if (ret) { + DRM_ERROR("UVD DPM Power Gating Failed to set powergating state\n"); + return; + } + /* enable clockgating. hw will dynamically gate/ungate clocks on the fly */ ret = amdgpu_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD, AMD_CG_STATE_GATE); - /* XXX: check for errors */ + if (ret) { + DRM_ERROR("UVD DPM Power Gating Failed to set clockgating state\n"); + return; + } } cz_update_uvd_dpm(adev, gate); } -- cgit v0.10.2 From f6ade30418a753e3b52329fa067250091944733f Mon Sep 17 00:00:00 2001 From: Tom St Denis Date: Thu, 28 Jul 2016 09:33:56 -0400 Subject: drm/amd/amdgpu: Enable carrizo GFX PG Signed-off-by: Tom St Denis Reviewed-by: Rex Zhu Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 03a31c5..90b3ca8 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -1249,7 +1249,13 @@ static int vi_common_early_init(void *handle) AMD_CG_SUPPORT_HDP_LS | AMD_CG_SUPPORT_SDMA_MGCG | AMD_CG_SUPPORT_SDMA_LS; + /* rev0 hardware requires workarounds to support PG */ adev->pg_flags = 0; + if (adev->rev_id != 0x00) { + adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | + AMD_PG_SUPPORT_GFX_SMG | + AMD_PG_SUPPORT_GFX_PIPELINE; + } adev->external_rev_id = adev->rev_id + 0x1; break; case CHIP_STONEY: -- cgit v0.10.2 From 65b426225b6f9d48a7250b0bd0b9141a9a6e7312 Mon Sep 17 00:00:00 2001 From: Tom St Denis Date: Thu, 28 Jul 2016 09:35:57 -0400 Subject: drm/amd/amdgpu: Enable carrizo UVD PG Signed-off-by: Tom St Denis Reviewed-by: Rex Zhu Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 90b3ca8..0088171 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -1254,7 +1254,8 @@ static int vi_common_early_init(void *handle) if (adev->rev_id != 0x00) { adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG | - AMD_PG_SUPPORT_GFX_PIPELINE; + AMD_PG_SUPPORT_GFX_PIPELINE | + AMD_PG_SUPPORT_UVD; } adev->external_rev_id = adev->rev_id + 0x1; break; -- cgit v0.10.2 From 2ed0936de5ccbfd670ebf558770bb89bf9fe3b96 Mon Sep 17 00:00:00 2001 From: Tom St Denis Date: Thu, 28 Jul 2016 09:36:26 -0400 Subject: drm/amd/amdgpu: Enable carrizo VCE PG Signed-off-by: Tom St Denis Reviewed-by: Rex Zhu Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 0088171..66aa1ee 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -1255,7 +1255,8 @@ static int vi_common_early_init(void *handle) adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG | AMD_PG_SUPPORT_GFX_PIPELINE | - AMD_PG_SUPPORT_UVD; + AMD_PG_SUPPORT_UVD | + AMD_PG_SUPPORT_VCE; } adev->external_rev_id = adev->rev_id + 0x1; break; -- cgit v0.10.2 From 4e86be75c69cac5ccb599ba5b38df60336f247c9 Mon Sep 17 00:00:00 2001 From: Tom St Denis Date: Thu, 28 Jul 2016 09:38:13 -0400 Subject: drm/amd/amdgpu: Enable stoney GFX PG Signed-off-by: Tom St Denis Reviewed-by: Rex Zhu Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 66aa1ee..c1e4284 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -1276,6 +1276,9 @@ static int vi_common_early_init(void *handle) AMD_CG_SUPPORT_HDP_LS | AMD_CG_SUPPORT_SDMA_MGCG | AMD_CG_SUPPORT_SDMA_LS; + adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | + AMD_PG_SUPPORT_GFX_SMG | + AMD_PG_SUPPORT_GFX_PIPELINE; adev->external_rev_id = adev->rev_id + 0x1; break; default: -- cgit v0.10.2 From c2cdb0428026dc79b0a902ca2c4371b88e130846 Mon Sep 17 00:00:00 2001 From: Tom St Denis Date: Thu, 28 Jul 2016 09:38:29 -0400 Subject: drm/amd/amdgpu: Enable stoney UVD PG Signed-off-by: Tom St Denis Reviewed-by: Rex Zhu Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index c1e4284..9c3f7b7 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -1278,7 +1278,8 @@ static int vi_common_early_init(void *handle) AMD_CG_SUPPORT_SDMA_LS; adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG | - AMD_PG_SUPPORT_GFX_PIPELINE; + AMD_PG_SUPPORT_GFX_PIPELINE | + AMD_PG_SUPPORT_UVD; adev->external_rev_id = adev->rev_id + 0x1; break; default: -- cgit v0.10.2 From 75419c437851ab28d75bbf14502342c97cf47eb4 Mon Sep 17 00:00:00 2001 From: Tom St Denis Date: Thu, 28 Jul 2016 09:38:45 -0400 Subject: drm/amd/amdgpu: Enable stoney VCE PG Signed-off-by: Tom St Denis Reviewed-by: Rex Zhu Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 9c3f7b7..aaee251 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -1279,7 +1279,8 @@ static int vi_common_early_init(void *handle) adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG | AMD_PG_SUPPORT_GFX_PIPELINE | - AMD_PG_SUPPORT_UVD; + AMD_PG_SUPPORT_UVD | + AMD_PG_SUPPORT_VCE; adev->external_rev_id = adev->rev_id + 0x1; break; default: -- cgit v0.10.2 From a94cbfc3773b90cf7ea02d4c079ec54eeabb42d6 Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Thu, 7 Jul 2016 19:39:15 +0800 Subject: drm/amd/powerplay: fix typos of volt/millivolt symbols in comment. Signed-off-by: Rex Zhu Reviewed-by: Alexandre Demers Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c index 769636a..a8085e7 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c @@ -2905,8 +2905,8 @@ static int polaris10_get_evv_voltages(struct pp_hwmgr *hwmgr) continue; } - /* need to make sure vddc is less than 2v or else, it could burn the ASIC. - * real voltage level in unit of 0.01mv */ + /* need to make sure vddc is less than 2V or else, it could burn the ASIC. + * real voltage level in unit of 0.01mV */ PP_ASSERT_WITH_CODE((vddc < 200000 && vddc != 0), "Invalid VDDC value", result = -EINVAL;); -- cgit v0.10.2 From af223dfaf0d93e7a0ed75bed4f69e5db198b741e Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Thu, 28 Jul 2016 16:51:47 +0800 Subject: drm/amdgpu: add module parameters to ctrl powerplay feature Signed-off-by: Rex Zhu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 9aa533c..44fda31 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -84,6 +84,7 @@ int amdgpu_sched_jobs = 32; int amdgpu_sched_hw_submission = 2; int amdgpu_powerplay = -1; int amdgpu_powercontainment = 1; +int amdgpu_sclk_deep_sleep_en = 1; unsigned amdgpu_pcie_gen_cap = 0; unsigned amdgpu_pcie_lane_cap = 0; unsigned amdgpu_cg_mask = 0xffffffff; @@ -170,6 +171,9 @@ MODULE_PARM_DESC(powercontainment, "Power Containment (1 = enable (default), 0 = module_param_named(powercontainment, amdgpu_powercontainment, int, 0444); #endif +MODULE_PARM_DESC(sclkdeepsleep, "SCLK Deep Sleep (1 = enable (default), 0 = disable)"); +module_param_named(sclkdeepsleep, amdgpu_sclk_deep_sleep_en, int, 0444); + MODULE_PARM_DESC(pcie_gen_cap, "PCIE Gen Caps (0: autodetect (default))"); module_param_named(pcie_gen_cap, amdgpu_pcie_gen_cap, uint, 0444); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c index 57aa342..a9b3adc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c @@ -52,7 +52,6 @@ static int amdgpu_powerplay_init(struct amdgpu_device *adev) pp_init->chip_family = adev->family; pp_init->chip_id = adev->asic_type; pp_init->device = amdgpu_cgs_create_device(adev); - pp_init->powercontainment_enabled = amdgpu_powercontainment; ret = amd_powerplay_init(pp_init, amd_pp); kfree(pp_init); diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c index abbb658..12d3333 100644 --- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c @@ -31,6 +31,7 @@ #include "eventmanager.h" #include "pp_debug.h" + #define PP_CHECK(handle) \ do { \ if ((handle) == NULL || (handle)->pp_valid != PP_VALID) \ diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c index 120a9e2..e93492b 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c @@ -619,9 +619,6 @@ static int fiji_hwmgr_backend_init(struct pp_hwmgr *hwmgr) phm_cap_set(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_TablelessHardwareInterface); - phm_cap_set(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_SclkDeepSleep); - data->gpio_debug = 0; phm_cap_set(hwmgr->platform_descriptor.platformCaps, diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_powertune.c b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_powertune.c index 4465845..c3b2f51 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_powertune.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_powertune.c @@ -77,9 +77,8 @@ void fiji_initialize_power_tune_defaults(struct pp_hwmgr *hwmgr) fiji_hwmgr->fast_watermark_threshold = 100; - if (hwmgr->powercontainment_enabled) { - phm_cap_set(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_PowerContainment); + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_PowerContainment)) { tmp = 1; fiji_hwmgr->enable_dte_feature = tmp ? false : true; fiji_hwmgr->enable_tdc_limit_feature = tmp ? true : false; diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c index 65408dd..384b425 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c @@ -41,6 +41,25 @@ extern int fiji_hwmgr_init(struct pp_hwmgr *hwmgr); extern int polaris10_hwmgr_init(struct pp_hwmgr *hwmgr); extern int iceland_hwmgr_init(struct pp_hwmgr *hwmgr); +static int hwmgr_set_features_platform_caps(struct pp_hwmgr *hwmgr) +{ + if (amdgpu_sclk_deep_sleep_en) + phm_cap_set(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_SclkDeepSleep); + else + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_SclkDeepSleep); + + if (amdgpu_powercontainment) + phm_cap_set(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_PowerContainment); + else + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_PowerContainment); + + return 0; +} + int hwmgr_init(struct amd_pp_init *pp_init, struct pp_instance *handle) { struct pp_hwmgr *hwmgr; @@ -60,7 +79,8 @@ int hwmgr_init(struct amd_pp_init *pp_init, struct pp_instance *handle) hwmgr->hw_revision = pp_init->rev_id; hwmgr->usec_timeout = AMD_MAX_USEC_TIMEOUT; hwmgr->power_source = PP_PowerSource_AC; - hwmgr->powercontainment_enabled = pp_init->powercontainment_enabled; + + hwmgr_set_features_platform_caps(hwmgr); switch (hwmgr->chip_family) { case AMDGPU_FAMILY_CZ: diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/iceland_powertune.c b/drivers/gpu/drm/amd/powerplay/hwmgr/iceland_powertune.c index d10cd9f..041e964 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/iceland_powertune.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/iceland_powertune.c @@ -137,9 +137,8 @@ void iceland_initialize_power_tune_defaults(struct pp_hwmgr *hwmgr) data->fast_watermark_threshold = 100; - if (hwmgr->powercontainment_enabled) { - phm_cap_set(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_PowerContainment); + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_PowerContainment)) { tmp = 1; data->enable_dte_feature = tmp ? false : true; data->enable_tdc_limit_feature = tmp ? true : false; diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c index a8085e7..60cfcbd 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c @@ -2773,9 +2773,6 @@ int polaris10_set_features_platform_caps(struct pp_hwmgr *hwmgr) struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend); phm_cap_set(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_SclkDeepSleep); - - phm_cap_set(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_DynamicPatchPowerState); if (data->mvdd_control == POLARIS10_VOLTAGE_CONTROL_NONE) @@ -2820,13 +2817,6 @@ int polaris10_set_features_platform_caps(struct pp_hwmgr *hwmgr) phm_cap_set(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_TCPRamping); - if (hwmgr->powercontainment_enabled) - phm_cap_set(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_PowerContainment); - else - phm_cap_unset(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_PowerContainment); - phm_cap_set(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_CAC); diff --git a/drivers/gpu/drm/amd/powerplay/inc/amd_powerplay.h b/drivers/gpu/drm/amd/powerplay/inc/amd_powerplay.h index b764c8c..ba29a5b 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/amd_powerplay.h +++ b/drivers/gpu/drm/amd/powerplay/inc/amd_powerplay.h @@ -132,8 +132,8 @@ struct amd_pp_init { uint32_t chip_family; uint32_t chip_id; uint32_t rev_id; - bool powercontainment_enabled; }; + enum amd_pp_display_config_type{ AMD_PP_DisplayConfigType_None = 0, AMD_PP_DisplayConfigType_DP54 , diff --git a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h index bf0d2ac..911081c 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h +++ b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h @@ -41,6 +41,9 @@ struct phm_fan_speed_info; struct pp_atomctrl_voltage_table; +extern int amdgpu_powercontainment; +extern int amdgpu_sclk_deep_sleep_en; + enum DISPLAY_GAP { DISPLAY_GAP_VBLANK_OR_WM = 0, /* Wait for vblank or MCHG watermark. */ DISPLAY_GAP_VBLANK = 1, /* Wait for vblank. */ @@ -614,7 +617,6 @@ struct pp_hwmgr { uint32_t num_ps; struct pp_thermal_controller_info thermal_controller; bool fan_ctrl_is_in_default_mode; - bool powercontainment_enabled; uint32_t fan_ctrl_default_mode; uint32_t tmin; struct phm_microcode_version_info microcode_version_info; -- cgit v0.10.2 From 66bc3f7f03d5457f6c65790319b636bbab53e3cd Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Thu, 28 Jul 2016 17:36:35 +0800 Subject: drm/amdgpu: use modules parameter to ctrl deep sleep feature in dpm Signed-off-by: Rex Zhu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index e936468..389322a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -91,6 +91,7 @@ extern unsigned amdgpu_pcie_lane_cap; extern unsigned amdgpu_cg_mask; extern unsigned amdgpu_pg_mask; extern char *amdgpu_disable_cu; +extern int amdgpu_sclk_deep_sleep_en; #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000 #define AMDGPU_MAX_USEC_TIMEOUT 100000 /* 100 ms */ diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c index e2f0e5d..7bd6174 100644 --- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c @@ -5873,7 +5873,10 @@ static int ci_dpm_init(struct amdgpu_device *adev) pi->pcie_dpm_key_disabled = 0; pi->thermal_sclk_dpm_enabled = 0; - pi->caps_sclk_ds = true; + if (amdgpu_sclk_deep_sleep_en) + pi->caps_sclk_ds = true; + else + pi->caps_sclk_ds = false; pi->mclk_strobe_mode_threshold = 40000; pi->mclk_stutter_mode_threshold = 40000; diff --git a/drivers/gpu/drm/amd/amdgpu/cz_dpm.c b/drivers/gpu/drm/amd/amdgpu/cz_dpm.c index bfb6b01..794c5f3 100644 --- a/drivers/gpu/drm/amd/amdgpu/cz_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/cz_dpm.c @@ -435,7 +435,11 @@ static int cz_dpm_init(struct amdgpu_device *adev) pi->caps_td_ramping = true; pi->caps_tcp_ramping = true; } - pi->caps_sclk_ds = true; + if (amdgpu_sclk_deep_sleep_en) + pi->caps_sclk_ds = true; + else + pi->caps_sclk_ds = false; + pi->voting_clients = 0x00c00033; pi->auto_thermal_throttling_enabled = true; pi->bapm_enabled = false; diff --git a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c index a845e88..f8618a3 100644 --- a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c @@ -2845,7 +2845,11 @@ static int kv_dpm_init(struct amdgpu_device *adev) pi->caps_tcp_ramping = true; } - pi->caps_sclk_ds = true; + if (amdgpu_sclk_deep_sleep_en) + pi->caps_sclk_ds = true; + else + pi->caps_sclk_ds = false; + pi->enable_auto_thermal_throttling = true; pi->disable_nb_ps3_in_battery = false; if (amdgpu_bapm == 0) -- cgit v0.10.2 From b62774fc2f731f430b8618c1d067b06cce71cbe5 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 29 Jul 2016 18:03:42 -0400 Subject: drm/amdgpu/gmc7: add missing mullins case Looks like this got missed when we ported the code from radeon. Reviewed-by: Edward O'Callaghan Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index d24a82b..0b0f086 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -144,6 +144,7 @@ static int gmc_v7_0_init_microcode(struct amdgpu_device *adev) break; case CHIP_KAVERI: case CHIP_KABINI: + case CHIP_MULLINS: return 0; default: BUG(); } -- cgit v0.10.2 From b9a8be95b1149b74a9455f6bbe633aa241ed5204 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 29 Jul 2016 18:14:39 -0400 Subject: drm/amdgpu/ci: add mullins to default case for smc ucode It's already covered by the default case, but add it for consistency. Reviewed-by: Alexandre Demers Reviewed-by: Edward O'Callaghan Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c index 7bd6174..a0d63a2 100644 --- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c @@ -5779,6 +5779,7 @@ static int ci_dpm_init_microcode(struct amdgpu_device *adev) break; case CHIP_KAVERI: case CHIP_KABINI: + case CHIP_MULLINS: default: BUG(); } -- cgit v0.10.2 From 1684d3ba488529266ce6f97b88076bd185f4790e Mon Sep 17 00:00:00 2001 From: Eric Huang Date: Thu, 28 Jul 2016 17:25:01 -0400 Subject: drm/amd/amdgpu: change pptable output format from ASCII to binary Reviewed-by: Edward O'Callaghan Reviewed-by: Alex Deucher Signed-off-by: Eric Huang Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 411965f..d4ec3cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -305,7 +305,7 @@ static ssize_t amdgpu_get_pp_table(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; char *table = NULL; - int size, i; + int size; if (adev->pp_enabled) size = amdgpu_dpm_get_pp_table(adev, &table); @@ -315,10 +315,7 @@ static ssize_t amdgpu_get_pp_table(struct device *dev, if (size >= PAGE_SIZE) size = PAGE_SIZE - 1; - for (i = 0; i < size; i++) { - sprintf(buf + i, "%02x", table[i]); - } - sprintf(buf + i, "\n"); + memcpy(buf, table, size); return size; } -- cgit v0.10.2 From 309889098c6095cf4c1247724c22c2943807da20 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 1 Aug 2016 14:29:15 -0400 Subject: drm/amdgpu/gfx8: remove stale function declaration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This got leftover somehow when I cleaned this up. Reviewed-by: Christian König Reviewed-by: Chunming Zhou Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h index bc82c79..ebed1f8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h @@ -26,6 +26,4 @@ extern const struct amd_ip_funcs gfx_v8_0_ip_funcs; -void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num); - #endif -- cgit v0.10.2 From 59b4a97742888d9d3c4daa96bd15157ebd496f81 Mon Sep 17 00:00:00 2001 From: Flora Cui Date: Tue, 19 Jul 2016 16:48:22 +0800 Subject: drm/amdgpu: implement amdgpu_fill_buffer() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit so that bo could be set to some pattern Signed-off-by: Flora Cui Reviewed-by: Chunming Zhou Reviewed-by: Christian König Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 389322a..9f71ff7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -435,6 +435,11 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint32_t byte_count, struct reservation_object *resv, struct fence **fence); +int amdgpu_fill_buffer(struct amdgpu_bo *bo, + uint32_t src_data, + struct reservation_object *resv, + struct fence **fence); + int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma); struct amdgpu_bo_list_entry { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index b7742e6..5dc9c4c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1207,6 +1207,68 @@ error_free: return r; } +int amdgpu_fill_buffer(struct amdgpu_bo *bo, + uint32_t src_data, + struct reservation_object *resv, + struct fence **fence) +{ + struct amdgpu_device *adev = bo->adev; + struct amdgpu_job *job; + struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; + + uint32_t max_bytes, byte_count; + uint64_t dst_offset; + unsigned int num_loops, num_dw; + unsigned int i; + int r; + + byte_count = bo->tbo.num_pages << PAGE_SHIFT; + max_bytes = adev->mman.buffer_funcs->fill_max_bytes; + num_loops = DIV_ROUND_UP(byte_count, max_bytes); + num_dw = num_loops * adev->mman.buffer_funcs->fill_num_dw; + + /* for IB padding */ + while (num_dw & 0x7) + num_dw++; + + r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, &job); + if (r) + return r; + + if (resv) { + r = amdgpu_sync_resv(adev, &job->sync, resv, + AMDGPU_FENCE_OWNER_UNDEFINED); + if (r) { + DRM_ERROR("sync failed (%d).\n", r); + goto error_free; + } + } + + dst_offset = bo->tbo.mem.start << PAGE_SHIFT; + for (i = 0; i < num_loops; i++) { + uint32_t cur_size_in_bytes = min(byte_count, max_bytes); + + amdgpu_emit_fill_buffer(adev, &job->ibs[0], src_data, + dst_offset, cur_size_in_bytes); + + dst_offset += cur_size_in_bytes; + byte_count -= cur_size_in_bytes; + } + + amdgpu_ring_pad_ib(ring, &job->ibs[0]); + WARN_ON(job->ibs[0].length_dw > num_dw); + r = amdgpu_job_submit(job, ring, &adev->mman.entity, + AMDGPU_FENCE_OWNER_UNDEFINED, fence); + if (r) + goto error_free; + + return 0; + +error_free: + amdgpu_job_free(job); + return r; +} + #if defined(CONFIG_DEBUG_FS) static int amdgpu_mm_dump_table(struct seq_file *m, void *data) -- cgit v0.10.2 From 4fea83ff0f61676389b17803365c1e8d2b652183 Mon Sep 17 00:00:00 2001 From: Flora Cui Date: Wed, 20 Jul 2016 14:44:38 +0800 Subject: drm/amdgpu: expose AMDGPU_GEM_CREATE_VRAM_CLEARED to user space MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit V2: fix the return value for fill failure and validate bo before filling data Reviewed-by: Christian König Signed-off-by: Flora Cui Reviewed-by: Chunming Zhou Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 67de19c..d8e69a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -340,11 +340,44 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev, if (unlikely(r != 0)) { return r; } + + if (flags & AMDGPU_GEM_CREATE_VRAM_CLEARED && + bo->tbo.mem.placement & TTM_PL_FLAG_VRAM) { + struct fence *fence; + + if (adev->mman.buffer_funcs_ring == NULL || + !adev->mman.buffer_funcs_ring->ready) { + r = -EBUSY; + goto fail_free; + } + + r = amdgpu_bo_reserve(bo, false); + if (unlikely(r != 0)) + goto fail_free; + + amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM); + r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false); + if (unlikely(r != 0)) + goto fail_unreserve; + + amdgpu_fill_buffer(bo, 0, bo->tbo.resv, &fence); + amdgpu_bo_fence(bo, fence, false); + amdgpu_bo_unreserve(bo); + fence_put(bo->tbo.moving); + bo->tbo.moving = fence_get(fence); + fence_put(fence); + } *bo_ptr = bo; trace_amdgpu_bo_create(bo); return 0; + +fail_unreserve: + amdgpu_bo_unreserve(bo); +fail_free: + amdgpu_bo_unref(&bo); + return r; } int amdgpu_bo_create(struct amdgpu_device *adev, diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 462246a..a902a60 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -77,6 +77,8 @@ extern "C" { #define AMDGPU_GEM_CREATE_NO_CPU_ACCESS (1 << 1) /* Flag that USWC attributes should be used for GTT */ #define AMDGPU_GEM_CREATE_CPU_GTT_USWC (1 << 2) +/* Flag that the memory should be in VRAM and cleared */ +#define AMDGPU_GEM_CREATE_VRAM_CLEARED (1 << 3) struct drm_amdgpu_gem_create_in { /** the requested memory size */ -- cgit v0.10.2 From 08bd8b9f6f11e5b4f13580805ef3a38848e7fed7 Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Fri, 29 Jul 2016 16:30:04 +0800 Subject: drm/amd/powerplay: delete useless code in hwmgr. Signed-off-by: Rex Zhu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c index 384b425..ba8f57a 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c @@ -206,29 +206,7 @@ int phm_wait_on_register(struct pp_hwmgr *hwmgr, uint32_t index, return 0; } -int phm_wait_for_register_unequal(struct pp_hwmgr *hwmgr, - uint32_t index, uint32_t value, uint32_t mask) -{ - uint32_t i; - uint32_t cur_value; - - if (hwmgr == NULL || hwmgr->device == NULL) { - printk(KERN_ERR "[ powerplay ] Invalid Hardware Manager!"); - return -EINVAL; - } - - for (i = 0; i < hwmgr->usec_timeout; i++) { - cur_value = cgs_read_register(hwmgr->device, index); - if ((cur_value & mask) != (value & mask)) - break; - udelay(1); - } - /* timeout means wrong logic*/ - if (i == hwmgr->usec_timeout) - return -1; - return 0; -} /** @@ -251,21 +229,7 @@ void phm_wait_on_indirect_register(struct pp_hwmgr *hwmgr, phm_wait_on_register(hwmgr, indirect_port + 1, mask, value); } -void phm_wait_for_indirect_register_unequal(struct pp_hwmgr *hwmgr, - uint32_t indirect_port, - uint32_t index, - uint32_t value, - uint32_t mask) -{ - if (hwmgr == NULL || hwmgr->device == NULL) { - printk(KERN_ERR "[ powerplay ] Invalid Hardware Manager!"); - return; - } - cgs_write_register(hwmgr->device, indirect_port, index); - phm_wait_for_register_unequal(hwmgr, indirect_port + 1, - value, mask); -} bool phm_cf_want_uvd_power_gating(struct pp_hwmgr *hwmgr) { diff --git a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h index 911081c..36b4ec9 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h +++ b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h @@ -639,16 +639,7 @@ extern int hw_init_power_state_table(struct pp_hwmgr *hwmgr); extern int phm_wait_on_register(struct pp_hwmgr *hwmgr, uint32_t index, uint32_t value, uint32_t mask); -extern int phm_wait_for_register_unequal(struct pp_hwmgr *hwmgr, - uint32_t index, uint32_t value, uint32_t mask); -extern uint32_t phm_read_indirect_register(struct pp_hwmgr *hwmgr, - uint32_t indirect_port, uint32_t index); - -extern void phm_write_indirect_register(struct pp_hwmgr *hwmgr, - uint32_t indirect_port, - uint32_t index, - uint32_t value); extern void phm_wait_on_indirect_register(struct pp_hwmgr *hwmgr, uint32_t indirect_port, @@ -656,12 +647,7 @@ extern void phm_wait_on_indirect_register(struct pp_hwmgr *hwmgr, uint32_t value, uint32_t mask); -extern void phm_wait_for_indirect_register_unequal( - struct pp_hwmgr *hwmgr, - uint32_t indirect_port, - uint32_t index, - uint32_t value, - uint32_t mask); + extern bool phm_cf_want_uvd_power_gating(struct pp_hwmgr *hwmgr); extern bool phm_cf_want_vce_power_gating(struct pp_hwmgr *hwmgr); @@ -699,43 +685,7 @@ extern void phm_apply_dal_min_voltage_request(struct pp_hwmgr *hwmgr); PHM_FIELD_SHIFT(reg, field)) -#define PHM_WAIT_REGISTER_GIVEN_INDEX(hwmgr, index, value, mask) \ - phm_wait_on_register(hwmgr, index, value, mask) - -#define PHM_WAIT_REGISTER_UNEQUAL_GIVEN_INDEX(hwmgr, index, value, mask) \ - phm_wait_for_register_unequal(hwmgr, index, value, mask) - -#define PHM_WAIT_INDIRECT_REGISTER_GIVEN_INDEX(hwmgr, port, index, value, mask) \ - phm_wait_on_indirect_register(hwmgr, mm##port##_INDEX, index, value, mask) - -#define PHM_WAIT_INDIRECT_REGISTER_UNEQUAL_GIVEN_INDEX(hwmgr, port, index, value, mask) \ - phm_wait_for_indirect_register_unequal(hwmgr, mm##port##_INDEX, index, value, mask) -#define PHM_WAIT_VFPF_INDIRECT_REGISTER_GIVEN_INDEX(hwmgr, port, index, value, mask) \ - phm_wait_on_indirect_register(hwmgr, mm##port##_INDEX_0, index, value, mask) - -#define PHM_WAIT_VFPF_INDIRECT_REGISTER_UNEQUAL_GIVEN_INDEX(hwmgr, port, index, value, mask) \ - phm_wait_for_indirect_register_unequal(hwmgr, mm##port##_INDEX_0, index, value, mask) - -/* Operations on named registers. */ - -#define PHM_WAIT_REGISTER(hwmgr, reg, value, mask) \ - PHM_WAIT_REGISTER_GIVEN_INDEX(hwmgr, mm##reg, value, mask) - -#define PHM_WAIT_REGISTER_UNEQUAL(hwmgr, reg, value, mask) \ - PHM_WAIT_REGISTER_UNEQUAL_GIVEN_INDEX(hwmgr, mm##reg, value, mask) - -#define PHM_WAIT_INDIRECT_REGISTER(hwmgr, port, reg, value, mask) \ - PHM_WAIT_INDIRECT_REGISTER_GIVEN_INDEX(hwmgr, port, ix##reg, value, mask) - -#define PHM_WAIT_INDIRECT_REGISTER_UNEQUAL(hwmgr, port, reg, value, mask) \ - PHM_WAIT_INDIRECT_REGISTER_UNEQUAL_GIVEN_INDEX(hwmgr, port, ix##reg, value, mask) - -#define PHM_WAIT_VFPF_INDIRECT_REGISTER(hwmgr, port, reg, value, mask) \ - PHM_WAIT_VFPF_INDIRECT_REGISTER_GIVEN_INDEX(hwmgr, port, ix##reg, value, mask) - -#define PHM_WAIT_VFPF_INDIRECT_REGISTER_UNEQUAL(hwmgr, port, reg, value, mask) \ - PHM_WAIT_VFPF_INDIRECT_REGISTER_UNEQUAL_GIVEN_INDEX(hwmgr, port, ix##reg, value, mask) /* Operations on named fields. */ @@ -764,60 +714,16 @@ extern void phm_apply_dal_min_voltage_request(struct pp_hwmgr *hwmgr); PHM_SET_FIELD(cgs_read_ind_register(device, port, ix##reg), \ reg, field, fieldval)) -#define PHM_WAIT_FIELD(hwmgr, reg, field, fieldval) \ - PHM_WAIT_REGISTER(hwmgr, reg, (fieldval) \ - << PHM_FIELD_SHIFT(reg, field), PHM_FIELD_MASK(reg, field)) - -#define PHM_WAIT_INDIRECT_FIELD(hwmgr, port, reg, field, fieldval) \ - PHM_WAIT_INDIRECT_REGISTER(hwmgr, port, reg, (fieldval) \ - << PHM_FIELD_SHIFT(reg, field), PHM_FIELD_MASK(reg, field)) +#define PHM_WAIT_INDIRECT_REGISTER_GIVEN_INDEX(hwmgr, port, index, value, mask) \ + phm_wait_on_indirect_register(hwmgr, mm##port##_INDEX, index, value, mask) -#define PHM_WAIT_VFPF_INDIRECT_FIELD(hwmgr, port, reg, field, fieldval) \ - PHM_WAIT_VFPF_INDIRECT_REGISTER(hwmgr, port, reg, (fieldval) \ - << PHM_FIELD_SHIFT(reg, field), PHM_FIELD_MASK(reg, field)) -#define PHM_WAIT_FIELD_UNEQUAL(hwmgr, reg, field, fieldval) \ - PHM_WAIT_REGISTER_UNEQUAL(hwmgr, reg, (fieldval) \ - << PHM_FIELD_SHIFT(reg, field), PHM_FIELD_MASK(reg, field)) - -#define PHM_WAIT_INDIRECT_FIELD_UNEQUAL(hwmgr, port, reg, field, fieldval) \ - PHM_WAIT_INDIRECT_REGISTER_UNEQUAL(hwmgr, port, reg, (fieldval) \ - << PHM_FIELD_SHIFT(reg, field), PHM_FIELD_MASK(reg, field)) +#define PHM_WAIT_INDIRECT_REGISTER(hwmgr, port, reg, value, mask) \ + PHM_WAIT_INDIRECT_REGISTER_GIVEN_INDEX(hwmgr, port, ix##reg, value, mask) -#define PHM_WAIT_VFPF_INDIRECT_FIELD_UNEQUAL(hwmgr, port, reg, field, fieldval) \ - PHM_WAIT_VFPF_INDIRECT_REGISTER_UNEQUAL(hwmgr, port, reg, (fieldval) \ +#define PHM_WAIT_INDIRECT_FIELD(hwmgr, port, reg, field, fieldval) \ + PHM_WAIT_INDIRECT_REGISTER(hwmgr, port, reg, (fieldval) \ << PHM_FIELD_SHIFT(reg, field), PHM_FIELD_MASK(reg, field)) -/* Operations on arrays of registers & fields. */ - -#define PHM_READ_ARRAY_REGISTER(device, reg, offset) \ - cgs_read_register(device, mm##reg + (offset)) - -#define PHM_WRITE_ARRAY_REGISTER(device, reg, offset, value) \ - cgs_write_register(device, mm##reg + (offset), value) - -#define PHM_WAIT_ARRAY_REGISTER(hwmgr, reg, offset, value, mask) \ - PHM_WAIT_REGISTER_GIVEN_INDEX(hwmgr, mm##reg + (offset), value, mask) - -#define PHM_WAIT_ARRAY_REGISTER_UNEQUAL(hwmgr, reg, offset, value, mask) \ - PHM_WAIT_REGISTER_UNEQUAL_GIVEN_INDEX(hwmgr, mm##reg + (offset), value, mask) - -#define PHM_READ_ARRAY_FIELD(hwmgr, reg, offset, field) \ - PHM_GET_FIELD(PHM_READ_ARRAY_REGISTER(hwmgr->device, reg, offset), reg, field) - -#define PHM_WRITE_ARRAY_FIELD(hwmgr, reg, offset, field, fieldvalue) \ - PHM_WRITE_ARRAY_REGISTER(hwmgr->device, reg, offset, \ - PHM_SET_FIELD(PHM_READ_ARRAY_REGISTER(hwmgr->device, reg, offset), \ - reg, field, fieldvalue)) - -#define PHM_WAIT_ARRAY_FIELD(hwmgr, reg, offset, field, fieldvalue) \ - PHM_WAIT_REGISTER_GIVEN_INDEX(hwmgr, mm##reg + (offset), \ - (fieldvalue) << PHM_FIELD_SHIFT(reg, field), \ - PHM_FIELD_MASK(reg, field)) - -#define PHM_WAIT_ARRAY_FIELD_UNEQUAL(hwmgr, reg, offset, field, fieldvalue) \ - PHM_WAIT_REGISTER_UNEQUAL_GIVEN_INDEX(hwmgr, mm##reg + (offset), \ - (fieldvalue) << PHM_FIELD_SHIFT(reg, field), \ - PHM_FIELD_MASK(reg, field)) #endif /* _HWMGR_H_ */ -- cgit v0.10.2 From 93bdacd1bd7b81921c165e94bbe3dcefd6f88dc1 Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Fri, 29 Jul 2016 19:15:52 +0800 Subject: drm/amd/powerplay: pass sub_device_id and sub_vendor_id to powerplay. Signed-off-by: Rex Zhu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c index a9b3adc..8528850 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c @@ -52,6 +52,9 @@ static int amdgpu_powerplay_init(struct amdgpu_device *adev) pp_init->chip_family = adev->family; pp_init->chip_id = adev->asic_type; pp_init->device = amdgpu_cgs_create_device(adev); + pp_init->rev_id = adev->pdev->revision; + pp_init->sub_sys_id = adev->pdev->subsystem_device; + pp_init->sub_vendor_id = adev->pdev->subsystem_vendor; ret = amd_powerplay_init(pp_init, amd_pp); kfree(pp_init); diff --git a/drivers/gpu/drm/amd/powerplay/inc/amd_powerplay.h b/drivers/gpu/drm/amd/powerplay/inc/amd_powerplay.h index ba29a5b..3f8172f 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/amd_powerplay.h +++ b/drivers/gpu/drm/amd/powerplay/inc/amd_powerplay.h @@ -132,6 +132,8 @@ struct amd_pp_init { uint32_t chip_family; uint32_t chip_id; uint32_t rev_id; + uint16_t sub_sys_id; + uint16_t sub_vendor_id; }; enum amd_pp_display_config_type{ -- cgit v0.10.2 From 5765a36d4c88d7ea2ff53e35cccca751c74716d8 Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Fri, 29 Jul 2016 19:19:26 +0800 Subject: drm/amdgpu: AMD SW workaround for certain partner boards Signed-off-by: Rex Zhu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index ce0929b..7e8cddb 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -699,7 +699,10 @@ static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev) polaris10_golden_common_all, (const u32)ARRAY_SIZE(polaris10_golden_common_all)); WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C); - if (adev->pdev->revision == 0xc7) { + if (adev->pdev->revision == 0xc7 && + ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) || + (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) || + (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) { amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD); amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0); } -- cgit v0.10.2 From 6391b6ab3246fe23840ed3635b17ad83f0fd0942 Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Fri, 29 Jul 2016 19:26:36 +0800 Subject: drm/amd/powrplay: workaround Memory EDC Error for certain partner boards. Signed-off-by: Rex Zhu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c index ba8f57a..d829076 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c @@ -77,6 +77,8 @@ int hwmgr_init(struct amd_pp_init *pp_init, struct pp_instance *handle) hwmgr->chip_family = pp_init->chip_family; hwmgr->chip_id = pp_init->chip_id; hwmgr->hw_revision = pp_init->rev_id; + hwmgr->sub_sys_id = pp_init->sub_sys_id; + hwmgr->sub_vendor_id = pp_init->sub_vendor_id; hwmgr->usec_timeout = AMD_MAX_USEC_TIMEOUT; hwmgr->power_source = PP_PowerSource_AC; diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c index 60cfcbd..7d55296 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c @@ -3133,7 +3133,10 @@ int polaris10_patch_voltage_workaround(struct pp_hwmgr *hwmgr) table_info->vddc_lookup_table; uint32_t i; - if (hwmgr->chip_id == CHIP_POLARIS10 && hwmgr->hw_revision == 0xC7) { + if (hwmgr->chip_id == CHIP_POLARIS10 && hwmgr->hw_revision == 0xC7 && + ((hwmgr->sub_sys_id == 0xb37 && hwmgr->sub_vendor_id == 0x1002) || + (hwmgr->sub_sys_id == 0x4a8 && hwmgr->sub_vendor_id == 0x1043) || + (hwmgr->sub_sys_id == 0x9480 && hwmgr->sub_vendor_id == 0x1682))) { if (lookup_table->entries[dep_mclk_table->entries[dep_mclk_table->count-1].vddInd].us_vdd >= 1000) return 0; -- cgit v0.10.2 From 8e9fc800ec9e1a50a1fa7c38af8bf80d29dd85b4 Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Sun, 31 Jul 2016 13:46:01 +0800 Subject: drm/amd/powrplay: delete code no longer in use on Polaris. Signed-off-by: Rex Zhu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c index 7d55296..98e01d1 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c @@ -98,19 +98,6 @@ #define PCIE_BUS_CLK 10000 #define TCLK (PCIE_BUS_CLK / 10) - -static const uint16_t polaris10_clock_stretcher_lookup_table[2][4] = -{ {600, 1050, 3, 0}, {600, 1050, 6, 1} }; - -/* [FF, SS] type, [] 4 voltage ranges, and [Floor Freq, Boundary Freq, VID min , VID max] */ -static const uint32_t polaris10_clock_stretcher_ddt_table[2][4][4] = -{ { {265, 529, 120, 128}, {325, 650, 96, 119}, {430, 860, 32, 95}, {0, 0, 0, 31} }, - { {275, 550, 104, 112}, {319, 638, 96, 103}, {360, 720, 64, 95}, {384, 768, 32, 63} } }; - -/* [Use_For_Low_freq] value, [0%, 5%, 10%, 7.14%, 14.28%, 20%] (coming from PWR_CKS_CNTL.stretch_amount reg spec) */ -static const uint8_t polaris10_clock_stretch_amount_conversion[2][6] = -{ {0, 1, 3, 2, 4, 5}, {0, 2, 4, 5, 6, 5} }; - /** Values for the CG_THERMAL_CTRL::DPM_EVENT_SRC field. */ enum DPM_EVENT_SRC { DPM_EVENT_SRC_ANALOG = 0, -- cgit v0.10.2 From ecf6a637090c8034bc0d843c48905bfa5e2a4e0c Mon Sep 17 00:00:00 2001 From: Ken Wang Date: Wed, 27 Jul 2016 19:12:15 +0800 Subject: drm/ttm: Add interface to export kernel_zone max memory size in ttm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ken Wang Reviewed-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/ttm/ttm_memory.c b/drivers/gpu/drm/ttm/ttm_memory.c index a1803fb..29855be 100644 --- a/drivers/gpu/drm/ttm/ttm_memory.c +++ b/drivers/gpu/drm/ttm/ttm_memory.c @@ -600,3 +600,9 @@ size_t ttm_round_pot(size_t size) return 0; } EXPORT_SYMBOL(ttm_round_pot); + +uint64_t ttm_get_kernel_zone_memory_size(struct ttm_mem_global *glob) +{ + return glob->zone_kernel->max_mem; +} +EXPORT_SYMBOL(ttm_get_kernel_zone_memory_size); diff --git a/include/drm/ttm/ttm_memory.h b/include/drm/ttm/ttm_memory.h index 72dcbe8..c452089 100644 --- a/include/drm/ttm/ttm_memory.h +++ b/include/drm/ttm/ttm_memory.h @@ -155,4 +155,5 @@ extern int ttm_mem_global_alloc_page(struct ttm_mem_global *glob, extern void ttm_mem_global_free_page(struct ttm_mem_global *glob, struct page *page); extern size_t ttm_round_pot(size_t size); +extern uint64_t ttm_get_kernel_zone_memory_size(struct ttm_mem_global *glob); #endif -- cgit v0.10.2 From a693e050edfe794fea81d7cfe72429a406aa380b Mon Sep 17 00:00:00 2001 From: Ken Wang Date: Wed, 27 Jul 2016 19:18:01 +0800 Subject: drm/amdgpu: use the kernel zone memory size as the max remote memory in amdgpu Signed-off-by: Ken Wang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 9f71ff7..12112cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -2428,6 +2428,8 @@ uint32_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, void amdgpu_vram_location(struct amdgpu_device *adev, struct amdgpu_mc *mc, u64 base); void amdgpu_gtt_location(struct amdgpu_device *adev, struct amdgpu_mc *mc); void amdgpu_ttm_set_active_vram_size(struct amdgpu_device *adev, u64 size); +u64 amdgpu_ttm_get_gtt_mem_size(struct amdgpu_device *adev); +int amdgpu_ttm_global_init(struct amdgpu_device *adev); void amdgpu_program_register_sequence(struct amdgpu_device *adev, const u32 *registers, const u32 array_size); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 5dc9c4c..819c62d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -74,7 +75,7 @@ static void amdgpu_ttm_mem_global_release(struct drm_global_reference *ref) ttm_mem_global_release(ref->object); } -static int amdgpu_ttm_global_init(struct amdgpu_device *adev) +int amdgpu_ttm_global_init(struct amdgpu_device *adev) { struct drm_global_reference *global_ref; struct amdgpu_ring *ring; @@ -987,10 +988,6 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) unsigned i, j; int r; - r = amdgpu_ttm_global_init(adev); - if (r) { - return r; - } /* No others user of address space so set it to 0 */ r = ttm_bo_device_init(&adev->mman.bdev, adev->mman.bo_global_ref.ref.object, @@ -1449,3 +1446,8 @@ static void amdgpu_ttm_debugfs_fini(struct amdgpu_device *adev) #endif } + +u64 amdgpu_ttm_get_gtt_mem_size(struct amdgpu_device *adev) +{ + return ttm_get_kernel_zone_memory_size(adev->mman.mem_global_ref.object); +} diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 0b0f086..bca50e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -393,7 +393,7 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev) * size equal to the 1024 or vram, whichever is larger. */ if (amdgpu_gart_size == -1) - adev->mc.gtt_size = max((1024ULL << 20), adev->mc.mc_vram_size); + adev->mc.gtt_size = amdgpu_ttm_get_gtt_mem_size(adev); else adev->mc.gtt_size = (uint64_t)amdgpu_gart_size << 20; @@ -953,6 +953,11 @@ static int gmc_v7_0_sw_init(void *handle) return r; } + r = amdgpu_ttm_global_init(adev); + if (r) { + return r; + } + r = gmc_v7_0_mc_init(adev); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 0a23b83..3cbbf53 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -469,7 +469,7 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev) * size equal to the 1024 or vram, whichever is larger. */ if (amdgpu_gart_size == -1) - adev->mc.gtt_size = max((1024ULL << 20), adev->mc.mc_vram_size); + adev->mc.gtt_size = amdgpu_ttm_get_gtt_mem_size(adev); else adev->mc.gtt_size = (uint64_t)amdgpu_gart_size << 20; @@ -949,6 +949,11 @@ static int gmc_v8_0_sw_init(void *handle) return r; } + r = amdgpu_ttm_global_init(adev); + if (r) { + return r; + } + r = gmc_v8_0_mc_init(adev); if (r) return r; -- cgit v0.10.2 From f10379503e2b1814e27957899bcdfd0132a1915e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sat, 30 Jul 2016 00:48:39 +0200 Subject: drm/amdgpu: print more accurate error messages on IB submission failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's useful for debugging. Signed-off-by: Marek Olšák Reviewed-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 0307ff5..8eb93df 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -386,8 +386,10 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true, &duplicates); - if (unlikely(r != 0)) + if (unlikely(r != 0)) { + DRM_ERROR("ttm_eu_reserve_buffers failed.\n"); goto error_free_pages; + } /* Without a BO list we don't have userptr BOs */ if (!p->bo_list) @@ -427,9 +429,10 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, /* Unreserve everything again. */ ttm_eu_backoff_reservation(&p->ticket, &p->validated); - /* We tried to often, just abort */ + /* We tried too many times, just abort */ if (!--tries) { r = -EDEADLK; + DRM_ERROR("deadlock in %s\n", __func__); goto error_free_pages; } @@ -441,11 +444,13 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, sizeof(struct page*)); if (!e->user_pages) { r = -ENOMEM; + DRM_ERROR("calloc failure in %s\n", __func__); goto error_free_pages; } r = amdgpu_ttm_tt_get_user_pages(ttm, e->user_pages); if (r) { + DRM_ERROR("amdgpu_ttm_tt_get_user_pages failed.\n"); drm_free_large(e->user_pages); e->user_pages = NULL; goto error_free_pages; @@ -462,12 +467,16 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, p->bytes_moved = 0; r = amdgpu_cs_list_validate(p, &duplicates); - if (r) + if (r) { + DRM_ERROR("amdgpu_cs_list_validate(duplicates) failed.\n"); goto error_validate; + } r = amdgpu_cs_list_validate(p, &p->validated); - if (r) + if (r) { + DRM_ERROR("amdgpu_cs_list_validate(validated) failed.\n"); goto error_validate; + } fpriv->vm.last_eviction_counter = atomic64_read(&p->adev->num_evictions); -- cgit v0.10.2 From 713c0021979a525b9e87dce8fd9d79dd0b99db4c Mon Sep 17 00:00:00 2001 From: Leo Liu Date: Wed, 3 Aug 2016 09:25:59 -0400 Subject: drm/amdgpu: remove the check for sessions being closed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will make clock and power gated when no block decoded, for example when paused during the playback. Signed-off-by: Leo Liu Reviewed-by: Christian König Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index c22b64e..bf59354 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -1089,15 +1089,9 @@ static void amdgpu_uvd_idle_work_handler(struct work_struct *work) { struct amdgpu_device *adev = container_of(work, struct amdgpu_device, uvd.idle_work.work); - unsigned i, fences, handles = 0; + unsigned fences = amdgpu_fence_count_emitted(&adev->uvd.ring); - fences = amdgpu_fence_count_emitted(&adev->uvd.ring); - - for (i = 0; i < adev->uvd.max_handles; ++i) - if (atomic_read(&adev->uvd.handles[i])) - ++handles; - - if (fences == 0 && handles == 0) { + if (fences == 0) { if (adev->pm.dpm_enabled) { amdgpu_dpm_enable_uvd(adev, false); } else { -- cgit v0.10.2 From f16fe6d303855b609618d5db8705a8ee4634e4bf Mon Sep 17 00:00:00 2001 From: Tom St Denis Date: Wed, 3 Aug 2016 10:14:44 -0400 Subject: drm/amd/amdgpu: fix indentation in vce3 CG MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Christian König Signed-off-by: Tom St Denis Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index e5b18ad..26fb606 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -126,6 +126,7 @@ static void vce_v3_0_set_vce_sw_clock_gating(struct amdgpu_device *adev, bool gated) { u32 tmp, data; + /* Set Override to disable Clock Gating */ vce_v3_0_override_vce_clock_gating(adev, true); @@ -165,9 +166,9 @@ static void vce_v3_0_set_vce_sw_clock_gating(struct amdgpu_device *adev, /* Force VCE_UENC_DMA_DCLK_CTRL Clock ON */ tmp = data = RREG32(mmVCE_UENC_DMA_DCLK_CTRL); data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK | - VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK | - VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK | - 0x8; + VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK | + VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK | + 0x8; if (tmp != data) WREG32(mmVCE_UENC_DMA_DCLK_CTRL, data); } else { @@ -201,9 +202,9 @@ static void vce_v3_0_set_vce_sw_clock_gating(struct amdgpu_device *adev, /* Set VCE_UENC_DMA_DCLK_CTRL CG always in dynamic mode */ tmp = data = RREG32(mmVCE_UENC_DMA_DCLK_CTRL); data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK | - VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK | - VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK | - 0x8); + VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK | + VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK | + 0x8); if (tmp != data) WREG32(mmVCE_UENC_DMA_DCLK_CTRL, data); } -- cgit v0.10.2 From 6f906814a5a8045874910f9c21c3ffd758a22f65 Mon Sep 17 00:00:00 2001 From: Tom St Denis Date: Wed, 3 Aug 2016 10:15:24 -0400 Subject: drm/amd/amdgpu: Fix VCE CG order and resume defaults CG was being enabled in reverse sense from dpm/powerplay. Also fix the default CLK_EN signal to enable all of the blocks. Signed-off-by: Tom St Denis Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index 26fb606..a9db5cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -130,40 +130,35 @@ static void vce_v3_0_set_vce_sw_clock_gating(struct amdgpu_device *adev, /* Set Override to disable Clock Gating */ vce_v3_0_override_vce_clock_gating(adev, true); - if (!gated) { - /* Force CLOCK ON for VCE_CLOCK_GATING_B, - * {*_FORCE_ON, *_FORCE_OFF} = {1, 0} - * VREG can be FORCE ON or set to Dynamic, but can't be OFF - */ + /* This function enables MGCG which is controlled by firmware. + With the clocks in the gated state the core is still + accessible but the firmware will throttle the clocks on the + fly as necessary. + */ + if (gated) { tmp = data = RREG32(mmVCE_CLOCK_GATING_B); data |= 0x1ff; data &= ~0xef0000; if (tmp != data) WREG32(mmVCE_CLOCK_GATING_B, data); - /* Force CLOCK ON for VCE_UENC_CLOCK_GATING, - * {*_FORCE_ON, *_FORCE_OFF} = {1, 0} - */ tmp = data = RREG32(mmVCE_UENC_CLOCK_GATING); data |= 0x3ff000; data &= ~0xffc00000; if (tmp != data) WREG32(mmVCE_UENC_CLOCK_GATING, data); - /* set VCE_UENC_CLOCK_GATING_2 */ tmp = data = RREG32(mmVCE_UENC_CLOCK_GATING_2); data |= 0x2; - data &= ~0x2; + data &= ~0x00010000; if (tmp != data) WREG32(mmVCE_UENC_CLOCK_GATING_2, data); - /* Force CLOCK ON for VCE_UENC_REG_CLOCK_GATING */ tmp = data = RREG32(mmVCE_UENC_REG_CLOCK_GATING); data |= 0x37f; if (tmp != data) WREG32(mmVCE_UENC_REG_CLOCK_GATING, data); - /* Force VCE_UENC_DMA_DCLK_CTRL Clock ON */ tmp = data = RREG32(mmVCE_UENC_DMA_DCLK_CTRL); data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK | VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK | @@ -172,34 +167,27 @@ static void vce_v3_0_set_vce_sw_clock_gating(struct amdgpu_device *adev, if (tmp != data) WREG32(mmVCE_UENC_DMA_DCLK_CTRL, data); } else { - /* Force CLOCK OFF for VCE_CLOCK_GATING_B, - * {*, *_FORCE_OFF} = {*, 1} - * set VREG to Dynamic, as it can't be OFF - */ tmp = data = RREG32(mmVCE_CLOCK_GATING_B); data &= ~0x80010; data |= 0xe70008; if (tmp != data) WREG32(mmVCE_CLOCK_GATING_B, data); - /* Force CLOCK OFF for VCE_UENC_CLOCK_GATING, - * Force ClOCK OFF takes precedent over Force CLOCK ON setting. - * {*_FORCE_ON, *_FORCE_OFF} = {*, 1} - */ + tmp = data = RREG32(mmVCE_UENC_CLOCK_GATING); data |= 0xffc00000; if (tmp != data) WREG32(mmVCE_UENC_CLOCK_GATING, data); - /* Set VCE_UENC_CLOCK_GATING_2 */ + tmp = data = RREG32(mmVCE_UENC_CLOCK_GATING_2); data |= 0x10000; if (tmp != data) WREG32(mmVCE_UENC_CLOCK_GATING_2, data); - /* Set VCE_UENC_REG_CLOCK_GATING to dynamic */ + tmp = data = RREG32(mmVCE_UENC_REG_CLOCK_GATING); data &= ~0xffc00000; if (tmp != data) WREG32(mmVCE_UENC_REG_CLOCK_GATING, data); - /* Set VCE_UENC_DMA_DCLK_CTRL CG always in dynamic mode */ + tmp = data = RREG32(mmVCE_UENC_DMA_DCLK_CTRL); data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK | VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK | @@ -538,7 +526,7 @@ static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx) WREG32_P(mmVCE_CLOCK_GATING_A, 0, ~(1 << 16)); WREG32_P(mmVCE_UENC_CLOCK_GATING, 0x1FF000, ~0xFF9FF000); WREG32_P(mmVCE_UENC_REG_CLOCK_GATING, 0x3F, ~0x3F); - WREG32(mmVCE_CLOCK_GATING_B, 0xf7); + WREG32(mmVCE_CLOCK_GATING_B, 0x1FF); WREG32(mmVCE_LMI_CTRL, 0x00398000); WREG32_P(mmVCE_LMI_CACHE_CTRL, 0x0, ~0x1); -- cgit v0.10.2 From 13d58d2a492269880e85d89e124edb814753fa37 Mon Sep 17 00:00:00 2001 From: Tom St Denis Date: Wed, 3 Aug 2016 10:15:48 -0400 Subject: drm/amd/amdgpu: Don't set VCE CG in startup It's handled by DPM/PP properly. Signed-off-by: Tom St Denis Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index a9db5cb..311b384 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -285,10 +285,6 @@ static int vce_v3_0_start(struct amdgpu_device *adev) /* clear BUSY flag */ WREG32_P(mmVCE_STATUS, 0, ~VCE_STATUS__JOB_BUSY_MASK); - /* Set Clock-Gating off */ - if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG) - vce_v3_0_set_vce_sw_clock_gating(adev, false); - if (r) { DRM_ERROR("VCE not responding, giving up!!!\n"); mutex_unlock(&adev->grbm_idx_mutex); -- cgit v0.10.2 From 1af69a2c355e9a6d3d9384697be923e34265f79c Mon Sep 17 00:00:00 2001 From: Tom St Denis Date: Wed, 3 Aug 2016 10:16:17 -0400 Subject: drm/amd/amdgpu: Enable VCE MGCG for Carrizo Signed-off-by: Tom St Denis Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index aaee251..de959a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -1248,7 +1248,8 @@ static int vi_common_early_init(void *handle) AMD_CG_SUPPORT_HDP_MGCG | AMD_CG_SUPPORT_HDP_LS | AMD_CG_SUPPORT_SDMA_MGCG | - AMD_CG_SUPPORT_SDMA_LS; + AMD_CG_SUPPORT_SDMA_LS | + AMD_CG_SUPPORT_VCE_MGCG; /* rev0 hardware requires workarounds to support PG */ adev->pg_flags = 0; if (adev->rev_id != 0x00) { -- cgit v0.10.2 From 8ef583e9f9df9b6371e3faecb3955350f0588a4c Mon Sep 17 00:00:00 2001 From: Tom St Denis Date: Wed, 3 Aug 2016 11:34:35 -0400 Subject: drm/amd/amdgpu: Enable VCE MGCG for Stoney Signed-off-by: Tom St Denis Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index de959a7..8f37066 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -1276,7 +1276,8 @@ static int vi_common_early_init(void *handle) AMD_CG_SUPPORT_HDP_MGCG | AMD_CG_SUPPORT_HDP_LS | AMD_CG_SUPPORT_SDMA_MGCG | - AMD_CG_SUPPORT_SDMA_LS; + AMD_CG_SUPPORT_SDMA_LS | + AMD_CG_SUPPORT_VCE_MGCG; adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG | AMD_PG_SUPPORT_GFX_PIPELINE | -- cgit v0.10.2 From 568ba157cdb561c8e52985d81bdabed3f6248f77 Mon Sep 17 00:00:00 2001 From: Tom St Denis Date: Wed, 3 Aug 2016 12:37:23 -0400 Subject: drm/amd/amdgpu: Partially revert change to UVD v3 CG Partially undo changes made by commit: drm/amd/amdgpu: don't track state in UVD clockgating To keep bypass even if CG flags are not set. Signed-off-by: Tom St Denis Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index 391457f..c11b97f 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -960,13 +960,13 @@ static int uvd_v6_0_set_clockgating_state(void *handle, { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (!(adev->cg_flags & AMD_CG_SUPPORT_UVD_MGCG)) - return 0; - if (adev->asic_type == CHIP_FIJI || adev->asic_type == CHIP_POLARIS10) uvd_v6_set_bypass_mode(adev, state == AMD_CG_STATE_GATE ? true : false); + if (!(adev->cg_flags & AMD_CG_SUPPORT_UVD_MGCG)) + return 0; + if (state == AMD_CG_STATE_GATE) { /* disable HW gating and enable Sw gating */ uvd_v6_0_set_sw_clock_gating(adev); -- cgit v0.10.2 From e72cfd58998834570b0ea9cd1961e4abe84945e1 Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Wed, 27 Jul 2016 13:15:20 +0800 Subject: drm/amdgpu: irq resume should be immediately after gpu resume MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Chunming Zhou Reviewed-by: Christian König Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 9b358df..c715f1b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2094,6 +2094,7 @@ retry: amdgpu_atombios_scratch_regs_restore(adev); } if (!r) { + amdgpu_irq_gpu_reset_resume_helper(adev); r = amdgpu_ib_ring_tests(adev); if (r) { dev_err(adev->dev, "ib ring test failed (%d).\n", r); @@ -2124,7 +2125,6 @@ retry: /* bad news, how to tell it to userspace ? */ dev_info(adev->dev, "GPU reset failed\n"); } - amdgpu_irq_gpu_reset_resume_helper(adev); return r; } -- cgit v0.10.2 From 9c0171b41248805d928863fc87190e6530b71773 Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Wed, 3 Aug 2016 14:55:46 +0800 Subject: drm/amdgpu: fix harvest config checking in vce3 check_soft_reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Chunming Zhou Reviewed-by: Christian König Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index 311b384..516882b 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -631,9 +631,6 @@ static int vce_v3_0_check_soft_reset(void *handle) tmp = REG_SET_FIELD(tmp, GRBM_GFX_INDEX, INSTANCE_INDEX, 0); WREG32(mmGRBM_GFX_INDEX, tmp); - if (adev->vce.harvest_config & (AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1)) - srbm_soft_reset = 0; - if (srbm_soft_reset) { adev->ip_block_status[AMD_IP_BLOCK_TYPE_VCE].hang = true; adev->vce.srbm_soft_reset = srbm_soft_reset; -- cgit v0.10.2 From ac8e3f304047e8f0229e217afaee25be918f8ae7 Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Wed, 3 Aug 2016 15:10:28 +0800 Subject: drm/amdgpu: remove RB1_BUSY bit checking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a workaround to let VCE soft reset work. RB1_BUSY bit is always set, so remove its checking now, and we will depend on RB0_BUSY currently. After we find the root cause of RB1_BUSY, we can add it back. Signed-off-by: Rex Zhu Reviewed-by: Christian König Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index 516882b..76d5d68 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -589,7 +589,11 @@ static int vce_v3_0_wait_for_idle(void *handle) return -ETIMEDOUT; } -#define AMDGPU_VCE_STATUS_BUSY_MASK 0x78 +#define VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK 0x00000008L /* AUTO_BUSY */ +#define VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK 0x00000010L /* RB0_BUSY */ +#define VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK 0x00000020L /* RB1_BUSY */ +#define AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \ + VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK) static int vce_v3_0_check_soft_reset(void *handle) { -- cgit v0.10.2 From bde1d8b228cfaca212bcf5f093b5c8c928af2bff Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Wed, 3 Aug 2016 15:15:37 +0800 Subject: drm/amdgpu: enable VCE soft reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Chunming Zhou Reviewed-by: Christian König Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index c715f1b..72d5d09 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1977,7 +1977,6 @@ static bool amdgpu_need_full_reset(struct amdgpu_device *adev) { if (adev->ip_block_status[AMD_IP_BLOCK_TYPE_GMC].hang || adev->ip_block_status[AMD_IP_BLOCK_TYPE_SMC].hang || - adev->ip_block_status[AMD_IP_BLOCK_TYPE_VCE].hang || adev->ip_block_status[AMD_IP_BLOCK_TYPE_ACP].hang || adev->ip_block_status[AMD_IP_BLOCK_TYPE_DCE].hang) { DRM_INFO("Some block need full reset!\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index 76d5d68..7e6bb45 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -601,9 +601,6 @@ static int vce_v3_0_check_soft_reset(void *handle) u32 srbm_soft_reset = 0; u32 tmp; - /* VCE BUG: it is always busy, so skip its checking now */ - return 0; - /* According to VCE team , we should use VCE_STATUS instead * SRBM_STATUS.VCE_BUSY bit for busy status checking. * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE -- cgit v0.10.2 From 29425288ba76f2476b2a9375e1d171f631aa01dd Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Wed, 3 Aug 2016 14:37:19 +0800 Subject: drm/amd/powerplay: delete code set/unset deep_sleep/power_containment. they were controled by module parameter. Signed-off-by: Rex Zhu Reviewed-by: Ken Wang Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c index 8cc0df9..4015384 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c @@ -221,9 +221,6 @@ static int cz_initialize_dpm_defaults(struct pp_hwmgr *hwmgr) phm_cap_set(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_NonABMSupportInPPLib); - phm_cap_set(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_SclkDeepSleep); - phm_cap_unset(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_DynamicM3Arbiter); diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_powertune.c b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_powertune.c index c3b2f51..f5992ea 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_powertune.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_powertune.c @@ -57,8 +57,6 @@ void fiji_initialize_power_tune_defaults(struct pp_hwmgr *hwmgr) /* Assume disabled */ phm_cap_unset(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_PowerContainment); - phm_cap_unset(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_CAC); phm_cap_unset(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_SQRamping); diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/iceland_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/iceland_hwmgr.c index d8ca59b..f48f3ea 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/iceland_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/iceland_hwmgr.c @@ -3621,9 +3621,6 @@ static int iceland_hwmgr_backend_init(struct pp_hwmgr *hwmgr) data->engine_clock_data = 0; data->memory_clock_data = 0; - phm_cap_set(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_SclkDeepSleep); - phm_cap_unset(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_SclkDeepSleepAboveLow); @@ -3847,8 +3844,7 @@ static int iceland_hwmgr_backend_init(struct pp_hwmgr *hwmgr) PHM_PlatformCaps_GFXClockGatingManagedInCAIL); phm_cap_set(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_IcelandULPSSWWorkAround); - phm_cap_set(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_PowerContainment); + /* iceland doesn't support UVD and VCE */ phm_cap_unset(hwmgr->platform_descriptor.platformCaps, -- cgit v0.10.2 From 173247569a95a63daa1a28b4c7e2c7d02a89d1a3 Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Mon, 1 Aug 2016 10:49:00 +0800 Subject: drm/amd/powerplay: change structure variable name. Signed-off-by: Rex Zhu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c index 5dba7c5..704ff4c 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c @@ -978,7 +978,7 @@ static int polaris10_smu_init(struct pp_smumgr *smumgr) return 0; } -static const struct pp_smumgr_func ellsemere_smu_funcs = { +static const struct pp_smumgr_func polaris10_smu_funcs = { .smu_init = polaris10_smu_init, .smu_fini = polaris10_smu_fini, .start_smu = polaris10_start_smu, @@ -1001,7 +1001,7 @@ int polaris10_smum_init(struct pp_smumgr *smumgr) return -1; smumgr->backend = polaris10_smu; - smumgr->smumgr_funcs = &ellsemere_smu_funcs; + smumgr->smumgr_funcs = &polaris10_smu_funcs; return 0; } -- cgit v0.10.2 From fad2af195f1abaada473f4f9e9a554c1e4db768b Mon Sep 17 00:00:00 2001 From: Huang Rui Date: Mon, 18 Apr 2016 23:29:32 +0800 Subject: drm/amd/powerplay: enable powerplay as default on CZ/ST Enable powerplay as default on Carrizo and Stoney. And it can be disabled with amdgpu.powerplay=0. Reviewed-by: Alex Deucher Signed-off-by: Huang Rui Cc: Rex Zhu Cc: Flora Cui Cc: Ken Wang Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c index 8528850..5450744 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c @@ -109,11 +109,9 @@ static int amdgpu_pp_early_init(void *handle) case CHIP_TONGA: case CHIP_FIJI: case CHIP_TOPAZ: - adev->pp_enabled = (amdgpu_powerplay == 0) ? false : true; - break; case CHIP_CARRIZO: case CHIP_STONEY: - adev->pp_enabled = (amdgpu_powerplay > 0) ? true : false; + adev->pp_enabled = (amdgpu_powerplay == 0) ? false : true; break; /* These chips don't have powerplay implemenations */ case CHIP_BONAIRE: -- cgit v0.10.2 From fdba11f4079ec0b3037a0e3ce6014b47135db431 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 31 Mar 2016 18:18:28 -0400 Subject: drm/amdgpu: move all Kconfig options to amdgpu/Kconfig For consistency. Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index fc35731..64f2a44 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -171,9 +171,6 @@ config DRM_AMDGPU If M is selected, the module will be called amdgpu. source "drivers/gpu/drm/amd/amdgpu/Kconfig" -source "drivers/gpu/drm/amd/powerplay/Kconfig" - -source "drivers/gpu/drm/amd/acp/Kconfig" source "drivers/gpu/drm/nouveau/Kconfig" diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig index 7335c04..f3cb69d 100644 --- a/drivers/gpu/drm/amd/amdgpu/Kconfig +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig @@ -25,3 +25,5 @@ config DRM_AMDGPU_GART_DEBUGFS Selecting this option creates a debugfs file to inspect the mapped pages. Uses more memory for housekeeping, enable only for debugging. +source "drivers/gpu/drm/amd/powerplay/Kconfig" +source "drivers/gpu/drm/amd/acp/Kconfig" -- cgit v0.10.2 From 7b8082bc0774b75a8a3f457f2399a85d4d515c56 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Fri, 5 Aug 2016 18:36:10 +0900 Subject: drm/ttm: Wait for a BO to become idle before unbinding it from GTT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes hangs under memory pressure, e.g. running the piglit test tex3d-maxsize concurrently with other tests. Fixes: 17d33bc9d6ef ("drm/ttm: drop waiting for idle in ttm_bo_evict.") Reviewed-by: Christian König Signed-off-by: Michel Dänzer Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 819c62d..105bd22 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -336,7 +336,7 @@ static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, if (unlikely(r)) { goto out_cleanup; } - r = ttm_bo_move_ttm(bo, true, no_wait_gpu, new_mem); + r = ttm_bo_move_ttm(bo, true, interruptible, no_wait_gpu, new_mem); out_cleanup: ttm_bo_mem_put(bo, &tmp_mem); return r; @@ -369,7 +369,7 @@ static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, if (unlikely(r)) { return r; } - r = ttm_bo_move_ttm(bo, true, no_wait_gpu, &tmp_mem); + r = ttm_bo_move_ttm(bo, true, interruptible, no_wait_gpu, &tmp_mem); if (unlikely(r)) { goto out_cleanup; } diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 528bdef..6190035 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -1151,7 +1151,7 @@ nouveau_bo_move_flipd(struct ttm_buffer_object *bo, bool evict, bool intr, if (ret) goto out; - ret = ttm_bo_move_ttm(bo, true, no_wait_gpu, new_mem); + ret = ttm_bo_move_ttm(bo, true, intr, no_wait_gpu, new_mem); out: ttm_bo_mem_put(bo, &tmp_mem); return ret; @@ -1179,7 +1179,7 @@ nouveau_bo_move_flips(struct ttm_buffer_object *bo, bool evict, bool intr, if (ret) return ret; - ret = ttm_bo_move_ttm(bo, true, no_wait_gpu, &tmp_mem); + ret = ttm_bo_move_ttm(bo, true, intr, no_wait_gpu, &tmp_mem); if (ret) goto out; diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index ffdad81..0c00e19 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -346,7 +346,7 @@ static int radeon_move_vram_ram(struct ttm_buffer_object *bo, if (unlikely(r)) { goto out_cleanup; } - r = ttm_bo_move_ttm(bo, true, no_wait_gpu, new_mem); + r = ttm_bo_move_ttm(bo, true, interruptible, no_wait_gpu, new_mem); out_cleanup: ttm_bo_mem_put(bo, &tmp_mem); return r; @@ -379,7 +379,7 @@ static int radeon_move_ram_vram(struct ttm_buffer_object *bo, if (unlikely(r)) { return r; } - r = ttm_bo_move_ttm(bo, true, no_wait_gpu, &tmp_mem); + r = ttm_bo_move_ttm(bo, true, interruptible, no_wait_gpu, &tmp_mem); if (unlikely(r)) { goto out_cleanup; } diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 4054d80..42c074a 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -354,7 +354,8 @@ static int ttm_bo_handle_move_mem(struct ttm_buffer_object *bo, if (!(old_man->flags & TTM_MEMTYPE_FLAG_FIXED) && !(new_man->flags & TTM_MEMTYPE_FLAG_FIXED)) - ret = ttm_bo_move_ttm(bo, evict, no_wait_gpu, mem); + ret = ttm_bo_move_ttm(bo, evict, interruptible, no_wait_gpu, + mem); else if (bdev->driver->move) ret = bdev->driver->move(bo, evict, interruptible, no_wait_gpu, mem); diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 2df602a..f157a9e 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -45,7 +45,7 @@ void ttm_bo_free_old_node(struct ttm_buffer_object *bo) } int ttm_bo_move_ttm(struct ttm_buffer_object *bo, - bool evict, + bool evict, bool interruptible, bool no_wait_gpu, struct ttm_mem_reg *new_mem) { struct ttm_tt *ttm = bo->ttm; @@ -53,6 +53,14 @@ int ttm_bo_move_ttm(struct ttm_buffer_object *bo, int ret; if (old_mem->mem_type != TTM_PL_SYSTEM) { + ret = ttm_bo_wait(bo, interruptible, no_wait_gpu); + + if (unlikely(ret != 0)) { + if (ret != -ERESTARTSYS) + pr_err("Failed to expire sync object before unbinding TTM\n"); + return ret; + } + ttm_tt_unbind(ttm); ttm_bo_free_old_node(bo); ttm_flag_masked(&old_mem->placement, TTM_PL_FLAG_SYSTEM, diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index 4348d6d..99c6d01 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -962,6 +962,7 @@ void ttm_mem_io_free(struct ttm_bo_device *bdev, * * @bo: A pointer to a struct ttm_buffer_object. * @evict: 1: This is an eviction. Don't try to pipeline. + * @interruptible: Sleep interruptible if waiting. * @no_wait_gpu: Return immediately if the GPU is busy. * @new_mem: struct ttm_mem_reg indicating where to move. * @@ -976,7 +977,7 @@ void ttm_mem_io_free(struct ttm_bo_device *bdev, */ extern int ttm_bo_move_ttm(struct ttm_buffer_object *bo, - bool evict, bool no_wait_gpu, + bool evict, bool interruptible, bool no_wait_gpu, struct ttm_mem_reg *new_mem); /** -- cgit v0.10.2 From 4e2f0caa3960c1890ca4a7650d5e6bebbcc8ca04 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Mon, 8 Aug 2016 12:28:25 +0900 Subject: drm/ttm: Remove unused parameter evict from ttm_bo_move_ttm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Christian König Signed-off-by: Michel Dänzer Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 105bd22..42c9066 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -336,7 +336,7 @@ static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, if (unlikely(r)) { goto out_cleanup; } - r = ttm_bo_move_ttm(bo, true, interruptible, no_wait_gpu, new_mem); + r = ttm_bo_move_ttm(bo, interruptible, no_wait_gpu, new_mem); out_cleanup: ttm_bo_mem_put(bo, &tmp_mem); return r; @@ -369,7 +369,7 @@ static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, if (unlikely(r)) { return r; } - r = ttm_bo_move_ttm(bo, true, interruptible, no_wait_gpu, &tmp_mem); + r = ttm_bo_move_ttm(bo, interruptible, no_wait_gpu, &tmp_mem); if (unlikely(r)) { goto out_cleanup; } diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 6190035..01460d7 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -1151,7 +1151,7 @@ nouveau_bo_move_flipd(struct ttm_buffer_object *bo, bool evict, bool intr, if (ret) goto out; - ret = ttm_bo_move_ttm(bo, true, intr, no_wait_gpu, new_mem); + ret = ttm_bo_move_ttm(bo, intr, no_wait_gpu, new_mem); out: ttm_bo_mem_put(bo, &tmp_mem); return ret; @@ -1179,7 +1179,7 @@ nouveau_bo_move_flips(struct ttm_buffer_object *bo, bool evict, bool intr, if (ret) return ret; - ret = ttm_bo_move_ttm(bo, true, intr, no_wait_gpu, &tmp_mem); + ret = ttm_bo_move_ttm(bo, intr, no_wait_gpu, &tmp_mem); if (ret) goto out; diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index 0c00e19..f8f474d 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -346,7 +346,7 @@ static int radeon_move_vram_ram(struct ttm_buffer_object *bo, if (unlikely(r)) { goto out_cleanup; } - r = ttm_bo_move_ttm(bo, true, interruptible, no_wait_gpu, new_mem); + r = ttm_bo_move_ttm(bo, interruptible, no_wait_gpu, new_mem); out_cleanup: ttm_bo_mem_put(bo, &tmp_mem); return r; @@ -379,7 +379,7 @@ static int radeon_move_ram_vram(struct ttm_buffer_object *bo, if (unlikely(r)) { return r; } - r = ttm_bo_move_ttm(bo, true, interruptible, no_wait_gpu, &tmp_mem); + r = ttm_bo_move_ttm(bo, interruptible, no_wait_gpu, &tmp_mem); if (unlikely(r)) { goto out_cleanup; } diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 42c074a..2a750d4 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -354,8 +354,7 @@ static int ttm_bo_handle_move_mem(struct ttm_buffer_object *bo, if (!(old_man->flags & TTM_MEMTYPE_FLAG_FIXED) && !(new_man->flags & TTM_MEMTYPE_FLAG_FIXED)) - ret = ttm_bo_move_ttm(bo, evict, interruptible, no_wait_gpu, - mem); + ret = ttm_bo_move_ttm(bo, interruptible, no_wait_gpu, mem); else if (bdev->driver->move) ret = bdev->driver->move(bo, evict, interruptible, no_wait_gpu, mem); diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index f157a9e..f87162f 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -45,8 +45,8 @@ void ttm_bo_free_old_node(struct ttm_buffer_object *bo) } int ttm_bo_move_ttm(struct ttm_buffer_object *bo, - bool evict, bool interruptible, - bool no_wait_gpu, struct ttm_mem_reg *new_mem) + bool interruptible, bool no_wait_gpu, + struct ttm_mem_reg *new_mem) { struct ttm_tt *ttm = bo->ttm; struct ttm_mem_reg *old_mem = &bo->mem; diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index 99c6d01..12d348f 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -961,7 +961,6 @@ void ttm_mem_io_free(struct ttm_bo_device *bdev, * ttm_bo_move_ttm * * @bo: A pointer to a struct ttm_buffer_object. - * @evict: 1: This is an eviction. Don't try to pipeline. * @interruptible: Sleep interruptible if waiting. * @no_wait_gpu: Return immediately if the GPU is busy. * @new_mem: struct ttm_mem_reg indicating where to move. @@ -977,7 +976,7 @@ void ttm_mem_io_free(struct ttm_bo_device *bdev, */ extern int ttm_bo_move_ttm(struct ttm_buffer_object *bo, - bool evict, bool interruptible, bool no_wait_gpu, + bool interruptible, bool no_wait_gpu, struct ttm_mem_reg *new_mem); /** -- cgit v0.10.2 From 4499f2acd57b568e8fa2c7908a8a3cf2802629c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Mon, 8 Aug 2016 12:28:26 +0900 Subject: drm/ttm: Remove unused parameter evict from ttm_bo_move_memcpy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Christian König Signed-off-by: Michel Dänzer Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 42c9066..3cf54c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -436,8 +436,7 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, if (r) { memcpy: - r = ttm_bo_move_memcpy(bo, evict, interruptible, - no_wait_gpu, new_mem); + r = ttm_bo_move_memcpy(bo, interruptible, no_wait_gpu, new_mem); if (r) { return r; } diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 01460d7..8ab9ce5 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -1297,7 +1297,7 @@ nouveau_bo_move(struct ttm_buffer_object *bo, bool evict, bool intr, /* Fallback to software copy. */ ret = ttm_bo_wait(bo, intr, no_wait_gpu); if (ret == 0) - ret = ttm_bo_move_memcpy(bo, evict, intr, no_wait_gpu, new_mem); + ret = ttm_bo_move_memcpy(bo, intr, no_wait_gpu, new_mem); out: if (drm->device.info.family < NV_DEVICE_INFO_V0_TESLA) { diff --git a/drivers/gpu/drm/qxl/qxl_ttm.c b/drivers/gpu/drm/qxl/qxl_ttm.c index d50c967..6a22de0 100644 --- a/drivers/gpu/drm/qxl/qxl_ttm.c +++ b/drivers/gpu/drm/qxl/qxl_ttm.c @@ -361,8 +361,8 @@ static int qxl_bo_move(struct ttm_buffer_object *bo, qxl_move_null(bo, new_mem); return 0; } - return ttm_bo_move_memcpy(bo, evict, interruptible, - no_wait_gpu, new_mem); + return ttm_bo_move_memcpy(bo, interruptible, no_wait_gpu, + new_mem); } static void qxl_bo_move_notify(struct ttm_buffer_object *bo, diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index f8f474d..93414ac 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -444,8 +444,7 @@ static int radeon_bo_move(struct ttm_buffer_object *bo, if (r) { memcpy: - r = ttm_bo_move_memcpy(bo, evict, interruptible, - no_wait_gpu, new_mem); + r = ttm_bo_move_memcpy(bo, interruptible, no_wait_gpu, new_mem); if (r) { return r; } diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 2a750d4..c2a30bd 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -359,8 +359,7 @@ static int ttm_bo_handle_move_mem(struct ttm_buffer_object *bo, ret = bdev->driver->move(bo, evict, interruptible, no_wait_gpu, mem); else - ret = ttm_bo_move_memcpy(bo, evict, interruptible, - no_wait_gpu, mem); + ret = ttm_bo_move_memcpy(bo, interruptible, no_wait_gpu, mem); if (ret) { if (bdev->driver->move_notify) { diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index f87162f..bf6e216 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -329,8 +329,7 @@ static int ttm_copy_ttm_io_page(struct ttm_tt *ttm, void *dst, } int ttm_bo_move_memcpy(struct ttm_buffer_object *bo, - bool evict, bool interruptible, - bool no_wait_gpu, + bool interruptible, bool no_wait_gpu, struct ttm_mem_reg *new_mem) { struct ttm_bo_device *bdev = bo->bdev; diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index 12d348f..c986fa7 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -983,7 +983,6 @@ extern int ttm_bo_move_ttm(struct ttm_buffer_object *bo, * ttm_bo_move_memcpy * * @bo: A pointer to a struct ttm_buffer_object. - * @evict: 1: This is an eviction. Don't try to pipeline. * @interruptible: Sleep interruptible if waiting. * @no_wait_gpu: Return immediately if the GPU is busy. * @new_mem: struct ttm_mem_reg indicating where to move. @@ -999,8 +998,7 @@ extern int ttm_bo_move_ttm(struct ttm_buffer_object *bo, */ extern int ttm_bo_move_memcpy(struct ttm_buffer_object *bo, - bool evict, bool interruptible, - bool no_wait_gpu, + bool interruptible, bool no_wait_gpu, struct ttm_mem_reg *new_mem); /** -- cgit v0.10.2 From 1969753907286e5bf5b37c477113f77fbb6159e2 Mon Sep 17 00:00:00 2001 From: Lyude Date: Fri, 5 Aug 2016 20:30:34 -0400 Subject: drm/radeon: Don't print error on aux transaction timeouts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since it's normal for DRM to retry our aux transaction helpers multiple times in a row, up to 32 times for each attempted transaction, we're making a lot of noise that is no longer necessary now that DRM will just print the return code we give it. Reviewed-by: Christian König Signed-off-by: Lyude Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/radeon/radeon_dp_auxch.c b/drivers/gpu/drm/radeon/radeon_dp_auxch.c index db64e00..2d46564 100644 --- a/drivers/gpu/drm/radeon/radeon_dp_auxch.c +++ b/drivers/gpu/drm/radeon/radeon_dp_auxch.c @@ -164,7 +164,6 @@ radeon_dp_aux_transfer_native(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg } if (tmp & AUX_SW_RX_TIMEOUT) { - DRM_DEBUG_KMS("dp_aux_ch timed out\n"); ret = -ETIMEDOUT; goto done; } -- cgit v0.10.2 From 84cefe18e3951d985b1179b35d14fae9e4ae2229 Mon Sep 17 00:00:00 2001 From: Lyude Date: Fri, 5 Aug 2016 20:30:35 -0400 Subject: drm/radeon: Don't retry 7 times in radeon_dp_dpcd() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When this code was written, we didn't retry DP aux transactions on any error, which required retrying important transactions like this in individual drivers. Since that's no longer the case, retrying here is not necessary. As well, we retry any aux transaction on any error 32 times. 7 * 32 = 224, which means this loop causes us to retry grabbing the dpcd 224 times. This is definitely far more then we actually need to do. Reviewed-by: Christian König Signed-off-by: Lyude Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c index cead089a..432cb46 100644 --- a/drivers/gpu/drm/radeon/atombios_dp.c +++ b/drivers/gpu/drm/radeon/atombios_dp.c @@ -389,22 +389,21 @@ bool radeon_dp_getdpcd(struct radeon_connector *radeon_connector) { struct radeon_connector_atom_dig *dig_connector = radeon_connector->con_priv; u8 msg[DP_DPCD_SIZE]; - int ret, i; + int ret; - for (i = 0; i < 7; i++) { - ret = drm_dp_dpcd_read(&radeon_connector->ddc_bus->aux, DP_DPCD_REV, msg, - DP_DPCD_SIZE); - if (ret == DP_DPCD_SIZE) { - memcpy(dig_connector->dpcd, msg, DP_DPCD_SIZE); + ret = drm_dp_dpcd_read(&radeon_connector->ddc_bus->aux, DP_DPCD_REV, msg, + DP_DPCD_SIZE); + if (ret == DP_DPCD_SIZE) { + memcpy(dig_connector->dpcd, msg, DP_DPCD_SIZE); - DRM_DEBUG_KMS("DPCD: %*ph\n", (int)sizeof(dig_connector->dpcd), - dig_connector->dpcd); + DRM_DEBUG_KMS("DPCD: %*ph\n", (int)sizeof(dig_connector->dpcd), + dig_connector->dpcd); - radeon_dp_probe_oui(radeon_connector); + radeon_dp_probe_oui(radeon_connector); - return true; - } + return true; } + dig_connector->dpcd[0] = 0; return false; } -- cgit v0.10.2 From aff802954e06c435f582e8725964d66e21c8d573 Mon Sep 17 00:00:00 2001 From: Lyude Date: Fri, 5 Aug 2016 20:30:36 -0400 Subject: drm/amdgpu: Don't print error on aux transaction timeouts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since it's normal for DRM to retry our aux transaction helpers multiple times in a row, up to 32 times for each attempted transaction, we're making a lot of noise that is no longer necessary now that DRM will just print the return code we give it. Reviewed-by: Christian König Signed-off-by: Lyude Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_dp.c b/drivers/gpu/drm/amd/amdgpu/atombios_dp.c index 7f85c2c..166dc7b 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_dp.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_dp.c @@ -88,7 +88,6 @@ static int amdgpu_atombios_dp_process_aux_ch(struct amdgpu_i2c_chan *chan, /* timeout */ if (args.v2.ucReplyStatus == 1) { - DRM_DEBUG_KMS("dp_aux_ch timeout\n"); r = -ETIMEDOUT; goto done; } -- cgit v0.10.2 From 14a2fb487bed51d6020ed5d950e9f2cf17b5421b Mon Sep 17 00:00:00 2001 From: Lyude Date: Fri, 5 Aug 2016 20:30:37 -0400 Subject: drm/amdgpu: Don't retry 7 times in amdgpu_atombios_dp_get_dpcd() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When this code was written, we didn't retry DP aux transactions on any error, which required retrying important transactions like this in individual drivers. Since that's no longer the case, retrying here is not necessary. As well, we retry any aux transaction on any error 32 times. 7 * 32 = 224, which means this loop causes us to retry grabbing the dpcd 224 times. This is definitely far more then we actually need to do. Reviewed-by: Christian König Signed-off-by: Lyude Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_dp.c b/drivers/gpu/drm/amd/amdgpu/atombios_dp.c index 166dc7b..f81068b 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_dp.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_dp.c @@ -338,22 +338,21 @@ int amdgpu_atombios_dp_get_dpcd(struct amdgpu_connector *amdgpu_connector) { struct amdgpu_connector_atom_dig *dig_connector = amdgpu_connector->con_priv; u8 msg[DP_DPCD_SIZE]; - int ret, i; + int ret; - for (i = 0; i < 7; i++) { - ret = drm_dp_dpcd_read(&amdgpu_connector->ddc_bus->aux, DP_DPCD_REV, msg, - DP_DPCD_SIZE); - if (ret == DP_DPCD_SIZE) { - memcpy(dig_connector->dpcd, msg, DP_DPCD_SIZE); + ret = drm_dp_dpcd_read(&amdgpu_connector->ddc_bus->aux, DP_DPCD_REV, + msg, DP_DPCD_SIZE); + if (ret == DP_DPCD_SIZE) { + memcpy(dig_connector->dpcd, msg, DP_DPCD_SIZE); - DRM_DEBUG_KMS("DPCD: %*ph\n", (int)sizeof(dig_connector->dpcd), - dig_connector->dpcd); + DRM_DEBUG_KMS("DPCD: %*ph\n", (int)sizeof(dig_connector->dpcd), + dig_connector->dpcd); - amdgpu_atombios_dp_probe_oui(amdgpu_connector); + amdgpu_atombios_dp_probe_oui(amdgpu_connector); - return 0; - } + return 0; } + dig_connector->dpcd[0] = 0; return -EINVAL; } -- cgit v0.10.2 From 16925c92dbd97524655525b6816625e1f0063d12 Mon Sep 17 00:00:00 2001 From: Emily Deng Date: Mon, 8 Aug 2016 11:30:34 +0800 Subject: drm/amdgpu: Add virtual connector and encoder macros. For virtual display feature, add virtual connector and encoder macros. Signed-off-by: Emily Deng Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/ObjectID.h b/drivers/gpu/drm/amd/amdgpu/ObjectID.h index 0619269..b8d6667 100644 --- a/drivers/gpu/drm/amd/amdgpu/ObjectID.h +++ b/drivers/gpu/drm/amd/amdgpu/ObjectID.h @@ -90,6 +90,7 @@ #define ENCODER_OBJECT_ID_INTERNAL_VCE 0x24 #define ENCODER_OBJECT_ID_INTERNAL_UNIPHY3 0x25 #define ENCODER_OBJECT_ID_INTERNAL_AMCLK 0x27 +#define ENCODER_OBJECT_ID_VIRTUAL 0x28 #define ENCODER_OBJECT_ID_GENERAL_EXTERNAL_DVO 0xFF @@ -119,6 +120,7 @@ #define CONNECTOR_OBJECT_ID_eDP 0x14 #define CONNECTOR_OBJECT_ID_MXM 0x15 #define CONNECTOR_OBJECT_ID_LVDS_eDP 0x16 +#define CONNECTOR_OBJECT_ID_VIRTUAL 0x17 /* deleted */ @@ -147,6 +149,7 @@ #define GRAPH_OBJECT_ENUM_ID5 0x05 #define GRAPH_OBJECT_ENUM_ID6 0x06 #define GRAPH_OBJECT_ENUM_ID7 0x07 +#define GRAPH_OBJECT_ENUM_VIRTUAL 0x08 /****************************************************/ /* Graphics Object ID Bit definition */ @@ -408,6 +411,10 @@ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\ ENCODER_OBJECT_ID_HDMI_ANX9805 << OBJECT_ID_SHIFT) +#define ENCODER_VIRTUAL_ENUM_VIRTUAL ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\ + GRAPH_OBJECT_ENUM_VIRTUAL << ENUM_ID_SHIFT |\ + ENCODER_OBJECT_ID_VIRTUAL << OBJECT_ID_SHIFT) + /****************************************************/ /* Connector Object ID definition - Shared with BIOS */ /****************************************************/ -- cgit v0.10.2 From c6e14f40ba1c2d23be03f09ace4b4d78bfc066e4 Mon Sep 17 00:00:00 2001 From: Emily Deng Date: Mon, 8 Aug 2016 11:30:50 +0800 Subject: drm/amdgpu: Initialize dce_virtual_ip_funcs For virtual display feature, first need to initialize dce_virtual_ip_funcs, which will be used when set ip blocks. Signed-off-by: Emily Deng Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index c7fcdce..21dd7c0 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -58,7 +58,8 @@ amdgpu-y += \ # add DCE block amdgpu-y += \ dce_v10_0.o \ - dce_v11_0.o + dce_v11_0.o \ + dce_virtual.o # add GFX block amdgpu-y += \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index 76f9602..abeb6aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -335,7 +335,7 @@ int amdgpu_crtc_set_config(struct drm_mode_set *set) return ret; } -static const char *encoder_names[38] = { +static const char *encoder_names[41] = { "NONE", "INTERNAL_LVDS", "INTERNAL_TMDS1", @@ -374,6 +374,9 @@ static const char *encoder_names[38] = { "TRAVIS", "INTERNAL_VCE", "INTERNAL_UNIPHY3", + "HDMI_ANX9805", + "INTERNAL_AMCLK", + "VIRTUAL", }; static const char *hpd_names[6] = { diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c new file mode 100644 index 0000000..953ee1a --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c @@ -0,0 +1,308 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "drmP.h" +#include "amdgpu.h" +#include "amdgpu_pm.h" +#include "amdgpu_i2c.h" +#include "atom.h" +#include "amdgpu_atombios.h" +#include "atombios_crtc.h" +#include "atombios_encoders.h" +#include "amdgpu_pll.h" +#include "amdgpu_connectors.h" + +static void dce_virtual_set_display_funcs(struct amdgpu_device *adev); +static void dce_virtual_set_irq_funcs(struct amdgpu_device *adev); + +static const struct drm_crtc_funcs dce_virtual_crtc_funcs = { + .cursor_set2 = NULL, + .cursor_move = NULL, + .gamma_set = NULL, + .set_config = NULL, + .destroy = NULL, + .page_flip = NULL, +}; + +static const struct drm_crtc_helper_funcs dce_virtual_crtc_helper_funcs = { + .dpms = NULL, + .mode_fixup = NULL, + .mode_set = NULL, + .mode_set_base = NULL, + .mode_set_base_atomic = NULL, + .prepare = NULL, + .commit = NULL, + .load_lut = NULL, + .disable = NULL, +}; + +static int dce_virtual_crtc_init(struct amdgpu_device *adev, int index) +{ + struct amdgpu_crtc *amdgpu_crtc; + int i; + + amdgpu_crtc = kzalloc(sizeof(struct amdgpu_crtc) + + (AMDGPUFB_CONN_LIMIT * sizeof(struct drm_connector *)), GFP_KERNEL); + if (amdgpu_crtc == NULL) + return -ENOMEM; + + drm_crtc_init(adev->ddev, &amdgpu_crtc->base, &dce_virtual_crtc_funcs); + + drm_mode_crtc_set_gamma_size(&amdgpu_crtc->base, 256); + amdgpu_crtc->crtc_id = index; + adev->mode_info.crtcs[index] = amdgpu_crtc; + + for (i = 0; i < 256; i++) { + amdgpu_crtc->lut_r[i] = i << 2; + amdgpu_crtc->lut_g[i] = i << 2; + amdgpu_crtc->lut_b[i] = i << 2; + } + + amdgpu_crtc->pll_id = ATOM_PPLL_INVALID; + amdgpu_crtc->encoder = NULL; + amdgpu_crtc->connector = NULL; + drm_crtc_helper_add(&amdgpu_crtc->base, &dce_virtual_crtc_helper_funcs); + + return 0; +} + +static int dce_virtual_early_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + dce_virtual_set_display_funcs(adev); + dce_virtual_set_irq_funcs(adev); + + adev->mode_info.num_crtc = 1; + adev->mode_info.num_hpd = 1; + adev->mode_info.num_dig = 1; + return 0; +} + +static bool dce_virtual_get_connector_info(struct amdgpu_device *adev) +{ + struct amdgpu_i2c_bus_rec ddc_bus; + struct amdgpu_router router; + struct amdgpu_hpd hpd; + + /* look up gpio for ddc, hpd */ + ddc_bus.valid = false; + hpd.hpd = AMDGPU_HPD_NONE; + /* needed for aux chan transactions */ + ddc_bus.hpd = hpd.hpd; + + memset(&router, 0, sizeof(router)); + router.ddc_valid = false; + router.cd_valid = false; + amdgpu_display_add_connector(adev, + 0, + ATOM_DEVICE_CRT1_SUPPORT, + DRM_MODE_CONNECTOR_VIRTUAL, &ddc_bus, + CONNECTOR_OBJECT_ID_VIRTUAL, + &hpd, + &router); + + amdgpu_display_add_encoder(adev, ENCODER_VIRTUAL_ENUM_VIRTUAL, + ATOM_DEVICE_CRT1_SUPPORT, + 0); + + amdgpu_link_encoder_connector(adev->ddev); + + return true; +} + +static int dce_virtual_sw_init(void *handle) +{ + int r, i; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = amdgpu_irq_add_id(adev, 229, &adev->crtc_irq); + if (r) + return r; + + adev->ddev->mode_config.funcs = &amdgpu_mode_funcs; + + adev->ddev->mode_config.max_width = 16384; + adev->ddev->mode_config.max_height = 16384; + + adev->ddev->mode_config.preferred_depth = 24; + adev->ddev->mode_config.prefer_shadow = 1; + + adev->ddev->mode_config.fb_base = adev->mc.aper_base; + + r = amdgpu_modeset_create_props(adev); + if (r) + return r; + + adev->ddev->mode_config.max_width = 16384; + adev->ddev->mode_config.max_height = 16384; + + /* allocate crtcs */ + for (i = 0; i < adev->mode_info.num_crtc; i++) { + r = dce_virtual_crtc_init(adev, i); + if (r) + return r; + } + + dce_virtual_get_connector_info(adev); + amdgpu_print_display_setup(adev->ddev); + + drm_kms_helper_poll_init(adev->ddev); + + adev->mode_info.mode_config_initialized = true; + return 0; +} + +static int dce_virtual_sw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + kfree(adev->mode_info.bios_hardcoded_edid); + + drm_kms_helper_poll_fini(adev->ddev); + + drm_mode_config_cleanup(adev->ddev); + adev->mode_info.mode_config_initialized = false; + return 0; +} + +static int dce_virtual_hw_init(void *handle) +{ + return 0; +} + +static int dce_virtual_hw_fini(void *handle) +{ + return 0; +} + +static int dce_virtual_suspend(void *handle) +{ + return dce_virtual_hw_fini(handle); +} + +static int dce_virtual_resume(void *handle) +{ + int ret; + + ret = dce_virtual_hw_init(handle); + + return ret; +} + +static bool dce_virtual_is_idle(void *handle) +{ + return true; +} + +static int dce_virtual_wait_for_idle(void *handle) +{ + return 0; +} + +static int dce_virtual_soft_reset(void *handle) +{ + return 0; +} + +static int dce_virtual_set_clockgating_state(void *handle, + enum amd_clockgating_state state) +{ + return 0; +} + +static int dce_virtual_set_powergating_state(void *handle, + enum amd_powergating_state state) +{ + return 0; +} + +const struct amd_ip_funcs dce_virtual_ip_funcs = { + .name = "dce_virtual", + .early_init = dce_virtual_early_init, + .late_init = NULL, + .sw_init = dce_virtual_sw_init, + .sw_fini = dce_virtual_sw_fini, + .hw_init = dce_virtual_hw_init, + .hw_fini = dce_virtual_hw_fini, + .suspend = dce_virtual_suspend, + .resume = dce_virtual_resume, + .is_idle = dce_virtual_is_idle, + .wait_for_idle = dce_virtual_wait_for_idle, + .soft_reset = dce_virtual_soft_reset, + .set_clockgating_state = dce_virtual_set_clockgating_state, + .set_powergating_state = dce_virtual_set_powergating_state, +}; + +static const struct amdgpu_display_funcs dce_virtual_display_funcs = { + .set_vga_render_state = NULL, + .bandwidth_update = NULL, + .vblank_get_counter = NULL, + .vblank_wait = NULL, + .is_display_hung = NULL, + .backlight_set_level = NULL, + .backlight_get_level = NULL, + .hpd_sense = NULL, + .hpd_set_polarity = NULL, + .hpd_get_gpio_reg = NULL, + .page_flip = NULL, + .page_flip_get_scanoutpos = NULL, + .add_encoder = NULL, + .add_connector = &amdgpu_connector_add, + .stop_mc_access = NULL, + .resume_mc_access = NULL, +}; + +static void dce_virtual_set_display_funcs(struct amdgpu_device *adev) +{ + if (adev->mode_info.funcs == NULL) + adev->mode_info.funcs = &dce_virtual_display_funcs; +} + +static const struct amdgpu_irq_src_funcs dce_virtual_crtc_irq_funcs = { + .set = NULL, + .process = NULL, +}; + +static const struct amdgpu_irq_src_funcs dce_virtual_pageflip_irq_funcs = { + .set = NULL, + .process = NULL, +}; + +static const struct amdgpu_irq_src_funcs dce_virtual_hpd_irq_funcs = { + .set = NULL, + .process = NULL, +}; + +static void dce_virtual_set_irq_funcs(struct amdgpu_device *adev) +{ + adev->crtc_irq.num_types = AMDGPU_CRTC_IRQ_LAST; + adev->crtc_irq.funcs = &dce_virtual_crtc_irq_funcs; + + adev->pageflip_irq.num_types = AMDGPU_PAGEFLIP_IRQ_LAST; + adev->pageflip_irq.funcs = &dce_virtual_pageflip_irq_funcs; + + adev->hpd_irq.num_types = AMDGPU_HPD_LAST; + adev->hpd_irq.funcs = &dce_virtual_hpd_irq_funcs; +} + + diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.h b/drivers/gpu/drm/amd/amdgpu/dce_virtual.h new file mode 100644 index 0000000..d205d7f --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.h @@ -0,0 +1,29 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __DCE_VIRTUAL_H__ +#define __DCE_VIRTUAL_H__ + +extern const struct amd_ip_funcs dce_virtual_ip_funcs; +#endif + -- cgit v0.10.2 From 8e6de75bd1d6fa84c4338f6e2276eb2bc339f5c9 Mon Sep 17 00:00:00 2001 From: Emily Deng Date: Mon, 8 Aug 2016 11:31:13 +0800 Subject: drm/amdgpu: Initialize dce_virtual_display_funcs. For virtual display feature, initialize dce_virtual_display_funcs, which will be used in function dce_virtual_set_display_funcs. Signed-off-by: Emily Deng Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c index 953ee1a..2da8847 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c @@ -34,6 +34,97 @@ static void dce_virtual_set_display_funcs(struct amdgpu_device *adev); static void dce_virtual_set_irq_funcs(struct amdgpu_device *adev); +/** + * dce_virtual_vblank_wait - vblank wait asic callback. + * + * @adev: amdgpu_device pointer + * @crtc: crtc to wait for vblank on + * + * Wait for vblank on the requested crtc (evergreen+). + */ +static void dce_virtual_vblank_wait(struct amdgpu_device *adev, int crtc) +{ + return; +} + +static u32 dce_virtual_vblank_get_counter(struct amdgpu_device *adev, int crtc) +{ + if (crtc >= adev->mode_info.num_crtc) + return 0; + else + return adev->ddev->vblank[crtc].count; +} + +static void dce_virtual_page_flip(struct amdgpu_device *adev, + int crtc_id, u64 crtc_base, bool async) +{ + return; +} + +static int dce_virtual_crtc_get_scanoutpos(struct amdgpu_device *adev, int crtc, + u32 *vbl, u32 *position) +{ + if ((crtc < 0) || (crtc >= adev->mode_info.num_crtc)) + return -EINVAL; + + *vbl = 0; + *position = 0; + + return 0; +} + +static bool dce_virtual_hpd_sense(struct amdgpu_device *adev, + enum amdgpu_hpd_id hpd) +{ + return true; +} + +static void dce_virtual_hpd_set_polarity(struct amdgpu_device *adev, + enum amdgpu_hpd_id hpd) +{ + return; +} + +static u32 dce_virtual_hpd_get_gpio_reg(struct amdgpu_device *adev) +{ + return 0; +} + +static bool dce_virtual_is_display_hung(struct amdgpu_device *adev) +{ + return false; +} + +void dce_virtual_stop_mc_access(struct amdgpu_device *adev, + struct amdgpu_mode_mc_save *save) +{ + return; +} +void dce_virtual_resume_mc_access(struct amdgpu_device *adev, + struct amdgpu_mode_mc_save *save) +{ + return; +} + +void dce_virtual_set_vga_render_state(struct amdgpu_device *adev, + bool render) +{ + return; +} + +/** + * dce_virtual_bandwidth_update - program display watermarks + * + * @adev: amdgpu_device pointer + * + * Calculate and program the display watermarks and line + * buffer allocation (CIK). + */ +static void dce_virtual_bandwidth_update(struct amdgpu_device *adev) +{ + return; +} + static const struct drm_crtc_funcs dce_virtual_crtc_funcs = { .cursor_set2 = NULL, .cursor_move = NULL, @@ -253,23 +344,127 @@ const struct amd_ip_funcs dce_virtual_ip_funcs = { .set_powergating_state = dce_virtual_set_powergating_state, }; +/* these are handled by the primary encoders */ +static void dce_virtual_encoder_prepare(struct drm_encoder *encoder) +{ + return; +} + +static void dce_virtual_encoder_commit(struct drm_encoder *encoder) +{ + return; +} + +static void +dce_virtual_encoder_mode_set(struct drm_encoder *encoder, + struct drm_display_mode *mode, + struct drm_display_mode *adjusted_mode) +{ + return; +} + +static void dce_virtual_encoder_disable(struct drm_encoder *encoder) +{ + return; +} + +static void +dce_virtual_encoder_dpms(struct drm_encoder *encoder, int mode) +{ + return; +} + +static bool dce_virtual_encoder_mode_fixup(struct drm_encoder *encoder, + const struct drm_display_mode *mode, + struct drm_display_mode *adjusted_mode) +{ + + /* set the active encoder to connector routing */ + amdgpu_encoder_set_active_device(encoder); + + return true; +} + +static const struct drm_encoder_helper_funcs dce_virtual_encoder_helper_funcs = { + .dpms = dce_virtual_encoder_dpms, + .mode_fixup = dce_virtual_encoder_mode_fixup, + .prepare = dce_virtual_encoder_prepare, + .mode_set = dce_virtual_encoder_mode_set, + .commit = dce_virtual_encoder_commit, + .disable = dce_virtual_encoder_disable, +}; + +static void dce_virtual_encoder_destroy(struct drm_encoder *encoder) +{ + struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); + + kfree(amdgpu_encoder->enc_priv); + drm_encoder_cleanup(encoder); + kfree(amdgpu_encoder); +} + +static const struct drm_encoder_funcs dce_virtual_encoder_funcs = { + .destroy = dce_virtual_encoder_destroy, +}; + +static void dce_virtual_encoder_add(struct amdgpu_device *adev, + uint32_t encoder_enum, + uint32_t supported_device, + u16 caps) +{ + struct drm_device *dev = adev->ddev; + struct drm_encoder *encoder; + struct amdgpu_encoder *amdgpu_encoder; + + /* see if we already added it */ + list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) { + amdgpu_encoder = to_amdgpu_encoder(encoder); + if (amdgpu_encoder->encoder_enum == encoder_enum) { + amdgpu_encoder->devices |= supported_device; + return; + } + + } + + /* add a new one */ + amdgpu_encoder = kzalloc(sizeof(struct amdgpu_encoder), GFP_KERNEL); + if (!amdgpu_encoder) + return; + + encoder = &amdgpu_encoder->base; + encoder->possible_crtcs = 0x1; + amdgpu_encoder->enc_priv = NULL; + amdgpu_encoder->encoder_enum = encoder_enum; + amdgpu_encoder->encoder_id = (encoder_enum & OBJECT_ID_MASK) >> OBJECT_ID_SHIFT; + amdgpu_encoder->devices = supported_device; + amdgpu_encoder->rmx_type = RMX_OFF; + amdgpu_encoder->underscan_type = UNDERSCAN_OFF; + amdgpu_encoder->is_ext_encoder = false; + amdgpu_encoder->caps = caps; + + drm_encoder_init(dev, encoder, &dce_virtual_encoder_funcs, + DRM_MODE_ENCODER_VIRTUAL, NULL); + drm_encoder_helper_add(encoder, &dce_virtual_encoder_helper_funcs); + DRM_INFO("[FM]encoder: %d is VIRTUAL\n", amdgpu_encoder->encoder_id); +} + static const struct amdgpu_display_funcs dce_virtual_display_funcs = { - .set_vga_render_state = NULL, - .bandwidth_update = NULL, - .vblank_get_counter = NULL, - .vblank_wait = NULL, - .is_display_hung = NULL, + .set_vga_render_state = &dce_virtual_set_vga_render_state, + .bandwidth_update = &dce_virtual_bandwidth_update, + .vblank_get_counter = &dce_virtual_vblank_get_counter, + .vblank_wait = &dce_virtual_vblank_wait, + .is_display_hung = &dce_virtual_is_display_hung, .backlight_set_level = NULL, .backlight_get_level = NULL, - .hpd_sense = NULL, - .hpd_set_polarity = NULL, - .hpd_get_gpio_reg = NULL, - .page_flip = NULL, - .page_flip_get_scanoutpos = NULL, - .add_encoder = NULL, + .hpd_sense = &dce_virtual_hpd_sense, + .hpd_set_polarity = &dce_virtual_hpd_set_polarity, + .hpd_get_gpio_reg = &dce_virtual_hpd_get_gpio_reg, + .page_flip = &dce_virtual_page_flip, + .page_flip_get_scanoutpos = &dce_virtual_crtc_get_scanoutpos, + .add_encoder = &dce_virtual_encoder_add, .add_connector = &amdgpu_connector_add, - .stop_mc_access = NULL, - .resume_mc_access = NULL, + .stop_mc_access = &dce_virtual_stop_mc_access, + .resume_mc_access = &dce_virtual_resume_mc_access, }; static void dce_virtual_set_display_funcs(struct amdgpu_device *adev) @@ -305,4 +500,3 @@ static void dce_virtual_set_irq_funcs(struct amdgpu_device *adev) adev->hpd_irq.funcs = &dce_virtual_hpd_irq_funcs; } - -- cgit v0.10.2 From e13273d4a4702f7cb21a5f6e94919a5b52c45c32 Mon Sep 17 00:00:00 2001 From: Emily Deng Date: Mon, 8 Aug 2016 11:31:37 +0800 Subject: drm/amdgpu: Initialize crtc, pageflip irq funcs (v2) For virtual display feature, initialize dce_virtual_crtc_irq_funcs, dce_virtual_pageflip_irq_funcs. As it has no dce engine, so the pageflip interrupt won't be generated, and the vsync interrupt will be generated by smu's periodic timer or software timer which will be implemented later. v2: agd: rebase on upstream Signed-off-by: Emily Deng Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c index 2da8847..985b276 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c @@ -473,19 +473,128 @@ static void dce_virtual_set_display_funcs(struct amdgpu_device *adev) adev->mode_info.funcs = &dce_virtual_display_funcs; } +static void dce_virtual_set_crtc_vblank_interrupt_state(struct amdgpu_device *adev, + int crtc, + enum amdgpu_interrupt_state state) +{ + if (crtc >= adev->mode_info.num_crtc) { + DRM_DEBUG("invalid crtc %d\n", crtc); + return; + } +} + +static int dce_virtual_set_crtc_irq_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) +{ + switch (type) { + case AMDGPU_CRTC_IRQ_VBLANK1: + dce_virtual_set_crtc_vblank_interrupt_state(adev, 0, state); + break; + default: + break; + } + return 0; +} + +static void dce_virtual_crtc_vblank_int_ack(struct amdgpu_device *adev, + int crtc) +{ + if (crtc >= adev->mode_info.num_crtc) { + DRM_DEBUG("invalid crtc %d\n", crtc); + return; + } +} + +static int dce_virtual_crtc_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + unsigned crtc = 0; + unsigned irq_type = AMDGPU_CRTC_IRQ_VBLANK1; + + adev->ddev->vblank[crtc].count++; + dce_virtual_crtc_vblank_int_ack(adev, crtc); + + if (amdgpu_irq_enabled(adev, source, irq_type)) { + drm_handle_vblank(adev->ddev, crtc); + } + + DRM_DEBUG("IH: D%d vblank\n", crtc + 1); + return 0; +} + +static int dce_virtual_set_pageflip_irq_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *src, + unsigned type, + enum amdgpu_interrupt_state state) +{ + if (type >= adev->mode_info.num_crtc) { + DRM_ERROR("invalid pageflip crtc %d\n", type); + return -EINVAL; + } + DRM_DEBUG("[FM]set pageflip irq type %d state %d\n", type, state); + + return 0; +} + +static int dce_virtual_pageflip_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + unsigned long flags; + unsigned crtc_id = 0; + struct amdgpu_crtc *amdgpu_crtc; + struct amdgpu_flip_work *works; + + crtc_id = 0; + amdgpu_crtc = adev->mode_info.crtcs[crtc_id]; + + if (crtc_id >= adev->mode_info.num_crtc) { + DRM_ERROR("invalid pageflip crtc %d\n", crtc_id); + return -EINVAL; + } + + /* IRQ could occur when in initial stage */ + if (amdgpu_crtc == NULL) + return 0; + + spin_lock_irqsave(&adev->ddev->event_lock, flags); + works = amdgpu_crtc->pflip_works; + if (amdgpu_crtc->pflip_status != AMDGPU_FLIP_SUBMITTED) { + DRM_DEBUG_DRIVER("amdgpu_crtc->pflip_status = %d != " + "AMDGPU_FLIP_SUBMITTED(%d)\n", + amdgpu_crtc->pflip_status, + AMDGPU_FLIP_SUBMITTED); + spin_unlock_irqrestore(&adev->ddev->event_lock, flags); + return 0; + } + + /* page flip completed. clean up */ + amdgpu_crtc->pflip_status = AMDGPU_FLIP_NONE; + amdgpu_crtc->pflip_works = NULL; + + /* wakeup usersapce */ + if (works->event) + drm_crtc_send_vblank_event(&amdgpu_crtc->base, works->event); + + spin_unlock_irqrestore(&adev->ddev->event_lock, flags); + + drm_crtc_vblank_put(&amdgpu_crtc->base); + schedule_work(&works->unpin_work); + + return 0; +} + static const struct amdgpu_irq_src_funcs dce_virtual_crtc_irq_funcs = { - .set = NULL, - .process = NULL, + .set = dce_virtual_set_crtc_irq_state, + .process = dce_virtual_crtc_irq, }; static const struct amdgpu_irq_src_funcs dce_virtual_pageflip_irq_funcs = { - .set = NULL, - .process = NULL, -}; - -static const struct amdgpu_irq_src_funcs dce_virtual_hpd_irq_funcs = { - .set = NULL, - .process = NULL, + .set = dce_virtual_set_pageflip_irq_state, + .process = dce_virtual_pageflip_irq, }; static void dce_virtual_set_irq_funcs(struct amdgpu_device *adev) @@ -495,8 +604,5 @@ static void dce_virtual_set_irq_funcs(struct amdgpu_device *adev) adev->pageflip_irq.num_types = AMDGPU_PAGEFLIP_IRQ_LAST; adev->pageflip_irq.funcs = &dce_virtual_pageflip_irq_funcs; - - adev->hpd_irq.num_types = AMDGPU_HPD_LAST; - adev->hpd_irq.funcs = &dce_virtual_hpd_irq_funcs; } -- cgit v0.10.2 From f1f5ef92679fd9871b9642916c32b78fc333642a Mon Sep 17 00:00:00 2001 From: Emily Deng Date: Mon, 8 Aug 2016 11:32:00 +0800 Subject: drm/amdgpu: Initialize dce_virtual_crtc_helper_funcs For virtual display feature, initialize dce_virtual_crtc_helper_funcs. Signed-off-by: Emily Deng Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c index 985b276..11f04ec 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c @@ -134,16 +134,135 @@ static const struct drm_crtc_funcs dce_virtual_crtc_funcs = { .page_flip = NULL, }; +static void dce_virtual_crtc_dpms(struct drm_crtc *crtc, int mode) +{ + struct drm_device *dev = crtc->dev; + struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); + unsigned type; + + switch (mode) { + case DRM_MODE_DPMS_ON: + amdgpu_crtc->enabled = true; + /* Make sure VBLANK and PFLIP interrupts are still enabled */ + type = amdgpu_crtc_idx_to_irq_type(adev, amdgpu_crtc->crtc_id); + amdgpu_irq_update(adev, &adev->crtc_irq, type); + amdgpu_irq_update(adev, &adev->pageflip_irq, type); + drm_vblank_on(dev, amdgpu_crtc->crtc_id); + break; + case DRM_MODE_DPMS_STANDBY: + case DRM_MODE_DPMS_SUSPEND: + case DRM_MODE_DPMS_OFF: + drm_vblank_off(dev, amdgpu_crtc->crtc_id); + amdgpu_crtc->enabled = false; + break; + } +} + + +static void dce_virtual_crtc_prepare(struct drm_crtc *crtc) +{ + dce_virtual_crtc_dpms(crtc, DRM_MODE_DPMS_OFF); +} + +static void dce_virtual_crtc_commit(struct drm_crtc *crtc) +{ + dce_virtual_crtc_dpms(crtc, DRM_MODE_DPMS_ON); +} + +static void dce_virtual_crtc_disable(struct drm_crtc *crtc) +{ + struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); + + dce_virtual_crtc_dpms(crtc, DRM_MODE_DPMS_OFF); + if (crtc->primary->fb) { + int r; + struct amdgpu_framebuffer *amdgpu_fb; + struct amdgpu_bo *rbo; + + amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb); + rbo = gem_to_amdgpu_bo(amdgpu_fb->obj); + r = amdgpu_bo_reserve(rbo, false); + if (unlikely(r)) + DRM_ERROR("failed to reserve rbo before unpin\n"); + else { + amdgpu_bo_unpin(rbo); + amdgpu_bo_unreserve(rbo); + } + } + + amdgpu_crtc->pll_id = ATOM_PPLL_INVALID; + amdgpu_crtc->encoder = NULL; + amdgpu_crtc->connector = NULL; +} + +static int dce_virtual_crtc_mode_set(struct drm_crtc *crtc, + struct drm_display_mode *mode, + struct drm_display_mode *adjusted_mode, + int x, int y, struct drm_framebuffer *old_fb) +{ + struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); + + /* update the hw version fpr dpm */ + amdgpu_crtc->hw_mode = *adjusted_mode; + + return 0; +} + +static bool dce_virtual_crtc_mode_fixup(struct drm_crtc *crtc, + const struct drm_display_mode *mode, + struct drm_display_mode *adjusted_mode) +{ + struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); + struct drm_device *dev = crtc->dev; + struct drm_encoder *encoder; + + /* assign the encoder to the amdgpu crtc to avoid repeated lookups later */ + list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) { + if (encoder->crtc == crtc) { + amdgpu_crtc->encoder = encoder; + amdgpu_crtc->connector = amdgpu_get_connector_for_encoder(encoder); + break; + } + } + if ((amdgpu_crtc->encoder == NULL) || (amdgpu_crtc->connector == NULL)) { + amdgpu_crtc->encoder = NULL; + amdgpu_crtc->connector = NULL; + return false; + } + + return true; +} + + +static int dce_virtual_crtc_set_base(struct drm_crtc *crtc, int x, int y, + struct drm_framebuffer *old_fb) +{ + return 0; +} + +static void dce_virtual_crtc_load_lut(struct drm_crtc *crtc) +{ + return; +} + +static int dce_virtual_crtc_set_base_atomic(struct drm_crtc *crtc, + struct drm_framebuffer *fb, + int x, int y, enum mode_set_atomic state) +{ + return 0; +} + static const struct drm_crtc_helper_funcs dce_virtual_crtc_helper_funcs = { - .dpms = NULL, - .mode_fixup = NULL, - .mode_set = NULL, - .mode_set_base = NULL, - .mode_set_base_atomic = NULL, - .prepare = NULL, - .commit = NULL, - .load_lut = NULL, - .disable = NULL, + .dpms = dce_virtual_crtc_dpms, + .mode_fixup = dce_virtual_crtc_mode_fixup, + .mode_set = dce_virtual_crtc_mode_set, + .mode_set_base = dce_virtual_crtc_set_base, + .mode_set_base_atomic = dce_virtual_crtc_set_base_atomic, + .prepare = dce_virtual_crtc_prepare, + .commit = dce_virtual_crtc_commit, + .load_lut = dce_virtual_crtc_load_lut, + .disable = dce_virtual_crtc_disable, }; static int dce_virtual_crtc_init(struct amdgpu_device *adev, int index) -- cgit v0.10.2 From 0d43f3bca1ea1f85c1b344abcc2f17f4731e5eda Mon Sep 17 00:00:00 2001 From: Emily Deng Date: Mon, 8 Aug 2016 11:32:22 +0800 Subject: drm/amdgpu: Initialize dce_virtual_crtc_funcs. (v2) For virtual display feature, initialize dce_virtual_crtc_funcs. v2: agd: rebase on upstream Signed-off-by: Emily Deng Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c index 11f04ec..d6802be 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c @@ -125,13 +125,37 @@ static void dce_virtual_bandwidth_update(struct amdgpu_device *adev) return; } +static int dce_virtual_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, + u16 *green, u16 *blue, uint32_t size) +{ + struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); + int i; + + /* userspace palettes are always correct as is */ + for (i = 0; i < size; i++) { + amdgpu_crtc->lut_r[i] = red[i] >> 6; + amdgpu_crtc->lut_g[i] = green[i] >> 6; + amdgpu_crtc->lut_b[i] = blue[i] >> 6; + } + + return 0; +} + +static void dce_virtual_crtc_destroy(struct drm_crtc *crtc) +{ + struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); + + drm_crtc_cleanup(crtc); + kfree(amdgpu_crtc); +} + static const struct drm_crtc_funcs dce_virtual_crtc_funcs = { .cursor_set2 = NULL, .cursor_move = NULL, - .gamma_set = NULL, - .set_config = NULL, - .destroy = NULL, - .page_flip = NULL, + .gamma_set = dce_virtual_crtc_gamma_set, + .set_config = amdgpu_crtc_set_config, + .destroy = dce_virtual_crtc_destroy, + .page_flip = amdgpu_crtc_page_flip, }; static void dce_virtual_crtc_dpms(struct drm_crtc *crtc, int mode) -- cgit v0.10.2 From 0bad1619ff5cce53964785a9faedcfb1d4810223 Mon Sep 17 00:00:00 2001 From: Emily Deng Date: Mon, 8 Aug 2016 11:32:35 +0800 Subject: drm/amdgpu: To define whether the GPU has DCE engine. For virtual display feature, when the GPU has DCE engine, need to disable the VGA render and CRTC, or it will hang when initialize GMC. So first detect whether the GPU has DCE engine. Signed-off-by: Emily Deng Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c index 9831753..1b62116 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c @@ -259,6 +259,33 @@ static const int object_connector_convert[] = { DRM_MODE_CONNECTOR_Unknown }; +bool amdgpu_atombios_has_dce_engine_info(struct amdgpu_device *adev) +{ + struct amdgpu_mode_info *mode_info = &adev->mode_info; + struct atom_context *ctx = mode_info->atom_context; + int index = GetIndexIntoMasterTable(DATA, Object_Header); + u16 size, data_offset; + u8 frev, crev; + ATOM_DISPLAY_OBJECT_PATH_TABLE *path_obj; + ATOM_OBJECT_HEADER *obj_header; + + if (!amdgpu_atom_parse_data_header(ctx, index, &size, &frev, &crev, &data_offset)) + return false; + + if (crev < 2) + return false; + + obj_header = (ATOM_OBJECT_HEADER *) (ctx->bios + data_offset); + path_obj = (ATOM_DISPLAY_OBJECT_PATH_TABLE *) + (ctx->bios + data_offset + + le16_to_cpu(obj_header->usDisplayPathTableOffset)); + + if (path_obj->ucNumOfDispPath) + return true; + else + return false; +} + bool amdgpu_atombios_get_connector_info_from_object_table(struct amdgpu_device *adev) { struct amdgpu_mode_info *mode_info = &adev->mode_info; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h index 8c2e696..15dd43e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h @@ -140,6 +140,8 @@ struct amdgpu_i2c_bus_rec amdgpu_atombios_lookup_i2c_gpio(struct amdgpu_device * uint8_t id); void amdgpu_atombios_i2c_init(struct amdgpu_device *adev); +bool amdgpu_atombios_has_dce_engine_info(struct amdgpu_device *adev); + bool amdgpu_atombios_get_connector_info_from_object_table(struct amdgpu_device *adev); int amdgpu_atombios_get_clock_info(struct amdgpu_device *adev); -- cgit v0.10.2 From 83c9b0253b1136b1312fd2a0bfd173f625c65091 Mon Sep 17 00:00:00 2001 From: Emily Deng Date: Mon, 8 Aug 2016 11:33:11 +0800 Subject: drm/amdgpu: Disable VGA render and crtc when init GMC. For virtual display feature, when the GPU has DCE engine, need to disable the VGA render and CRTC, or it will hang when initialize GMC. Signed-off-by: Emily Deng Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index 7f642b5..b888d72 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -712,6 +712,45 @@ static void dce_v10_0_set_vga_render_state(struct amdgpu_device *adev, WREG32(mmVGA_RENDER_CONTROL, tmp); } +static int dce_v10_0_get_num_crtc(struct amdgpu_device *adev) +{ + int num_crtc = 0; + + switch (adev->asic_type) { + case CHIP_FIJI: + case CHIP_TONGA: + num_crtc = 6; + break; + default: + num_crtc = 0; + } + return num_crtc; +} + +void dce_v10_0_disable_dce(struct amdgpu_device *adev) +{ + /*Disable VGA render and enabled crtc, if has DCE engine*/ + if (amdgpu_atombios_has_dce_engine_info(adev)) { + u32 tmp; + int crtc_enabled, i; + + dce_v10_0_set_vga_render_state(adev, false); + + /*Disable crtc*/ + for (i = 0; i < dce_v10_0_get_num_crtc(adev); i++) { + crtc_enabled = REG_GET_FIELD(RREG32(mmCRTC_CONTROL + crtc_offsets[i]), + CRTC_CONTROL, CRTC_MASTER_EN); + if (crtc_enabled) { + WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 1); + tmp = RREG32(mmCRTC_CONTROL + crtc_offsets[i]); + tmp = REG_SET_FIELD(tmp, CRTC_CONTROL, CRTC_MASTER_EN, 0); + WREG32(mmCRTC_CONTROL + crtc_offsets[i], tmp); + WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 0); + } + } + } +} + static void dce_v10_0_program_fmt(struct drm_encoder *encoder) { struct drm_device *dev = encoder->dev; @@ -2962,10 +3001,11 @@ static int dce_v10_0_early_init(void *handle) dce_v10_0_set_display_funcs(adev); dce_v10_0_set_irq_funcs(adev); + adev->mode_info.num_crtc = dce_v10_0_get_num_crtc(adev); + switch (adev->asic_type) { case CHIP_FIJI: case CHIP_TONGA: - adev->mode_info.num_crtc = 6; /* XXX 7??? */ adev->mode_info.num_hpd = 6; adev->mode_info.num_dig = 7; break; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.h b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.h index 1bfa48d..e3dc04d 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.h +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.h @@ -26,4 +26,6 @@ extern const struct amd_ip_funcs dce_v10_0_ip_funcs; +void dce_v10_0_disable_dce(struct amdgpu_device *adev); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index d4bf133..cfadd79 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -673,6 +673,53 @@ static void dce_v11_0_set_vga_render_state(struct amdgpu_device *adev, WREG32(mmVGA_RENDER_CONTROL, tmp); } +static int dce_v11_0_get_num_crtc (struct amdgpu_device *adev) +{ + int num_crtc = 0; + + switch (adev->asic_type) { + case CHIP_CARRIZO: + num_crtc = 3; + break; + case CHIP_STONEY: + num_crtc = 2; + break; + case CHIP_POLARIS10: + num_crtc = 6; + break; + case CHIP_POLARIS11: + num_crtc = 5; + break; + default: + num_crtc = 0; + } + return num_crtc; +} + +void dce_v11_0_disable_dce(struct amdgpu_device *adev) +{ + /*Disable VGA render and enabled crtc, if has DCE engine*/ + if (amdgpu_atombios_has_dce_engine_info(adev)) { + u32 tmp; + int crtc_enabled, i; + + dce_v11_0_set_vga_render_state(adev, false); + + /*Disable crtc*/ + for (i = 0; i < dce_v11_0_get_num_crtc(adev); i++) { + crtc_enabled = REG_GET_FIELD(RREG32(mmCRTC_CONTROL + crtc_offsets[i]), + CRTC_CONTROL, CRTC_MASTER_EN); + if (crtc_enabled) { + WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 1); + tmp = RREG32(mmCRTC_CONTROL + crtc_offsets[i]); + tmp = REG_SET_FIELD(tmp, CRTC_CONTROL, CRTC_MASTER_EN, 0); + WREG32(mmCRTC_CONTROL + crtc_offsets[i], tmp); + WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 0); + } + } + } +} + static void dce_v11_0_program_fmt(struct drm_encoder *encoder) { struct drm_device *dev = encoder->dev; @@ -2999,24 +3046,22 @@ static int dce_v11_0_early_init(void *handle) dce_v11_0_set_display_funcs(adev); dce_v11_0_set_irq_funcs(adev); + adev->mode_info.num_crtc = dce_v11_0_get_num_crtc(adev); + switch (adev->asic_type) { case CHIP_CARRIZO: - adev->mode_info.num_crtc = 3; adev->mode_info.num_hpd = 6; adev->mode_info.num_dig = 9; break; case CHIP_STONEY: - adev->mode_info.num_crtc = 2; adev->mode_info.num_hpd = 6; adev->mode_info.num_dig = 9; break; case CHIP_POLARIS10: - adev->mode_info.num_crtc = 6; adev->mode_info.num_hpd = 6; adev->mode_info.num_dig = 6; break; case CHIP_POLARIS11: - adev->mode_info.num_crtc = 5; adev->mode_info.num_hpd = 5; adev->mode_info.num_dig = 5; break; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.h b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.h index 84e4618..1f58a65 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.h +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.h @@ -26,4 +26,6 @@ extern const struct amd_ip_funcs dce_v11_0_ip_funcs; +void dce_v11_0_disable_dce(struct amdgpu_device *adev); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index 4fdfab1..e4467b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -604,6 +604,52 @@ static void dce_v8_0_set_vga_render_state(struct amdgpu_device *adev, WREG32(mmVGA_RENDER_CONTROL, tmp); } +static int dce_v8_0_get_num_crtc(struct amdgpu_device *adev) +{ + int num_crtc = 0; + + switch (adev->asic_type) { + case CHIP_BONAIRE: + case CHIP_HAWAII: + num_crtc = 6; + break; + case CHIP_KAVERI: + num_crtc = 4; + break; + case CHIP_KABINI: + case CHIP_MULLINS: + num_crtc = 2; + break; + default: + num_crtc = 0; + } + return num_crtc; +} + +void dce_v8_0_disable_dce(struct amdgpu_device *adev) +{ + /*Disable VGA render and enabled crtc, if has DCE engine*/ + if (amdgpu_atombios_has_dce_engine_info(adev)) { + u32 tmp; + int crtc_enabled, i; + + dce_v8_0_set_vga_render_state(adev, false); + + /*Disable crtc*/ + for (i = 0; i < dce_v8_0_get_num_crtc(adev); i++) { + crtc_enabled = REG_GET_FIELD(RREG32(mmCRTC_CONTROL + crtc_offsets[i]), + CRTC_CONTROL, CRTC_MASTER_EN); + if (crtc_enabled) { + WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 1); + tmp = RREG32(mmCRTC_CONTROL + crtc_offsets[i]); + tmp = REG_SET_FIELD(tmp, CRTC_CONTROL, CRTC_MASTER_EN, 0); + WREG32(mmCRTC_CONTROL + crtc_offsets[i], tmp); + WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 0); + } + } + } +} + static void dce_v8_0_program_fmt(struct drm_encoder *encoder) { struct drm_device *dev = encoder->dev; @@ -2803,21 +2849,20 @@ static int dce_v8_0_early_init(void *handle) dce_v8_0_set_display_funcs(adev); dce_v8_0_set_irq_funcs(adev); + adev->mode_info.num_crtc = dce_v8_0_get_num_crtc(adev); + switch (adev->asic_type) { case CHIP_BONAIRE: case CHIP_HAWAII: - adev->mode_info.num_crtc = 6; adev->mode_info.num_hpd = 6; adev->mode_info.num_dig = 6; break; case CHIP_KAVERI: - adev->mode_info.num_crtc = 4; adev->mode_info.num_hpd = 6; adev->mode_info.num_dig = 7; break; case CHIP_KABINI: case CHIP_MULLINS: - adev->mode_info.num_crtc = 2; adev->mode_info.num_hpd = 6; adev->mode_info.num_dig = 6; /* ? */ break; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.h b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.h index 7701685..7d0770c 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.h +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.h @@ -26,4 +26,6 @@ extern const struct amd_ip_funcs dce_v8_0_ip_funcs; +void dce_v8_0_disable_dce(struct amdgpu_device *adev); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c index d6802be..c7da45c 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c @@ -25,11 +25,13 @@ #include "amdgpu_pm.h" #include "amdgpu_i2c.h" #include "atom.h" -#include "amdgpu_atombios.h" -#include "atombios_crtc.h" -#include "atombios_encoders.h" #include "amdgpu_pll.h" #include "amdgpu_connectors.h" +#ifdef CONFIG_DRM_AMDGPU_CIK +#include "dce_v8_0.h" +#endif +#include "dce_v10_0.h" +#include "dce_v11_0.h" static void dce_virtual_set_display_funcs(struct amdgpu_device *adev); static void dce_virtual_set_irq_funcs(struct amdgpu_device *adev); @@ -98,6 +100,30 @@ static bool dce_virtual_is_display_hung(struct amdgpu_device *adev) void dce_virtual_stop_mc_access(struct amdgpu_device *adev, struct amdgpu_mode_mc_save *save) { + switch (adev->asic_type) { + case CHIP_BONAIRE: + case CHIP_HAWAII: + case CHIP_KAVERI: + case CHIP_KABINI: + case CHIP_MULLINS: +#ifdef CONFIG_DRM_AMDGPU_CIK + dce_v8_0_disable_dce(adev); +#endif + break; + case CHIP_FIJI: + case CHIP_TONGA: + dce_v10_0_disable_dce(adev); + break; + case CHIP_CARRIZO: + case CHIP_STONEY: + case CHIP_POLARIS11: + case CHIP_POLARIS10: + dce_v11_0_disable_dce(adev); + break; + default: + DRM_ERROR("Usupported ASIC type: 0x%X\n", adev->asic_type); + } + return; } void dce_virtual_resume_mc_access(struct amdgpu_device *adev, -- cgit v0.10.2 From 46ac3622437692c371f3e647dc29f99e14b4f596 Mon Sep 17 00:00:00 2001 From: Emily Deng Date: Mon, 8 Aug 2016 11:35:39 +0800 Subject: drm/amdgpu: Use software timer to generate vsync interrupt. For virtual display feature, use the software timer to simulate the vsync interrupt. Signed-off-by: Emily Deng Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index 6b1d7d3..b1ae33b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -39,6 +39,8 @@ #include #include #include +#include +#include "amdgpu_irq.h" struct amdgpu_bo; struct amdgpu_device; @@ -339,6 +341,8 @@ struct amdgpu_mode_info { int num_dig; /* number of dig blocks */ int disp_priority; const struct amdgpu_display_funcs *funcs; + struct hrtimer vblank_timer; + enum amdgpu_interrupt_state vsync_timer_enabled; }; #define AMDGPU_MAX_BL_LEVEL 0xFF diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c index c7da45c..ace52a3 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c @@ -32,6 +32,7 @@ #endif #include "dce_v10_0.h" #include "dce_v11_0.h" +#include "dce_virtual.h" static void dce_virtual_set_display_funcs(struct amdgpu_device *adev); static void dce_virtual_set_irq_funcs(struct amdgpu_device *adev); @@ -642,16 +643,44 @@ static void dce_virtual_set_display_funcs(struct amdgpu_device *adev) adev->mode_info.funcs = &dce_virtual_display_funcs; } +static enum hrtimer_restart dce_virtual_vblank_timer_handle(struct hrtimer *vblank_timer) +{ + struct amdgpu_mode_info *mode_info = container_of(vblank_timer, struct amdgpu_mode_info ,vblank_timer); + struct amdgpu_device *adev = container_of(mode_info, struct amdgpu_device ,mode_info); + unsigned crtc = 0; + adev->ddev->vblank[0].count++; + drm_handle_vblank(adev->ddev, crtc); + hrtimer_start(vblank_timer, ktime_set(0, DCE_VIRTUAL_VBLANK_PERIOD), HRTIMER_MODE_REL); + return HRTIMER_NORESTART; +} + static void dce_virtual_set_crtc_vblank_interrupt_state(struct amdgpu_device *adev, - int crtc, - enum amdgpu_interrupt_state state) + int crtc, + enum amdgpu_interrupt_state state) { if (crtc >= adev->mode_info.num_crtc) { DRM_DEBUG("invalid crtc %d\n", crtc); return; } + + if (state && !adev->mode_info.vsync_timer_enabled) { + DRM_DEBUG("Enable software vsync timer\n"); + hrtimer_init(&adev->mode_info.vblank_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hrtimer_set_expires(&adev->mode_info.vblank_timer, ktime_set(0, DCE_VIRTUAL_VBLANK_PERIOD)); + adev->mode_info.vblank_timer.function = dce_virtual_vblank_timer_handle; + hrtimer_start(&adev->mode_info.vblank_timer, ktime_set(0, DCE_VIRTUAL_VBLANK_PERIOD), HRTIMER_MODE_REL); + } else if (!state && adev->mode_info.vsync_timer_enabled) { + DRM_DEBUG("Disable software vsync timer\n"); + hrtimer_cancel(&adev->mode_info.vblank_timer); + } + + if (!state || (state && !adev->mode_info.vsync_timer_enabled)) + adev->ddev->vblank[0].count = 0; + adev->mode_info.vsync_timer_enabled = state; + DRM_DEBUG("[FM]set crtc %d vblank interrupt state %d\n", crtc, state); } + static int dce_virtual_set_crtc_irq_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, unsigned type, diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.h b/drivers/gpu/drm/amd/amdgpu/dce_virtual.h index d205d7f..e239243 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.h +++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.h @@ -25,5 +25,7 @@ #define __DCE_VIRTUAL_H__ extern const struct amd_ip_funcs dce_virtual_ip_funcs; +#define DCE_VIRTUAL_VBLANK_PERIOD 16666666 + #endif -- cgit v0.10.2 From 6b5084ccfcd4d43d31c6124968dcc537ea265994 Mon Sep 17 00:00:00 2001 From: Emily Deng Date: Mon, 8 Aug 2016 11:36:07 +0800 Subject: drm/amdgpu: Call pageflip irq funtion when receiced vsync interrupt. For virtual display feature, as there is no dce engine, so no pageflip irq generated. So directly call pageflip irq funtion when received vysn interrupt. Signed-off-by: Emily Deng Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c index ace52a3..1c16983 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c @@ -36,6 +36,9 @@ static void dce_virtual_set_display_funcs(struct amdgpu_device *adev); static void dce_virtual_set_irq_funcs(struct amdgpu_device *adev); +static int dce_virtual_pageflip_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry); /** * dce_virtual_vblank_wait - vblank wait asic callback. @@ -650,6 +653,7 @@ static enum hrtimer_restart dce_virtual_vblank_timer_handle(struct hrtimer *vbla unsigned crtc = 0; adev->ddev->vblank[0].count++; drm_handle_vblank(adev->ddev, crtc); + dce_virtual_pageflip_irq(adev, NULL, NULL); hrtimer_start(vblank_timer, ktime_set(0, DCE_VIRTUAL_VBLANK_PERIOD), HRTIMER_MODE_REL); return HRTIMER_NORESTART; } @@ -706,8 +710,8 @@ static void dce_virtual_crtc_vblank_int_ack(struct amdgpu_device *adev, } static int dce_virtual_crtc_irq(struct amdgpu_device *adev, - struct amdgpu_irq_src *source, - struct amdgpu_iv_entry *entry) + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) { unsigned crtc = 0; unsigned irq_type = AMDGPU_CRTC_IRQ_VBLANK1; @@ -718,7 +722,7 @@ static int dce_virtual_crtc_irq(struct amdgpu_device *adev, if (amdgpu_irq_enabled(adev, source, irq_type)) { drm_handle_vblank(adev->ddev, crtc); } - + dce_virtual_pageflip_irq(adev, NULL, NULL); DRM_DEBUG("IH: D%d vblank\n", crtc + 1); return 0; } -- cgit v0.10.2 From 048a5b76d2ba40adedb98a987bb15a9cc1f0a62b Mon Sep 17 00:00:00 2001 From: Emily Deng Date: Mon, 8 Aug 2016 11:36:31 +0800 Subject: drm/amdgpu: Add DRM_MODE_CONNECTOR_VIRTUAL connector in amdgpu_connector_add. For virtual display feature, add one connector type in amdgpu_connector_add. Signed-off-by: Emily Deng Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index ff0b55a..319a5e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -1504,6 +1504,86 @@ static const struct drm_connector_funcs amdgpu_connector_edp_funcs = { .force = amdgpu_connector_dvi_force, }; +static struct drm_encoder * +amdgpu_connector_virtual_encoder(struct drm_connector *connector) +{ + int enc_id = connector->encoder_ids[0]; + struct drm_encoder *encoder; + int i; + for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++) { + if (connector->encoder_ids[i] == 0) + break; + + encoder = drm_encoder_find(connector->dev, connector->encoder_ids[i]); + if (!encoder) + continue; + + if (encoder->encoder_type == DRM_MODE_ENCODER_VIRTUAL) + return encoder; + } + + /* pick the first one */ + if (enc_id) + return drm_encoder_find(connector->dev, enc_id); + return NULL; +} + +static int amdgpu_connector_virtual_get_modes(struct drm_connector *connector) +{ + struct drm_encoder *encoder = amdgpu_connector_best_single_encoder(connector); + + if (encoder) { + amdgpu_connector_add_common_modes(encoder, connector); + } + + return 0; +} + +static int amdgpu_connector_virtual_mode_valid(struct drm_connector *connector, + struct drm_display_mode *mode) +{ + return MODE_OK; +} + +int amdgpu_connector_virtual_dpms(struct drm_connector *connector, int mode) +{ + return 0; +} + +static enum drm_connector_status + +amdgpu_connector_virtual_detect(struct drm_connector *connector, bool force) +{ + return connector_status_connected; +} + +int amdgpu_connector_virtual_set_property(struct drm_connector *connector, + struct drm_property *property, + uint64_t val) +{ + return 0; +} + +static void amdgpu_connector_virtual_force(struct drm_connector *connector) +{ + return; +} + +static const struct drm_connector_helper_funcs amdgpu_connector_virtual_helper_funcs = { + .get_modes = amdgpu_connector_virtual_get_modes, + .mode_valid = amdgpu_connector_virtual_mode_valid, + .best_encoder = amdgpu_connector_virtual_encoder, +}; + +static const struct drm_connector_funcs amdgpu_connector_virtual_funcs = { + .dpms = amdgpu_connector_virtual_dpms, + .detect = amdgpu_connector_virtual_detect, + .fill_modes = drm_helper_probe_single_connector_modes, + .set_property = amdgpu_connector_virtual_set_property, + .destroy = amdgpu_connector_destroy, + .force = amdgpu_connector_virtual_force, +}; + void amdgpu_connector_add(struct amdgpu_device *adev, uint32_t connector_id, @@ -1888,6 +1968,17 @@ amdgpu_connector_add(struct amdgpu_device *adev, connector->interlace_allowed = false; connector->doublescan_allowed = false; break; + case DRM_MODE_CONNECTOR_VIRTUAL: + amdgpu_dig_connector = kzalloc(sizeof(struct amdgpu_connector_atom_dig), GFP_KERNEL); + if (!amdgpu_dig_connector) + goto failed; + amdgpu_connector->con_priv = amdgpu_dig_connector; + drm_connector_init(dev, &amdgpu_connector->base, &amdgpu_connector_virtual_funcs, connector_type); + drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_virtual_helper_funcs); + subpixel_order = SubPixelHorizontalRGB; + connector->interlace_allowed = false; + connector->doublescan_allowed = false; + break; } } -- cgit v0.10.2 From 31ad61e4afa53a7b2e364f7c021546fbc6ce0d85 Mon Sep 17 00:00:00 2001 From: Joonas Lahtinen Date: Fri, 29 Jul 2016 08:50:05 +0300 Subject: drm: BIT(DRM_ROTATE_?) -> DRM_ROTATE_? MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Only property creation uses the rotation as an index, so convert the to figure the index when needed. v2: Use the new defines to build the _MASK defines (Sean) Cc: intel-gfx@lists.freedesktop.org Cc: linux-arm-msm@vger.kernel.org Cc: freedreno@lists.freedesktop.org Cc: malidp@foss.arm.com Cc: David Airlie Cc: Daniel Vetter Cc: Ville Syrjälä Cc: Liviu Dudau Cc: Sean Paul Acked-by: Liviu Dudau Reviewed-by: Ville Syrjälä (v1) Signed-off-by: Joonas Lahtinen Signed-off-by: Sean Paul Link: http://patchwork.freedesktop.org/patch/msgid/1469771405-17653-1-git-send-email-joonas.lahtinen@linux.intel.com diff --git a/drivers/gpu/drm/arm/malidp_drv.h b/drivers/gpu/drm/arm/malidp_drv.h index 95558fd..271d2fb 100644 --- a/drivers/gpu/drm/arm/malidp_drv.h +++ b/drivers/gpu/drm/arm/malidp_drv.h @@ -49,6 +49,6 @@ void malidp_de_planes_destroy(struct drm_device *drm); int malidp_crtc_init(struct drm_device *drm); /* often used combination of rotational bits */ -#define MALIDP_ROTATED_MASK (BIT(DRM_ROTATE_90) | BIT(DRM_ROTATE_270)) +#define MALIDP_ROTATED_MASK (DRM_ROTATE_90 | DRM_ROTATE_270) #endif /* __MALIDP_DRV_H__ */ diff --git a/drivers/gpu/drm/arm/malidp_planes.c b/drivers/gpu/drm/arm/malidp_planes.c index 725098d..82c193e 100644 --- a/drivers/gpu/drm/arm/malidp_planes.c +++ b/drivers/gpu/drm/arm/malidp_planes.c @@ -108,7 +108,7 @@ static int malidp_de_plane_check(struct drm_plane *plane, return -EINVAL; /* packed RGB888 / BGR888 can't be rotated or flipped */ - if (state->rotation != BIT(DRM_ROTATE_0) && + if (state->rotation != DRM_ROTATE_0 && (state->fb->pixel_format == DRM_FORMAT_RGB888 || state->fb->pixel_format == DRM_FORMAT_BGR888)) return -EINVAL; @@ -188,9 +188,9 @@ static void malidp_de_plane_update(struct drm_plane *plane, /* setup the rotation and axis flip bits */ if (plane->state->rotation & DRM_ROTATE_MASK) val = ilog2(plane->state->rotation & DRM_ROTATE_MASK) << LAYER_ROT_OFFSET; - if (plane->state->rotation & BIT(DRM_REFLECT_X)) + if (plane->state->rotation & DRM_REFLECT_X) val |= LAYER_V_FLIP; - if (plane->state->rotation & BIT(DRM_REFLECT_Y)) + if (plane->state->rotation & DRM_REFLECT_Y) val |= LAYER_H_FLIP; /* set the 'enable layer' bit */ @@ -255,12 +255,12 @@ int malidp_de_planes_init(struct drm_device *drm) goto cleanup; if (!drm->mode_config.rotation_property) { - unsigned long flags = BIT(DRM_ROTATE_0) | - BIT(DRM_ROTATE_90) | - BIT(DRM_ROTATE_180) | - BIT(DRM_ROTATE_270) | - BIT(DRM_REFLECT_X) | - BIT(DRM_REFLECT_Y); + unsigned long flags = DRM_ROTATE_0 | + DRM_ROTATE_90 | + DRM_ROTATE_180 | + DRM_ROTATE_270 | + DRM_REFLECT_X | + DRM_REFLECT_Y; drm->mode_config.rotation_property = drm_mode_create_rotation_property(drm, flags); } @@ -268,7 +268,7 @@ int malidp_de_planes_init(struct drm_device *drm) if (drm->mode_config.rotation_property && (id != DE_SMART)) drm_object_attach_property(&plane->base.base, drm->mode_config.rotation_property, - BIT(DRM_ROTATE_0)); + DRM_ROTATE_0); drm_plane_helper_add(&plane->base, &malidp_de_plane_helper_funcs); diff --git a/drivers/gpu/drm/armada/armada_overlay.c b/drivers/gpu/drm/armada/armada_overlay.c index 1ee707e..152b4e7 100644 --- a/drivers/gpu/drm/armada/armada_overlay.c +++ b/drivers/gpu/drm/armada/armada_overlay.c @@ -121,7 +121,7 @@ armada_ovl_plane_update(struct drm_plane *plane, struct drm_crtc *crtc, int ret; ret = drm_plane_helper_check_update(plane, crtc, fb, &src, &dest, &clip, - BIT(DRM_ROTATE_0), + DRM_ROTATE_0, 0, INT_MAX, true, false, &visible); if (ret) return ret; diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c index 016c191..146809a 100644 --- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c +++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c @@ -393,7 +393,7 @@ static void atmel_hlcdc_plane_update_format(struct atmel_hlcdc_plane *plane, if ((state->base.fb->pixel_format == DRM_FORMAT_YUV422 || state->base.fb->pixel_format == DRM_FORMAT_NV61) && - (state->base.rotation & (BIT(DRM_ROTATE_90) | BIT(DRM_ROTATE_270)))) + (state->base.rotation & (DRM_ROTATE_90 | DRM_ROTATE_270))) cfg |= ATMEL_HLCDC_YUV422ROT; atmel_hlcdc_layer_update_cfg(&plane->layer, @@ -628,7 +628,7 @@ static int atmel_hlcdc_plane_atomic_check(struct drm_plane *p, /* * Swap width and size in case of 90 or 270 degrees rotation */ - if (state->base.rotation & (BIT(DRM_ROTATE_90) | BIT(DRM_ROTATE_270))) { + if (state->base.rotation & (DRM_ROTATE_90 | DRM_ROTATE_270)) { tmp = state->crtc_w; state->crtc_w = state->crtc_h; state->crtc_h = tmp; @@ -677,7 +677,7 @@ static int atmel_hlcdc_plane_atomic_check(struct drm_plane *p, return -EINVAL; switch (state->base.rotation & DRM_ROTATE_MASK) { - case BIT(DRM_ROTATE_90): + case DRM_ROTATE_90: offset = ((y_offset + state->src_y + patched_src_w - 1) / ydiv) * fb->pitches[i]; offset += ((x_offset + state->src_x) / xdiv) * @@ -686,7 +686,7 @@ static int atmel_hlcdc_plane_atomic_check(struct drm_plane *p, fb->pitches[i]; state->pstride[i] = -fb->pitches[i] - state->bpp[i]; break; - case BIT(DRM_ROTATE_180): + case DRM_ROTATE_180: offset = ((y_offset + state->src_y + patched_src_h - 1) / ydiv) * fb->pitches[i]; offset += ((x_offset + state->src_x + patched_src_w - 1) / @@ -695,7 +695,7 @@ static int atmel_hlcdc_plane_atomic_check(struct drm_plane *p, state->bpp[i]) - fb->pitches[i]; state->pstride[i] = -2 * state->bpp[i]; break; - case BIT(DRM_ROTATE_270): + case DRM_ROTATE_270: offset = ((y_offset + state->src_y) / ydiv) * fb->pitches[i]; offset += ((x_offset + state->src_x + patched_src_h - 1) / @@ -705,7 +705,7 @@ static int atmel_hlcdc_plane_atomic_check(struct drm_plane *p, (2 * state->bpp[i]); state->pstride[i] = fb->pitches[i] - state->bpp[i]; break; - case BIT(DRM_ROTATE_0): + case DRM_ROTATE_0: default: offset = ((y_offset + state->src_y) / ydiv) * fb->pitches[i]; @@ -905,7 +905,7 @@ static void atmel_hlcdc_plane_init_properties(struct atmel_hlcdc_plane *plane, if (desc->layout.xstride && desc->layout.pstride) drm_object_attach_property(&plane->base.base, plane->base.dev->mode_config.rotation_property, - BIT(DRM_ROTATE_0)); + DRM_ROTATE_0); if (desc->layout.csc) { /* @@ -1056,10 +1056,10 @@ atmel_hlcdc_plane_create_properties(struct drm_device *dev) dev->mode_config.rotation_property = drm_mode_create_rotation_property(dev, - BIT(DRM_ROTATE_0) | - BIT(DRM_ROTATE_90) | - BIT(DRM_ROTATE_180) | - BIT(DRM_ROTATE_270)); + DRM_ROTATE_0 | + DRM_ROTATE_90 | + DRM_ROTATE_180 | + DRM_ROTATE_270); if (!dev->mode_config.rotation_property) return ERR_PTR(-ENOMEM); diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index 813821e..f59e8c0 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -2362,7 +2362,7 @@ int __drm_atomic_helper_set_config(struct drm_mode_set *set, primary_state->crtc_h = vdisplay; primary_state->src_x = set->x << 16; primary_state->src_y = set->y << 16; - if (primary_state->rotation & (BIT(DRM_ROTATE_90) | BIT(DRM_ROTATE_270))) { + if (primary_state->rotation & (DRM_ROTATE_90 | DRM_ROTATE_270)) { primary_state->src_w = vdisplay << 16; primary_state->src_h = hdisplay << 16; } else { @@ -3047,7 +3047,7 @@ void drm_atomic_helper_plane_reset(struct drm_plane *plane) if (plane->state) { plane->state->plane = plane; - plane->state->rotation = BIT(DRM_ROTATE_0); + plane->state->rotation = DRM_ROTATE_0; } } EXPORT_SYMBOL(drm_atomic_helper_plane_reset); diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index f1d9f05..909a025 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -2804,8 +2804,8 @@ int drm_crtc_check_viewport(const struct drm_crtc *crtc, drm_crtc_get_hv_timing(mode, &hdisplay, &vdisplay); if (crtc->state && - crtc->primary->state->rotation & (BIT(DRM_ROTATE_90) | - BIT(DRM_ROTATE_270))) + crtc->primary->state->rotation & (DRM_ROTATE_90 | + DRM_ROTATE_270)) swap(hdisplay, vdisplay); return check_src_coords(x << 16, y << 16, @@ -5646,9 +5646,9 @@ int drm_mode_destroy_dumb_ioctl(struct drm_device *dev, * Eg. if the hardware supports everything except DRM_REFLECT_X * one could call this function like this: * - * drm_rotation_simplify(rotation, BIT(DRM_ROTATE_0) | - * BIT(DRM_ROTATE_90) | BIT(DRM_ROTATE_180) | - * BIT(DRM_ROTATE_270) | BIT(DRM_REFLECT_Y)); + * drm_rotation_simplify(rotation, DRM_ROTATE_0 | + * DRM_ROTATE_90 | DRM_ROTATE_180 | + * DRM_ROTATE_270 | DRM_REFLECT_Y); * * to eliminate the DRM_ROTATE_X flag. Depending on what kind of * transforms the hardware supports, this function may not @@ -5659,7 +5659,7 @@ unsigned int drm_rotation_simplify(unsigned int rotation, unsigned int supported_rotations) { if (rotation & ~supported_rotations) { - rotation ^= BIT(DRM_REFLECT_X) | BIT(DRM_REFLECT_Y); + rotation ^= DRM_REFLECT_X | DRM_REFLECT_Y; rotation = (rotation & DRM_REFLECT_MASK) | BIT((ffs(rotation & DRM_ROTATE_MASK) + 1) % 4); } @@ -5788,12 +5788,12 @@ struct drm_property *drm_mode_create_rotation_property(struct drm_device *dev, unsigned int supported_rotations) { static const struct drm_prop_enum_list props[] = { - { DRM_ROTATE_0, "rotate-0" }, - { DRM_ROTATE_90, "rotate-90" }, - { DRM_ROTATE_180, "rotate-180" }, - { DRM_ROTATE_270, "rotate-270" }, - { DRM_REFLECT_X, "reflect-x" }, - { DRM_REFLECT_Y, "reflect-y" }, + { __builtin_ffs(DRM_ROTATE_0) - 1, "rotate-0" }, + { __builtin_ffs(DRM_ROTATE_90) - 1, "rotate-90" }, + { __builtin_ffs(DRM_ROTATE_180) - 1, "rotate-180" }, + { __builtin_ffs(DRM_ROTATE_270) - 1, "rotate-270" }, + { __builtin_ffs(DRM_REFLECT_X) - 1, "reflect-x" }, + { __builtin_ffs(DRM_REFLECT_Y) - 1, "reflect-y" }, }; return drm_property_create_bitmask(dev, 0, "rotation", diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index ce54e98..d4896f9 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -335,7 +335,7 @@ retry: goto fail; } - plane_state->rotation = BIT(DRM_ROTATE_0); + plane_state->rotation = DRM_ROTATE_0; plane->old_fb = plane->fb; plane_mask |= 1 << drm_plane_index(plane); @@ -395,7 +395,7 @@ static int restore_fbdev_mode(struct drm_fb_helper *fb_helper) if (dev->mode_config.rotation_property) { drm_mode_plane_set_obj_prop(plane, dev->mode_config.rotation_property, - BIT(DRM_ROTATE_0)); + DRM_ROTATE_0); } } diff --git a/drivers/gpu/drm/drm_plane_helper.c b/drivers/gpu/drm/drm_plane_helper.c index 16c4a7b..c360e30 100644 --- a/drivers/gpu/drm/drm_plane_helper.c +++ b/drivers/gpu/drm/drm_plane_helper.c @@ -274,7 +274,7 @@ int drm_primary_helper_update(struct drm_plane *plane, struct drm_crtc *crtc, ret = drm_plane_helper_check_update(plane, crtc, fb, &src, &dest, &clip, - BIT(DRM_ROTATE_0), + DRM_ROTATE_0, DRM_PLANE_HELPER_NO_SCALING, DRM_PLANE_HELPER_NO_SCALING, false, false, &visible); diff --git a/drivers/gpu/drm/drm_rect.c b/drivers/gpu/drm/drm_rect.c index a8e2c86..4063f6e 100644 --- a/drivers/gpu/drm/drm_rect.c +++ b/drivers/gpu/drm/drm_rect.c @@ -317,38 +317,38 @@ void drm_rect_rotate(struct drm_rect *r, { struct drm_rect tmp; - if (rotation & (BIT(DRM_REFLECT_X) | BIT(DRM_REFLECT_Y))) { + if (rotation & (DRM_REFLECT_X | DRM_REFLECT_Y)) { tmp = *r; - if (rotation & BIT(DRM_REFLECT_X)) { + if (rotation & DRM_REFLECT_X) { r->x1 = width - tmp.x2; r->x2 = width - tmp.x1; } - if (rotation & BIT(DRM_REFLECT_Y)) { + if (rotation & DRM_REFLECT_Y) { r->y1 = height - tmp.y2; r->y2 = height - tmp.y1; } } switch (rotation & DRM_ROTATE_MASK) { - case BIT(DRM_ROTATE_0): + case DRM_ROTATE_0: break; - case BIT(DRM_ROTATE_90): + case DRM_ROTATE_90: tmp = *r; r->x1 = tmp.y1; r->x2 = tmp.y2; r->y1 = width - tmp.x2; r->y2 = width - tmp.x1; break; - case BIT(DRM_ROTATE_180): + case DRM_ROTATE_180: tmp = *r; r->x1 = width - tmp.x2; r->x2 = width - tmp.x1; r->y1 = height - tmp.y2; r->y2 = height - tmp.y1; break; - case BIT(DRM_ROTATE_270): + case DRM_ROTATE_270: tmp = *r; r->x1 = height - tmp.y2; r->x2 = height - tmp.y1; @@ -392,23 +392,23 @@ void drm_rect_rotate_inv(struct drm_rect *r, struct drm_rect tmp; switch (rotation & DRM_ROTATE_MASK) { - case BIT(DRM_ROTATE_0): + case DRM_ROTATE_0: break; - case BIT(DRM_ROTATE_90): + case DRM_ROTATE_90: tmp = *r; r->x1 = width - tmp.y2; r->x2 = width - tmp.y1; r->y1 = tmp.x1; r->y2 = tmp.x2; break; - case BIT(DRM_ROTATE_180): + case DRM_ROTATE_180: tmp = *r; r->x1 = width - tmp.x2; r->x2 = width - tmp.x1; r->y1 = height - tmp.y2; r->y2 = height - tmp.y1; break; - case BIT(DRM_ROTATE_270): + case DRM_ROTATE_270: tmp = *r; r->x1 = tmp.y1; r->x2 = tmp.y2; @@ -419,15 +419,15 @@ void drm_rect_rotate_inv(struct drm_rect *r, break; } - if (rotation & (BIT(DRM_REFLECT_X) | BIT(DRM_REFLECT_Y))) { + if (rotation & (DRM_REFLECT_X | DRM_REFLECT_Y)) { tmp = *r; - if (rotation & BIT(DRM_REFLECT_X)) { + if (rotation & DRM_REFLECT_X) { r->x1 = width - tmp.x2; r->x2 = width - tmp.x1; } - if (rotation & BIT(DRM_REFLECT_Y)) { + if (rotation & DRM_REFLECT_Y) { r->y1 = height - tmp.y2; r->y2 = height - tmp.y1; } diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 844fea7..9b03cb2 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -3089,12 +3089,12 @@ static const char *plane_rotation(unsigned int rotation) */ snprintf(buf, sizeof(buf), "%s%s%s%s%s%s(0x%08x)", - (rotation & BIT(DRM_ROTATE_0)) ? "0 " : "", - (rotation & BIT(DRM_ROTATE_90)) ? "90 " : "", - (rotation & BIT(DRM_ROTATE_180)) ? "180 " : "", - (rotation & BIT(DRM_ROTATE_270)) ? "270 " : "", - (rotation & BIT(DRM_REFLECT_X)) ? "FLIPX " : "", - (rotation & BIT(DRM_REFLECT_Y)) ? "FLIPY " : "", + (rotation & DRM_ROTATE_0) ? "0 " : "", + (rotation & DRM_ROTATE_90) ? "90 " : "", + (rotation & DRM_ROTATE_180) ? "180 " : "", + (rotation & DRM_ROTATE_270) ? "270 " : "", + (rotation & DRM_REFLECT_X) ? "FLIPX " : "", + (rotation & DRM_REFLECT_Y) ? "FLIPY " : "", rotation); return buf; diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/intel_atomic_plane.c index 7de7721..7cc9c76 100644 --- a/drivers/gpu/drm/i915/intel_atomic_plane.c +++ b/drivers/gpu/drm/i915/intel_atomic_plane.c @@ -55,7 +55,7 @@ intel_create_plane_state(struct drm_plane *plane) return NULL; state->base.plane = plane; - state->base.rotation = BIT(DRM_ROTATE_0); + state->base.rotation = DRM_ROTATE_0; state->ckey.flags = I915_SET_COLORKEY_NONE; return state; diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index c457eed..b8a42d1 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2688,7 +2688,7 @@ static void i9xx_update_primary_plane(struct drm_plane *primary, intel_crtc->dspaddr_offset = linear_offset; } - if (rotation == BIT(DRM_ROTATE_180)) { + if (rotation == DRM_ROTATE_180) { dspcntr |= DISPPLANE_ROTATE_180; x += (crtc_state->pipe_src_w - 1); @@ -2791,7 +2791,7 @@ static void ironlake_update_primary_plane(struct drm_plane *primary, intel_compute_tile_offset(&x, &y, fb, 0, fb->pitches[0], rotation); linear_offset -= intel_crtc->dspaddr_offset; - if (rotation == BIT(DRM_ROTATE_180)) { + if (rotation == DRM_ROTATE_180) { dspcntr |= DISPPLANE_ROTATE_180; if (!IS_HASWELL(dev) && !IS_BROADWELL(dev)) { @@ -2952,17 +2952,17 @@ u32 skl_plane_ctl_tiling(uint64_t fb_modifier) u32 skl_plane_ctl_rotation(unsigned int rotation) { switch (rotation) { - case BIT(DRM_ROTATE_0): + case DRM_ROTATE_0: break; /* * DRM_ROTATE_ is counter clockwise to stay compatible with Xrandr * while i915 HW rotation is clockwise, thats why this swapping. */ - case BIT(DRM_ROTATE_90): + case DRM_ROTATE_90: return PLANE_CTL_ROTATE_270; - case BIT(DRM_ROTATE_180): + case DRM_ROTATE_180: return PLANE_CTL_ROTATE_180; - case BIT(DRM_ROTATE_270): + case DRM_ROTATE_270: return PLANE_CTL_ROTATE_90; default: MISSING_CASE(rotation); @@ -4248,7 +4248,7 @@ int skl_update_scaler_crtc(struct intel_crtc_state *state) intel_crtc->pipe, SKL_CRTC_INDEX); return skl_update_scaler(state, !state->base.active, SKL_CRTC_INDEX, - &state->scaler_state.scaler_id, BIT(DRM_ROTATE_0), + &state->scaler_state.scaler_id, DRM_ROTATE_0, state->pipe_src_w, state->pipe_src_h, adjusted_mode->crtc_hdisplay, adjusted_mode->crtc_vdisplay); } @@ -10263,7 +10263,7 @@ static void i9xx_update_cursor(struct drm_crtc *crtc, u32 base, if (HAS_DDI(dev)) cntl |= CURSOR_PIPE_CSC_ENABLE; - if (plane_state->base.rotation == BIT(DRM_ROTATE_180)) + if (plane_state->base.rotation == DRM_ROTATE_180) cntl |= CURSOR_ROTATE_180; } @@ -10309,7 +10309,7 @@ static void intel_crtc_update_cursor(struct drm_crtc *crtc, /* ILK+ do this automagically */ if (HAS_GMCH_DISPLAY(dev) && - plane_state->base.rotation == BIT(DRM_ROTATE_180)) { + plane_state->base.rotation == DRM_ROTATE_180) { base += (plane_state->base.crtc_h * plane_state->base.crtc_w - 1) * 4; } @@ -14306,11 +14306,11 @@ fail: void intel_create_rotation_property(struct drm_device *dev, struct intel_plane *plane) { if (!dev->mode_config.rotation_property) { - unsigned long flags = BIT(DRM_ROTATE_0) | - BIT(DRM_ROTATE_180); + unsigned long flags = DRM_ROTATE_0 | + DRM_ROTATE_180; if (INTEL_INFO(dev)->gen >= 9) - flags |= BIT(DRM_ROTATE_90) | BIT(DRM_ROTATE_270); + flags |= DRM_ROTATE_90 | DRM_ROTATE_270; dev->mode_config.rotation_property = drm_mode_create_rotation_property(dev, flags); @@ -14453,8 +14453,8 @@ static struct drm_plane *intel_cursor_plane_create(struct drm_device *dev, if (!dev->mode_config.rotation_property) dev->mode_config.rotation_property = drm_mode_create_rotation_property(dev, - BIT(DRM_ROTATE_0) | - BIT(DRM_ROTATE_180)); + DRM_ROTATE_0 | + DRM_ROTATE_180); if (dev->mode_config.rotation_property) drm_object_attach_property(&cursor->base.base, dev->mode_config.rotation_property, diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index cc937a1..d55b5e0 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1258,7 +1258,7 @@ unsigned int intel_tile_height(const struct drm_i915_private *dev_priv, static inline bool intel_rotation_90_or_270(unsigned int rotation) { - return rotation & (BIT(DRM_ROTATE_90) | BIT(DRM_ROTATE_270)); + return rotation & (DRM_ROTATE_90 | DRM_ROTATE_270); } void intel_create_rotation_property(struct drm_device *dev, diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c index 6a7ad3e..fa11277 100644 --- a/drivers/gpu/drm/i915/intel_fbc.c +++ b/drivers/gpu/drm/i915/intel_fbc.c @@ -775,7 +775,7 @@ static bool intel_fbc_can_activate(struct intel_crtc *crtc) return false; } if (INTEL_INFO(dev_priv)->gen <= 4 && !IS_G4X(dev_priv) && - cache->plane.rotation != BIT(DRM_ROTATE_0)) { + cache->plane.rotation != DRM_ROTATE_0) { fbc->no_fbc_reason = "rotation unsupported"; return false; } diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index 86b00c6..1da9ce4 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -223,7 +223,7 @@ static int intelfb_create(struct drm_fb_helper *helper, * This also validates that any existing fb inherited from the * BIOS is suitable for own access. */ - ret = intel_pin_and_fence_fb_obj(&ifbdev->fb->base, BIT(DRM_ROTATE_0)); + ret = intel_pin_and_fence_fb_obj(&ifbdev->fb->base, DRM_ROTATE_0); if (ret) goto out_unlock; @@ -289,7 +289,7 @@ static int intelfb_create(struct drm_fb_helper *helper, out_destroy_fbi: drm_fb_helper_release_fbi(helper); out_unpin: - intel_unpin_fb_obj(&ifbdev->fb->base, BIT(DRM_ROTATE_0)); + intel_unpin_fb_obj(&ifbdev->fb->base, DRM_ROTATE_0); out_unlock: mutex_unlock(&dev->struct_mutex); return ret; @@ -554,7 +554,7 @@ static void intel_fbdev_destroy(struct intel_fbdev *ifbdev) if (ifbdev->fb) { mutex_lock(&ifbdev->helper.dev->struct_mutex); - intel_unpin_fb_obj(&ifbdev->fb->base, BIT(DRM_ROTATE_0)); + intel_unpin_fb_obj(&ifbdev->fb->base, DRM_ROTATE_0); mutex_unlock(&ifbdev->helper.dev->struct_mutex); drm_framebuffer_remove(&ifbdev->fb->base); diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index 7c08e4f..e43d97c 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -444,7 +444,7 @@ vlv_update_plane(struct drm_plane *dplane, fb->pitches[0], rotation); linear_offset -= sprsurf_offset; - if (rotation == BIT(DRM_ROTATE_180)) { + if (rotation == DRM_ROTATE_180) { sprctl |= SP_ROTATE_180; x += src_w; @@ -577,7 +577,7 @@ ivb_update_plane(struct drm_plane *plane, fb->pitches[0], rotation); linear_offset -= sprsurf_offset; - if (rotation == BIT(DRM_ROTATE_180)) { + if (rotation == DRM_ROTATE_180) { sprctl |= SPRITE_ROTATE_180; /* HSW and BDW does this automagically in hardware */ @@ -714,7 +714,7 @@ ilk_update_plane(struct drm_plane *plane, fb->pitches[0], rotation); linear_offset -= dvssurf_offset; - if (rotation == BIT(DRM_ROTATE_180)) { + if (rotation == DRM_ROTATE_180) { dvscntr |= DVS_ROTATE_180; x += src_w; diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c index 432c098..a02a24e 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c @@ -78,7 +78,7 @@ static void mdp5_plane_install_rotation_property(struct drm_device *dev, if (!dev->mode_config.rotation_property) dev->mode_config.rotation_property = drm_mode_create_rotation_property(dev, - BIT(DRM_REFLECT_X) | BIT(DRM_REFLECT_Y)); + DRM_REFLECT_X | DRM_REFLECT_Y); if (dev->mode_config.rotation_property) drm_object_attach_property(&plane->base, @@ -309,8 +309,8 @@ static int mdp5_plane_atomic_check(struct drm_plane *plane, return -EINVAL; } - hflip = !!(state->rotation & BIT(DRM_REFLECT_X)); - vflip = !!(state->rotation & BIT(DRM_REFLECT_Y)); + hflip = !!(state->rotation & DRM_REFLECT_X); + vflip = !!(state->rotation & DRM_REFLECT_Y); if ((vflip && !(mdp5_plane->caps & MDP_PIPE_CAP_VFLIP)) || (hflip && !(mdp5_plane->caps & MDP_PIPE_CAP_HFLIP))) { dev_err(plane->dev->dev, @@ -743,8 +743,8 @@ static int mdp5_plane_mode_set(struct drm_plane *plane, config |= get_scale_config(format, src_h, crtc_h, false); DBG("scale config = %x", config); - hflip = !!(pstate->rotation & BIT(DRM_REFLECT_X)); - vflip = !!(pstate->rotation & BIT(DRM_REFLECT_Y)); + hflip = !!(pstate->rotation & DRM_REFLECT_X); + vflip = !!(pstate->rotation & DRM_REFLECT_Y); spin_lock_irqsave(&mdp5_plane->pipe_lock, flags); diff --git a/drivers/gpu/drm/omapdrm/omap_drv.c b/drivers/gpu/drm/omapdrm/omap_drv.c index 26c6134..3dd78f2 100644 --- a/drivers/gpu/drm/omapdrm/omap_drv.c +++ b/drivers/gpu/drm/omapdrm/omap_drv.c @@ -295,9 +295,9 @@ static int omap_modeset_init_properties(struct drm_device *dev) if (priv->has_dmm) { dev->mode_config.rotation_property = drm_mode_create_rotation_property(dev, - BIT(DRM_ROTATE_0) | BIT(DRM_ROTATE_90) | - BIT(DRM_ROTATE_180) | BIT(DRM_ROTATE_270) | - BIT(DRM_REFLECT_X) | BIT(DRM_REFLECT_Y)); + DRM_ROTATE_0 | DRM_ROTATE_90 | + DRM_ROTATE_180 | DRM_ROTATE_270 | + DRM_REFLECT_X | DRM_REFLECT_Y); if (!dev->mode_config.rotation_property) return -ENOMEM; } diff --git a/drivers/gpu/drm/omapdrm/omap_fb.c b/drivers/gpu/drm/omapdrm/omap_fb.c index 31f5178..5f3337f 100644 --- a/drivers/gpu/drm/omapdrm/omap_fb.c +++ b/drivers/gpu/drm/omapdrm/omap_fb.c @@ -179,24 +179,24 @@ void omap_framebuffer_update_scanout(struct drm_framebuffer *fb, (uint32_t)win->rotation); /* fallthru to default to no rotation */ case 0: - case BIT(DRM_ROTATE_0): + case DRM_ROTATE_0: orient = 0; break; - case BIT(DRM_ROTATE_90): + case DRM_ROTATE_90: orient = MASK_XY_FLIP | MASK_X_INVERT; break; - case BIT(DRM_ROTATE_180): + case DRM_ROTATE_180: orient = MASK_X_INVERT | MASK_Y_INVERT; break; - case BIT(DRM_ROTATE_270): + case DRM_ROTATE_270: orient = MASK_XY_FLIP | MASK_Y_INVERT; break; } - if (win->rotation & BIT(DRM_REFLECT_X)) + if (win->rotation & DRM_REFLECT_X) orient ^= MASK_X_INVERT; - if (win->rotation & BIT(DRM_REFLECT_Y)) + if (win->rotation & DRM_REFLECT_Y) orient ^= MASK_Y_INVERT; /* adjust x,y offset for flip/invert: */ @@ -213,7 +213,7 @@ void omap_framebuffer_update_scanout(struct drm_framebuffer *fb, } else { switch (win->rotation & DRM_ROTATE_MASK) { case 0: - case BIT(DRM_ROTATE_0): + case DRM_ROTATE_0: /* OK */ break; diff --git a/drivers/gpu/drm/omapdrm/omap_plane.c b/drivers/gpu/drm/omapdrm/omap_plane.c index 5252ab7..4c7727e 100644 --- a/drivers/gpu/drm/omapdrm/omap_plane.c +++ b/drivers/gpu/drm/omapdrm/omap_plane.c @@ -109,8 +109,8 @@ static void omap_plane_atomic_update(struct drm_plane *plane, win.src_y = state->src_y >> 16; switch (state->rotation & DRM_ROTATE_MASK) { - case BIT(DRM_ROTATE_90): - case BIT(DRM_ROTATE_270): + case DRM_ROTATE_90: + case DRM_ROTATE_270: win.src_w = state->src_h >> 16; win.src_h = state->src_w >> 16; break; @@ -149,7 +149,7 @@ static void omap_plane_atomic_disable(struct drm_plane *plane, struct omap_plane_state *omap_state = to_omap_plane_state(plane->state); struct omap_plane *omap_plane = to_omap_plane(plane); - plane->state->rotation = BIT(DRM_ROTATE_0); + plane->state->rotation = DRM_ROTATE_0; omap_state->zorder = plane->type == DRM_PLANE_TYPE_PRIMARY ? 0 : omap_plane->id; @@ -178,7 +178,7 @@ static int omap_plane_atomic_check(struct drm_plane *plane, return -EINVAL; if (state->fb) { - if (state->rotation != BIT(DRM_ROTATE_0) && + if (state->rotation != DRM_ROTATE_0 && !omap_framebuffer_supports_rotation(state->fb)) return -EINVAL; } @@ -269,7 +269,7 @@ static void omap_plane_reset(struct drm_plane *plane) */ omap_state->zorder = plane->type == DRM_PLANE_TYPE_PRIMARY ? 0 : omap_plane->id; - omap_state->base.rotation = BIT(DRM_ROTATE_0); + omap_state->base.rotation = DRM_ROTATE_0; plane->state = &omap_state->base; plane->state->plane = plane; diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h index 44e0708..6c12fec 100644 --- a/include/drm/drm_crtc.h +++ b/include/drm/drm_crtc.h @@ -83,14 +83,15 @@ static inline uint64_t I642U64(int64_t val) * specified amount in degrees in counter clockwise direction. DRM_REFLECT_X and * DRM_REFLECT_Y reflects the image along the specified axis prior to rotation */ -#define DRM_ROTATE_MASK 0x0f -#define DRM_ROTATE_0 0 -#define DRM_ROTATE_90 1 -#define DRM_ROTATE_180 2 -#define DRM_ROTATE_270 3 -#define DRM_REFLECT_MASK (~DRM_ROTATE_MASK) -#define DRM_REFLECT_X 4 -#define DRM_REFLECT_Y 5 +#define DRM_ROTATE_0 BIT(0) +#define DRM_ROTATE_90 BIT(1) +#define DRM_ROTATE_180 BIT(2) +#define DRM_ROTATE_270 BIT(3) +#define DRM_ROTATE_MASK (DRM_ROTATE_0 | DRM_ROTATE_90 | \ + DRM_ROTATE_180 | DRM_ROTATE_270) +#define DRM_REFLECT_X BIT(4) +#define DRM_REFLECT_Y BIT(5) +#define DRM_REFLECT_MASK (DRM_REFLECT_X | DRM_REFLECT_Y) enum drm_connector_force { DRM_FORCE_UNSPECIFIED, -- cgit v0.10.2 From 1e1a5f8f8c3a4b24b3fc0340880cf1e3949f4997 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 26 Jul 2016 19:06:56 +0300 Subject: drm: Warn about negative sizes when calculating scale factor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Passing negative width/hight to scale factor calculations is not legal. Let's WARN if that happens. Signed-off-by: Ville Syrjälä Signed-off-by: Sean Paul Link: http://patchwork.freedesktop.org/patch/msgid/1469549224-1860-2-git-send-email-ville.syrjala@linux.intel.com diff --git a/drivers/gpu/drm/drm_rect.c b/drivers/gpu/drm/drm_rect.c index 4063f6e..73e53a8 100644 --- a/drivers/gpu/drm/drm_rect.c +++ b/drivers/gpu/drm/drm_rect.c @@ -100,7 +100,7 @@ static int drm_calc_scale(int src, int dst) { int scale = 0; - if (src < 0 || dst < 0) + if (WARN_ON(src < 0 || dst < 0)) return -EINVAL; if (dst == 0) -- cgit v0.10.2 From d7da824d9edeb7d83676c11d800b8243d87eafbf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 26 Jul 2016 19:06:57 +0300 Subject: drm: Store clipped src/dst coordinatee in drm_plane_state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pretty much all driver will have need for the clipped plane coordinates, so let's stuff then into drm_plane_state. Signed-off-by: Ville Syrjälä Signed-off-by: Sean Paul Link: http://patchwork.freedesktop.org/patch/msgid/1469549224-1860-3-git-send-email-ville.syrjala@linux.intel.com diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h index 6c12fec..b618b50 100644 --- a/include/drm/drm_crtc.h +++ b/include/drm/drm_crtc.h @@ -35,6 +35,7 @@ #include #include #include +#include struct drm_device; struct drm_mode_set; @@ -1415,6 +1416,9 @@ struct drm_connector { * @zpos: priority of the given plane on crtc (optional) * @normalized_zpos: normalized value of zpos: unique, range from 0 to N-1 * where N is the number of active planes for given crtc + * @src: clipped source coordinates of the plane (in 16.16) + * @dst: clipped destination coordinates of the plane + * @visible: visibility of the plane * @state: backpointer to global drm_atomic_state */ struct drm_plane_state { @@ -1439,6 +1443,15 @@ struct drm_plane_state { unsigned int zpos; unsigned int normalized_zpos; + /* Clipped coordinates */ + struct drm_rect src, dst; + + /* + * Is the plane actually visible? Can be false even + * if fb!=NULL and crtc!=NULL, due to clipping. + */ + bool visible; + struct drm_atomic_state *state; }; -- cgit v0.10.2 From df86af9133b4958a04c44828d29617eb1a6ff31c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 8 Aug 2016 10:55:10 +0300 Subject: drm/plane-helper: Add drm_plane_helper_check_state() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a version of drm_plane_helper_check_update() which takes a plane state instead of having the caller pass in everything. And to reduce code duplication, let's reimplement drm_plane_helper_check_update() in terms of the new function, by having a tempororary plane state on the stack. v2: Add a note that the functions modifies the state (Chris) v3: Fix drm_plane_helper_check_update() y coordinates (Daniel Kurtz) Cc: Daniel Kurtz Cc: Chris Wilson Signed-off-by: Ville Syrjälä Reviewed-by: Sean Paul (v2) Signed-off-by: Ville Syrjälä Signed-off-by: Sean Paul Link: http://patchwork.freedesktop.org/patch/msgid/1470642910-14073-1-git-send-email-ville.syrjala@linux.intel.com diff --git a/drivers/gpu/drm/drm_plane_helper.c b/drivers/gpu/drm/drm_plane_helper.c index c360e30..b522aab 100644 --- a/drivers/gpu/drm/drm_plane_helper.c +++ b/drivers/gpu/drm/drm_plane_helper.c @@ -108,14 +108,9 @@ static int get_connectors_for_crtc(struct drm_crtc *crtc, } /** - * drm_plane_helper_check_update() - Check plane update for validity - * @plane: plane object to update - * @crtc: owning CRTC of owning plane - * @fb: framebuffer to flip onto plane - * @src: source coordinates in 16.16 fixed point - * @dest: integer destination coordinates + * drm_plane_helper_check_state() - Check plane state for validity + * @state: plane state to check * @clip: integer clipping coordinates - * @rotation: plane rotation * @min_scale: minimum @src:@dest scaling factor in 16.16 fixed point * @max_scale: maximum @src:@dest scaling factor in 16.16 fixed point * @can_position: is it legal to position the plane such that it @@ -123,10 +118,9 @@ static int get_connectors_for_crtc(struct drm_crtc *crtc, * only be false for primary planes. * @can_update_disabled: can the plane be updated while the crtc * is disabled? - * @visible: output parameter indicating whether plane is still visible after - * clipping * - * Checks that a desired plane update is valid. Drivers that provide + * Checks that a desired plane update is valid, and updates various + * bits of derived state (clipped coordinates etc.). Drivers that provide * their own plane handling rather than helper-provided implementations may * still wish to call this function to avoid duplication of error checking * code. @@ -134,29 +128,38 @@ static int get_connectors_for_crtc(struct drm_crtc *crtc, * RETURNS: * Zero if update appears valid, error code on failure */ -int drm_plane_helper_check_update(struct drm_plane *plane, - struct drm_crtc *crtc, - struct drm_framebuffer *fb, - struct drm_rect *src, - struct drm_rect *dest, - const struct drm_rect *clip, - unsigned int rotation, - int min_scale, - int max_scale, - bool can_position, - bool can_update_disabled, - bool *visible) +int drm_plane_helper_check_state(struct drm_plane_state *state, + const struct drm_rect *clip, + int min_scale, + int max_scale, + bool can_position, + bool can_update_disabled) { + struct drm_crtc *crtc = state->crtc; + struct drm_framebuffer *fb = state->fb; + struct drm_rect *src = &state->src; + struct drm_rect *dst = &state->dst; + unsigned int rotation = state->rotation; int hscale, vscale; + src->x1 = state->src_x; + src->y1 = state->src_y; + src->x2 = state->src_x + state->src_w; + src->y2 = state->src_y + state->src_h; + + dst->x1 = state->crtc_x; + dst->y1 = state->crtc_y; + dst->x2 = state->crtc_x + state->crtc_w; + dst->y2 = state->crtc_y + state->crtc_h; + if (!fb) { - *visible = false; + state->visible = false; return 0; } /* crtc should only be NULL when disabling (i.e., !fb) */ if (WARN_ON(!crtc)) { - *visible = false; + state->visible = false; return 0; } @@ -168,20 +171,20 @@ int drm_plane_helper_check_update(struct drm_plane *plane, drm_rect_rotate(src, fb->width << 16, fb->height << 16, rotation); /* Check scaling */ - hscale = drm_rect_calc_hscale(src, dest, min_scale, max_scale); - vscale = drm_rect_calc_vscale(src, dest, min_scale, max_scale); + hscale = drm_rect_calc_hscale(src, dst, min_scale, max_scale); + vscale = drm_rect_calc_vscale(src, dst, min_scale, max_scale); if (hscale < 0 || vscale < 0) { DRM_DEBUG_KMS("Invalid scaling of plane\n"); - drm_rect_debug_print("src: ", src, true); - drm_rect_debug_print("dst: ", dest, false); + drm_rect_debug_print("src: ", &state->src, true); + drm_rect_debug_print("dst: ", &state->dst, false); return -ERANGE; } - *visible = drm_rect_clip_scaled(src, dest, clip, hscale, vscale); + state->visible = drm_rect_clip_scaled(src, dst, clip, hscale, vscale); drm_rect_rotate_inv(src, fb->width << 16, fb->height << 16, rotation); - if (!*visible) + if (!state->visible) /* * Plane isn't visible; some drivers can handle this * so we just return success here. Drivers that can't @@ -191,15 +194,87 @@ int drm_plane_helper_check_update(struct drm_plane *plane, */ return 0; - if (!can_position && !drm_rect_equals(dest, clip)) { + if (!can_position && !drm_rect_equals(dst, clip)) { DRM_DEBUG_KMS("Plane must cover entire CRTC\n"); - drm_rect_debug_print("dst: ", dest, false); + drm_rect_debug_print("dst: ", dst, false); drm_rect_debug_print("clip: ", clip, false); return -EINVAL; } return 0; } +EXPORT_SYMBOL(drm_plane_helper_check_state); + +/** + * drm_plane_helper_check_update() - Check plane update for validity + * @plane: plane object to update + * @crtc: owning CRTC of owning plane + * @fb: framebuffer to flip onto plane + * @src: source coordinates in 16.16 fixed point + * @dest: integer destination coordinates + * @clip: integer clipping coordinates + * @rotation: plane rotation + * @min_scale: minimum @src:@dest scaling factor in 16.16 fixed point + * @max_scale: maximum @src:@dest scaling factor in 16.16 fixed point + * @can_position: is it legal to position the plane such that it + * doesn't cover the entire crtc? This will generally + * only be false for primary planes. + * @can_update_disabled: can the plane be updated while the crtc + * is disabled? + * @visible: output parameter indicating whether plane is still visible after + * clipping + * + * Checks that a desired plane update is valid. Drivers that provide + * their own plane handling rather than helper-provided implementations may + * still wish to call this function to avoid duplication of error checking + * code. + * + * RETURNS: + * Zero if update appears valid, error code on failure + */ +int drm_plane_helper_check_update(struct drm_plane *plane, + struct drm_crtc *crtc, + struct drm_framebuffer *fb, + struct drm_rect *src, + struct drm_rect *dst, + const struct drm_rect *clip, + unsigned int rotation, + int min_scale, + int max_scale, + bool can_position, + bool can_update_disabled, + bool *visible) +{ + struct drm_plane_state state = { + .plane = plane, + .crtc = crtc, + .fb = fb, + .src_x = src->x1, + .src_y = src->y1, + .src_w = drm_rect_width(src), + .src_h = drm_rect_height(src), + .crtc_x = dst->x1, + .crtc_y = dst->y1, + .crtc_w = drm_rect_width(dst), + .crtc_h = drm_rect_height(dst), + .rotation = rotation, + .visible = *visible, + }; + int ret; + + ret = drm_plane_helper_check_state(&state, clip, + min_scale, max_scale, + can_position, + can_update_disabled); + if (ret) + return ret; + + *src = state.src; + *dst = state.dst; + *visible = state.visible; + + return 0; +} EXPORT_SYMBOL(drm_plane_helper_check_update); /** diff --git a/include/drm/drm_plane_helper.h b/include/drm/drm_plane_helper.h index 0e0c357..fbc8ecb 100644 --- a/include/drm/drm_plane_helper.h +++ b/include/drm/drm_plane_helper.h @@ -40,6 +40,11 @@ int drm_crtc_init(struct drm_device *dev, struct drm_crtc *crtc, const struct drm_crtc_funcs *funcs); +int drm_plane_helper_check_state(struct drm_plane_state *state, + const struct drm_rect *clip, + int min_scale, int max_scale, + bool can_position, + bool can_update_disabled); int drm_plane_helper_check_update(struct drm_plane *plane, struct drm_crtc *crtc, struct drm_framebuffer *fb, -- cgit v0.10.2 From 936e71e314d393cd74c42c81b00b2092330c802d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 26 Jul 2016 19:06:59 +0300 Subject: drm/i915: Use drm_plane_state.{src,dst,visible} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the private drm_rects/flags in intel_plane_state with the ones now living in drm_plane_state. Signed-off-by: Ville Syrjälä Reviewed-by: Chris Wilson Signed-off-by: Sean Paul Link: http://patchwork.freedesktop.org/patch/msgid/1469549224-1860-5-git-send-email-ville.syrjala@linux.intel.com diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/intel_atomic_plane.c index 7cc9c76..ffc5730 100644 --- a/drivers/gpu/drm/i915/intel_atomic_plane.c +++ b/drivers/gpu/drm/i915/intel_atomic_plane.c @@ -139,14 +139,14 @@ static int intel_plane_atomic_check(struct drm_plane *plane, * we want to keep another copy internal to our driver that we can * clip/modify ourselves. */ - intel_state->src.x1 = state->src_x; - intel_state->src.y1 = state->src_y; - intel_state->src.x2 = state->src_x + state->src_w; - intel_state->src.y2 = state->src_y + state->src_h; - intel_state->dst.x1 = state->crtc_x; - intel_state->dst.y1 = state->crtc_y; - intel_state->dst.x2 = state->crtc_x + state->crtc_w; - intel_state->dst.y2 = state->crtc_y + state->crtc_h; + intel_state->base.src.x1 = state->src_x; + intel_state->base.src.y1 = state->src_y; + intel_state->base.src.x2 = state->src_x + state->src_w; + intel_state->base.src.y2 = state->src_y + state->src_h; + intel_state->base.dst.x1 = state->crtc_x; + intel_state->base.dst.y1 = state->crtc_y; + intel_state->base.dst.x2 = state->crtc_x + state->crtc_w; + intel_state->base.dst.y2 = state->crtc_y + state->crtc_h; /* Clip all planes to CRTC size, or 0x0 if CRTC is disabled */ intel_state->clip.x1 = 0; @@ -180,7 +180,7 @@ static int intel_plane_atomic_check(struct drm_plane *plane, } } - intel_state->visible = false; + intel_state->base.visible = false; ret = intel_plane->check_plane(plane, crtc_state, intel_state); if (ret) return ret; @@ -196,7 +196,7 @@ static void intel_plane_atomic_update(struct drm_plane *plane, to_intel_plane_state(plane->state); struct drm_crtc *crtc = plane->state->crtc ?: old_state->crtc; - if (intel_state->visible) + if (intel_state->base.visible) intel_plane->update_plane(plane, to_intel_crtc_state(crtc->state), intel_state); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index b8a42d1..8ecfa95 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2565,7 +2565,7 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc, * simplest solution is to just disable the primary plane now and * pretend the BIOS never had it enabled. */ - to_intel_plane_state(plane_state)->visible = false; + to_intel_plane_state(plane_state)->base.visible = false; crtc_state->plane_mask &= ~(1 << drm_plane_index(primary)); intel_pre_disable_primary_noatomic(&intel_crtc->base); intel_plane->disable_plane(primary, &intel_crtc->base); @@ -2583,14 +2583,14 @@ valid_fb: plane_state->crtc_w = fb->width; plane_state->crtc_h = fb->height; - intel_state->src.x1 = plane_state->src_x; - intel_state->src.y1 = plane_state->src_y; - intel_state->src.x2 = plane_state->src_x + plane_state->src_w; - intel_state->src.y2 = plane_state->src_y + plane_state->src_h; - intel_state->dst.x1 = plane_state->crtc_x; - intel_state->dst.y1 = plane_state->crtc_y; - intel_state->dst.x2 = plane_state->crtc_x + plane_state->crtc_w; - intel_state->dst.y2 = plane_state->crtc_y + plane_state->crtc_h; + intel_state->base.src.x1 = plane_state->src_x; + intel_state->base.src.y1 = plane_state->src_y; + intel_state->base.src.x2 = plane_state->src_x + plane_state->src_w; + intel_state->base.src.y2 = plane_state->src_y + plane_state->src_h; + intel_state->base.dst.x1 = plane_state->crtc_x; + intel_state->base.dst.y1 = plane_state->crtc_y; + intel_state->base.dst.x2 = plane_state->crtc_x + plane_state->crtc_w; + intel_state->base.dst.y2 = plane_state->crtc_y + plane_state->crtc_h; obj = intel_fb_obj(fb); if (obj->tiling_mode != I915_TILING_NONE) @@ -2618,8 +2618,8 @@ static void i9xx_update_primary_plane(struct drm_plane *primary, i915_reg_t reg = DSPCNTR(plane); unsigned int rotation = plane_state->base.rotation; int cpp = drm_format_plane_cpp(fb->pixel_format, 0); - int x = plane_state->src.x1 >> 16; - int y = plane_state->src.y1 >> 16; + int x = plane_state->base.src.x1 >> 16; + int y = plane_state->base.src.y1 >> 16; dspcntr = DISPPLANE_GAMMA_ENABLE; @@ -2748,8 +2748,8 @@ static void ironlake_update_primary_plane(struct drm_plane *primary, i915_reg_t reg = DSPCNTR(plane); unsigned int rotation = plane_state->base.rotation; int cpp = drm_format_plane_cpp(fb->pixel_format, 0); - int x = plane_state->src.x1 >> 16; - int y = plane_state->src.y1 >> 16; + int x = plane_state->base.src.x1 >> 16; + int y = plane_state->base.src.y1 >> 16; dspcntr = DISPPLANE_GAMMA_ENABLE; dspcntr |= DISPLAY_PLANE_ENABLE; @@ -2987,14 +2987,14 @@ static void skylake_update_primary_plane(struct drm_plane *plane, int x_offset, y_offset; u32 surf_addr; int scaler_id = plane_state->scaler_id; - int src_x = plane_state->src.x1 >> 16; - int src_y = plane_state->src.y1 >> 16; - int src_w = drm_rect_width(&plane_state->src) >> 16; - int src_h = drm_rect_height(&plane_state->src) >> 16; - int dst_x = plane_state->dst.x1; - int dst_y = plane_state->dst.y1; - int dst_w = drm_rect_width(&plane_state->dst); - int dst_h = drm_rect_height(&plane_state->dst); + int src_x = plane_state->base.src.x1 >> 16; + int src_y = plane_state->base.src.y1 >> 16; + int src_w = drm_rect_width(&plane_state->base.src) >> 16; + int src_h = drm_rect_height(&plane_state->base.src) >> 16; + int dst_x = plane_state->base.dst.x1; + int dst_y = plane_state->base.dst.y1; + int dst_w = drm_rect_width(&plane_state->base.dst); + int dst_h = drm_rect_height(&plane_state->base.dst); plane_ctl = PLANE_CTL_ENABLE | PLANE_CTL_PIPE_GAMMA_ENABLE | @@ -3009,7 +3009,7 @@ static void skylake_update_primary_plane(struct drm_plane *plane, fb->pixel_format); surf_addr = intel_plane_obj_offset(to_intel_plane(plane), obj, 0); - WARN_ON(drm_rect_width(&plane_state->src) == 0); + WARN_ON(drm_rect_width(&plane_state->base.src) == 0); if (intel_rotation_90_or_270(rotation)) { int cpp = drm_format_plane_cpp(fb->pixel_format, 0); @@ -3098,7 +3098,7 @@ static void intel_update_primary_planes(struct drm_device *dev) drm_modeset_lock_crtc(crtc, &plane->base); plane_state = to_intel_plane_state(plane->base.state); - if (plane_state->visible) + if (plane_state->base.visible) plane->update_plane(&plane->base, to_intel_crtc_state(crtc->state), plane_state); @@ -4273,7 +4273,7 @@ static int skl_update_scaler_plane(struct intel_crtc_state *crtc_state, struct drm_framebuffer *fb = plane_state->base.fb; int ret; - bool force_detach = !fb || !plane_state->visible; + bool force_detach = !fb || !plane_state->base.visible; DRM_DEBUG_KMS("Updating scaler for [PLANE:%d:%s] scaler_user index %u.%u\n", intel_plane->base.base.id, intel_plane->base.name, @@ -4283,10 +4283,10 @@ static int skl_update_scaler_plane(struct intel_crtc_state *crtc_state, drm_plane_index(&intel_plane->base), &plane_state->scaler_id, plane_state->base.rotation, - drm_rect_width(&plane_state->src) >> 16, - drm_rect_height(&plane_state->src) >> 16, - drm_rect_width(&plane_state->dst), - drm_rect_height(&plane_state->dst)); + drm_rect_width(&plane_state->base.src) >> 16, + drm_rect_height(&plane_state->base.src) >> 16, + drm_rect_width(&plane_state->base.dst), + drm_rect_height(&plane_state->base.dst)); if (ret || plane_state->scaler_id < 0) return ret; @@ -4584,9 +4584,9 @@ static void intel_post_plane_update(struct intel_crtc_state *old_crtc_state) intel_fbc_post_update(crtc); - if (primary_state->visible && + if (primary_state->base.visible && (needs_modeset(&pipe_config->base) || - !old_primary_state->visible)) + !old_primary_state->base.visible)) intel_post_enable_primary(&crtc->base); } } @@ -4612,8 +4612,8 @@ static void intel_pre_plane_update(struct intel_crtc_state *old_crtc_state) intel_fbc_pre_update(crtc, pipe_config, primary_state); - if (old_primary_state->visible && - (modeset || !primary_state->visible)) + if (old_primary_state->base.visible && + (modeset || !primary_state->base.visible)) intel_pre_disable_primary(&crtc->base); } @@ -6298,13 +6298,13 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc) if (!intel_crtc->active) return; - if (to_intel_plane_state(crtc->primary->state)->visible) { + if (to_intel_plane_state(crtc->primary->state)->base.visible) { WARN_ON(intel_crtc->flip_work); intel_pre_disable_primary_noatomic(crtc); intel_crtc_disable_planes(crtc, 1 << drm_plane_index(crtc->primary)); - to_intel_plane_state(crtc->primary->state)->visible = false; + to_intel_plane_state(crtc->primary->state)->base.visible = false; } dev_priv->display.crtc_disable(crtc); @@ -10178,7 +10178,7 @@ static void i845_update_cursor(struct drm_crtc *crtc, u32 base, struct intel_crtc *intel_crtc = to_intel_crtc(crtc); uint32_t cntl = 0, size = 0; - if (plane_state && plane_state->visible) { + if (plane_state && plane_state->base.visible) { unsigned int width = plane_state->base.crtc_w; unsigned int height = plane_state->base.crtc_h; unsigned int stride = roundup_pow_of_two(width) * 4; @@ -10242,7 +10242,7 @@ static void i9xx_update_cursor(struct drm_crtc *crtc, u32 base, int pipe = intel_crtc->pipe; uint32_t cntl = 0; - if (plane_state && plane_state->visible) { + if (plane_state && plane_state->base.visible) { cntl = MCURSOR_GAMMA_ENABLE; switch (plane_state->base.crtc_w) { case 64: @@ -11826,7 +11826,7 @@ static bool intel_wm_need_update(struct drm_plane *plane, struct intel_plane_state *cur = to_intel_plane_state(plane->state); /* Update watermarks on tiling or size changes. */ - if (new->visible != cur->visible) + if (new->base.visible != cur->base.visible) return true; if (!cur->base.fb || !new->base.fb) @@ -11834,10 +11834,10 @@ static bool intel_wm_need_update(struct drm_plane *plane, if (cur->base.fb->modifier[0] != new->base.fb->modifier[0] || cur->base.rotation != new->base.rotation || - drm_rect_width(&new->src) != drm_rect_width(&cur->src) || - drm_rect_height(&new->src) != drm_rect_height(&cur->src) || - drm_rect_width(&new->dst) != drm_rect_width(&cur->dst) || - drm_rect_height(&new->dst) != drm_rect_height(&cur->dst)) + drm_rect_width(&new->base.src) != drm_rect_width(&cur->base.src) || + drm_rect_height(&new->base.src) != drm_rect_height(&cur->base.src) || + drm_rect_width(&new->base.dst) != drm_rect_width(&cur->base.dst) || + drm_rect_height(&new->base.dst) != drm_rect_height(&cur->base.dst)) return true; return false; @@ -11845,10 +11845,10 @@ static bool intel_wm_need_update(struct drm_plane *plane, static bool needs_scaling(struct intel_plane_state *state) { - int src_w = drm_rect_width(&state->src) >> 16; - int src_h = drm_rect_height(&state->src) >> 16; - int dst_w = drm_rect_width(&state->dst); - int dst_h = drm_rect_height(&state->dst); + int src_w = drm_rect_width(&state->base.src) >> 16; + int src_h = drm_rect_height(&state->base.src) >> 16; + int dst_w = drm_rect_width(&state->base.dst); + int dst_h = drm_rect_height(&state->base.dst); return (src_w != dst_w || src_h != dst_h); } @@ -11879,8 +11879,8 @@ int intel_plane_atomic_calc_changes(struct drm_crtc_state *crtc_state, return ret; } - was_visible = old_plane_state->visible; - visible = to_intel_plane_state(plane_state)->visible; + was_visible = old_plane_state->base.visible; + visible = to_intel_plane_state(plane_state)->base.visible; if (!was_crtc_enabled && WARN_ON(was_visible)) was_visible = false; @@ -11896,7 +11896,7 @@ int intel_plane_atomic_calc_changes(struct drm_crtc_state *crtc_state, * only combine the results from all planes in the current place? */ if (!is_crtc_enabled) - to_intel_plane_state(plane_state)->visible = visible = false; + to_intel_plane_state(plane_state)->base.visible = visible = false; if (!was_visible && !visible) return 0; @@ -12301,12 +12301,13 @@ static void intel_dump_pipe_config(struct intel_crtc *crtc, drm_get_format_name(fb->pixel_format)); DRM_DEBUG_KMS("\tscaler:%d src %dx%d+%d+%d dst %dx%d+%d+%d\n", state->scaler_id, - state->src.x1 >> 16, state->src.y1 >> 16, - drm_rect_width(&state->src) >> 16, - drm_rect_height(&state->src) >> 16, - state->dst.x1, state->dst.y1, - drm_rect_width(&state->dst), - drm_rect_height(&state->dst)); + state->base.src.x1 >> 16, + state->base.src.y1 >> 16, + drm_rect_width(&state->base.src) >> 16, + drm_rect_height(&state->base.src) >> 16, + state->base.dst.x1, state->base.dst.y1, + drm_rect_width(&state->base.dst), + drm_rect_height(&state->base.dst)); } } @@ -14141,12 +14142,14 @@ intel_check_primary_plane(struct drm_plane *plane, can_position = true; } - return drm_plane_helper_check_update(plane, crtc, fb, &state->src, - &state->dst, &state->clip, + return drm_plane_helper_check_update(plane, crtc, fb, + &state->base.src, + &state->base.dst, + &state->base.clip, state->base.rotation, min_scale, max_scale, can_position, true, - &state->visible); + &state->base.visible); } static void intel_begin_crtc_commit(struct drm_crtc *crtc, @@ -14333,12 +14336,13 @@ intel_check_cursor_plane(struct drm_plane *plane, unsigned stride; int ret; - ret = drm_plane_helper_check_update(plane, crtc, fb, &state->src, - &state->dst, &state->clip, + ret = drm_plane_helper_check_update(plane, crtc, fb, &state->base.src, + &state->base.dst, + &state->base.clip, state->base.rotation, DRM_PLANE_HELPER_NO_SCALING, DRM_PLANE_HELPER_NO_SCALING, - true, true, &state->visible); + true, true, &state->base.visible); if (ret) return ret; @@ -14375,7 +14379,7 @@ intel_check_cursor_plane(struct drm_plane *plane, * Refuse the put the cursor into that compromised position. */ if (IS_CHERRYVIEW(plane->dev) && pipe == PIPE_C && - state->visible && state->base.crtc_x < 0) { + state->base.visible && state->base.crtc_x < 0) { DRM_DEBUG_KMS("CHV cursor C not allowed to straddle the left screen edge\n"); return -EINVAL; } @@ -15825,7 +15829,7 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc) * Temporarily change the plane mapping and disable everything * ... */ plane = crtc->plane; - to_intel_plane_state(crtc->base.primary->state)->visible = true; + to_intel_plane_state(crtc->base.primary->state)->base.visible = true; crtc->plane = !plane; intel_crtc_disable_noatomic(&crtc->base); crtc->plane = plane; @@ -15952,10 +15956,10 @@ static void readout_plane_state(struct intel_crtc *crtc) struct intel_plane_state *plane_state = to_intel_plane_state(primary->state); - plane_state->visible = crtc->active && + plane_state->base.visible = crtc->active && primary_get_hw_state(to_intel_plane(primary)); - if (plane_state->visible) + if (plane_state->base.visible) crtc->base.state->plane_mask |= 1 << drm_plane_index(primary); } diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index d55b5e0..9c59521 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -338,10 +338,7 @@ struct intel_atomic_state { struct intel_plane_state { struct drm_plane_state base; - struct drm_rect src; - struct drm_rect dst; struct drm_rect clip; - bool visible; /* * scaler_id diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c index fa11277..3f4e32f 100644 --- a/drivers/gpu/drm/i915/intel_fbc.c +++ b/drivers/gpu/drm/i915/intel_fbc.c @@ -494,7 +494,7 @@ static bool multiple_pipes_ok(struct intel_crtc *crtc, if (!no_fbc_on_multiple_pipes(dev_priv)) return true; - if (plane_state->visible) + if (plane_state->base.visible) fbc->visible_pipes_mask |= (1 << pipe); else fbc->visible_pipes_mask &= ~(1 << pipe); @@ -725,9 +725,9 @@ static void intel_fbc_update_state_cache(struct intel_crtc *crtc, ilk_pipe_pixel_rate(crtc_state); cache->plane.rotation = plane_state->base.rotation; - cache->plane.src_w = drm_rect_width(&plane_state->src) >> 16; - cache->plane.src_h = drm_rect_height(&plane_state->src) >> 16; - cache->plane.visible = plane_state->visible; + cache->plane.src_w = drm_rect_width(&plane_state->base.src) >> 16; + cache->plane.src_h = drm_rect_height(&plane_state->base.src) >> 16; + cache->plane.visible = plane_state->base.visible; if (!cache->plane.visible) return; @@ -1050,7 +1050,7 @@ void intel_fbc_choose_crtc(struct drm_i915_private *dev_priv, struct intel_plane_state *intel_plane_state = to_intel_plane_state(plane_state); - if (!intel_plane_state->visible) + if (!intel_plane_state->base.visible) continue; for_each_crtc_in_state(state, crtc, crtc_state, j) { @@ -1212,7 +1212,7 @@ void intel_fbc_init_pipe_state(struct drm_i915_private *dev_priv) for_each_intel_crtc(&dev_priv->drm, crtc) if (intel_crtc_active(&crtc->base) && - to_intel_plane_state(crtc->base.primary->state)->visible) + to_intel_plane_state(crtc->base.primary->state)->base.visible) dev_priv->fbc.visible_pipes_mask |= (1 << crtc->pipe); } diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index f4f3fcc..b33cf82 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -960,7 +960,7 @@ static uint16_t vlv_compute_wm_level(struct intel_plane *plane, if (dev_priv->wm.pri_latency[level] == 0) return USHRT_MAX; - if (!state->visible) + if (!state->base.visible) return 0; cpp = drm_format_plane_cpp(state->base.fb->pixel_format, 0); @@ -1002,7 +1002,7 @@ static void vlv_compute_fifo(struct intel_crtc *crtc) if (plane->base.type == DRM_PLANE_TYPE_CURSOR) continue; - if (state->visible) { + if (state->base.visible) { wm_state->num_active_planes++; total_rate += drm_format_plane_cpp(state->base.fb->pixel_format, 0); } @@ -1018,7 +1018,7 @@ static void vlv_compute_fifo(struct intel_crtc *crtc) continue; } - if (!state->visible) { + if (!state->base.visible) { plane->wm.fifo_size = 0; continue; } @@ -1118,7 +1118,7 @@ static void vlv_compute_wm(struct intel_crtc *crtc) struct intel_plane_state *state = to_intel_plane_state(plane->base.state); - if (!state->visible) + if (!state->base.visible) continue; /* normal watermarks */ @@ -1767,7 +1767,7 @@ static uint32_t ilk_compute_pri_wm(const struct intel_crtc_state *cstate, drm_format_plane_cpp(pstate->base.fb->pixel_format, 0) : 0; uint32_t method1, method2; - if (!cstate->base.active || !pstate->visible) + if (!cstate->base.active || !pstate->base.visible) return 0; method1 = ilk_wm_method1(ilk_pipe_pixel_rate(cstate), cpp, mem_value); @@ -1777,7 +1777,7 @@ static uint32_t ilk_compute_pri_wm(const struct intel_crtc_state *cstate, method2 = ilk_wm_method2(ilk_pipe_pixel_rate(cstate), cstate->base.adjusted_mode.crtc_htotal, - drm_rect_width(&pstate->dst), + drm_rect_width(&pstate->base.dst), cpp, mem_value); return min(method1, method2); @@ -1795,13 +1795,13 @@ static uint32_t ilk_compute_spr_wm(const struct intel_crtc_state *cstate, drm_format_plane_cpp(pstate->base.fb->pixel_format, 0) : 0; uint32_t method1, method2; - if (!cstate->base.active || !pstate->visible) + if (!cstate->base.active || !pstate->base.visible) return 0; method1 = ilk_wm_method1(ilk_pipe_pixel_rate(cstate), cpp, mem_value); method2 = ilk_wm_method2(ilk_pipe_pixel_rate(cstate), cstate->base.adjusted_mode.crtc_htotal, - drm_rect_width(&pstate->dst), + drm_rect_width(&pstate->base.dst), cpp, mem_value); return min(method1, method2); } @@ -1820,7 +1820,7 @@ static uint32_t ilk_compute_cur_wm(const struct intel_crtc_state *cstate, * this is necessary to avoid flickering. */ int cpp = 4; - int width = pstate->visible ? pstate->base.crtc_w : 64; + int width = pstate->base.visible ? pstate->base.crtc_w : 64; if (!cstate->base.active) return 0; @@ -1838,10 +1838,10 @@ static uint32_t ilk_compute_fbc_wm(const struct intel_crtc_state *cstate, int cpp = pstate->base.fb ? drm_format_plane_cpp(pstate->base.fb->pixel_format, 0) : 0; - if (!cstate->base.active || !pstate->visible) + if (!cstate->base.active || !pstate->base.visible) return 0; - return ilk_wm_fbc(pri_val, drm_rect_width(&pstate->dst), cpp); + return ilk_wm_fbc(pri_val, drm_rect_width(&pstate->base.dst), cpp); } static unsigned int ilk_display_fifo_size(const struct drm_device *dev) @@ -2358,10 +2358,10 @@ static int ilk_compute_pipe_wm(struct intel_crtc_state *cstate) pipe_wm->pipe_enabled = cstate->base.active; if (sprstate) { - pipe_wm->sprites_enabled = sprstate->visible; - pipe_wm->sprites_scaled = sprstate->visible && - (drm_rect_width(&sprstate->dst) != drm_rect_width(&sprstate->src) >> 16 || - drm_rect_height(&sprstate->dst) != drm_rect_height(&sprstate->src) >> 16); + pipe_wm->sprites_enabled = sprstate->base.visible; + pipe_wm->sprites_scaled = sprstate->base.visible && + (drm_rect_width(&sprstate->base.dst) != drm_rect_width(&sprstate->base.src) >> 16 || + drm_rect_height(&sprstate->base.dst) != drm_rect_height(&sprstate->base.src) >> 16); } usable_level = max_level; @@ -2996,14 +2996,14 @@ skl_plane_downscale_amount(const struct intel_plane_state *pstate) uint32_t downscale_h, downscale_w; uint32_t src_w, src_h, dst_w, dst_h; - if (WARN_ON(!pstate->visible)) + if (WARN_ON(!pstate->base.visible)) return DRM_PLANE_HELPER_NO_SCALING; /* n.b., src is 16.16 fixed point, dst is whole integer */ - src_w = drm_rect_width(&pstate->src); - src_h = drm_rect_height(&pstate->src); - dst_w = drm_rect_width(&pstate->dst); - dst_h = drm_rect_height(&pstate->dst); + src_w = drm_rect_width(&pstate->base.src); + src_h = drm_rect_height(&pstate->base.src); + dst_w = drm_rect_width(&pstate->base.dst); + dst_h = drm_rect_height(&pstate->base.dst); if (intel_rotation_90_or_270(pstate->base.rotation)) swap(dst_w, dst_h); @@ -3025,15 +3025,15 @@ skl_plane_relative_data_rate(const struct intel_crtc_state *cstate, uint32_t width = 0, height = 0; unsigned format = fb ? fb->pixel_format : DRM_FORMAT_XRGB8888; - if (!intel_pstate->visible) + if (!intel_pstate->base.visible) return 0; if (pstate->plane->type == DRM_PLANE_TYPE_CURSOR) return 0; if (y && format != DRM_FORMAT_NV12) return 0; - width = drm_rect_width(&intel_pstate->src) >> 16; - height = drm_rect_height(&intel_pstate->src) >> 16; + width = drm_rect_width(&intel_pstate->base.src) >> 16; + height = drm_rect_height(&intel_pstate->base.src) >> 16; if (intel_rotation_90_or_270(pstate->rotation)) swap(width, height); @@ -3134,8 +3134,8 @@ skl_ddb_min_alloc(const struct drm_plane_state *pstate, fb->modifier[0] != I915_FORMAT_MOD_Yf_TILED) return 8; - src_w = drm_rect_width(&intel_pstate->src) >> 16; - src_h = drm_rect_height(&intel_pstate->src) >> 16; + src_w = drm_rect_width(&intel_pstate->base.src) >> 16; + src_h = drm_rect_height(&intel_pstate->base.src) >> 16; if (intel_rotation_90_or_270(pstate->rotation)) swap(src_w, src_h); @@ -3226,7 +3226,7 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, if (intel_plane->pipe != pipe) continue; - if (!to_intel_plane_state(pstate)->visible) { + if (!to_intel_plane_state(pstate)->base.visible) { minimum[id] = 0; y_minimum[id] = 0; continue; @@ -3363,7 +3363,7 @@ static uint32_t skl_adjusted_plane_pixel_rate(const struct intel_crtc_state *cst uint64_t pixel_rate; /* Shouldn't reach here on disabled planes... */ - if (WARN_ON(!pstate->visible)) + if (WARN_ON(!pstate->base.visible)) return 0; /* @@ -3399,13 +3399,13 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, uint32_t width = 0, height = 0; uint32_t plane_pixel_rate; - if (latency == 0 || !cstate->base.active || !intel_pstate->visible) { + if (latency == 0 || !cstate->base.active || !intel_pstate->base.visible) { *enabled = false; return 0; } - width = drm_rect_width(&intel_pstate->src) >> 16; - height = drm_rect_height(&intel_pstate->src) >> 16; + width = drm_rect_width(&intel_pstate->base.src) >> 16; + height = drm_rect_height(&intel_pstate->base.src) >> 16; if (intel_rotation_90_or_270(pstate->rotation)) swap(width, height); diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index e43d97c..efb8324 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -211,14 +211,14 @@ skl_update_plane(struct drm_plane *drm_plane, u32 tile_height, plane_offset, plane_size; unsigned int rotation = plane_state->base.rotation; int x_offset, y_offset; - int crtc_x = plane_state->dst.x1; - int crtc_y = plane_state->dst.y1; - uint32_t crtc_w = drm_rect_width(&plane_state->dst); - uint32_t crtc_h = drm_rect_height(&plane_state->dst); - uint32_t x = plane_state->src.x1 >> 16; - uint32_t y = plane_state->src.y1 >> 16; - uint32_t src_w = drm_rect_width(&plane_state->src) >> 16; - uint32_t src_h = drm_rect_height(&plane_state->src) >> 16; + int crtc_x = plane_state->base.dst.x1; + int crtc_y = plane_state->base.dst.y1; + uint32_t crtc_w = drm_rect_width(&plane_state->base.dst); + uint32_t crtc_h = drm_rect_height(&plane_state->base.dst); + uint32_t x = plane_state->base.src.x1 >> 16; + uint32_t y = plane_state->base.src.y1 >> 16; + uint32_t src_w = drm_rect_width(&plane_state->base.src) >> 16; + uint32_t src_h = drm_rect_height(&plane_state->base.src) >> 16; plane_ctl = PLANE_CTL_ENABLE | PLANE_CTL_PIPE_GAMMA_ENABLE | @@ -370,14 +370,14 @@ vlv_update_plane(struct drm_plane *dplane, unsigned int rotation = dplane->state->rotation; int cpp = drm_format_plane_cpp(fb->pixel_format, 0); const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; - int crtc_x = plane_state->dst.x1; - int crtc_y = plane_state->dst.y1; - uint32_t crtc_w = drm_rect_width(&plane_state->dst); - uint32_t crtc_h = drm_rect_height(&plane_state->dst); - uint32_t x = plane_state->src.x1 >> 16; - uint32_t y = plane_state->src.y1 >> 16; - uint32_t src_w = drm_rect_width(&plane_state->src) >> 16; - uint32_t src_h = drm_rect_height(&plane_state->src) >> 16; + int crtc_x = plane_state->base.dst.x1; + int crtc_y = plane_state->base.dst.y1; + uint32_t crtc_w = drm_rect_width(&plane_state->base.dst); + uint32_t crtc_h = drm_rect_height(&plane_state->base.dst); + uint32_t x = plane_state->base.src.x1 >> 16; + uint32_t y = plane_state->base.src.y1 >> 16; + uint32_t src_w = drm_rect_width(&plane_state->base.src) >> 16; + uint32_t src_h = drm_rect_height(&plane_state->base.src) >> 16; sprctl = SP_ENABLE; @@ -512,14 +512,14 @@ ivb_update_plane(struct drm_plane *plane, unsigned int rotation = plane_state->base.rotation; int cpp = drm_format_plane_cpp(fb->pixel_format, 0); const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; - int crtc_x = plane_state->dst.x1; - int crtc_y = plane_state->dst.y1; - uint32_t crtc_w = drm_rect_width(&plane_state->dst); - uint32_t crtc_h = drm_rect_height(&plane_state->dst); - uint32_t x = plane_state->src.x1 >> 16; - uint32_t y = plane_state->src.y1 >> 16; - uint32_t src_w = drm_rect_width(&plane_state->src) >> 16; - uint32_t src_h = drm_rect_height(&plane_state->src) >> 16; + int crtc_x = plane_state->base.dst.x1; + int crtc_y = plane_state->base.dst.y1; + uint32_t crtc_w = drm_rect_width(&plane_state->base.dst); + uint32_t crtc_h = drm_rect_height(&plane_state->base.dst); + uint32_t x = plane_state->base.src.x1 >> 16; + uint32_t y = plane_state->base.src.y1 >> 16; + uint32_t src_w = drm_rect_width(&plane_state->base.src) >> 16; + uint32_t src_h = drm_rect_height(&plane_state->base.src) >> 16; sprctl = SPRITE_ENABLE; @@ -653,14 +653,14 @@ ilk_update_plane(struct drm_plane *plane, unsigned int rotation = plane_state->base.rotation; int cpp = drm_format_plane_cpp(fb->pixel_format, 0); const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; - int crtc_x = plane_state->dst.x1; - int crtc_y = plane_state->dst.y1; - uint32_t crtc_w = drm_rect_width(&plane_state->dst); - uint32_t crtc_h = drm_rect_height(&plane_state->dst); - uint32_t x = plane_state->src.x1 >> 16; - uint32_t y = plane_state->src.y1 >> 16; - uint32_t src_w = drm_rect_width(&plane_state->src) >> 16; - uint32_t src_h = drm_rect_height(&plane_state->src) >> 16; + int crtc_x = plane_state->base.dst.x1; + int crtc_y = plane_state->base.dst.y1; + uint32_t crtc_w = drm_rect_width(&plane_state->base.dst); + uint32_t crtc_h = drm_rect_height(&plane_state->base.dst); + uint32_t x = plane_state->base.src.x1 >> 16; + uint32_t y = plane_state->base.src.y1 >> 16; + uint32_t src_w = drm_rect_width(&plane_state->base.src) >> 16; + uint32_t src_h = drm_rect_height(&plane_state->base.src) >> 16; dvscntr = DVS_ENABLE; @@ -778,15 +778,15 @@ intel_check_sprite_plane(struct drm_plane *plane, int crtc_x, crtc_y; unsigned int crtc_w, crtc_h; uint32_t src_x, src_y, src_w, src_h; - struct drm_rect *src = &state->src; - struct drm_rect *dst = &state->dst; + struct drm_rect *src = &state->base.src; + struct drm_rect *dst = &state->base.dst; const struct drm_rect *clip = &state->clip; int hscale, vscale; int max_scale, min_scale; bool can_scale; if (!fb) { - state->visible = false; + state->base.visible = false; return 0; } @@ -834,14 +834,14 @@ intel_check_sprite_plane(struct drm_plane *plane, vscale = drm_rect_calc_vscale_relaxed(src, dst, min_scale, max_scale); BUG_ON(vscale < 0); - state->visible = drm_rect_clip_scaled(src, dst, clip, hscale, vscale); + state->base.visible = drm_rect_clip_scaled(src, dst, clip, hscale, vscale); crtc_x = dst->x1; crtc_y = dst->y1; crtc_w = drm_rect_width(dst); crtc_h = drm_rect_height(dst); - if (state->visible) { + if (state->base.visible) { /* check again in case clipping clamped the results */ hscale = drm_rect_calc_hscale(src, dst, min_scale, max_scale); if (hscale < 0) { @@ -898,12 +898,12 @@ intel_check_sprite_plane(struct drm_plane *plane, crtc_w &= ~1; if (crtc_w == 0) - state->visible = false; + state->base.visible = false; } } /* Check size restrictions when scaling */ - if (state->visible && (src_w != crtc_w || src_h != crtc_h)) { + if (state->base.visible && (src_w != crtc_w || src_h != crtc_h)) { unsigned int width_bytes; int cpp = drm_format_plane_cpp(fb->pixel_format, 0); @@ -912,10 +912,10 @@ intel_check_sprite_plane(struct drm_plane *plane, /* FIXME interlacing min height is 6 */ if (crtc_w < 3 || crtc_h < 3) - state->visible = false; + state->base.visible = false; if (src_w < 3 || src_h < 3) - state->visible = false; + state->base.visible = false; width_bytes = ((src_x * cpp) & 63) + src_w * cpp; @@ -926,7 +926,7 @@ intel_check_sprite_plane(struct drm_plane *plane, } } - if (state->visible) { + if (state->base.visible) { src->x1 = src_x << 16; src->x2 = (src_x + src_w) << 16; src->y1 = src_y << 16; -- cgit v0.10.2 From f8856a44ad5c0f0f8c641e826984f92fb46f7a57 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 26 Jul 2016 19:07:00 +0300 Subject: drm/i915: Use drm_plane_helper_check_state() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the use of drm_plane_helper_check_update() with drm_plane_helper_check_state() since we have a plane state. Signed-off-by: Ville Syrjälä Signed-off-by: Sean Paul Link: http://patchwork.freedesktop.org/patch/msgid/1469549224-1860-6-git-send-email-ville.syrjala@linux.intel.com diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/intel_atomic_plane.c index ffc5730..e06d1f5 100644 --- a/drivers/gpu/drm/i915/intel_atomic_plane.c +++ b/drivers/gpu/drm/i915/intel_atomic_plane.c @@ -134,20 +134,6 @@ static int intel_plane_atomic_check(struct drm_plane *plane, crtc_state = to_intel_crtc_state(drm_crtc_state); - /* - * The original src/dest coordinates are stored in state->base, but - * we want to keep another copy internal to our driver that we can - * clip/modify ourselves. - */ - intel_state->base.src.x1 = state->src_x; - intel_state->base.src.y1 = state->src_y; - intel_state->base.src.x2 = state->src_x + state->src_w; - intel_state->base.src.y2 = state->src_y + state->src_h; - intel_state->base.dst.x1 = state->crtc_x; - intel_state->base.dst.y1 = state->crtc_y; - intel_state->base.dst.x2 = state->crtc_x + state->crtc_w; - intel_state->base.dst.y2 = state->crtc_y + state->crtc_h; - /* Clip all planes to CRTC size, or 0x0 if CRTC is disabled */ intel_state->clip.x1 = 0; intel_state->clip.y1 = 0; diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 8ecfa95..15bb92b 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -14128,7 +14128,6 @@ intel_check_primary_plane(struct drm_plane *plane, struct intel_plane_state *state) { struct drm_crtc *crtc = state->base.crtc; - struct drm_framebuffer *fb = state->base.fb; int min_scale = DRM_PLANE_HELPER_NO_SCALING; int max_scale = DRM_PLANE_HELPER_NO_SCALING; bool can_position = false; @@ -14142,14 +14141,10 @@ intel_check_primary_plane(struct drm_plane *plane, can_position = true; } - return drm_plane_helper_check_update(plane, crtc, fb, - &state->base.src, - &state->base.dst, - &state->base.clip, - state->base.rotation, - min_scale, max_scale, - can_position, true, - &state->base.visible); + return drm_plane_helper_check_state(&state->base, + &state->clip, + min_scale, max_scale, + can_position, true); } static void intel_begin_crtc_commit(struct drm_crtc *crtc, @@ -14329,20 +14324,17 @@ intel_check_cursor_plane(struct drm_plane *plane, struct intel_crtc_state *crtc_state, struct intel_plane_state *state) { - struct drm_crtc *crtc = crtc_state->base.crtc; struct drm_framebuffer *fb = state->base.fb; struct drm_i915_gem_object *obj = intel_fb_obj(fb); enum pipe pipe = to_intel_plane(plane)->pipe; unsigned stride; int ret; - ret = drm_plane_helper_check_update(plane, crtc, fb, &state->base.src, - &state->base.dst, - &state->base.clip, - state->base.rotation, - DRM_PLANE_HELPER_NO_SCALING, - DRM_PLANE_HELPER_NO_SCALING, - true, true, &state->base.visible); + ret = drm_plane_helper_check_state(&state->base, + &state->clip, + DRM_PLANE_HELPER_NO_SCALING, + DRM_PLANE_HELPER_NO_SCALING, + true, true); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index efb8324..1d9736b 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -785,6 +785,16 @@ intel_check_sprite_plane(struct drm_plane *plane, int max_scale, min_scale; bool can_scale; + src->x1 = state->base.src_x; + src->y1 = state->base.src_y; + src->x2 = state->base.src_x + state->base.src_w; + src->y2 = state->base.src_y + state->base.src_h; + + dst->x1 = state->base.crtc_x; + dst->y1 = state->base.crtc_y; + dst->x2 = state->base.crtc_x + state->base.crtc_w; + dst->y2 = state->base.crtc_y + state->base.crtc_h; + if (!fb) { state->base.visible = false; return 0; -- cgit v0.10.2 From ac92028e99f55cb821f97e2e866a7f32ffe66aa2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 26 Jul 2016 19:07:01 +0300 Subject: drm/rockchip: Use drm_plane_state.{src, dst} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the private drm_rects in vop_plane_state with the ones now living in drm_plane_state. Cc: Yao Cc: linux-rockchip@lists.infradead.org Signed-off-by: Ville Syrjälä Acked-by: Mark Yao Signed-off-by: Sean Paul Link: http://patchwork.freedesktop.org/patch/msgid/1469549224-1860-7-git-send-email-ville.syrjala@linux.intel.com diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c index 91305eb..c566c740 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c @@ -87,8 +87,6 @@ struct vop_plane_state { struct drm_plane_state base; int format; - struct drm_rect src; - struct drm_rect dest; dma_addr_t yrgb_mst; bool enable; }; @@ -595,8 +593,8 @@ static int vop_plane_atomic_check(struct drm_plane *plane, const struct vop_win_data *win = vop_win->data; bool visible; int ret; - struct drm_rect *dest = &vop_plane_state->dest; - struct drm_rect *src = &vop_plane_state->src; + struct drm_rect *dest = &state->dst; + struct drm_rect *src = &state->src; struct drm_rect clip; int min_scale = win->phy->scl ? FRAC_16_16(1, 8) : DRM_PLANE_HELPER_NO_SCALING; @@ -694,8 +692,8 @@ static void vop_plane_atomic_update(struct drm_plane *plane, unsigned int actual_w, actual_h; unsigned int dsp_stx, dsp_sty; uint32_t act_info, dsp_info, dsp_st; - struct drm_rect *src = &vop_plane_state->src; - struct drm_rect *dest = &vop_plane_state->dest; + struct drm_rect *src = &state->src; + struct drm_rect *dest = &state->dst; struct drm_gem_object *obj, *uv_obj; struct rockchip_gem_object *rk_obj, *rk_uv_obj; unsigned long offset; -- cgit v0.10.2 From f9b96be0ecb349c65875b55c14345d0604405e88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 26 Jul 2016 19:07:02 +0300 Subject: drm/rockchip: Use drm_plane_helper_check_state() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the use of drm_plane_helper_check_update() with drm_plane_helper_check_state() since we have a plane state. Rockchip looks to handling plane clipping rather well already (unlikje most arm drm drivers) so there are no function changes here. Cc: Yao Cc: linux-rockchip@lists.infradead.org Signed-off-by: Ville Syrjälä Acked-by: Mark Yao Signed-off-by: Sean Paul Link: http://patchwork.freedesktop.org/patch/msgid/1469549224-1860-8-git-send-email-ville.syrjala@linux.intel.com diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c index c566c740..31744fe 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c @@ -591,10 +591,7 @@ static int vop_plane_atomic_check(struct drm_plane *plane, struct vop_win *vop_win = to_vop_win(plane); struct vop_plane_state *vop_plane_state = to_vop_plane_state(state); const struct vop_win_data *win = vop_win->data; - bool visible; int ret; - struct drm_rect *dest = &state->dst; - struct drm_rect *src = &state->src; struct drm_rect clip; int min_scale = win->phy->scl ? FRAC_16_16(1, 8) : DRM_PLANE_HELPER_NO_SCALING; @@ -608,30 +605,18 @@ static int vop_plane_atomic_check(struct drm_plane *plane, if (WARN_ON(!crtc_state)) return -EINVAL; - src->x1 = state->src_x; - src->y1 = state->src_y; - src->x2 = state->src_x + state->src_w; - src->y2 = state->src_y + state->src_h; - dest->x1 = state->crtc_x; - dest->y1 = state->crtc_y; - dest->x2 = state->crtc_x + state->crtc_w; - dest->y2 = state->crtc_y + state->crtc_h; - clip.x1 = 0; clip.y1 = 0; clip.x2 = crtc_state->adjusted_mode.hdisplay; clip.y2 = crtc_state->adjusted_mode.vdisplay; - ret = drm_plane_helper_check_update(plane, crtc, state->fb, - src, dest, &clip, - state->rotation, - min_scale, - max_scale, - true, true, &visible); + ret = drm_plane_helper_check_state(state, &clip, + min_scale, max_scale, + true, true); if (ret) return ret; - if (!visible) + if (!state->visible) goto out_disable; vop_plane_state->format = vop_convert_format(fb->pixel_format); @@ -642,7 +627,7 @@ static int vop_plane_atomic_check(struct drm_plane *plane, * Src.x1 can be odd when do clip, but yuv plane start point * need align with 2 pixel. */ - if (is_yuv_support(fb->pixel_format) && ((src->x1 >> 16) % 2)) + if (is_yuv_support(fb->pixel_format) && ((state->src.x1 >> 16) % 2)) return -EINVAL; vop_plane_state->enable = true; -- cgit v0.10.2 From 0e4faf679e67b5ab55752d18fc82f3c7351b2f03 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 26 Jul 2016 19:07:03 +0300 Subject: drm/mediatek: Use drm_plane_helper_check_state() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the use of drm_plane_helper_check_update() with drm_plane_helper_check_state() since we have a plane state. This also eliminates the double clipping the driver was doing in both check and commit phases). And it should fix src coordinate addr adjustement. Previously the driver was expecting negative dst coordinates after clipping, which is not going happen, so any clipping induced addr adjustment simply didn't happen. Neither did the driver respect any user configured src coordinates, so panning and such would have been totally broken. It should be all good now. Cc: CK Hu Cc: linux-mediatek@lists.infradead.org Signed-off-by: Ville Syrjälä Reviewed-by: Bibby Hsieh Tested-by: Bibby Hsieh Acked-by: CK Hu Signed-off-by: Sean Paul Link: http://patchwork.freedesktop.org/patch/msgid/1469549224-1860-9-git-send-email-ville.syrjala@linux.intel.com diff --git a/drivers/gpu/drm/mediatek/mtk_drm_plane.c b/drivers/gpu/drm/mediatek/mtk_drm_plane.c index 3995765..5f2516f 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_plane.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_plane.c @@ -30,15 +30,20 @@ static const u32 formats[] = { DRM_FORMAT_RGB565, }; -static void mtk_plane_enable(struct mtk_drm_plane *mtk_plane, bool enable, - dma_addr_t addr, struct drm_rect *dest) +static void mtk_plane_enable(struct mtk_drm_plane *mtk_plane, + dma_addr_t addr) { struct drm_plane *plane = &mtk_plane->base; struct mtk_plane_state *state = to_mtk_plane_state(plane->state); unsigned int pitch, format; - int x, y; + bool enable; - if (WARN_ON(!plane->state || (enable && !plane->state->fb))) + if (WARN_ON(!plane->state)) + return; + + enable = state->base.visible; + + if (WARN_ON(enable && !plane->state->fb)) return; if (plane->state->fb) { @@ -49,27 +54,17 @@ static void mtk_plane_enable(struct mtk_drm_plane *mtk_plane, bool enable, format = DRM_FORMAT_RGBA8888; } - x = plane->state->crtc_x; - y = plane->state->crtc_y; - - if (x < 0) { - addr -= x * 4; - x = 0; - } - - if (y < 0) { - addr -= y * pitch; - y = 0; - } + addr += (state->base.src.x1 >> 16) * 4; + addr += (state->base.src.y1 >> 16) * pitch; state->pending.enable = enable; state->pending.pitch = pitch; state->pending.format = format; state->pending.addr = addr; - state->pending.x = x; - state->pending.y = y; - state->pending.width = dest->x2 - dest->x1; - state->pending.height = dest->y2 - dest->y1; + state->pending.x = state->base.dst.x1; + state->pending.y = state->base.dst.y1; + state->pending.width = drm_rect_width(&state->base.dst); + state->pending.height = drm_rect_height(&state->base.dst); wmb(); /* Make sure the above parameters are set before update */ state->pending.dirty = true; } @@ -134,20 +129,6 @@ static int mtk_plane_atomic_check(struct drm_plane *plane, { struct drm_framebuffer *fb = state->fb; struct drm_crtc_state *crtc_state; - bool visible; - struct drm_rect dest = { - .x1 = state->crtc_x, - .y1 = state->crtc_y, - .x2 = state->crtc_x + state->crtc_w, - .y2 = state->crtc_y + state->crtc_h, - }; - struct drm_rect src = { - /* 16.16 fixed point */ - .x1 = state->src_x, - .y1 = state->src_y, - .x2 = state->src_x + state->src_w, - .y2 = state->src_y + state->src_h, - }; struct drm_rect clip = { 0, }; if (!fb) @@ -168,12 +149,10 @@ static int mtk_plane_atomic_check(struct drm_plane *plane, clip.x2 = crtc_state->mode.hdisplay; clip.y2 = crtc_state->mode.vdisplay; - return drm_plane_helper_check_update(plane, state->crtc, fb, - &src, &dest, &clip, - state->rotation, - DRM_PLANE_HELPER_NO_SCALING, - DRM_PLANE_HELPER_NO_SCALING, - true, true, &visible); + return drm_plane_helper_check_state(state, &clip, + DRM_PLANE_HELPER_NO_SCALING, + DRM_PLANE_HELPER_NO_SCALING, + true, true); } static void mtk_plane_atomic_update(struct drm_plane *plane, @@ -184,24 +163,13 @@ static void mtk_plane_atomic_update(struct drm_plane *plane, struct drm_gem_object *gem; struct mtk_drm_gem_obj *mtk_gem; struct mtk_drm_plane *mtk_plane = to_mtk_plane(plane); - struct drm_rect dest = { - .x1 = state->base.crtc_x, - .y1 = state->base.crtc_y, - .x2 = state->base.crtc_x + state->base.crtc_w, - .y2 = state->base.crtc_y + state->base.crtc_h, - }; - struct drm_rect clip = { 0, }; if (!crtc) return; - clip.x2 = state->base.crtc->state->mode.hdisplay; - clip.y2 = state->base.crtc->state->mode.vdisplay; - drm_rect_intersect(&dest, &clip); - gem = mtk_fb_get_gem_obj(state->base.fb); mtk_gem = to_mtk_gem_obj(gem); - mtk_plane_enable(mtk_plane, true, mtk_gem->dma_addr, &dest); + mtk_plane_enable(mtk_plane, mtk_gem->dma_addr); } static void mtk_plane_atomic_disable(struct drm_plane *plane, -- cgit v0.10.2 From 4be12cc23d5030dcf7dfe055e26c0ab6e79dbb38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 26 Jul 2016 19:07:04 +0300 Subject: drm/simple_kms_helper: Use drm_plane_helper_check_state() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the use of drm_plane_helper_check_update() with drm_plane_helper_check_state() since we have a plane state. I don't see any actual users of drm_simple_kms_helper yet, so no actual plane clipping bugs to fix. Cc: Noralf Trønnes Signed-off-by: Ville Syrjälä Signed-off-by: Sean Paul Link: http://patchwork.freedesktop.org/patch/msgid/1469549224-1860-10-git-send-email-ville.syrjala@linux.intel.com diff --git a/drivers/gpu/drm/drm_simple_kms_helper.c b/drivers/gpu/drm/drm_simple_kms_helper.c index 0db36d2..0a02efe 100644 --- a/drivers/gpu/drm/drm_simple_kms_helper.c +++ b/drivers/gpu/drm/drm_simple_kms_helper.c @@ -73,22 +73,9 @@ static const struct drm_crtc_funcs drm_simple_kms_crtc_funcs = { static int drm_simple_kms_plane_atomic_check(struct drm_plane *plane, struct drm_plane_state *plane_state) { - struct drm_rect src = { - .x1 = plane_state->src_x, - .y1 = plane_state->src_y, - .x2 = plane_state->src_x + plane_state->src_w, - .y2 = plane_state->src_y + plane_state->src_h, - }; - struct drm_rect dest = { - .x1 = plane_state->crtc_x, - .y1 = plane_state->crtc_y, - .x2 = plane_state->crtc_x + plane_state->crtc_w, - .y2 = plane_state->crtc_y + plane_state->crtc_h, - }; struct drm_rect clip = { 0 }; struct drm_simple_display_pipe *pipe; struct drm_crtc_state *crtc_state; - bool visible; int ret; pipe = container_of(plane, struct drm_simple_display_pipe, plane); @@ -102,17 +89,15 @@ static int drm_simple_kms_plane_atomic_check(struct drm_plane *plane, clip.x2 = crtc_state->adjusted_mode.hdisplay; clip.y2 = crtc_state->adjusted_mode.vdisplay; - ret = drm_plane_helper_check_update(plane, &pipe->crtc, - plane_state->fb, - &src, &dest, &clip, - plane_state->rotation, - DRM_PLANE_HELPER_NO_SCALING, - DRM_PLANE_HELPER_NO_SCALING, - false, true, &visible); + + ret = drm_plane_helper_check_state(plane_state, &clip, + DRM_PLANE_HELPER_NO_SCALING, + DRM_PLANE_HELPER_NO_SCALING, + false, true); if (ret) return ret; - if (!visible) + if (!plane_state->visible) return -EINVAL; if (!pipe->funcs || !pipe->funcs->check) -- cgit v0.10.2 From 73785a972901fe5de941950cd1f699b9ea998626 Mon Sep 17 00:00:00 2001 From: Daniel Kurtz Date: Thu, 4 Aug 2016 10:59:51 +0800 Subject: drm/mediatek: Remove mtk_drm_crtc_check_flush This function no longer exists. Signed-off-by: Daniel Kurtz Signed-off-by: Bibby Hsieh Signed-off-by: Sean Paul Link: http://patchwork.freedesktop.org/patch/msgid/1470279597-60453-2-git-send-email-bibby.hsieh@mediatek.com diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.h b/drivers/gpu/drm/mediatek/mtk_drm_crtc.h index 81e5566..4d32cf1 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.h +++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.h @@ -22,7 +22,6 @@ int mtk_drm_crtc_enable_vblank(struct drm_device *drm, unsigned int pipe); void mtk_drm_crtc_disable_vblank(struct drm_device *drm, unsigned int pipe); -void mtk_drm_crtc_check_flush(struct drm_crtc *crtc); void mtk_drm_crtc_commit(struct drm_crtc *crtc); void mtk_crtc_ddp_irq(struct drm_crtc *crtc, struct mtk_ddp_comp *ovl); int mtk_drm_crtc_create(struct drm_device *drm_dev, -- cgit v0.10.2 From 0d5a32b7257234a25cc6b7ae936d236f61030f3f Mon Sep 17 00:00:00 2001 From: Daniel Kurtz Date: Thu, 4 Aug 2016 10:59:52 +0800 Subject: drm/mediatek: plane: Remove plane zpos/index It is not actually useful to a mtk plane to know its zpos/index, so just remove this field. This let's completely remove struct mtk_drm_plane in a follow up patch. Signed-off-by: Daniel Kurtz Signed-off-by: Bibby Hsieh Signed-off-by: Sean Paul Link: http://patchwork.freedesktop.org/patch/msgid/1470279597-60453-3-git-send-email-bibby.hsieh@mediatek.com diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c index 24aa3ba..18211ab 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c @@ -559,7 +559,7 @@ int mtk_drm_crtc_create(struct drm_device *drm_dev, (zpos == 1) ? DRM_PLANE_TYPE_CURSOR : DRM_PLANE_TYPE_OVERLAY; ret = mtk_plane_init(drm_dev, &mtk_crtc->planes[zpos], - BIT(pipe), type, zpos); + BIT(pipe), type); if (ret) goto unprepare; } diff --git a/drivers/gpu/drm/mediatek/mtk_drm_plane.c b/drivers/gpu/drm/mediatek/mtk_drm_plane.c index 5f2516f..32a8e55 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_plane.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_plane.c @@ -189,8 +189,7 @@ static const struct drm_plane_helper_funcs mtk_plane_helper_funcs = { }; int mtk_plane_init(struct drm_device *dev, struct mtk_drm_plane *mtk_plane, - unsigned long possible_crtcs, enum drm_plane_type type, - unsigned int zpos) + unsigned long possible_crtcs, enum drm_plane_type type) { int err; @@ -203,7 +202,6 @@ int mtk_plane_init(struct drm_device *dev, struct mtk_drm_plane *mtk_plane, } drm_plane_helper_add(&mtk_plane->base, &mtk_plane_helper_funcs); - mtk_plane->idx = zpos; return 0; } diff --git a/drivers/gpu/drm/mediatek/mtk_drm_plane.h b/drivers/gpu/drm/mediatek/mtk_drm_plane.h index 72a7b3e..74dbeda 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_plane.h +++ b/drivers/gpu/drm/mediatek/mtk_drm_plane.h @@ -20,7 +20,6 @@ struct mtk_drm_plane { struct drm_plane base; - unsigned int idx; }; struct mtk_plane_pending_state { @@ -53,7 +52,6 @@ to_mtk_plane_state(struct drm_plane_state *state) } int mtk_plane_init(struct drm_device *dev, struct mtk_drm_plane *mtk_plane, - unsigned long possible_crtcs, enum drm_plane_type type, - unsigned int zpos); + unsigned long possible_crtcs, enum drm_plane_type type); #endif -- cgit v0.10.2 From 5bfafad8059b40be3ab60be26d3270b74303639a Mon Sep 17 00:00:00 2001 From: Daniel Kurtz Date: Thu, 4 Aug 2016 10:59:53 +0800 Subject: drm/mediatek: Remove mtk_drm_plane Now that mtk_drm_plane just contains its base struct drm_plane, we can just remove it and use struct drm_plane everywhere. Signed-off-by: Daniel Kurtz Signed-off-by: Bibby Hsieh Signed-off-by: Sean Paul Link: http://patchwork.freedesktop.org/patch/msgid/1470279597-60453-4-git-send-email-bibby.hsieh@mediatek.com diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c index 18211ab..d6fbefa 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c @@ -31,7 +31,7 @@ * struct mtk_drm_crtc - MediaTek specific crtc structure. * @base: crtc object. * @enabled: records whether crtc_enable succeeded - * @planes: array of 4 mtk_drm_plane structures, one for each overlay plane + * @planes: array of 4 drm_plane structures, one for each overlay plane * @pending_planes: whether any plane has pending changes to be applied * @config_regs: memory mapped mmsys configuration register space * @mutex: handle to one of the ten disp_mutex streams @@ -45,7 +45,7 @@ struct mtk_drm_crtc { bool pending_needs_vblank; struct drm_pending_vblank_event *event; - struct mtk_drm_plane planes[OVL_LAYER_NR]; + struct drm_plane planes[OVL_LAYER_NR]; bool pending_planes; void __iomem *config_regs; @@ -272,7 +272,7 @@ static int mtk_crtc_ddp_hw_init(struct mtk_drm_crtc *mtk_crtc) /* Initially configure all planes */ for (i = 0; i < OVL_LAYER_NR; i++) { - struct drm_plane *plane = &mtk_crtc->planes[i].base; + struct drm_plane *plane = &mtk_crtc->planes[i]; struct mtk_plane_state *plane_state; plane_state = to_mtk_plane_state(plane->state); @@ -351,7 +351,7 @@ static void mtk_drm_crtc_disable(struct drm_crtc *crtc) /* Set all pending plane state to disabled */ for (i = 0; i < OVL_LAYER_NR; i++) { - struct drm_plane *plane = &mtk_crtc->planes[i].base; + struct drm_plane *plane = &mtk_crtc->planes[i]; struct mtk_plane_state *plane_state; plane_state = to_mtk_plane_state(plane->state); @@ -397,7 +397,7 @@ static void mtk_drm_crtc_atomic_flush(struct drm_crtc *crtc, if (mtk_crtc->event) mtk_crtc->pending_needs_vblank = true; for (i = 0; i < OVL_LAYER_NR; i++) { - struct drm_plane *plane = &mtk_crtc->planes[i].base; + struct drm_plane *plane = &mtk_crtc->planes[i]; struct mtk_plane_state *plane_state; plane_state = to_mtk_plane_state(plane->state); @@ -471,7 +471,7 @@ void mtk_crtc_ddp_irq(struct drm_crtc *crtc, struct mtk_ddp_comp *ovl) if (mtk_crtc->pending_planes) { for (i = 0; i < OVL_LAYER_NR; i++) { - struct drm_plane *plane = &mtk_crtc->planes[i].base; + struct drm_plane *plane = &mtk_crtc->planes[i]; struct mtk_plane_state *plane_state; plane_state = to_mtk_plane_state(plane->state); @@ -564,8 +564,8 @@ int mtk_drm_crtc_create(struct drm_device *drm_dev, goto unprepare; } - ret = mtk_drm_crtc_init(drm_dev, mtk_crtc, &mtk_crtc->planes[0].base, - &mtk_crtc->planes[1].base, pipe); + ret = mtk_drm_crtc_init(drm_dev, mtk_crtc, &mtk_crtc->planes[0], + &mtk_crtc->planes[1], pipe); if (ret < 0) goto unprepare; diff --git a/drivers/gpu/drm/mediatek/mtk_drm_plane.c b/drivers/gpu/drm/mediatek/mtk_drm_plane.c index 32a8e55..86b7aed 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_plane.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_plane.c @@ -30,10 +30,9 @@ static const u32 formats[] = { DRM_FORMAT_RGB565, }; -static void mtk_plane_enable(struct mtk_drm_plane *mtk_plane, +static void mtk_plane_enable(struct drm_plane *plane, dma_addr_t addr) { - struct drm_plane *plane = &mtk_plane->base; struct mtk_plane_state *state = to_mtk_plane_state(plane->state); unsigned int pitch, format; bool enable; @@ -162,14 +161,13 @@ static void mtk_plane_atomic_update(struct drm_plane *plane, struct drm_crtc *crtc = state->base.crtc; struct drm_gem_object *gem; struct mtk_drm_gem_obj *mtk_gem; - struct mtk_drm_plane *mtk_plane = to_mtk_plane(plane); if (!crtc) return; gem = mtk_fb_get_gem_obj(state->base.fb); mtk_gem = to_mtk_gem_obj(gem); - mtk_plane_enable(mtk_plane, mtk_gem->dma_addr); + mtk_plane_enable(plane, mtk_gem->dma_addr); } static void mtk_plane_atomic_disable(struct drm_plane *plane, @@ -188,12 +186,12 @@ static const struct drm_plane_helper_funcs mtk_plane_helper_funcs = { .atomic_disable = mtk_plane_atomic_disable, }; -int mtk_plane_init(struct drm_device *dev, struct mtk_drm_plane *mtk_plane, +int mtk_plane_init(struct drm_device *dev, struct drm_plane *plane, unsigned long possible_crtcs, enum drm_plane_type type) { int err; - err = drm_universal_plane_init(dev, &mtk_plane->base, possible_crtcs, + err = drm_universal_plane_init(dev, plane, possible_crtcs, &mtk_plane_funcs, formats, ARRAY_SIZE(formats), type, NULL); if (err) { @@ -201,7 +199,7 @@ int mtk_plane_init(struct drm_device *dev, struct mtk_drm_plane *mtk_plane, return err; } - drm_plane_helper_add(&mtk_plane->base, &mtk_plane_helper_funcs); + drm_plane_helper_add(plane, &mtk_plane_helper_funcs); return 0; } diff --git a/drivers/gpu/drm/mediatek/mtk_drm_plane.h b/drivers/gpu/drm/mediatek/mtk_drm_plane.h index 74dbeda..6a20b49 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_plane.h +++ b/drivers/gpu/drm/mediatek/mtk_drm_plane.h @@ -18,10 +18,6 @@ #include #include -struct mtk_drm_plane { - struct drm_plane base; -}; - struct mtk_plane_pending_state { bool config; bool enable; @@ -40,18 +36,13 @@ struct mtk_plane_state { struct mtk_plane_pending_state pending; }; -static inline struct mtk_drm_plane *to_mtk_plane(struct drm_plane *plane) -{ - return container_of(plane, struct mtk_drm_plane, base); -} - static inline struct mtk_plane_state * to_mtk_plane_state(struct drm_plane_state *state) { return container_of(state, struct mtk_plane_state, base); } -int mtk_plane_init(struct drm_device *dev, struct mtk_drm_plane *mtk_plane, +int mtk_plane_init(struct drm_device *dev, struct drm_plane *plane, unsigned long possible_crtcs, enum drm_plane_type type); #endif -- cgit v0.10.2 From 903daff60fc5629d39ceb5f870afecdc477bcd6c Mon Sep 17 00:00:00 2001 From: Bibby Hsieh Date: Thu, 4 Aug 2016 10:59:54 +0800 Subject: drm/mediatek: Use drm_atomic destroy_state helpers Use the core destroy_state helpers to destroy core state to ensure we don't leak if/when more fields get added later. Signed-off-by: Daniel Kurtz Signed-off-by: Bibby Hsieh Signed-off-by: Sean Paul Link: http://patchwork.freedesktop.org/patch/msgid/1470279597-60453-5-git-send-email-bibby.hsieh@mediatek.com diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c index d6fbefa..733b2a3 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c @@ -112,8 +112,7 @@ static void mtk_drm_crtc_reset(struct drm_crtc *crtc) struct mtk_crtc_state *state; if (crtc->state) { - if (crtc->state->mode_blob) - drm_property_unreference_blob(crtc->state->mode_blob); + __drm_atomic_helper_crtc_destroy_state(crtc->state); state = to_mtk_crtc_state(crtc->state); memset(state, 0, sizeof(*state)); diff --git a/drivers/gpu/drm/mediatek/mtk_drm_plane.c b/drivers/gpu/drm/mediatek/mtk_drm_plane.c index 86b7aed..17172ba 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_plane.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_plane.c @@ -73,8 +73,7 @@ static void mtk_plane_reset(struct drm_plane *plane) struct mtk_plane_state *state; if (plane->state) { - if (plane->state->fb) - drm_framebuffer_unreference(plane->state->fb); + __drm_atomic_helper_plane_destroy_state(plane->state); state = to_mtk_plane_state(plane->state); memset(state, 0, sizeof(*state)); -- cgit v0.10.2 From f176cbf6f9407d363e72ba9bc2f7cd40853b4388 Mon Sep 17 00:00:00 2001 From: Daniel Kurtz Date: Thu, 4 Aug 2016 10:59:55 +0800 Subject: drm/mediatek: plane: Merge mtk_plane_enable into mtk_plane_atomic_update The mtk_plane_enable is just called once by mtk_plane_atomic_update. So, merge mtk_plane_enable into mtk_plane_atomic_update. While we are here, also clean up the function a bit by using an fb local variables. Signed-off-by: Bibby Hsieh Signed-off-by: Daniel Kurtz Signed-off-by: Sean Paul Link: http://patchwork.freedesktop.org/patch/msgid/1470279597-60453-6-git-send-email-bibby.hsieh@mediatek.com diff --git a/drivers/gpu/drm/mediatek/mtk_drm_plane.c b/drivers/gpu/drm/mediatek/mtk_drm_plane.c index 17172ba..b3ddb20 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_plane.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_plane.c @@ -30,44 +30,6 @@ static const u32 formats[] = { DRM_FORMAT_RGB565, }; -static void mtk_plane_enable(struct drm_plane *plane, - dma_addr_t addr) -{ - struct mtk_plane_state *state = to_mtk_plane_state(plane->state); - unsigned int pitch, format; - bool enable; - - if (WARN_ON(!plane->state)) - return; - - enable = state->base.visible; - - if (WARN_ON(enable && !plane->state->fb)) - return; - - if (plane->state->fb) { - pitch = plane->state->fb->pitches[0]; - format = plane->state->fb->pixel_format; - } else { - pitch = 0; - format = DRM_FORMAT_RGBA8888; - } - - addr += (state->base.src.x1 >> 16) * 4; - addr += (state->base.src.y1 >> 16) * pitch; - - state->pending.enable = enable; - state->pending.pitch = pitch; - state->pending.format = format; - state->pending.addr = addr; - state->pending.x = state->base.dst.x1; - state->pending.y = state->base.dst.y1; - state->pending.width = drm_rect_width(&state->base.dst); - state->pending.height = drm_rect_height(&state->base.dst); - wmb(); /* Make sure the above parameters are set before update */ - state->pending.dirty = true; -} - static void mtk_plane_reset(struct drm_plane *plane) { struct mtk_plane_state *state; @@ -157,16 +119,35 @@ static void mtk_plane_atomic_update(struct drm_plane *plane, struct drm_plane_state *old_state) { struct mtk_plane_state *state = to_mtk_plane_state(plane->state); - struct drm_crtc *crtc = state->base.crtc; + struct drm_crtc *crtc = plane->state->crtc; + struct drm_framebuffer *fb = plane->state->fb; struct drm_gem_object *gem; struct mtk_drm_gem_obj *mtk_gem; + unsigned int pitch, format; + dma_addr_t addr; - if (!crtc) + if (!crtc || WARN_ON(!fb)) return; - gem = mtk_fb_get_gem_obj(state->base.fb); + gem = mtk_fb_get_gem_obj(fb); mtk_gem = to_mtk_gem_obj(gem); - mtk_plane_enable(plane, mtk_gem->dma_addr); + addr = mtk_gem->dma_addr; + pitch = fb->pitches[0]; + format = fb->pixel_format; + + addr += (plane->state->src.x1 >> 16) * 4; + addr += (plane->state->src.y1 >> 16) * pitch; + + state->pending.enable = true; + state->pending.pitch = pitch; + state->pending.format = format; + state->pending.addr = addr; + state->pending.x = plane->state->dst.x1; + state->pending.y = plane->state->dst.y1; + state->pending.width = drm_rect_width(&plane->state->dst); + state->pending.height = drm_rect_height(&plane->state->dst); + wmb(); /* Make sure the above parameters are set before update */ + state->pending.dirty = true; } static void mtk_plane_atomic_disable(struct drm_plane *plane, -- cgit v0.10.2 From 9c350d834f08407d8470b3c09bbc34e42d261362 Mon Sep 17 00:00:00 2001 From: Daniel Kurtz Date: Thu, 4 Aug 2016 10:59:56 +0800 Subject: drm/mediatek: plane: Use FB's format's cpp to compute x offset Use the framebuffer's format to compute its cpp, and use it when calculating the address shift value. Signed-off-by: Bibby Hsieh Signed-off-by: Sean Paul Link: http://patchwork.freedesktop.org/patch/msgid/1470279597-60453-7-git-send-email-bibby.hsieh@mediatek.com diff --git a/drivers/gpu/drm/mediatek/mtk_drm_plane.c b/drivers/gpu/drm/mediatek/mtk_drm_plane.c index b3ddb20..c461a23 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_plane.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_plane.c @@ -135,7 +135,7 @@ static void mtk_plane_atomic_update(struct drm_plane *plane, pitch = fb->pitches[0]; format = fb->pixel_format; - addr += (plane->state->src.x1 >> 16) * 4; + addr += (plane->state->src.x1 >> 16) * drm_format_plane_cpp(format, 0); addr += (plane->state->src.y1 >> 16) * pitch; state->pending.enable = true; -- cgit v0.10.2 From ac08500c1bcb620810838c735143a00e5acf5b5f Mon Sep 17 00:00:00 2001 From: Bibby Hsieh Date: Thu, 4 Aug 2016 10:59:57 +0800 Subject: drm/mediatek: Fix mtk_atomic_complete for runtime_pm To properly implement atomic w/ runtime pm, we move drm_atomic_helper_commit_modeset_enables() above drm_atomic_helper_commit_planes() to ensure CRTCs are enabled before modifying plane registers, and set active_only to true to filter out plane update notifications when the CRTC is disabled. According to the document from linux kernel: Set the active_only parameters to true in order not to receive plane update notifications related to a disabled CRTC. This avoids the need to manually ignore plane updates in driver code when the driver and/or hardware can't or just don't need to deal with updates on disabled CRTCs, for example when supporting runtime PM. Signed-off-by: Bibby Hsieh Signed-off-by: Daniel Kurtz Signed-off-by: Sean Paul Link: http://patchwork.freedesktop.org/patch/msgid/1470279597-60453-8-git-send-email-bibby.hsieh@mediatek.com diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c index eebb7d8..0e769ab 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c @@ -61,10 +61,25 @@ static void mtk_atomic_complete(struct mtk_drm_private *private, mtk_atomic_wait_for_fences(state); + /* + * Mediatek drm supports runtime PM, so plane registers cannot be + * written when their crtc is disabled. + * + * The comment for drm_atomic_helper_commit states: + * For drivers supporting runtime PM the recommended sequence is + * + * drm_atomic_helper_commit_modeset_disables(dev, state); + * drm_atomic_helper_commit_modeset_enables(dev, state); + * drm_atomic_helper_commit_planes(dev, state, true); + * + * See the kerneldoc entries for these three functions for more details. + */ drm_atomic_helper_commit_modeset_disables(drm, state); - drm_atomic_helper_commit_planes(drm, state, false); drm_atomic_helper_commit_modeset_enables(drm, state); + drm_atomic_helper_commit_planes(drm, state, true); + drm_atomic_helper_wait_for_vblanks(drm, state); + drm_atomic_helper_cleanup_planes(drm, state); drm_atomic_state_free(state); } -- cgit v0.10.2 From e9ed3a67cd1bfd8d0d0dc4968a36f6ea4db2d45a Mon Sep 17 00:00:00 2001 From: Emily Deng Date: Mon, 8 Aug 2016 11:36:45 +0800 Subject: drm/amdgpu: Define virtual display ip blocks. For virtual display feature, define virtual display ip blocks, and set dce_virtual_ip_funcs to DCE block. Signed-off-by: Emily Deng Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index 4efc901..edcc142 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -67,6 +67,7 @@ #include "amdgpu_amdkfd.h" #include "amdgpu_powerplay.h" +#include "dce_virtual.h" /* * Indirect registers accessor @@ -1708,6 +1709,74 @@ static const struct amdgpu_ip_block_version bonaire_ip_blocks[] = }, }; +static const struct amdgpu_ip_block_version bonaire_ip_blocks_vd[] = +{ + /* ORDER MATTERS! */ + { + .type = AMD_IP_BLOCK_TYPE_COMMON, + .major = 1, + .minor = 0, + .rev = 0, + .funcs = &cik_common_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_GMC, + .major = 7, + .minor = 0, + .rev = 0, + .funcs = &gmc_v7_0_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_IH, + .major = 2, + .minor = 0, + .rev = 0, + .funcs = &cik_ih_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_SMC, + .major = 7, + .minor = 0, + .rev = 0, + .funcs = &amdgpu_pp_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_DCE, + .major = 8, + .minor = 2, + .rev = 0, + .funcs = &dce_virtual_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_GFX, + .major = 7, + .minor = 2, + .rev = 0, + .funcs = &gfx_v7_0_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_SDMA, + .major = 2, + .minor = 0, + .rev = 0, + .funcs = &cik_sdma_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_UVD, + .major = 4, + .minor = 2, + .rev = 0, + .funcs = &uvd_v4_2_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_VCE, + .major = 2, + .minor = 0, + .rev = 0, + .funcs = &vce_v2_0_ip_funcs, + }, +}; + static const struct amdgpu_ip_block_version hawaii_ip_blocks[] = { /* ORDER MATTERS! */ @@ -1776,6 +1845,74 @@ static const struct amdgpu_ip_block_version hawaii_ip_blocks[] = }, }; +static const struct amdgpu_ip_block_version hawaii_ip_blocks_vd[] = +{ + /* ORDER MATTERS! */ + { + .type = AMD_IP_BLOCK_TYPE_COMMON, + .major = 1, + .minor = 0, + .rev = 0, + .funcs = &cik_common_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_GMC, + .major = 7, + .minor = 0, + .rev = 0, + .funcs = &gmc_v7_0_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_IH, + .major = 2, + .minor = 0, + .rev = 0, + .funcs = &cik_ih_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_SMC, + .major = 7, + .minor = 0, + .rev = 0, + .funcs = &amdgpu_pp_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_DCE, + .major = 8, + .minor = 5, + .rev = 0, + .funcs = &dce_virtual_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_GFX, + .major = 7, + .minor = 3, + .rev = 0, + .funcs = &gfx_v7_0_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_SDMA, + .major = 2, + .minor = 0, + .rev = 0, + .funcs = &cik_sdma_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_UVD, + .major = 4, + .minor = 2, + .rev = 0, + .funcs = &uvd_v4_2_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_VCE, + .major = 2, + .minor = 0, + .rev = 0, + .funcs = &vce_v2_0_ip_funcs, + }, +}; + static const struct amdgpu_ip_block_version kabini_ip_blocks[] = { /* ORDER MATTERS! */ @@ -1844,6 +1981,74 @@ static const struct amdgpu_ip_block_version kabini_ip_blocks[] = }, }; +static const struct amdgpu_ip_block_version kabini_ip_blocks_vd[] = +{ + /* ORDER MATTERS! */ + { + .type = AMD_IP_BLOCK_TYPE_COMMON, + .major = 1, + .minor = 0, + .rev = 0, + .funcs = &cik_common_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_GMC, + .major = 7, + .minor = 0, + .rev = 0, + .funcs = &gmc_v7_0_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_IH, + .major = 2, + .minor = 0, + .rev = 0, + .funcs = &cik_ih_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_SMC, + .major = 7, + .minor = 0, + .rev = 0, + .funcs = &amdgpu_pp_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_DCE, + .major = 8, + .minor = 3, + .rev = 0, + .funcs = &dce_virtual_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_GFX, + .major = 7, + .minor = 2, + .rev = 0, + .funcs = &gfx_v7_0_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_SDMA, + .major = 2, + .minor = 0, + .rev = 0, + .funcs = &cik_sdma_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_UVD, + .major = 4, + .minor = 2, + .rev = 0, + .funcs = &uvd_v4_2_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_VCE, + .major = 2, + .minor = 0, + .rev = 0, + .funcs = &vce_v2_0_ip_funcs, + }, +}; + static const struct amdgpu_ip_block_version mullins_ip_blocks[] = { /* ORDER MATTERS! */ @@ -1912,6 +2117,74 @@ static const struct amdgpu_ip_block_version mullins_ip_blocks[] = }, }; +static const struct amdgpu_ip_block_version mullins_ip_blocks_vd[] = +{ + /* ORDER MATTERS! */ + { + .type = AMD_IP_BLOCK_TYPE_COMMON, + .major = 1, + .minor = 0, + .rev = 0, + .funcs = &cik_common_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_GMC, + .major = 7, + .minor = 0, + .rev = 0, + .funcs = &gmc_v7_0_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_IH, + .major = 2, + .minor = 0, + .rev = 0, + .funcs = &cik_ih_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_SMC, + .major = 7, + .minor = 0, + .rev = 0, + .funcs = &amdgpu_pp_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_DCE, + .major = 8, + .minor = 3, + .rev = 0, + .funcs = &dce_virtual_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_GFX, + .major = 7, + .minor = 2, + .rev = 0, + .funcs = &gfx_v7_0_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_SDMA, + .major = 2, + .minor = 0, + .rev = 0, + .funcs = &cik_sdma_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_UVD, + .major = 4, + .minor = 2, + .rev = 0, + .funcs = &uvd_v4_2_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_VCE, + .major = 2, + .minor = 0, + .rev = 0, + .funcs = &vce_v2_0_ip_funcs, + }, +}; + static const struct amdgpu_ip_block_version kaveri_ip_blocks[] = { /* ORDER MATTERS! */ @@ -1980,6 +2253,74 @@ static const struct amdgpu_ip_block_version kaveri_ip_blocks[] = }, }; +static const struct amdgpu_ip_block_version kaveri_ip_blocks_vd[] = +{ + /* ORDER MATTERS! */ + { + .type = AMD_IP_BLOCK_TYPE_COMMON, + .major = 1, + .minor = 0, + .rev = 0, + .funcs = &cik_common_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_GMC, + .major = 7, + .minor = 0, + .rev = 0, + .funcs = &gmc_v7_0_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_IH, + .major = 2, + .minor = 0, + .rev = 0, + .funcs = &cik_ih_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_SMC, + .major = 7, + .minor = 0, + .rev = 0, + .funcs = &amdgpu_pp_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_DCE, + .major = 8, + .minor = 1, + .rev = 0, + .funcs = &dce_virtual_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_GFX, + .major = 7, + .minor = 1, + .rev = 0, + .funcs = &gfx_v7_0_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_SDMA, + .major = 2, + .minor = 0, + .rev = 0, + .funcs = &cik_sdma_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_UVD, + .major = 4, + .minor = 2, + .rev = 0, + .funcs = &uvd_v4_2_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_VCE, + .major = 2, + .minor = 0, + .rev = 0, + .funcs = &vce_v2_0_ip_funcs, + }, +}; + int cik_set_ip_blocks(struct amdgpu_device *adev) { switch (adev->asic_type) { diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 8f37066..ff78b5a 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -77,6 +77,7 @@ #if defined(CONFIG_DRM_AMD_ACP) #include "amdgpu_acp.h" #endif +#include "dce_virtual.h" MODULE_FIRMWARE("amdgpu/polaris10_smc.bin"); MODULE_FIRMWARE("amdgpu/polaris10_smc_sk.bin"); @@ -890,6 +891,74 @@ static const struct amdgpu_ip_block_version tonga_ip_blocks[] = }, }; +static const struct amdgpu_ip_block_version tonga_ip_blocks_vd[] = +{ + /* ORDER MATTERS! */ + { + .type = AMD_IP_BLOCK_TYPE_COMMON, + .major = 2, + .minor = 0, + .rev = 0, + .funcs = &vi_common_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_GMC, + .major = 8, + .minor = 0, + .rev = 0, + .funcs = &gmc_v8_0_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_IH, + .major = 3, + .minor = 0, + .rev = 0, + .funcs = &tonga_ih_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_SMC, + .major = 7, + .minor = 1, + .rev = 0, + .funcs = &amdgpu_pp_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_DCE, + .major = 10, + .minor = 0, + .rev = 0, + .funcs = &dce_virtual_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_GFX, + .major = 8, + .minor = 0, + .rev = 0, + .funcs = &gfx_v8_0_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_SDMA, + .major = 3, + .minor = 0, + .rev = 0, + .funcs = &sdma_v3_0_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_UVD, + .major = 5, + .minor = 0, + .rev = 0, + .funcs = &uvd_v5_0_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_VCE, + .major = 3, + .minor = 0, + .rev = 0, + .funcs = &vce_v3_0_ip_funcs, + }, +}; + static const struct amdgpu_ip_block_version fiji_ip_blocks[] = { /* ORDER MATTERS! */ @@ -958,6 +1027,74 @@ static const struct amdgpu_ip_block_version fiji_ip_blocks[] = }, }; +static const struct amdgpu_ip_block_version fiji_ip_blocks_vd[] = +{ + /* ORDER MATTERS! */ + { + .type = AMD_IP_BLOCK_TYPE_COMMON, + .major = 2, + .minor = 0, + .rev = 0, + .funcs = &vi_common_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_GMC, + .major = 8, + .minor = 5, + .rev = 0, + .funcs = &gmc_v8_0_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_IH, + .major = 3, + .minor = 0, + .rev = 0, + .funcs = &tonga_ih_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_SMC, + .major = 7, + .minor = 1, + .rev = 0, + .funcs = &amdgpu_pp_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_DCE, + .major = 10, + .minor = 1, + .rev = 0, + .funcs = &dce_virtual_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_GFX, + .major = 8, + .minor = 0, + .rev = 0, + .funcs = &gfx_v8_0_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_SDMA, + .major = 3, + .minor = 0, + .rev = 0, + .funcs = &sdma_v3_0_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_UVD, + .major = 6, + .minor = 0, + .rev = 0, + .funcs = &uvd_v6_0_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_VCE, + .major = 3, + .minor = 0, + .rev = 0, + .funcs = &vce_v3_0_ip_funcs, + }, +}; + static const struct amdgpu_ip_block_version polaris11_ip_blocks[] = { /* ORDER MATTERS! */ @@ -1026,6 +1163,74 @@ static const struct amdgpu_ip_block_version polaris11_ip_blocks[] = }, }; +static const struct amdgpu_ip_block_version polaris11_ip_blocks_vd[] = +{ + /* ORDER MATTERS! */ + { + .type = AMD_IP_BLOCK_TYPE_COMMON, + .major = 2, + .minor = 0, + .rev = 0, + .funcs = &vi_common_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_GMC, + .major = 8, + .minor = 1, + .rev = 0, + .funcs = &gmc_v8_0_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_IH, + .major = 3, + .minor = 1, + .rev = 0, + .funcs = &tonga_ih_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_SMC, + .major = 7, + .minor = 2, + .rev = 0, + .funcs = &amdgpu_pp_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_DCE, + .major = 11, + .minor = 2, + .rev = 0, + .funcs = &dce_virtual_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_GFX, + .major = 8, + .minor = 0, + .rev = 0, + .funcs = &gfx_v8_0_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_SDMA, + .major = 3, + .minor = 1, + .rev = 0, + .funcs = &sdma_v3_0_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_UVD, + .major = 6, + .minor = 3, + .rev = 0, + .funcs = &uvd_v6_0_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_VCE, + .major = 3, + .minor = 4, + .rev = 0, + .funcs = &vce_v3_0_ip_funcs, + }, +}; + static const struct amdgpu_ip_block_version cz_ip_blocks[] = { /* ORDER MATTERS! */ @@ -1103,6 +1308,83 @@ static const struct amdgpu_ip_block_version cz_ip_blocks[] = #endif }; +static const struct amdgpu_ip_block_version cz_ip_blocks_vd[] = +{ + /* ORDER MATTERS! */ + { + .type = AMD_IP_BLOCK_TYPE_COMMON, + .major = 2, + .minor = 0, + .rev = 0, + .funcs = &vi_common_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_GMC, + .major = 8, + .minor = 0, + .rev = 0, + .funcs = &gmc_v8_0_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_IH, + .major = 3, + .minor = 0, + .rev = 0, + .funcs = &cz_ih_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_SMC, + .major = 8, + .minor = 0, + .rev = 0, + .funcs = &amdgpu_pp_ip_funcs + }, + { + .type = AMD_IP_BLOCK_TYPE_DCE, + .major = 11, + .minor = 0, + .rev = 0, + .funcs = &dce_virtual_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_GFX, + .major = 8, + .minor = 0, + .rev = 0, + .funcs = &gfx_v8_0_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_SDMA, + .major = 3, + .minor = 0, + .rev = 0, + .funcs = &sdma_v3_0_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_UVD, + .major = 6, + .minor = 0, + .rev = 0, + .funcs = &uvd_v6_0_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_VCE, + .major = 3, + .minor = 0, + .rev = 0, + .funcs = &vce_v3_0_ip_funcs, + }, +#if defined(CONFIG_DRM_AMD_ACP) + { + .type = AMD_IP_BLOCK_TYPE_ACP, + .major = 2, + .minor = 2, + .rev = 0, + .funcs = &acp_ip_funcs, + }, +#endif +}; + int vi_set_ip_blocks(struct amdgpu_device *adev) { switch (adev->asic_type) { -- cgit v0.10.2 From e443059d0f41fcc07f0fb6b3b8ae96dc3d2364c7 Mon Sep 17 00:00:00 2001 From: Emily Deng Date: Mon, 8 Aug 2016 11:37:29 +0800 Subject: drm/amdgpu: Define one variable for virtual display. For virtual display feature, define on variable in amdgpu.ko. When want to enable virtual display feature, need set the option "amdgpu.virtual_display=1". And then disable vga render and crtc if have DCE engine. Signed-off-by: Emily Deng Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 12112cc..54f7156 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -92,6 +92,7 @@ extern unsigned amdgpu_cg_mask; extern unsigned amdgpu_pg_mask; extern char *amdgpu_disable_cu; extern int amdgpu_sclk_deep_sleep_en; +extern int amdgpu_virtual_display; #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000 #define AMDGPU_MAX_USEC_TIMEOUT 100000 /* 100 ms */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 44fda31..421dbbf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -90,6 +90,7 @@ unsigned amdgpu_pcie_lane_cap = 0; unsigned amdgpu_cg_mask = 0xffffffff; unsigned amdgpu_pg_mask = 0xffffffff; char *amdgpu_disable_cu = NULL; +int amdgpu_virtual_display = 0; MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes"); module_param_named(vramlimit, amdgpu_vram_limit, int, 0600); @@ -189,6 +190,9 @@ module_param_named(pg_mask, amdgpu_pg_mask, uint, 0444); MODULE_PARM_DESC(disable_cu, "Disable CUs (se.sh.cu,...)"); module_param_named(disable_cu, amdgpu_disable_cu, charp, 0444); +MODULE_PARM_DESC(virtual_display, "enable virtual display (0 = disable virtual display)"); +module_param_named(virtual_display, amdgpu_virtual_display, int, 0444); + static const struct pci_device_id pciidlist[] = { #ifdef CONFIG_DRM_AMDGPU_CIK /* Kaveri */ -- cgit v0.10.2 From a6be7570518f85ce94ca9d6540543e00725828d3 Mon Sep 17 00:00:00 2001 From: Emily Deng Date: Mon, 8 Aug 2016 11:37:50 +0800 Subject: drm/amdgpu: Set ip_blocks according variable amdgpu_virtual_display. For virtual display feature, if user set the option "amdgpu.virtual_display=1" when load amdgpu.ko. Then need to set the ip_blocks with virtual display ip blocks. And when enable virtual display, the amdgpu_dal need to be set to zero. Signed-off-by: Emily Deng Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 72d5d09..4bf9bd9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1185,6 +1185,8 @@ static int amdgpu_early_init(struct amdgpu_device *adev) { int i, r; + DRM_INFO("virtual display enabled:%d\n", amdgpu_virtual_display); + switch (adev->asic_type) { case CHIP_TOPAZ: case CHIP_TONGA: diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index edcc142..15200b1 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -2323,30 +2323,59 @@ static const struct amdgpu_ip_block_version kaveri_ip_blocks_vd[] = int cik_set_ip_blocks(struct amdgpu_device *adev) { - switch (adev->asic_type) { - case CHIP_BONAIRE: - adev->ip_blocks = bonaire_ip_blocks; - adev->num_ip_blocks = ARRAY_SIZE(bonaire_ip_blocks); - break; - case CHIP_HAWAII: - adev->ip_blocks = hawaii_ip_blocks; - adev->num_ip_blocks = ARRAY_SIZE(hawaii_ip_blocks); - break; - case CHIP_KAVERI: - adev->ip_blocks = kaveri_ip_blocks; - adev->num_ip_blocks = ARRAY_SIZE(kaveri_ip_blocks); - break; - case CHIP_KABINI: - adev->ip_blocks = kabini_ip_blocks; - adev->num_ip_blocks = ARRAY_SIZE(kabini_ip_blocks); - break; - case CHIP_MULLINS: - adev->ip_blocks = mullins_ip_blocks; - adev->num_ip_blocks = ARRAY_SIZE(mullins_ip_blocks); - break; - default: - /* FIXME: not supported yet */ - return -EINVAL; + if (amdgpu_virtual_display) { + adev->mode_info.vsync_timer_enabled = AMDGPU_IRQ_STATE_DISABLE; + switch (adev->asic_type) { + case CHIP_BONAIRE: + adev->ip_blocks = bonaire_ip_blocks_vd; + adev->num_ip_blocks = ARRAY_SIZE(bonaire_ip_blocks_vd); + break; + case CHIP_HAWAII: + adev->ip_blocks = hawaii_ip_blocks_vd; + adev->num_ip_blocks = ARRAY_SIZE(hawaii_ip_blocks_vd); + break; + case CHIP_KAVERI: + adev->ip_blocks = kaveri_ip_blocks_vd; + adev->num_ip_blocks = ARRAY_SIZE(kaveri_ip_blocks_vd); + break; + case CHIP_KABINI: + adev->ip_blocks = kabini_ip_blocks_vd; + adev->num_ip_blocks = ARRAY_SIZE(kabini_ip_blocks_vd); + break; + case CHIP_MULLINS: + adev->ip_blocks = mullins_ip_blocks_vd; + adev->num_ip_blocks = ARRAY_SIZE(mullins_ip_blocks_vd); + break; + default: + /* FIXME: not supported yet */ + return -EINVAL; + } + } else { + switch (adev->asic_type) { + case CHIP_BONAIRE: + adev->ip_blocks = bonaire_ip_blocks; + adev->num_ip_blocks = ARRAY_SIZE(bonaire_ip_blocks); + break; + case CHIP_HAWAII: + adev->ip_blocks = hawaii_ip_blocks; + adev->num_ip_blocks = ARRAY_SIZE(hawaii_ip_blocks); + break; + case CHIP_KAVERI: + adev->ip_blocks = kaveri_ip_blocks; + adev->num_ip_blocks = ARRAY_SIZE(kaveri_ip_blocks); + break; + case CHIP_KABINI: + adev->ip_blocks = kabini_ip_blocks; + adev->num_ip_blocks = ARRAY_SIZE(kabini_ip_blocks); + break; + case CHIP_MULLINS: + adev->ip_blocks = mullins_ip_blocks; + adev->num_ip_blocks = ARRAY_SIZE(mullins_ip_blocks); + break; + default: + /* FIXME: not supported yet */ + return -EINVAL; + } } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index ff78b5a..0642917 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -1387,32 +1387,64 @@ static const struct amdgpu_ip_block_version cz_ip_blocks_vd[] = int vi_set_ip_blocks(struct amdgpu_device *adev) { - switch (adev->asic_type) { - case CHIP_TOPAZ: - adev->ip_blocks = topaz_ip_blocks; - adev->num_ip_blocks = ARRAY_SIZE(topaz_ip_blocks); - break; - case CHIP_FIJI: - adev->ip_blocks = fiji_ip_blocks; - adev->num_ip_blocks = ARRAY_SIZE(fiji_ip_blocks); - break; - case CHIP_TONGA: - adev->ip_blocks = tonga_ip_blocks; - adev->num_ip_blocks = ARRAY_SIZE(tonga_ip_blocks); - break; - case CHIP_POLARIS11: - case CHIP_POLARIS10: - adev->ip_blocks = polaris11_ip_blocks; - adev->num_ip_blocks = ARRAY_SIZE(polaris11_ip_blocks); - break; - case CHIP_CARRIZO: - case CHIP_STONEY: - adev->ip_blocks = cz_ip_blocks; - adev->num_ip_blocks = ARRAY_SIZE(cz_ip_blocks); - break; - default: - /* FIXME: not supported yet */ - return -EINVAL; + if (amdgpu_virtual_display) { + adev->mode_info.vsync_timer_enabled = AMDGPU_IRQ_STATE_DISABLE; + switch (adev->asic_type) { + case CHIP_TOPAZ: + adev->ip_blocks = topaz_ip_blocks; + adev->num_ip_blocks = ARRAY_SIZE(topaz_ip_blocks); + break; + case CHIP_FIJI: + adev->ip_blocks = fiji_ip_blocks_vd; + adev->num_ip_blocks = ARRAY_SIZE(fiji_ip_blocks_vd); + break; + case CHIP_TONGA: + adev->ip_blocks = tonga_ip_blocks_vd; + adev->num_ip_blocks = ARRAY_SIZE(tonga_ip_blocks_vd); + break; + case CHIP_POLARIS11: + case CHIP_POLARIS10: + adev->ip_blocks = polaris11_ip_blocks_vd; + adev->num_ip_blocks = ARRAY_SIZE(polaris11_ip_blocks_vd); + break; + + case CHIP_CARRIZO: + case CHIP_STONEY: + adev->ip_blocks = cz_ip_blocks_vd; + adev->num_ip_blocks = ARRAY_SIZE(cz_ip_blocks_vd); + break; + default: + /* FIXME: not supported yet */ + return -EINVAL; + } + } else { + switch (adev->asic_type) { + case CHIP_TOPAZ: + adev->ip_blocks = topaz_ip_blocks; + adev->num_ip_blocks = ARRAY_SIZE(topaz_ip_blocks); + break; + case CHIP_FIJI: + adev->ip_blocks = fiji_ip_blocks; + adev->num_ip_blocks = ARRAY_SIZE(fiji_ip_blocks); + break; + case CHIP_TONGA: + adev->ip_blocks = tonga_ip_blocks; + adev->num_ip_blocks = ARRAY_SIZE(tonga_ip_blocks); + break; + case CHIP_POLARIS11: + case CHIP_POLARIS10: + adev->ip_blocks = polaris11_ip_blocks; + adev->num_ip_blocks = ARRAY_SIZE(polaris11_ip_blocks); + break; + case CHIP_CARRIZO: + case CHIP_STONEY: + adev->ip_blocks = cz_ip_blocks; + adev->num_ip_blocks = ARRAY_SIZE(cz_ip_blocks); + break; + default: + /* FIXME: not supported yet */ + return -EINVAL; + } } return 0; -- cgit v0.10.2 From cfc5adea1955ee8ddb62cc0d20ee454472033b6a Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 8 Aug 2016 18:24:53 +0200 Subject: drm: Make sure drm_vblank_no_hw_counter isn't abused Shouldn't be possible since everyone kzallocs this, but better safe than sorry. Random drive-by-idea really. Cc: Rodrigo Vivi Signed-off-by: Daniel Vetter Reviewed-by: Rodrigo Vivi Signed-off-by: Sean Paul Link: http://patchwork.freedesktop.org/patch/msgid/1470673493-14304-1-git-send-email-daniel.vetter@ffwll.ch diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c index 97c7064..c0c3f20 100644 --- a/drivers/gpu/drm/drm_irq.c +++ b/drivers/gpu/drm/drm_irq.c @@ -1826,6 +1826,7 @@ EXPORT_SYMBOL(drm_crtc_handle_vblank); */ u32 drm_vblank_no_hw_counter(struct drm_device *dev, unsigned int pipe) { + WARN_ON_ONCE(dev->max_vblank_count != 0); return 0; } EXPORT_SYMBOL(drm_vblank_no_hw_counter); -- cgit v0.10.2 From d06b7e1cf0fa4552fd725111b46df1ed0b649b15 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 8 Aug 2016 14:35:55 -0400 Subject: drm/amdgpu: move vsync_timer_enabled setup to dce virtual early_init Put it in one place. Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index 15200b1..e539b28 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -2324,7 +2324,6 @@ static const struct amdgpu_ip_block_version kaveri_ip_blocks_vd[] = int cik_set_ip_blocks(struct amdgpu_device *adev) { if (amdgpu_virtual_display) { - adev->mode_info.vsync_timer_enabled = AMDGPU_IRQ_STATE_DISABLE; switch (adev->asic_type) { case CHIP_BONAIRE: adev->ip_blocks = bonaire_ip_blocks_vd; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c index 1c16983..4c8ca58 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c @@ -353,6 +353,7 @@ static int dce_virtual_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + adev->mode_info.vsync_timer_enabled = AMDGPU_IRQ_STATE_DISABLE; dce_virtual_set_display_funcs(adev); dce_virtual_set_irq_funcs(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 0642917..2d3e3ed 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -1388,7 +1388,6 @@ static const struct amdgpu_ip_block_version cz_ip_blocks_vd[] = int vi_set_ip_blocks(struct amdgpu_device *adev) { if (amdgpu_virtual_display) { - adev->mode_info.vsync_timer_enabled = AMDGPU_IRQ_STATE_DISABLE; switch (adev->asic_type) { case CHIP_TOPAZ: adev->ip_blocks = topaz_ip_blocks; -- cgit v0.10.2 From 2579de4394309da473d89cd5df08dc0bfe5e87f5 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 8 Aug 2016 14:40:04 -0400 Subject: drm/amdgpu/virtual_dce: add case for topaz for disable_dce This asic has no DCE block. Also clarify the error message for unmatched chips. Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c index 4c8ca58..5499693 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c @@ -124,8 +124,11 @@ void dce_virtual_stop_mc_access(struct amdgpu_device *adev, case CHIP_POLARIS10: dce_v11_0_disable_dce(adev); break; + case CHIP_TOPAZ: + /* no DCE */ + return; default: - DRM_ERROR("Usupported ASIC type: 0x%X\n", adev->asic_type); + DRM_ERROR("Virtual display unsupported ASIC type: 0x%X\n", adev->asic_type); } return; -- cgit v0.10.2 From 4f4b78341b658cfa0ca7b1587f5540c21f9db997 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 8 Aug 2016 14:45:29 -0400 Subject: drm/amdgpu: add virtual dce support for iceland Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 2d3e3ed..69c2f81 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -823,6 +823,60 @@ static const struct amdgpu_ip_block_version topaz_ip_blocks[] = }, }; +static const struct amdgpu_ip_block_version topaz_ip_blocks_vd[] = +{ + /* ORDER MATTERS! */ + { + .type = AMD_IP_BLOCK_TYPE_COMMON, + .major = 2, + .minor = 0, + .rev = 0, + .funcs = &vi_common_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_GMC, + .major = 7, + .minor = 4, + .rev = 0, + .funcs = &gmc_v7_0_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_IH, + .major = 2, + .minor = 4, + .rev = 0, + .funcs = &iceland_ih_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_SMC, + .major = 7, + .minor = 1, + .rev = 0, + .funcs = &amdgpu_pp_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_DCE, + .major = 1, + .minor = 0, + .rev = 0, + .funcs = &dce_virtual_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_GFX, + .major = 8, + .minor = 0, + .rev = 0, + .funcs = &gfx_v8_0_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_SDMA, + .major = 2, + .minor = 4, + .rev = 0, + .funcs = &sdma_v2_4_ip_funcs, + }, +}; + static const struct amdgpu_ip_block_version tonga_ip_blocks[] = { /* ORDER MATTERS! */ @@ -1390,8 +1444,8 @@ int vi_set_ip_blocks(struct amdgpu_device *adev) if (amdgpu_virtual_display) { switch (adev->asic_type) { case CHIP_TOPAZ: - adev->ip_blocks = topaz_ip_blocks; - adev->num_ip_blocks = ARRAY_SIZE(topaz_ip_blocks); + adev->ip_blocks = topaz_ip_blocks_vd; + adev->num_ip_blocks = ARRAY_SIZE(topaz_ip_blocks_vd); break; case CHIP_FIJI: adev->ip_blocks = fiji_ip_blocks_vd; -- cgit v0.10.2 From 8fe5616b20e5742bb5fee0e77dffe2fc76ac92a0 Mon Sep 17 00:00:00 2001 From: Jyri Sarha Date: Tue, 14 Jun 2016 11:43:30 +0300 Subject: drm/tilcdc: Restore old dpms state in pm_resume() Restore old dpms state in pm_resume(). The dpms is turned off in pm_suspend() and it should be restored to its original state in pm_resume(). Without this patch the display is left blanked after a suspend/resume cycle. Fixes commit 614b3cfeb8d2 ("drm/tilcdc: disable the lcd controller/dma engine when suspend invoked") Signed-off-by: Jyri Sarha diff --git a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c index 107c8bd..1601428 100644 --- a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c +++ b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c @@ -246,6 +246,13 @@ void tilcdc_crtc_dpms(struct drm_crtc *crtc, int mode) } } +int tilcdc_crtc_current_dpms_state(struct drm_crtc *crtc) +{ + struct tilcdc_crtc *tilcdc_crtc = to_tilcdc_crtc(crtc); + + return tilcdc_crtc->dpms; +} + static bool tilcdc_crtc_mode_fixup(struct drm_crtc *crtc, const struct drm_display_mode *mode, struct drm_display_mode *adjusted_mode) diff --git a/drivers/gpu/drm/tilcdc/tilcdc_drv.c b/drivers/gpu/drm/tilcdc/tilcdc_drv.c index d278093..ed68324 100644 --- a/drivers/gpu/drm/tilcdc/tilcdc_drv.c +++ b/drivers/gpu/drm/tilcdc/tilcdc_drv.c @@ -597,6 +597,7 @@ static int tilcdc_pm_suspend(struct device *dev) } /* Disable the LCDC controller, to avoid locking up the PRCM */ + priv->saved_dpms_state = tilcdc_crtc_current_dpms_state(priv->crtc); tilcdc_crtc_dpms(priv->crtc, DRM_MODE_DPMS_OFF); /* Save register state: */ @@ -627,6 +628,8 @@ static int tilcdc_pm_resume(struct device *dev) priv->saved_register[n++]); } + tilcdc_crtc_dpms(priv->crtc, priv->saved_dpms_state); + drm_kms_helper_poll_enable(ddev); return 0; diff --git a/drivers/gpu/drm/tilcdc/tilcdc_drv.h b/drivers/gpu/drm/tilcdc/tilcdc_drv.h index c1de18b..3b52ce8 100644 --- a/drivers/gpu/drm/tilcdc/tilcdc_drv.h +++ b/drivers/gpu/drm/tilcdc/tilcdc_drv.h @@ -67,6 +67,7 @@ struct tilcdc_drm_private { /* register contents saved across suspend/resume: */ u32 *saved_register; + int saved_dpms_state; bool ctx_valid; #ifdef CONFIG_CPU_FREQ @@ -172,5 +173,6 @@ void tilcdc_crtc_set_simulate_vesa_sync(struct drm_crtc *crtc, int tilcdc_crtc_mode_valid(struct drm_crtc *crtc, struct drm_display_mode *mode); int tilcdc_crtc_max_width(struct drm_crtc *crtc); void tilcdc_crtc_dpms(struct drm_crtc *crtc, int mode); +int tilcdc_crtc_current_dpms_state(struct drm_crtc *crtc); #endif /* __TILCDC_DRV_H__ */ -- cgit v0.10.2 From 1abcdac8ed4cf2335f050d88b2fe8f343726ed41 Mon Sep 17 00:00:00 2001 From: Jyri Sarha Date: Fri, 17 Jun 2016 11:54:06 +0300 Subject: drm/tilcdc: Move LCDC_SYNC_LOST handling inside if (ver == 2) statement Move LCDC_SYNC_LOST handling inside if (ver == 2) statement. LCDC_SYNC_LOST interrupt status bit is only defined for version 2 silicon. Signed-off-by: Jyri Sarha diff --git a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c index 1601428..45ce0ba 100644 --- a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c +++ b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c @@ -731,18 +731,17 @@ irqreturn_t tilcdc_crtc_irq(struct drm_crtc *crtc) wake_up(&tilcdc_crtc->frame_done_wq); } tilcdc_write(dev, LCDC_END_OF_INT_IND_REG, 0); - } - if (stat & LCDC_SYNC_LOST) { - dev_err_ratelimited(dev->dev, "%s(0x%08x): Sync lost", - __func__, stat); - tilcdc_crtc->frame_intact = false; - if (tilcdc_crtc->sync_lost_count++ > SYNC_LOST_COUNT_LIMIT) { - dev_err(dev->dev, - "%s(0x%08x): Sync lost flood detected, disabling the interrupt", - __func__, stat); - tilcdc_write(dev, LCDC_INT_ENABLE_CLR_REG, - LCDC_SYNC_LOST); + if (stat & LCDC_SYNC_LOST) { + dev_err_ratelimited(dev->dev, "%s(0x%08x): Sync lost", + __func__, stat); + tilcdc_crtc->frame_intact = false; + if (tilcdc_crtc->sync_lost_count++ > + SYNC_LOST_COUNT_LIMIT) { + dev_err(dev->dev, "%s(0x%08x): Sync lost flood detected, disabling the interrupt", __func__, stat); + tilcdc_write(dev, LCDC_INT_ENABLE_CLR_REG, + LCDC_SYNC_LOST); + } } } -- cgit v0.10.2 From 149441134c4ae4ebff393ca113f0941842385a6b Mon Sep 17 00:00:00 2001 From: Jyri Sarha Date: Thu, 7 Apr 2016 20:36:48 +0300 Subject: drm/tilcdc: Write to LCDC_END_OF_INT_IND_REG at the end of IRQ function Reorder the IRQ function so that the write to LCDC_END_OF_INT_IND_REG is done last. The write to LCDC_END_OF_INT_IND_REG indicates to LCDC that the interrupt service routine has completed (see section 13.3.6.1.6 in AM335x TRM). This is needed if LCDC's ipgvmodirq module is configured for pulse interrupts. Signed-off-by: Jyri Sarha diff --git a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c index 45ce0ba..55b8472 100644 --- a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c +++ b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c @@ -725,12 +725,16 @@ irqreturn_t tilcdc_crtc_irq(struct drm_crtc *crtc) tilcdc_crtc->frame_intact = true; } + if (stat & LCDC_FIFO_UNDERFLOW) + dev_err_ratelimited(dev->dev, "%s(0x%08x): FIFO underfow", + __func__, stat); + + /* For revision 2 only */ if (priv->rev == 2) { if (stat & LCDC_FRAME_DONE) { tilcdc_crtc->frame_done = true; wake_up(&tilcdc_crtc->frame_done_wq); } - tilcdc_write(dev, LCDC_END_OF_INT_IND_REG, 0); if (stat & LCDC_SYNC_LOST) { dev_err_ratelimited(dev->dev, "%s(0x%08x): Sync lost", @@ -743,11 +747,12 @@ irqreturn_t tilcdc_crtc_irq(struct drm_crtc *crtc) LCDC_SYNC_LOST); } } - } - if (stat & LCDC_FIFO_UNDERFLOW) - dev_err_ratelimited(dev->dev, "%s(0x%08x): FIFO underfow", - __func__, stat); + /* Indicate to LCDC that the interrupt service routine has + * completed, see 13.3.6.1.6 in AM335x TRM. + */ + tilcdc_write(dev, LCDC_END_OF_INT_IND_REG, 0); + } return IRQ_HANDLED; } -- cgit v0.10.2 From 2d5be88235c554d83c4bff8f12c96a1d7d27f697 Mon Sep 17 00:00:00 2001 From: Jyri Sarha Date: Thu, 7 Apr 2016 20:20:23 +0300 Subject: drm/tilcdc: Move waiting of LCDC_FRAME_DONE IRQ into stop() Move wait queue waiting of LCDC_FRAME_DONE IRQ from tilcdc_crtc_dpms() into stop() function. This is just a cleanup and enables independent use of stop() function. Signed-off-by: Jyri Sarha diff --git a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c index 55b8472..bcbf733 100644 --- a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c +++ b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c @@ -113,9 +113,25 @@ static void start(struct drm_crtc *crtc) static void stop(struct drm_crtc *crtc) { + struct tilcdc_crtc *tilcdc_crtc = to_tilcdc_crtc(crtc); struct drm_device *dev = crtc->dev; + struct tilcdc_drm_private *priv = dev->dev_private; + tilcdc_crtc->frame_done = false; tilcdc_clear(dev, LCDC_RASTER_CTRL_REG, LCDC_RASTER_ENABLE); + + /* + * if necessary wait for framedone irq which will still come + * before putting things to sleep.. + */ + if (priv->rev == 2) { + int ret = wait_event_timeout(tilcdc_crtc->frame_done_wq, + tilcdc_crtc->frame_done, + msecs_to_jiffies(50)); + if (ret == 0) + dev_err(dev->dev, "%s: timeout waiting for framedone\n", + __func__); + } } static void tilcdc_crtc_destroy(struct drm_crtc *crtc) @@ -212,22 +228,7 @@ void tilcdc_crtc_dpms(struct drm_crtc *crtc, int mode) pm_runtime_get_sync(dev->dev); start(crtc); } else { - tilcdc_crtc->frame_done = false; stop(crtc); - - /* - * if necessary wait for framedone irq which will still come - * before putting things to sleep.. - */ - if (priv->rev == 2) { - int ret = wait_event_timeout( - tilcdc_crtc->frame_done_wq, - tilcdc_crtc->frame_done, - msecs_to_jiffies(50)); - if (ret == 0) - dev_err(dev->dev, "timeout waiting for framedone\n"); - } - pm_runtime_put_sync(dev->dev); if (tilcdc_crtc->next_fb) { -- cgit v0.10.2 From 437c7d948d75c83441afea48773e50b992632483 Mon Sep 17 00:00:00 2001 From: Jyri Sarha Date: Thu, 16 Jun 2016 16:19:17 +0300 Subject: drm/tilcdc: Increase time out for waiting frame done interrupt Increase time out for waiting frame done interrupt. 50ms is long enough for the usual display modes (50 Hz or higher refresh rate), but it may be a bit tight for some unusual mode. Signed-off-by: Jyri Sarha diff --git a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c index bcbf733..8013a74 100644 --- a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c +++ b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c @@ -127,7 +127,7 @@ static void stop(struct drm_crtc *crtc) if (priv->rev == 2) { int ret = wait_event_timeout(tilcdc_crtc->frame_done_wq, tilcdc_crtc->frame_done, - msecs_to_jiffies(50)); + msecs_to_jiffies(500)); if (ret == 0) dev_err(dev->dev, "%s: timeout waiting for framedone\n", __func__); -- cgit v0.10.2 From d85f850ed6b9c47207b46643d1489e0a83900758 Mon Sep 17 00:00:00 2001 From: Jyri Sarha Date: Wed, 15 Jun 2016 11:16:23 +0300 Subject: drm/tilcdc: Call drm_crtc_vblank_on() and *_off() in start() and stop() Add drm_crtc_vblank_on() and *_off() calls to start() and stop() functions, to make sure any vblank waits etc. gets properly cleaned up. Signed-off-by: Jyri Sarha diff --git a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c index 8013a74..dfbeba5 100644 --- a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c +++ b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c @@ -109,6 +109,8 @@ static void start(struct drm_crtc *crtc) tilcdc_clear(dev, LCDC_DMA_CTRL_REG, LCDC_DUAL_FRAME_BUFFER_ENABLE); tilcdc_set(dev, LCDC_RASTER_CTRL_REG, LCDC_PALETTE_LOAD_MODE(DATA_ONLY)); tilcdc_set(dev, LCDC_RASTER_CTRL_REG, LCDC_RASTER_ENABLE); + + drm_crtc_vblank_on(crtc); } static void stop(struct drm_crtc *crtc) @@ -132,6 +134,8 @@ static void stop(struct drm_crtc *crtc) dev_err(dev->dev, "%s: timeout waiting for framedone\n", __func__); } + + drm_crtc_vblank_off(crtc); } static void tilcdc_crtc_destroy(struct drm_crtc *crtc) -- cgit v0.10.2 From 24b31ba099c6e32685211847fa58d876f562eb11 Mon Sep 17 00:00:00 2001 From: Jyri Sarha Date: Mon, 6 Jun 2016 11:16:41 +0300 Subject: drm/tilcdc: Refer to panel.txt and tfp410.txt bindings in tilcdc.txt The legacy panel.txt and tfp410.txt bindings are still the only supported way to connect lcd panel and tfp410 DVI encoder to tilcdc. Signed-off-by: Jyri Sarha diff --git a/Documentation/devicetree/bindings/display/tilcdc/tilcdc.txt b/Documentation/devicetree/bindings/display/tilcdc/tilcdc.txt index 2136ee8..6efa4c5 100644 --- a/Documentation/devicetree/bindings/display/tilcdc/tilcdc.txt +++ b/Documentation/devicetree/bindings/display/tilcdc/tilcdc.txt @@ -24,6 +24,10 @@ Optional nodes: binding follows Documentation/devicetree/bindings/graph.txt and suppors a single port with a single endpoint. + - See also Documentation/devicetree/bindings/display/tilcdc/panel.txt and + Documentation/devicetree/bindings/display/tilcdc/tfp410.txt for connecting + tfp410 DVI encoder or lcd panel to lcdc + Example: fb: fb@4830e000 { -- cgit v0.10.2 From 10a55a18f50136fe7229ad9a3dea4f7c2d387f6a Mon Sep 17 00:00:00 2001 From: Jyri Sarha Date: Mon, 6 Jun 2016 11:11:35 +0300 Subject: drm/tilcdc: Avoid error print by of_graph_get_next_endpoint() Avoid error print by of_graph_get_next_endpoint() if there is no ports present. Signed-off-by: Jyri Sarha diff --git a/drivers/gpu/drm/tilcdc/tilcdc_external.c b/drivers/gpu/drm/tilcdc/tilcdc_external.c index 03acb4f..ad3db4d 100644 --- a/drivers/gpu/drm/tilcdc/tilcdc_external.c +++ b/drivers/gpu/drm/tilcdc/tilcdc_external.c @@ -138,12 +138,21 @@ static int dev_match_of(struct device *dev, void *data) int tilcdc_get_external_components(struct device *dev, struct component_match **match) { + struct device_node *node; struct device_node *ep = NULL; int count = 0; - while ((ep = of_graph_get_next_endpoint(dev->of_node, ep))) { - struct device_node *node; + /* Avoid error print by of_graph_get_next_endpoint() if there + * is no ports present. + */ + node = of_get_child_by_name(dev->of_node, "ports"); + if (!node) + node = of_get_child_by_name(dev->of_node, "port"); + if (!node) + return 0; + of_node_put(node); + while ((ep = of_graph_get_next_endpoint(dev->of_node, ep))) { node = of_graph_get_remote_port_parent(ep); if (!node && !of_device_is_available(node)) { of_node_put(node); -- cgit v0.10.2 From 20a98acba5baf925d0d6fb334f1c55aa2ca7a708 Mon Sep 17 00:00:00 2001 From: Jyri Sarha Date: Thu, 23 Jun 2016 11:07:16 +0300 Subject: drm/tilcdc: Fix tilcdc component master unloading Fix tilcdc component master unloading. If a subcomponent module (tda998x in this case) is unloaded before its master (tilcdc in this case), it calls drm_put_dev() and it should not be called again by the master when its module is unloaded. However component_master_del() must still be called and the check if the drm_put_dev() has been called must be in component_master_ops unbind() callback, not in platform_driver remove() callback. Signed-off-by: Jyri Sarha diff --git a/drivers/gpu/drm/tilcdc/tilcdc_drv.c b/drivers/gpu/drm/tilcdc/tilcdc_drv.c index ed68324..16163a7 100644 --- a/drivers/gpu/drm/tilcdc/tilcdc_drv.c +++ b/drivers/gpu/drm/tilcdc/tilcdc_drv.c @@ -651,6 +651,12 @@ static int tilcdc_bind(struct device *dev) static void tilcdc_unbind(struct device *dev) { + struct drm_device *ddev = dev_get_drvdata(dev); + + /* Check if a subcomponent has already triggered the unloading. */ + if (!ddev->dev_private) + return; + drm_put_dev(dev_get_drvdata(dev)); } @@ -683,17 +689,15 @@ static int tilcdc_pdev_probe(struct platform_device *pdev) static int tilcdc_pdev_remove(struct platform_device *pdev) { - struct drm_device *ddev = dev_get_drvdata(&pdev->dev); - struct tilcdc_drm_private *priv = ddev->dev_private; - - /* Check if a subcomponent has already triggered the unloading. */ - if (!priv) - return 0; + int ret; - if (priv->is_componentized) - component_master_del(&pdev->dev, &tilcdc_comp_ops); - else + ret = tilcdc_get_external_components(&pdev->dev, NULL); + if (ret < 0) + return ret; + else if (ret == 0) drm_put_dev(platform_get_drvdata(pdev)); + else + component_master_del(&pdev->dev, &tilcdc_comp_ops); return 0; } -- cgit v0.10.2 From 8c65abb9140e26d66968e6e357a00ad704705154 Mon Sep 17 00:00:00 2001 From: Jyri Sarha Date: Thu, 7 Apr 2016 14:56:32 +0300 Subject: drm/tilcdc: Make tilcdc_crtc_page_flip() public Make tilcdc_crtc_page_flip() public for dummy plane implementation to use. Signed-off-by: Jyri Sarha diff --git a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c index dfbeba5..52845c8 100644 --- a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c +++ b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c @@ -165,7 +165,7 @@ static int tilcdc_verify_fb(struct drm_crtc *crtc, struct drm_framebuffer *fb) return 0; } -static int tilcdc_crtc_page_flip(struct drm_crtc *crtc, +int tilcdc_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb, struct drm_pending_vblank_event *event, uint32_t page_flip_flags) diff --git a/drivers/gpu/drm/tilcdc/tilcdc_drv.h b/drivers/gpu/drm/tilcdc/tilcdc_drv.h index 3b52ce8..cd78874 100644 --- a/drivers/gpu/drm/tilcdc/tilcdc_drv.h +++ b/drivers/gpu/drm/tilcdc/tilcdc_drv.h @@ -174,5 +174,9 @@ int tilcdc_crtc_mode_valid(struct drm_crtc *crtc, struct drm_display_mode *mode) int tilcdc_crtc_max_width(struct drm_crtc *crtc); void tilcdc_crtc_dpms(struct drm_crtc *crtc, int mode); int tilcdc_crtc_current_dpms_state(struct drm_crtc *crtc); +int tilcdc_crtc_page_flip(struct drm_crtc *crtc, + struct drm_framebuffer *fb, + struct drm_pending_vblank_event *event, + uint32_t page_flip_flags); #endif /* __TILCDC_DRV_H__ */ -- cgit v0.10.2 From 0a1fe1b7c589a635a40d6d9b525dd99634581ea2 Mon Sep 17 00:00:00 2001 From: Jyri Sarha Date: Mon, 13 Jun 2016 09:53:36 +0300 Subject: drm/tilcdc: Make tilcdc_crtc_page_flip() work if crtc is not yet on Make tilcdc_crtc_page_flip() work if crtc is not yet on. The plane commit sometimes comes before crtc is turned on. The new framebuffer should be set to scanout also in that case, so that it is there when crtc is turned on at the end of the commit phase. Signed-off-by: Jyri Sarha diff --git a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c index 52845c8..d9d2a6c 100644 --- a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c +++ b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c @@ -174,8 +174,6 @@ int tilcdc_crtc_page_flip(struct drm_crtc *crtc, struct drm_device *dev = crtc->dev; int r; unsigned long flags; - s64 tdiff; - ktime_t next_vblank; r = tilcdc_verify_fb(crtc, fb); if (r) @@ -194,15 +192,21 @@ int tilcdc_crtc_page_flip(struct drm_crtc *crtc, spin_lock_irqsave(&tilcdc_crtc->irq_lock, flags); - next_vblank = ktime_add_us(tilcdc_crtc->last_vblank, - 1000000 / crtc->hwmode.vrefresh); + if (crtc->hwmode.vrefresh && ktime_to_ns(tilcdc_crtc->last_vblank)) { + ktime_t next_vblank; + s64 tdiff; - tdiff = ktime_to_us(ktime_sub(next_vblank, ktime_get())); + next_vblank = ktime_add_us(tilcdc_crtc->last_vblank, + 1000000 / crtc->hwmode.vrefresh); - if (tdiff >= TILCDC_VBLANK_SAFETY_THRESHOLD_US) + tdiff = ktime_to_us(ktime_sub(next_vblank, ktime_get())); + + if (tdiff < TILCDC_VBLANK_SAFETY_THRESHOLD_US) + tilcdc_crtc->next_fb = fb; + } + + if (tilcdc_crtc->next_fb != fb) set_scanout(crtc, fb); - else - tilcdc_crtc->next_fb = fb; tilcdc_crtc->event = event; @@ -248,6 +252,7 @@ void tilcdc_crtc_dpms(struct drm_crtc *crtc, int mode) } drm_flip_work_commit(&tilcdc_crtc->unref_work, priv->wq); + tilcdc_crtc->last_vblank = ktime_set(0, 0); } } -- cgit v0.10.2 From b961c48b056c2562c7dbb0b2cfcdad486610550d Mon Sep 17 00:00:00 2001 From: Jyri Sarha Date: Thu, 7 Apr 2016 14:52:02 +0300 Subject: drm/tilcdc: Add dummy primary plane implementation Add dummy primary plane implementation. LCDC does not really have planes, only simple framebuffer that is mandatory. This primary plane implementation has the necessary checks for implementing simple framebuffer trough DRM plane abstraction. For setting the actual framebuffer the implementation relies on a CRTC side function. Signed-off-by: Jyri Sarha diff --git a/drivers/gpu/drm/tilcdc/Makefile b/drivers/gpu/drm/tilcdc/Makefile index deeca48..6f67517 100644 --- a/drivers/gpu/drm/tilcdc/Makefile +++ b/drivers/gpu/drm/tilcdc/Makefile @@ -7,6 +7,7 @@ obj-$(CONFIG_DRM_TILCDC_SLAVE_COMPAT) += tilcdc_slave_compat.o \ tilcdc_slave_compat.dtb.o tilcdc-y := \ + tilcdc_plane.o \ tilcdc_crtc.o \ tilcdc_tfp410.o \ tilcdc_panel.o \ diff --git a/drivers/gpu/drm/tilcdc/tilcdc_drv.h b/drivers/gpu/drm/tilcdc/tilcdc_drv.h index cd78874..0619c3c 100644 --- a/drivers/gpu/drm/tilcdc/tilcdc_drv.h +++ b/drivers/gpu/drm/tilcdc/tilcdc_drv.h @@ -179,4 +179,6 @@ int tilcdc_crtc_page_flip(struct drm_crtc *crtc, struct drm_pending_vblank_event *event, uint32_t page_flip_flags); +int tilcdc_plane_init(struct drm_device *dev, struct drm_plane *plane); + #endif /* __TILCDC_DRV_H__ */ diff --git a/drivers/gpu/drm/tilcdc/tilcdc_plane.c b/drivers/gpu/drm/tilcdc/tilcdc_plane.c new file mode 100644 index 0000000..d5635d6 --- /dev/null +++ b/drivers/gpu/drm/tilcdc/tilcdc_plane.c @@ -0,0 +1,133 @@ +/* + * Copyright (C) 2015 Texas Instruments + * Author: Jyri Sarha + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#include + +#include +#include +#include +#include + +#include "tilcdc_drv.h" + +static const u32 tilcdc_formats[] = { DRM_FORMAT_RGB565, + DRM_FORMAT_RGB888, + DRM_FORMAT_XRGB8888 }; + +static struct drm_plane_funcs tilcdc_plane_funcs = { + .update_plane = drm_atomic_helper_update_plane, + .disable_plane = drm_atomic_helper_disable_plane, + .destroy = drm_plane_cleanup, + .set_property = drm_atomic_helper_plane_set_property, + .reset = drm_atomic_helper_plane_reset, + .atomic_duplicate_state = drm_atomic_helper_plane_duplicate_state, + .atomic_destroy_state = drm_atomic_helper_plane_destroy_state, +}; + +static int tilcdc_plane_atomic_check(struct drm_plane *plane, + struct drm_plane_state *state) +{ + struct drm_crtc_state *crtc_state; + struct drm_plane_state *old_state = plane->state; + unsigned int depth, bpp; + + if (!state->crtc) + return 0; + + if (WARN_ON(!state->fb)) + return -EINVAL; + + if (state->crtc_x || state->crtc_y) { + dev_err(plane->dev->dev, "%s: crtc position must be zero.", + __func__); + return -EINVAL; + } + + crtc_state = drm_atomic_get_existing_crtc_state(state->state, + state->crtc); + /* we should have a crtc state if the plane is attached to a crtc */ + if (WARN_ON(!crtc_state)) + return 0; + + if (crtc_state->mode.hdisplay != state->crtc_w || + crtc_state->mode.vdisplay != state->crtc_h) { + dev_err(plane->dev->dev, + "%s: Size must match mode (%dx%d == %dx%d)", __func__, + crtc_state->mode.hdisplay, crtc_state->mode.vdisplay, + state->crtc_w, state->crtc_h); + return -EINVAL; + } + + drm_fb_get_bpp_depth(state->fb->pixel_format, &depth, &bpp); + if (state->fb->pitches[0] != crtc_state->mode.hdisplay * bpp / 8) { + dev_err(plane->dev->dev, + "Invalid pitch: fb and crtc widths must be the same"); + return -EINVAL; + } + + if (state->fb && old_state->fb && + state->fb->pixel_format != old_state->fb->pixel_format) { + dev_dbg(plane->dev->dev, + "%s(): pixel format change requires mode_change\n", + __func__); + crtc_state->mode_changed = true; + } + + return 0; +} + +static void tilcdc_plane_atomic_update(struct drm_plane *plane, + struct drm_plane_state *old_state) +{ + struct drm_plane_state *state = plane->state; + + if (!state->crtc) + return; + + if (WARN_ON(!state->fb || !state->crtc->state)) + return; + + tilcdc_crtc_page_flip(state->crtc, + state->fb, + state->crtc->state->event, + 0); +} + +static const struct drm_plane_helper_funcs plane_helper_funcs = { + .atomic_check = tilcdc_plane_atomic_check, + .atomic_update = tilcdc_plane_atomic_update, +}; + +int tilcdc_plane_init(struct drm_device *dev, + struct drm_plane *plane) +{ + int ret; + + ret = drm_plane_init(dev, plane, 1, + &tilcdc_plane_funcs, + tilcdc_formats, + ARRAY_SIZE(tilcdc_formats), + true); + if (ret) { + dev_err(dev->dev, "Failed to initialize plane: %d\n", ret); + return ret; + } + + drm_plane_helper_add(plane, &plane_helper_funcs); + + return 0; +} -- cgit v0.10.2 From 47f571c6e5d7b5f6021d6ade9607ecb6f28da5ad Mon Sep 17 00:00:00 2001 From: Jyri Sarha Date: Thu, 7 Apr 2016 15:04:18 +0300 Subject: drm/tilcdc: Initialize dummy primary plane from crtc init Initialize dummy primary plane from crtc init. Signed-off-by: Jyri Sarha diff --git a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c index d9d2a6c..e762b4e 100644 --- a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c +++ b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c @@ -26,6 +26,7 @@ struct tilcdc_crtc { struct drm_crtc base; + struct drm_plane primary; const struct tilcdc_panel_info *info; struct drm_pending_vblank_event *event; int dpms; @@ -782,6 +783,10 @@ struct drm_crtc *tilcdc_crtc_create(struct drm_device *dev) crtc = &tilcdc_crtc->base; + ret = tilcdc_plane_init(dev, &tilcdc_crtc->primary); + if (ret < 0) + goto fail; + tilcdc_crtc->dpms = DRM_MODE_DPMS_OFF; init_waitqueue_head(&tilcdc_crtc->frame_done_wq); @@ -790,7 +795,11 @@ struct drm_crtc *tilcdc_crtc_create(struct drm_device *dev) spin_lock_init(&tilcdc_crtc->irq_lock); - ret = drm_crtc_init(dev, crtc, &tilcdc_crtc_funcs); + ret = drm_crtc_init_with_planes(dev, crtc, + &tilcdc_crtc->primary, + NULL, + &tilcdc_crtc_funcs, + "tilcdc crtc"); if (ret < 0) goto fail; -- cgit v0.10.2 From f6382f186d29827501353af47c3c4134bab5f0d3 Mon Sep 17 00:00:00 2001 From: Jyri Sarha Date: Thu, 7 Apr 2016 15:09:50 +0300 Subject: drm/tilcdc: Add tilcdc_crtc_mode_set_nofb() Add tilcdc_crtc_mode_set_nofb(). The mode_set_nofb() semantics do not fit well to LCDC, because of the mandatory framebuffer. However, when the primary plane is required in the check phase, it and the framebuffer can be found from the atomic state struct. Signed-off-by: Jyri Sarha diff --git a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c index e762b4e..eae0020 100644 --- a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c +++ b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c @@ -303,6 +303,177 @@ static void tilcdc_crtc_commit(struct drm_crtc *crtc) tilcdc_crtc_dpms(crtc, DRM_MODE_DPMS_ON); } +static void tilcdc_crtc_mode_set_nofb(struct drm_crtc *crtc) +{ + struct tilcdc_crtc *tilcdc_crtc = to_tilcdc_crtc(crtc); + struct drm_device *dev = crtc->dev; + struct tilcdc_drm_private *priv = dev->dev_private; + const struct tilcdc_panel_info *info = tilcdc_crtc->info; + uint32_t reg, hbp, hfp, hsw, vbp, vfp, vsw; + struct drm_display_mode *mode = &crtc->state->adjusted_mode; + struct drm_framebuffer *fb = crtc->primary->state->fb; + + if (WARN_ON(!info)) + return; + + if (WARN_ON(!fb)) + return; + + pm_runtime_get_sync(dev->dev); + + /* Configure the Burst Size and fifo threshold of DMA: */ + reg = tilcdc_read(dev, LCDC_DMA_CTRL_REG) & ~0x00000770; + switch (info->dma_burst_sz) { + case 1: + reg |= LCDC_DMA_BURST_SIZE(LCDC_DMA_BURST_1); + break; + case 2: + reg |= LCDC_DMA_BURST_SIZE(LCDC_DMA_BURST_2); + break; + case 4: + reg |= LCDC_DMA_BURST_SIZE(LCDC_DMA_BURST_4); + break; + case 8: + reg |= LCDC_DMA_BURST_SIZE(LCDC_DMA_BURST_8); + break; + case 16: + reg |= LCDC_DMA_BURST_SIZE(LCDC_DMA_BURST_16); + break; + default: + dev_err(dev->dev, "invalid burst size\n"); + return; + } + reg |= (info->fifo_th << 8); + tilcdc_write(dev, LCDC_DMA_CTRL_REG, reg); + + /* Configure timings: */ + hbp = mode->htotal - mode->hsync_end; + hfp = mode->hsync_start - mode->hdisplay; + hsw = mode->hsync_end - mode->hsync_start; + vbp = mode->vtotal - mode->vsync_end; + vfp = mode->vsync_start - mode->vdisplay; + vsw = mode->vsync_end - mode->vsync_start; + + DBG("%dx%d, hbp=%u, hfp=%u, hsw=%u, vbp=%u, vfp=%u, vsw=%u", + mode->hdisplay, mode->vdisplay, hbp, hfp, hsw, vbp, vfp, vsw); + + /* Set AC Bias Period and Number of Transitions per Interrupt: */ + reg = tilcdc_read(dev, LCDC_RASTER_TIMING_2_REG) & ~0x000fff00; + reg |= LCDC_AC_BIAS_FREQUENCY(info->ac_bias) | + LCDC_AC_BIAS_TRANSITIONS_PER_INT(info->ac_bias_intrpt); + + /* + * subtract one from hfp, hbp, hsw because the hardware uses + * a value of 0 as 1 + */ + if (priv->rev == 2) { + /* clear bits we're going to set */ + reg &= ~0x78000033; + reg |= ((hfp-1) & 0x300) >> 8; + reg |= ((hbp-1) & 0x300) >> 4; + reg |= ((hsw-1) & 0x3c0) << 21; + } + tilcdc_write(dev, LCDC_RASTER_TIMING_2_REG, reg); + + reg = (((mode->hdisplay >> 4) - 1) << 4) | + (((hbp-1) & 0xff) << 24) | + (((hfp-1) & 0xff) << 16) | + (((hsw-1) & 0x3f) << 10); + if (priv->rev == 2) + reg |= (((mode->hdisplay >> 4) - 1) & 0x40) >> 3; + tilcdc_write(dev, LCDC_RASTER_TIMING_0_REG, reg); + + reg = ((mode->vdisplay - 1) & 0x3ff) | + ((vbp & 0xff) << 24) | + ((vfp & 0xff) << 16) | + (((vsw-1) & 0x3f) << 10); + tilcdc_write(dev, LCDC_RASTER_TIMING_1_REG, reg); + + /* + * be sure to set Bit 10 for the V2 LCDC controller, + * otherwise limited to 1024 pixels width, stopping + * 1920x1080 being supported. + */ + if (priv->rev == 2) { + if ((mode->vdisplay - 1) & 0x400) { + tilcdc_set(dev, LCDC_RASTER_TIMING_2_REG, + LCDC_LPP_B10); + } else { + tilcdc_clear(dev, LCDC_RASTER_TIMING_2_REG, + LCDC_LPP_B10); + } + } + + /* Configure display type: */ + reg = tilcdc_read(dev, LCDC_RASTER_CTRL_REG) & + ~(LCDC_TFT_MODE | LCDC_MONO_8BIT_MODE | LCDC_MONOCHROME_MODE | + LCDC_V2_TFT_24BPP_MODE | LCDC_V2_TFT_24BPP_UNPACK | + 0x000ff000 /* Palette Loading Delay bits */); + reg |= LCDC_TFT_MODE; /* no monochrome/passive support */ + if (info->tft_alt_mode) + reg |= LCDC_TFT_ALT_ENABLE; + if (priv->rev == 2) { + unsigned int depth, bpp; + + drm_fb_get_bpp_depth(fb->pixel_format, &depth, &bpp); + switch (bpp) { + case 16: + break; + case 32: + reg |= LCDC_V2_TFT_24BPP_UNPACK; + /* fallthrough */ + case 24: + reg |= LCDC_V2_TFT_24BPP_MODE; + break; + default: + dev_err(dev->dev, "invalid pixel format\n"); + return; + } + } + reg |= info->fdd < 12; + tilcdc_write(dev, LCDC_RASTER_CTRL_REG, reg); + + if (info->invert_pxl_clk) + tilcdc_set(dev, LCDC_RASTER_TIMING_2_REG, LCDC_INVERT_PIXEL_CLOCK); + else + tilcdc_clear(dev, LCDC_RASTER_TIMING_2_REG, LCDC_INVERT_PIXEL_CLOCK); + + if (info->sync_ctrl) + tilcdc_set(dev, LCDC_RASTER_TIMING_2_REG, LCDC_SYNC_CTRL); + else + tilcdc_clear(dev, LCDC_RASTER_TIMING_2_REG, LCDC_SYNC_CTRL); + + if (info->sync_edge) + tilcdc_set(dev, LCDC_RASTER_TIMING_2_REG, LCDC_SYNC_EDGE); + else + tilcdc_clear(dev, LCDC_RASTER_TIMING_2_REG, LCDC_SYNC_EDGE); + + if (mode->flags & DRM_MODE_FLAG_NHSYNC) + tilcdc_set(dev, LCDC_RASTER_TIMING_2_REG, LCDC_INVERT_HSYNC); + else + tilcdc_clear(dev, LCDC_RASTER_TIMING_2_REG, LCDC_INVERT_HSYNC); + + if (mode->flags & DRM_MODE_FLAG_NVSYNC) + tilcdc_set(dev, LCDC_RASTER_TIMING_2_REG, LCDC_INVERT_VSYNC); + else + tilcdc_clear(dev, LCDC_RASTER_TIMING_2_REG, LCDC_INVERT_VSYNC); + + if (info->raster_order) + tilcdc_set(dev, LCDC_RASTER_CTRL_REG, LCDC_RASTER_ORDER); + else + tilcdc_clear(dev, LCDC_RASTER_CTRL_REG, LCDC_RASTER_ORDER); + + drm_framebuffer_reference(fb); + + set_scanout(crtc, fb); + + tilcdc_crtc_update_clk(crtc); + + pm_runtime_put_sync(dev->dev); + + crtc->hwmode = crtc->state->adjusted_mode; +} + static int tilcdc_crtc_mode_set(struct drm_crtc *crtc, struct drm_display_mode *mode, struct drm_display_mode *adjusted_mode, @@ -519,6 +690,7 @@ static const struct drm_crtc_helper_funcs tilcdc_crtc_helper_funcs = { .commit = tilcdc_crtc_commit, .mode_set = tilcdc_crtc_mode_set, .mode_set_base = tilcdc_crtc_mode_set_base, + .mode_set_nofb = tilcdc_crtc_mode_set_nofb, }; int tilcdc_crtc_max_width(struct drm_crtc *crtc) -- cgit v0.10.2 From db380c58b76be09bc27be0f5d3480547db71e6d5 Mon Sep 17 00:00:00 2001 From: Jyri Sarha Date: Thu, 7 Apr 2016 15:10:23 +0300 Subject: drm/tilcdc: Add tilcdc_crtc_atomic_check() Add tilcdc_crtc_atomic_check(). Checks the display mode validity and the presence of the mandatory primary plane. The drm_crtc_helper_funcs mode_fixup() callback is left untouched and the check function does no try to do its job on purpose, despite what the mode_fixup() callback's documentations suggests. The plane's check() callback needs to set drm_crtc_state's ->mode_changed to true if the pixel format for the framebuffer changes. Because of this drm_mode_config_funcs atomic_check() callback needs to call drm_atomic_helper_check_modeset() once more after it has called drm_atomic_helper_check_planes(). If the fixing of the adjusted_mode would be done in drm_crtc_helper_funcs atomic_check() callback, it would get over written by the extra drm_atomic_helper_check_modeset() call. Signed-off-by: Jyri Sarha diff --git a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c index eae0020..3e272f9 100644 --- a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c +++ b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c @@ -474,6 +474,32 @@ static void tilcdc_crtc_mode_set_nofb(struct drm_crtc *crtc) crtc->hwmode = crtc->state->adjusted_mode; } +static int tilcdc_crtc_atomic_check(struct drm_crtc *crtc, + struct drm_crtc_state *state) +{ + struct drm_display_mode *mode = &state->mode; + int ret; + + /* If we are not active we don't care */ + if (!state->active) + return 0; + + if (state->state->planes[0].ptr != crtc->primary || + state->state->planes[0].state == NULL || + state->state->planes[0].state->crtc != crtc) { + dev_dbg(crtc->dev->dev, "CRTC primary plane must be present"); + return -EINVAL; + } + + ret = tilcdc_crtc_mode_valid(crtc, mode); + if (ret) { + dev_dbg(crtc->dev->dev, "Mode \"%s\" not valid", mode->name); + return -EINVAL; + } + + return 0; +} + static int tilcdc_crtc_mode_set(struct drm_crtc *crtc, struct drm_display_mode *mode, struct drm_display_mode *adjusted_mode, @@ -690,6 +716,7 @@ static const struct drm_crtc_helper_funcs tilcdc_crtc_helper_funcs = { .commit = tilcdc_crtc_commit, .mode_set = tilcdc_crtc_mode_set, .mode_set_base = tilcdc_crtc_mode_set_base, + .atomic_check = tilcdc_crtc_atomic_check, .mode_set_nofb = tilcdc_crtc_mode_set_nofb, }; -- cgit v0.10.2 From edc43303888c13904a1c990592eb64f17e8e7eb1 Mon Sep 17 00:00:00 2001 From: Jyri Sarha Date: Wed, 30 Dec 2015 17:40:24 +0200 Subject: drm/tilcdc: Add atomic mode config funcs Add atomic mode config funcs. The atomic_commit implementation is a copy-paste from drm_atomic_helper_commit(), leaving out the async test. The similar copy-paste implementation appears to be used in many other drivers too. The standard drm_atomic_helper_check() is used for checking. The drm_atomic_helper_check() can not be used in drm_mode_config_funcs atomic_check() callback because the plane's check implementation may update crtc state's ->mode_changed flag. Because of this the drm_atomic_helper_check_modeset() has to be called once more after drm_atomic_helper_check_planes() (see drm_atomic_helper_check_modeset() documentation). Signed-off-by: Jyri Sarha diff --git a/drivers/gpu/drm/tilcdc/tilcdc_drv.c b/drivers/gpu/drm/tilcdc/tilcdc_drv.c index 16163a7..a8c4779 100644 --- a/drivers/gpu/drm/tilcdc/tilcdc_drv.c +++ b/drivers/gpu/drm/tilcdc/tilcdc_drv.c @@ -20,6 +20,8 @@ #include #include #include +#include +#include #include "tilcdc_drv.h" #include "tilcdc_regs.h" @@ -59,9 +61,78 @@ static void tilcdc_fb_output_poll_changed(struct drm_device *dev) drm_fbdev_cma_hotplug_event(priv->fbdev); } +int tilcdc_atomic_check(struct drm_device *dev, + struct drm_atomic_state *state) +{ + int ret; + + ret = drm_atomic_helper_check_modeset(dev, state); + if (ret) + return ret; + + ret = drm_atomic_helper_check_planes(dev, state); + if (ret) + return ret; + + /* + * tilcdc ->atomic_check can update ->mode_changed if pixel format + * changes, hence will we check modeset changes again. + */ + ret = drm_atomic_helper_check_modeset(dev, state); + if (ret) + return ret; + + return ret; +} + +static int tilcdc_commit(struct drm_device *dev, + struct drm_atomic_state *state, + bool async) +{ + int ret; + + ret = drm_atomic_helper_prepare_planes(dev, state); + if (ret) + return ret; + + drm_atomic_helper_swap_state(state, true); + + /* + * Everything below can be run asynchronously without the need to grab + * any modeset locks at all under one condition: It must be guaranteed + * that the asynchronous work has either been cancelled (if the driver + * supports it, which at least requires that the framebuffers get + * cleaned up with drm_atomic_helper_cleanup_planes()) or completed + * before the new state gets committed on the software side with + * drm_atomic_helper_swap_state(). + * + * This scheme allows new atomic state updates to be prepared and + * checked in parallel to the asynchronous completion of the previous + * update. Which is important since compositors need to figure out the + * composition of the next frame right after having submitted the + * current layout. + */ + + drm_atomic_helper_commit_modeset_disables(dev, state); + + drm_atomic_helper_commit_planes(dev, state, false); + + drm_atomic_helper_commit_modeset_enables(dev, state); + + drm_atomic_helper_wait_for_vblanks(dev, state); + + drm_atomic_helper_cleanup_planes(dev, state); + + drm_atomic_state_free(state); + + return 0; +} + static const struct drm_mode_config_funcs mode_config_funcs = { .fb_create = tilcdc_fb_create, .output_poll_changed = tilcdc_fb_output_poll_changed, + .atomic_check = tilcdc_atomic_check, + .atomic_commit = tilcdc_commit, }; static int modeset_init(struct drm_device *dev) -- cgit v0.10.2 From 522a76f895d775a1c9ed6ff4a631d9054a949ef3 Mon Sep 17 00:00:00 2001 From: Jyri Sarha Date: Tue, 29 Dec 2015 17:27:32 +0200 Subject: drm/tilcdc: Add drm_mode_config_reset() call to tilcdc_load() Add drm_mode_config_reset() call to tilcdc_load(). This is need to initialize atomic state variables at load time. Signed-off-by: Jyri Sarha diff --git a/drivers/gpu/drm/tilcdc/tilcdc_drv.c b/drivers/gpu/drm/tilcdc/tilcdc_drv.c index a8c4779..11acd96 100644 --- a/drivers/gpu/drm/tilcdc/tilcdc_drv.c +++ b/drivers/gpu/drm/tilcdc/tilcdc_drv.c @@ -366,6 +366,9 @@ static int tilcdc_load(struct drm_device *dev, unsigned long flags) } drm_helper_disable_unused_functions(dev); + + drm_mode_config_reset(dev); + priv->fbdev = drm_fbdev_cma_init(dev, bpp, dev->mode_config.num_crtc, dev->mode_config.num_connector); -- cgit v0.10.2 From 305198de894345b788522feacded0ca78f9db5d2 Mon Sep 17 00:00:00 2001 From: Jyri Sarha Date: Thu, 7 Apr 2016 15:05:16 +0300 Subject: drm/tilcdc: Set DRIVER_ATOMIC and use atomic crtc helpers Set DRIVER_ATOMIC and use atomic helpers and rename commit and prepare crtc helpers to enable and disable. This makes the final jump to mode setting, but there is lot of obsolete code to clean up. Signed-off-by: Jyri Sarha diff --git a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c index 3e272f9..9a21a7f 100644 --- a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c +++ b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c @@ -17,6 +17,7 @@ #include "drm_flip_work.h" #include +#include #include "tilcdc_drv.h" #include "tilcdc_regs.h" @@ -293,12 +294,12 @@ static bool tilcdc_crtc_mode_fixup(struct drm_crtc *crtc, return true; } -static void tilcdc_crtc_prepare(struct drm_crtc *crtc) +static void tilcdc_crtc_disable(struct drm_crtc *crtc) { tilcdc_crtc_dpms(crtc, DRM_MODE_DPMS_OFF); } -static void tilcdc_crtc_commit(struct drm_crtc *crtc) +static void tilcdc_crtc_enable(struct drm_crtc *crtc) { tilcdc_crtc_dpms(crtc, DRM_MODE_DPMS_ON); } @@ -704,18 +705,23 @@ static int tilcdc_crtc_mode_set_base(struct drm_crtc *crtc, int x, int y, } static const struct drm_crtc_funcs tilcdc_crtc_funcs = { - .destroy = tilcdc_crtc_destroy, - .set_config = drm_crtc_helper_set_config, - .page_flip = tilcdc_crtc_page_flip, + .destroy = tilcdc_crtc_destroy, + .set_config = drm_atomic_helper_set_config, + .page_flip = drm_atomic_helper_page_flip, + .reset = drm_atomic_helper_crtc_reset, + .atomic_duplicate_state = drm_atomic_helper_crtc_duplicate_state, + .atomic_destroy_state = drm_atomic_helper_crtc_destroy_state, }; static const struct drm_crtc_helper_funcs tilcdc_crtc_helper_funcs = { .dpms = tilcdc_crtc_dpms, .mode_fixup = tilcdc_crtc_mode_fixup, - .prepare = tilcdc_crtc_prepare, - .commit = tilcdc_crtc_commit, + .prepare = tilcdc_crtc_disable, + .commit = tilcdc_crtc_enable, .mode_set = tilcdc_crtc_mode_set, .mode_set_base = tilcdc_crtc_mode_set_base, + .enable = tilcdc_crtc_enable, + .disable = tilcdc_crtc_disable, .atomic_check = tilcdc_crtc_atomic_check, .mode_set_nofb = tilcdc_crtc_mode_set_nofb, }; diff --git a/drivers/gpu/drm/tilcdc/tilcdc_drv.c b/drivers/gpu/drm/tilcdc/tilcdc_drv.c index 11acd96..576e4e1 100644 --- a/drivers/gpu/drm/tilcdc/tilcdc_drv.c +++ b/drivers/gpu/drm/tilcdc/tilcdc_drv.c @@ -611,7 +611,7 @@ static const struct file_operations fops = { static struct drm_driver tilcdc_driver = { .driver_features = (DRIVER_HAVE_IRQ | DRIVER_GEM | DRIVER_MODESET | - DRIVER_PRIME), + DRIVER_PRIME | DRIVER_ATOMIC), .load = tilcdc_load, .unload = tilcdc_unload, .lastclose = tilcdc_lastclose, -- cgit v0.10.2 From 6b4736db9c5d5fa903d20f1a82fe2777a395c955 Mon Sep 17 00:00:00 2001 From: Jyri Sarha Date: Mon, 11 Apr 2016 12:46:11 +0300 Subject: drm/tilcdc: Remove obsolete crtc helper functions Remove obsolete crtc helper functions. These are not needed when atomic modeset is used. Note that the drm_crtc_helper_funcs mode_fixup() is still needed. The crtc's check() callback can not do its job here. The plane's check() callback needs to set drm_crtc_state's ->mode_changed to true if the pixel format for the framebuffer changes. Because of this drm_mode_config_funcs atomic_check() callback needs to call drm_atomic_helper_check_modeset() once more after it has called drm_atomic_helper_check_planes(). If the fixing of the adjusted_mode would be done in drm_crtc_helper_funcs atomic_check() callback, it would get over written by the extra drm_atomic_helper_check_modeset() call. Signed-off-by: Jyri Sarha diff --git a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c index 9a21a7f..d150b3e 100644 --- a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c +++ b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c @@ -501,209 +501,6 @@ static int tilcdc_crtc_atomic_check(struct drm_crtc *crtc, return 0; } -static int tilcdc_crtc_mode_set(struct drm_crtc *crtc, - struct drm_display_mode *mode, - struct drm_display_mode *adjusted_mode, - int x, int y, - struct drm_framebuffer *old_fb) -{ - struct tilcdc_crtc *tilcdc_crtc = to_tilcdc_crtc(crtc); - struct drm_device *dev = crtc->dev; - struct tilcdc_drm_private *priv = dev->dev_private; - const struct tilcdc_panel_info *info = tilcdc_crtc->info; - uint32_t reg, hbp, hfp, hsw, vbp, vfp, vsw; - int ret; - - ret = tilcdc_crtc_mode_valid(crtc, mode); - if (WARN_ON(ret)) - return ret; - - if (WARN_ON(!info)) - return -EINVAL; - - ret = tilcdc_verify_fb(crtc, crtc->primary->fb); - if (ret) - return ret; - - pm_runtime_get_sync(dev->dev); - - /* Configure the Burst Size and fifo threshold of DMA: */ - reg = tilcdc_read(dev, LCDC_DMA_CTRL_REG) & ~0x00000770; - switch (info->dma_burst_sz) { - case 1: - reg |= LCDC_DMA_BURST_SIZE(LCDC_DMA_BURST_1); - break; - case 2: - reg |= LCDC_DMA_BURST_SIZE(LCDC_DMA_BURST_2); - break; - case 4: - reg |= LCDC_DMA_BURST_SIZE(LCDC_DMA_BURST_4); - break; - case 8: - reg |= LCDC_DMA_BURST_SIZE(LCDC_DMA_BURST_8); - break; - case 16: - reg |= LCDC_DMA_BURST_SIZE(LCDC_DMA_BURST_16); - break; - default: - return -EINVAL; - } - reg |= (info->fifo_th << 8); - tilcdc_write(dev, LCDC_DMA_CTRL_REG, reg); - - /* Configure timings: */ - hbp = mode->htotal - mode->hsync_end; - hfp = mode->hsync_start - mode->hdisplay; - hsw = mode->hsync_end - mode->hsync_start; - vbp = mode->vtotal - mode->vsync_end; - vfp = mode->vsync_start - mode->vdisplay; - vsw = mode->vsync_end - mode->vsync_start; - - DBG("%dx%d, hbp=%u, hfp=%u, hsw=%u, vbp=%u, vfp=%u, vsw=%u", - mode->hdisplay, mode->vdisplay, hbp, hfp, hsw, vbp, vfp, vsw); - - /* Configure the AC Bias Period and Number of Transitions per Interrupt: */ - reg = tilcdc_read(dev, LCDC_RASTER_TIMING_2_REG) & ~0x000fff00; - reg |= LCDC_AC_BIAS_FREQUENCY(info->ac_bias) | - LCDC_AC_BIAS_TRANSITIONS_PER_INT(info->ac_bias_intrpt); - - /* - * subtract one from hfp, hbp, hsw because the hardware uses - * a value of 0 as 1 - */ - if (priv->rev == 2) { - /* clear bits we're going to set */ - reg &= ~0x78000033; - reg |= ((hfp-1) & 0x300) >> 8; - reg |= ((hbp-1) & 0x300) >> 4; - reg |= ((hsw-1) & 0x3c0) << 21; - } - tilcdc_write(dev, LCDC_RASTER_TIMING_2_REG, reg); - - reg = (((mode->hdisplay >> 4) - 1) << 4) | - (((hbp-1) & 0xff) << 24) | - (((hfp-1) & 0xff) << 16) | - (((hsw-1) & 0x3f) << 10); - if (priv->rev == 2) - reg |= (((mode->hdisplay >> 4) - 1) & 0x40) >> 3; - tilcdc_write(dev, LCDC_RASTER_TIMING_0_REG, reg); - - reg = ((mode->vdisplay - 1) & 0x3ff) | - ((vbp & 0xff) << 24) | - ((vfp & 0xff) << 16) | - (((vsw-1) & 0x3f) << 10); - tilcdc_write(dev, LCDC_RASTER_TIMING_1_REG, reg); - - /* - * be sure to set Bit 10 for the V2 LCDC controller, - * otherwise limited to 1024 pixels width, stopping - * 1920x1080 being suppoted. - */ - if (priv->rev == 2) { - if ((mode->vdisplay - 1) & 0x400) { - tilcdc_set(dev, LCDC_RASTER_TIMING_2_REG, - LCDC_LPP_B10); - } else { - tilcdc_clear(dev, LCDC_RASTER_TIMING_2_REG, - LCDC_LPP_B10); - } - } - - /* Configure display type: */ - reg = tilcdc_read(dev, LCDC_RASTER_CTRL_REG) & - ~(LCDC_TFT_MODE | LCDC_MONO_8BIT_MODE | LCDC_MONOCHROME_MODE | - LCDC_V2_TFT_24BPP_MODE | LCDC_V2_TFT_24BPP_UNPACK | 0x000ff000); - reg |= LCDC_TFT_MODE; /* no monochrome/passive support */ - if (info->tft_alt_mode) - reg |= LCDC_TFT_ALT_ENABLE; - if (priv->rev == 2) { - unsigned int depth, bpp; - - drm_fb_get_bpp_depth(crtc->primary->fb->pixel_format, &depth, &bpp); - switch (bpp) { - case 16: - break; - case 32: - reg |= LCDC_V2_TFT_24BPP_UNPACK; - /* fallthrough */ - case 24: - reg |= LCDC_V2_TFT_24BPP_MODE; - break; - default: - dev_err(dev->dev, "invalid pixel format\n"); - return -EINVAL; - } - } - reg |= info->fdd < 12; - tilcdc_write(dev, LCDC_RASTER_CTRL_REG, reg); - - if (info->invert_pxl_clk) - tilcdc_set(dev, LCDC_RASTER_TIMING_2_REG, LCDC_INVERT_PIXEL_CLOCK); - else - tilcdc_clear(dev, LCDC_RASTER_TIMING_2_REG, LCDC_INVERT_PIXEL_CLOCK); - - if (info->sync_ctrl) - tilcdc_set(dev, LCDC_RASTER_TIMING_2_REG, LCDC_SYNC_CTRL); - else - tilcdc_clear(dev, LCDC_RASTER_TIMING_2_REG, LCDC_SYNC_CTRL); - - if (info->sync_edge) - tilcdc_set(dev, LCDC_RASTER_TIMING_2_REG, LCDC_SYNC_EDGE); - else - tilcdc_clear(dev, LCDC_RASTER_TIMING_2_REG, LCDC_SYNC_EDGE); - - /* - * use value from adjusted_mode here as this might have been - * changed as part of the fixup for slave encoders to solve the - * issue where tilcdc timings are not VESA compliant - */ - if (adjusted_mode->flags & DRM_MODE_FLAG_NHSYNC) - tilcdc_set(dev, LCDC_RASTER_TIMING_2_REG, LCDC_INVERT_HSYNC); - else - tilcdc_clear(dev, LCDC_RASTER_TIMING_2_REG, LCDC_INVERT_HSYNC); - - if (mode->flags & DRM_MODE_FLAG_NVSYNC) - tilcdc_set(dev, LCDC_RASTER_TIMING_2_REG, LCDC_INVERT_VSYNC); - else - tilcdc_clear(dev, LCDC_RASTER_TIMING_2_REG, LCDC_INVERT_VSYNC); - - if (info->raster_order) - tilcdc_set(dev, LCDC_RASTER_CTRL_REG, LCDC_RASTER_ORDER); - else - tilcdc_clear(dev, LCDC_RASTER_CTRL_REG, LCDC_RASTER_ORDER); - - drm_framebuffer_reference(crtc->primary->fb); - - set_scanout(crtc, crtc->primary->fb); - - tilcdc_crtc_update_clk(crtc); - - pm_runtime_put_sync(dev->dev); - - return 0; -} - -static int tilcdc_crtc_mode_set_base(struct drm_crtc *crtc, int x, int y, - struct drm_framebuffer *old_fb) -{ - struct drm_device *dev = crtc->dev; - int r; - - r = tilcdc_verify_fb(crtc, crtc->primary->fb); - if (r) - return r; - - drm_framebuffer_reference(crtc->primary->fb); - - pm_runtime_get_sync(dev->dev); - - set_scanout(crtc, crtc->primary->fb); - - pm_runtime_put_sync(dev->dev); - - return 0; -} - static const struct drm_crtc_funcs tilcdc_crtc_funcs = { .destroy = tilcdc_crtc_destroy, .set_config = drm_atomic_helper_set_config, @@ -714,12 +511,7 @@ static const struct drm_crtc_funcs tilcdc_crtc_funcs = { }; static const struct drm_crtc_helper_funcs tilcdc_crtc_helper_funcs = { - .dpms = tilcdc_crtc_dpms, .mode_fixup = tilcdc_crtc_mode_fixup, - .prepare = tilcdc_crtc_disable, - .commit = tilcdc_crtc_enable, - .mode_set = tilcdc_crtc_mode_set, - .mode_set_base = tilcdc_crtc_mode_set_base, .enable = tilcdc_crtc_enable, .disable = tilcdc_crtc_disable, .atomic_check = tilcdc_crtc_atomic_check, -- cgit v0.10.2 From c72cc663649a7a6b860988e2b3a355e4280fd38d Mon Sep 17 00:00:00 2001 From: Jyri Sarha Date: Sat, 9 Apr 2016 18:44:09 +0300 Subject: drm/tilcdc: Remove tilcdc_verify_fb() Remove tilcdc_verify_fb(). The tilcdc_verify_fb() function is not needed because the same checks are implemented in tilcdc_plane_atomic_check(). Signed-off-by: Jyri Sarha diff --git a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c index d150b3e..3d6000c 100644 --- a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c +++ b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c @@ -151,22 +151,6 @@ static void tilcdc_crtc_destroy(struct drm_crtc *crtc) drm_flip_work_cleanup(&tilcdc_crtc->unref_work); } -static int tilcdc_verify_fb(struct drm_crtc *crtc, struct drm_framebuffer *fb) -{ - struct drm_device *dev = crtc->dev; - unsigned int depth, bpp; - - drm_fb_get_bpp_depth(fb->pixel_format, &depth, &bpp); - - if (fb->pitches[0] != crtc->mode.hdisplay * bpp / 8) { - dev_err(dev->dev, - "Invalid pitch: fb and crtc widths must be the same"); - return -EINVAL; - } - - return 0; -} - int tilcdc_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb, struct drm_pending_vblank_event *event, @@ -174,13 +158,8 @@ int tilcdc_crtc_page_flip(struct drm_crtc *crtc, { struct tilcdc_crtc *tilcdc_crtc = to_tilcdc_crtc(crtc); struct drm_device *dev = crtc->dev; - int r; unsigned long flags; - r = tilcdc_verify_fb(crtc, fb); - if (r) - return r; - if (tilcdc_crtc->event) { dev_err(dev->dev, "already pending page flip!\n"); return -EBUSY; -- cgit v0.10.2 From ee6de21b6982d1513db874f3073ad30ea5f9abd7 Mon Sep 17 00:00:00 2001 From: Jyri Sarha Date: Wed, 13 Apr 2016 18:45:29 +0300 Subject: drm/tilcdc: panel: Set crtc panel info at init phase Set crtc panel info at init phase. Setting it at prepare callback does it multiple times for no good reason and it is also too late when atomic modeset is used. Signed-off-by: Jyri Sarha diff --git a/drivers/gpu/drm/tilcdc/tilcdc_panel.c b/drivers/gpu/drm/tilcdc/tilcdc_panel.c index ff7774c..9874881 100644 --- a/drivers/gpu/drm/tilcdc/tilcdc_panel.c +++ b/drivers/gpu/drm/tilcdc/tilcdc_panel.c @@ -64,9 +64,7 @@ static void panel_encoder_dpms(struct drm_encoder *encoder, int mode) static void panel_encoder_prepare(struct drm_encoder *encoder) { - struct panel_encoder *panel_encoder = to_panel_encoder(encoder); panel_encoder_dpms(encoder, DRM_MODE_DPMS_OFF); - tilcdc_crtc_set_panel_info(encoder->crtc, panel_encoder->mod->info); } static void panel_encoder_commit(struct drm_encoder *encoder) @@ -268,6 +266,9 @@ static int panel_modeset_init(struct tilcdc_module *mod, struct drm_device *dev) priv->encoders[priv->num_encoders++] = encoder; priv->connectors[priv->num_connectors++] = connector; + tilcdc_crtc_set_panel_info(priv->crtc, + to_panel_encoder(encoder)->mod->info); + return 0; } -- cgit v0.10.2 From 0f65d89b9a9736d90a689a351130d591ea7a6b38 Mon Sep 17 00:00:00 2001 From: Jyri Sarha Date: Wed, 13 Apr 2016 18:49:29 +0300 Subject: drm/tilcdc: panel: Add atomic modeset helpers to connector funcs Add atomic modeset helpers to panel connector funcs. Property handling related helpers, atomic reset helper, and new dpms helper is needed in connector for atomic modeseting to work. The default helper functions are enough. Signed-off-by: Jyri Sarha diff --git a/drivers/gpu/drm/tilcdc/tilcdc_panel.c b/drivers/gpu/drm/tilcdc/tilcdc_panel.c index 9874881..4ac1d25 100644 --- a/drivers/gpu/drm/tilcdc/tilcdc_panel.c +++ b/drivers/gpu/drm/tilcdc/tilcdc_panel.c @@ -22,6 +22,7 @@ #include