diff options
Diffstat (limited to 'drivers/gpu')
104 files changed, 5532 insertions, 1417 deletions
diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c index 9236965..f88a9b2 100644 --- a/drivers/gpu/drm/drm_crtc_helper.c +++ b/drivers/gpu/drm/drm_crtc_helper.c @@ -560,6 +560,11 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set) mode_changed = true; } else if (set->fb == NULL) { mode_changed = true; + } else if (set->fb->depth != set->crtc->fb->depth) { + mode_changed = true; + } else if (set->fb->bits_per_pixel != + set->crtc->fb->bits_per_pixel) { + mode_changed = true; } else fb_changed = true; } diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 0929219..756af4d 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -185,8 +185,8 @@ drm_edid_block_valid(u8 *raw_edid) bad: if (raw_edid) { printk(KERN_ERR "Raw EDID:\n"); - print_hex_dump_bytes(KERN_ERR, DUMP_PREFIX_NONE, raw_edid, EDID_LENGTH); - printk(KERN_ERR "\n"); + print_hex_dump(KERN_ERR, " \t", DUMP_PREFIX_NONE, 16, 1, + raw_edid, EDID_LENGTH, false); } return 0; } diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 4012fe4..186d62e 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -129,7 +129,7 @@ drm_gem_destroy(struct drm_device *dev) } /** - * Initialize an already allocate GEM object of the specified size with + * Initialize an already allocated GEM object of the specified size with * shmfs backing store. */ int drm_gem_object_init(struct drm_device *dev, @@ -151,6 +151,27 @@ int drm_gem_object_init(struct drm_device *dev, EXPORT_SYMBOL(drm_gem_object_init); /** + * Initialize an already allocated GEM object of the specified size with + * no GEM provided backing store. Instead the caller is responsible for + * backing the object and handling it. + */ +int drm_gem_private_object_init(struct drm_device *dev, + struct drm_gem_object *obj, size_t size) +{ + BUG_ON((size & (PAGE_SIZE - 1)) != 0); + + obj->dev = dev; + obj->filp = NULL; + + kref_init(&obj->refcount); + atomic_set(&obj->handle_count, 0); + obj->size = size; + + return 0; +} +EXPORT_SYMBOL(drm_gem_private_object_init); + +/** * Allocate a GEM object of the specified size with shmfs backing store */ struct drm_gem_object * @@ -211,6 +232,8 @@ drm_gem_handle_delete(struct drm_file *filp, u32 handle) idr_remove(&filp->object_idr, handle); spin_unlock(&filp->table_lock); + if (dev->driver->gem_close_object) + dev->driver->gem_close_object(obj, filp); drm_gem_object_handle_unreference_unlocked(obj); return 0; @@ -227,7 +250,8 @@ drm_gem_handle_create(struct drm_file *file_priv, struct drm_gem_object *obj, u32 *handlep) { - int ret; + struct drm_device *dev = obj->dev; + int ret; /* * Get the user-visible handle using idr. @@ -248,6 +272,15 @@ again: return ret; drm_gem_object_handle_reference(obj); + + if (dev->driver->gem_open_object) { + ret = dev->driver->gem_open_object(obj, file_priv); + if (ret) { + drm_gem_handle_delete(file_priv, *handlep); + return ret; + } + } + return 0; } EXPORT_SYMBOL(drm_gem_handle_create); @@ -402,7 +435,12 @@ drm_gem_open(struct drm_device *dev, struct drm_file *file_private) static int drm_gem_object_release_handle(int id, void *ptr, void *data) { + struct drm_file *file_priv = data; struct drm_gem_object *obj = ptr; + struct drm_device *dev = obj->dev; + + if (dev->driver->gem_close_object) + dev->driver->gem_close_object(obj, file_priv); drm_gem_object_handle_unreference_unlocked(obj); @@ -418,7 +456,7 @@ void drm_gem_release(struct drm_device *dev, struct drm_file *file_private) { idr_for_each(&file_private->object_idr, - &drm_gem_object_release_handle, NULL); + &drm_gem_object_release_handle, file_private); idr_remove_all(&file_private->object_idr); idr_destroy(&file_private->object_idr); @@ -427,7 +465,8 @@ drm_gem_release(struct drm_device *dev, struct drm_file *file_private) void drm_gem_object_release(struct drm_gem_object *obj) { - fput(obj->filp); + if (obj->filp) + fput(obj->filp); } EXPORT_SYMBOL(drm_gem_object_release); diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c index c2d32f2..ad74fb4 100644 --- a/drivers/gpu/drm/drm_modes.c +++ b/drivers/gpu/drm/drm_modes.c @@ -994,9 +994,10 @@ bool drm_mode_parse_command_line_for_connector(const char *mode_option, { const char *name; unsigned int namelen; - int res_specified = 0, bpp_specified = 0, refresh_specified = 0; + bool res_specified = false, bpp_specified = false, refresh_specified = false; unsigned int xres = 0, yres = 0, bpp = 32, refresh = 0; - int yres_specified = 0, cvt = 0, rb = 0, interlace = 0, margins = 0; + bool yres_specified = false, cvt = false, rb = false; + bool interlace = false, margins = false, was_digit = false; int i; enum drm_connector_force force = DRM_FORCE_UNSPECIFIED; @@ -1015,54 +1016,65 @@ bool drm_mode_parse_command_line_for_connector(const char *mode_option, for (i = namelen-1; i >= 0; i--) { switch (name[i]) { case '@': - namelen = i; if (!refresh_specified && !bpp_specified && - !yres_specified) { + !yres_specified && !cvt && !rb && was_digit) { refresh = simple_strtol(&name[i+1], NULL, 10); - refresh_specified = 1; - if (cvt || rb) - cvt = 0; + refresh_specified = true; + was_digit = false; } else goto done; break; case '-': - namelen = i; - if (!bpp_specified && !yres_specified) { + if (!bpp_specified && !yres_specified && !cvt && + !rb && was_digit) { bpp = simple_strtol(&name[i+1], NULL, 10); - bpp_specified = 1; - if (cvt || rb) - cvt = 0; + bpp_specified = true; + was_digit = false; } else goto done; break; case 'x': - if (!yres_specified) { + if (!yres_specified && was_digit) { yres = simple_strtol(&name[i+1], NULL, 10); - yres_specified = 1; + yres_specified = true; + was_digit = false; } else goto done; case '0' ... '9': + was_digit = true; break; case 'M': - if (!yres_specified) - cvt = 1; + if (yres_specified || cvt || was_digit) + goto done; + cvt = true; break; case 'R': - if (cvt) - rb = 1; + if (yres_specified || cvt || rb || was_digit) + goto done; + rb = true; break; case 'm': - if (!cvt) - margins = 1; + if (cvt || yres_specified || was_digit) + goto done; + margins = true; break; case 'i': - if (!cvt) - interlace = 1; + if (cvt || yres_specified || was_digit) + goto done; + interlace = true; break; case 'e': + if (yres_specified || bpp_specified || refresh_specified || + was_digit || (force != DRM_FORCE_UNSPECIFIED)) + goto done; + force = DRM_FORCE_ON; break; case 'D': + if (yres_specified || bpp_specified || refresh_specified || + was_digit || (force != DRM_FORCE_UNSPECIFIED)) + goto done; + if ((connector->connector_type != DRM_MODE_CONNECTOR_DVII) && (connector->connector_type != DRM_MODE_CONNECTOR_HDMIB)) force = DRM_FORCE_ON; @@ -1070,17 +1082,37 @@ bool drm_mode_parse_command_line_for_connector(const char *mode_option, force = DRM_FORCE_ON_DIGITAL; break; case 'd': + if (yres_specified || bpp_specified || refresh_specified || + was_digit || (force != DRM_FORCE_UNSPECIFIED)) + goto done; + force = DRM_FORCE_OFF; break; default: goto done; } } + if (i < 0 && yres_specified) { - xres = simple_strtol(name, NULL, 10); - res_specified = 1; + char *ch; + xres = simple_strtol(name, &ch, 10); + if ((ch != NULL) && (*ch == 'x')) + res_specified = true; + else + i = ch - name; + } else if (!yres_specified && was_digit) { + /* catch mode that begins with digits but has no 'x' */ + i = 0; } done: + if (i >= 0) { + printk(KERN_WARNING + "parse error at position %i in video mode '%s'\n", + i, name); + mode->specified = false; + return false; + } + if (res_specified) { mode->specified = true; mode->xres = xres; @@ -1096,9 +1128,10 @@ done: mode->bpp_specified = true; mode->bpp = bpp; } - mode->rb = rb ? true : false; - mode->cvt = cvt ? true : false; - mode->interlace = interlace ? true : false; + mode->rb = rb; + mode->cvt = cvt; + mode->interlace = interlace; + mode->margins = margins; mode->force = force; return true; diff --git a/drivers/gpu/drm/drm_platform.c b/drivers/gpu/drm/drm_platform.c index 7223f06..2a8b626 100644 --- a/drivers/gpu/drm/drm_platform.c +++ b/drivers/gpu/drm/drm_platform.c @@ -123,14 +123,15 @@ static int drm_platform_set_busid(struct drm_device *dev, struct drm_master *mas { int len, ret; - master->unique_len = 10 + strlen(dev->platformdev->name); + master->unique_len = 13 + strlen(dev->platformdev->name); + master->unique_size = master->unique_len; master->unique = kmalloc(master->unique_len + 1, GFP_KERNEL); if (master->unique == NULL) return -ENOMEM; len = snprintf(master->unique, master->unique_len, - "platform:%s", dev->platformdev->name); + "platform:%s:%02d", dev->platformdev->name, dev->platformdev->id); if (len > master->unique_len) { DRM_ERROR("Unique buffer overflowed\n"); diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 0a893f7..e2662497 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -865,7 +865,7 @@ static int i915_cur_delayinfo(struct seq_file *m, void *unused) MEMSTAT_VID_SHIFT); seq_printf(m, "Current P-state: %d\n", (rgvstat & MEMSTAT_PSTATE_MASK) >> MEMSTAT_PSTATE_SHIFT); - } else if (IS_GEN6(dev)) { + } else if (IS_GEN6(dev) || IS_GEN7(dev)) { u32 gt_perf_status = I915_READ(GEN6_GT_PERF_STATUS); u32 rp_state_limits = I915_READ(GEN6_RP_STATE_LIMITS); u32 rp_state_cap = I915_READ(GEN6_RP_STATE_CAP); @@ -1123,6 +1123,44 @@ static int i915_emon_status(struct seq_file *m, void *unused) return 0; } +static int i915_ring_freq_table(struct seq_file *m, void *unused) +{ + struct drm_info_node *node = (struct drm_info_node *) m->private; + struct drm_device *dev = node->minor->dev; + drm_i915_private_t *dev_priv = dev->dev_private; + int ret; + int gpu_freq, ia_freq; + + if (!(IS_GEN6(dev) || IS_GEN7(dev))) { + seq_printf(m, "unsupported on this chipset\n"); + return 0; + } + + ret = mutex_lock_interruptible(&dev->struct_mutex); + if (ret) + return ret; + + seq_printf(m, "GPU freq (MHz)\tEffective CPU freq (MHz)\n"); + + for (gpu_freq = dev_priv->min_delay; gpu_freq <= dev_priv->max_delay; + gpu_freq++) { + I915_WRITE(GEN6_PCODE_DATA, gpu_freq); + I915_WRITE(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | + GEN6_PCODE_READ_MIN_FREQ_TABLE); + if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & + GEN6_PCODE_READY) == 0, 10)) { + DRM_ERROR("pcode read of freq table timed out\n"); + continue; + } + ia_freq = I915_READ(GEN6_PCODE_DATA); + seq_printf(m, "%d\t\t%d\n", gpu_freq * 50, ia_freq * 100); + } + + mutex_unlock(&dev->struct_mutex); + + return 0; +} + static int i915_gfxec(struct seq_file *m, void *unused) { struct drm_info_node *node = (struct drm_info_node *) m->private; @@ -1430,6 +1468,7 @@ static struct drm_info_list i915_debugfs_list[] = { {"i915_inttoext_table", i915_inttoext_table, 0}, {"i915_drpc_info", i915_drpc_info, 0}, {"i915_emon_status", i915_emon_status, 0}, + {"i915_ring_freq_table", i915_ring_freq_table, 0}, {"i915_gfxec", i915_gfxec, 0}, {"i915_fbc_status", i915_fbc_status, 0}, {"i915_sr_status", i915_sr_status, 0}, diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 296fbd6..1271282 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1073,6 +1073,9 @@ static void i915_setup_compression(struct drm_device *dev, int size) unsigned long cfb_base; unsigned long ll_base = 0; + /* Just in case the BIOS is doing something questionable. */ + intel_disable_fbc(dev); + compressed_fb = drm_mm_search_free(&dev_priv->mm.stolen, size, 4096, 0); if (compressed_fb) compressed_fb = drm_mm_get_block(compressed_fb, size, 4096); @@ -1099,7 +1102,6 @@ static void i915_setup_compression(struct drm_device *dev, int size) dev_priv->cfb_size = size; - intel_disable_fbc(dev); dev_priv->compressed_fb = compressed_fb; if (HAS_PCH_SPLIT(dev)) I915_WRITE(ILK_DPFC_CB_BASE, compressed_fb->start); diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index eb91e2d..ce045a8 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -37,38 +37,70 @@ #include <linux/console.h> #include "drm_crtc_helper.h" -static int i915_modeset = -1; +static int i915_modeset __read_mostly = -1; module_param_named(modeset, i915_modeset, int, 0400); +MODULE_PARM_DESC(modeset, + "Use kernel modesetting [KMS] (0=DRM_I915_KMS from .config, " + "1=on, -1=force vga console preference [default])"); -unsigned int i915_fbpercrtc = 0; +unsigned int i915_fbpercrtc __always_unused = 0; module_param_named(fbpercrtc, i915_fbpercrtc, int, 0400); -int i915_panel_ignore_lid = 0; +int i915_panel_ignore_lid __read_mostly = 0; module_param_named(panel_ignore_lid, i915_panel_ignore_lid, int, 0600); +MODULE_PARM_DESC(panel_ignore_lid, + "Override lid status (0=autodetect [default], 1=lid open, " + "-1=lid closed)"); -unsigned int i915_powersave = 1; +unsigned int i915_powersave __read_mostly = 1; module_param_named(powersave, i915_powersave, int, 0600); +MODULE_PARM_DESC(powersave, + "Enable powersavings, fbc, downclocking, etc. (default: true)"); -unsigned int i915_semaphores = 0; +unsigned int i915_semaphores __read_mostly = 0; module_param_named(semaphores, i915_semaphores, int, 0600); +MODULE_PARM_DESC(semaphores, + "Use semaphores for inter-ring sync (default: false)"); -unsigned int i915_enable_rc6 = 0; +unsigned int i915_enable_rc6 __read_mostly = 0; module_param_named(i915_enable_rc6, i915_enable_rc6, int, 0600); +MODULE_PARM_DESC(i915_enable_rc6, + "Enable power-saving render C-state 6 (default: true)"); -unsigned int i915_enable_fbc = 0; +unsigned int i915_enable_fbc __read_mostly = 1; module_param_named(i915_enable_fbc, i915_enable_fbc, int, 0600); +MODULE_PARM_DESC(i915_enable_fbc, + "Enable frame buffer compression for power savings " + "(default: false)"); -unsigned int i915_lvds_downclock = 0; +unsigned int i915_lvds_downclock __read_mostly = 0; module_param_named(lvds_downclock, i915_lvds_downclock, int, 0400); +MODULE_PARM_DESC(lvds_downclock, + "Use panel (LVDS/eDP) downclocking for power savings " + "(default: false)"); -unsigned int i915_panel_use_ssc = 1; +unsigned int i915_panel_use_ssc __read_mostly = 1; module_param_named(lvds_use_ssc, i915_panel_use_ssc, int, 0600); +MODULE_PARM_DESC(lvds_use_ssc, + "Use Spread Spectrum Clock with panels [LVDS/eDP] " + "(default: true)"); -int i915_vbt_sdvo_panel_type = -1; +int i915_vbt_sdvo_panel_type __read_mostly = -1; module_param_named(vbt_sdvo_panel_type, i915_vbt_sdvo_panel_type, int, 0600); +MODULE_PARM_DESC(vbt_sdvo_panel_type, + "Override selection of SDVO panel mode in the VBT " + "(default: auto)"); -static bool i915_try_reset = true; +static bool i915_try_reset __read_mostly = true; module_param_named(reset, i915_try_reset, bool, 0600); +MODULE_PARM_DESC(reset, "Attempt GPU resets (default: true)"); + +bool i915_enable_hangcheck __read_mostly = true; +module_param_named(enable_hangcheck, i915_enable_hangcheck, bool, 0644); +MODULE_PARM_DESC(enable_hangcheck, + "Periodically check GPU activity for detecting hangs. " + "WARNING: Disabling this can cause system wide hangs. " + "(default: true)"); static struct drm_driver driver; extern int intel_agp_enabled; @@ -345,12 +377,17 @@ void gen6_gt_force_wake_put(struct drm_i915_private *dev_priv) void __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv) { - int loop = 500; - u32 fifo = I915_READ_NOTRACE(GT_FIFO_FREE_ENTRIES); - while (fifo < 20 && loop--) { - udelay(10); - fifo = I915_READ_NOTRACE(GT_FIFO_FREE_ENTRIES); + if (dev_priv->gt_fifo_count < GT_FIFO_NUM_RESERVED_ENTRIES ) { + int loop = 500; + u32 fifo = I915_READ_NOTRACE(GT_FIFO_FREE_ENTRIES); + while (fifo <= GT_FIFO_NUM_RESERVED_ENTRIES && loop--) { + udelay(10); + fifo = I915_READ_NOTRACE(GT_FIFO_FREE_ENTRIES); + } + WARN_ON(loop < 0 && fifo <= GT_FIFO_NUM_RESERVED_ENTRIES); + dev_priv->gt_fifo_count = fifo; } + dev_priv->gt_fifo_count--; } static int i915_drm_freeze(struct drm_device *dev) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index ce7914c..6867e19 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -214,6 +214,8 @@ struct drm_i915_display_funcs { int (*queue_flip)(struct drm_device *dev, struct drm_crtc *crtc, struct drm_framebuffer *fb, struct drm_i915_gem_object *obj); + int (*update_plane)(struct drm_crtc *crtc, struct drm_framebuffer *fb, + int x, int y); /* clock updates for mode set */ /* cursor updates */ /* render clock increase/decrease */ @@ -265,6 +267,7 @@ enum intel_pch { #define QUIRK_LVDS_SSC_DISABLE (1<<1) struct intel_fbdev; +struct intel_fbc_work; typedef struct drm_i915_private { struct drm_device *dev; @@ -275,6 +278,7 @@ typedef struct drm_i915_private { int relative_constants_mode; void __iomem *regs; + u32 gt_fifo_count; struct intel_gmbus { struct i2c_adapter adapter; @@ -329,11 +333,10 @@ typedef struct drm_i915_private { uint32_t last_instdone1; unsigned long cfb_size; - unsigned long cfb_pitch; - unsigned long cfb_offset; - int cfb_fence; - int cfb_plane; + unsigned int cfb_fb; + enum plane cfb_plane; int cfb_y; + struct intel_fbc_work *fbc_work; struct intel_opregion opregion; @@ -986,15 +989,16 @@ struct drm_i915_file_private { extern struct drm_ioctl_desc i915_ioctls[]; extern int i915_max_ioctl; -extern unsigned int i915_fbpercrtc; -extern int i915_panel_ignore_lid; -extern unsigned int i915_powersave; -extern unsigned int i915_semaphores; -extern unsigned int i915_lvds_downclock; -extern unsigned int i915_panel_use_ssc; -extern int i915_vbt_sdvo_panel_type; -extern unsigned int i915_enable_rc6; -extern unsigned int i915_enable_fbc; +extern unsigned int i915_fbpercrtc __always_unused; +extern int i915_panel_ignore_lid __read_mostly; +extern unsigned int i915_powersave __read_mostly; +extern unsigned int i915_semaphores __read_mostly; +extern unsigned int i915_lvds_downclock __read_mostly; +extern unsigned int i915_panel_use_ssc __read_mostly; +extern int i915_vbt_sdvo_panel_type __read_mostly; +extern unsigned int i915_enable_rc6 __read_mostly; +extern unsigned int i915_enable_fbc __read_mostly; +extern bool i915_enable_hangcheck __read_mostly; extern int i915_suspend(struct drm_device *dev, pm_message_t state); extern int i915_resume(struct drm_device *dev); @@ -1164,7 +1168,7 @@ void i915_gem_clflush_object(struct drm_i915_gem_object *obj); int __must_check i915_gem_object_set_domain(struct drm_i915_gem_object *obj, uint32_t read_domains, uint32_t write_domain); -int __must_check i915_gem_object_flush_gpu(struct drm_i915_gem_object *obj); +int __must_check i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj); int __must_check i915_gem_init_ringbuffer(struct drm_device *dev); void i915_gem_cleanup_ringbuffer(struct drm_device *dev); void i915_gem_do_init(struct drm_device *dev, @@ -1183,7 +1187,8 @@ int __must_check i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write); int __must_check -i915_gem_object_set_to_display_plane(struct drm_i915_gem_object *obj, +i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, + u32 alignment, struct intel_ring_buffer *pipelined); int i915_gem_attach_phys_object(struct drm_device *dev, struct drm_i915_gem_object *obj, @@ -1199,9 +1204,14 @@ i915_gem_get_unfenced_gtt_alignment(struct drm_device *dev, uint32_t size, int tiling_mode); +int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, + enum i915_cache_level cache_level); + /* i915_gem_gtt.c */ void i915_gem_restore_gtt_mappings(struct drm_device *dev); int __must_check i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj); +void i915_gem_gtt_rebind_object(struct drm_i915_gem_object *obj, + enum i915_cache_level cache_level); void i915_gem_gtt_unbind_object(struct drm_i915_gem_object *obj); /* i915_gem_evict.c */ @@ -1283,12 +1293,8 @@ extern void intel_modeset_init(struct drm_device *dev); extern void intel_modeset_gem_init(struct drm_device *dev); extern void intel_modeset_cleanup(struct drm_device *dev); extern int intel_modeset_vga_set_state(struct drm_device *dev, bool state); -extern void i8xx_disable_fbc(struct drm_device *dev); -extern void g4x_disable_fbc(struct drm_device *dev); -extern void ironlake_disable_fbc(struct drm_device *dev); -extern void intel_disable_fbc(struct drm_device *dev); -extern void intel_enable_fbc(struct drm_crtc *crtc, unsigned long interval); extern bool intel_fbc_enabled(struct drm_device *dev); +extern void intel_disable_fbc(struct drm_device *dev); extern bool ironlake_set_drps(struct drm_device *dev, u8 val); extern void ironlake_enable_rc6(struct drm_device *dev); extern void gen6_set_rps(struct drm_device *dev, u8 val); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index a087e1b..d1cd8b8 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1763,8 +1763,11 @@ i915_add_request(struct intel_ring_buffer *ring, ring->outstanding_lazy_request = false; if (!dev_priv->mm.suspended) { - mod_timer(&dev_priv->hangcheck_timer, - jiffies + msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD)); + if (i915_enable_hangcheck) { + mod_timer(&dev_priv->hangcheck_timer, + jiffies + + msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD)); + } if (was_empty) queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ); @@ -2135,6 +2138,30 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj) return 0; } +static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj) +{ + u32 old_write_domain, old_read_domains; + + /* Act a barrier for all accesses through the GTT */ + mb(); + + /* Force a pagefault for domain tracking on next user access */ + i915_gem_release_mmap(obj); + + if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) + return; + + old_read_domains = obj->base.read_domains; + old_write_domain = obj->base.write_domain; + + obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT; + obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT; + + trace_i915_gem_object_change_domain(obj, + old_read_domains, + old_write_domain); +} + /** * Unbinds an object from the GTT aperture. */ @@ -2151,23 +2178,28 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj) return -EINVAL; } - /* blow away mappings if mapped through GTT */ - i915_gem_release_mmap(obj); - - /* Move the object to the CPU domain to ensure that - * any possible CPU writes while it's not in the GTT - * are flushed when we go to remap it. This will - * also ensure that all pending GPU writes are finished - * before we unbind. - */ - ret = i915_gem_object_set_to_cpu_domain(obj, 1); + ret = i915_gem_object_finish_gpu(obj); if (ret == -ERESTARTSYS) return ret; /* Continue on if we fail due to EIO, the GPU is hung so we * should be safe and we need to cleanup or else we might * cause memory corruption through use-after-free. */ + + i915_gem_object_finish_gtt(obj); + + /* Move the object to the CPU domain to ensure that + * any possible CPU writes while it's not in the GTT + * are flushed when we go to remap it. + */ + if (ret == 0) + ret = i915_gem_object_set_to_cpu_domain(obj, 1); + if (ret == -ERESTARTSYS) + return ret; if (ret) { + /* In the event of a disaster, abandon all caches and + * hope for the best. + */ i915_gem_clflush_object(obj); obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; } @@ -2996,51 +3028,139 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) return 0; } +int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, + enum i915_cache_level cache_level) +{ + int ret; + + if (obj->cache_level == cache_level) + return 0; + + if (obj->pin_count) { + DRM_DEBUG("can not change the cache level of pinned objects\n"); + return -EBUSY; + } + + if (obj->gtt_space) { + ret = i915_gem_object_finish_gpu(obj); + if (ret) + return ret; + + i915_gem_object_finish_gtt(obj); + + /* Before SandyBridge, you could not use tiling or fence + * registers with snooped memory, so relinquish any fences + * currently pointing to our region in the aperture. + */ + if (INTEL_INFO(obj->base.dev)->gen < 6) { + ret = i915_gem_object_put_fence(obj); + if (ret) + return ret; + } + + i915_gem_gtt_rebind_object(obj, cache_level); + } + + if (cache_level == I915_CACHE_NONE) { + u32 old_read_domains, old_write_domain; + + /* If we're coming from LLC cached, then we haven't + * actually been tracking whether the data is in the + * CPU cache or not, since we only allow one bit set + * in obj->write_domain and have been skipping the clflushes. + * Just set it to the CPU cache for now. + */ + WARN_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU); + WARN_ON(obj->base.read_domains & ~I915_GEM_DOMAIN_CPU); + + old_read_domains = obj->base.read_domains; + old_write_domain = obj->base.write_domain; + + obj->base.read_domains = I915_GEM_DOMAIN_CPU; + obj->base.write_domain = I915_GEM_DOMAIN_CPU; + + trace_i915_gem_object_change_domain(obj, + old_read_domains, + old_write_domain); + } + + obj->cache_level = cache_level; + return 0; +} + /* - * Prepare buffer for display plane. Use uninterruptible for possible flush - * wait, as in modesetting process we're not supposed to be interrupted. + * Prepare buffer for display plane (scanout, cursors, etc). + * Can be called from an uninterruptible phase (modesetting) and allows + * any flushes to be pipelined (for pageflips). + * + * For the display plane, we want to be in the GTT but out of any write + * domains. So in many ways this looks like set_to_gtt_domain() apart from the + * ability to pipeline the waits, pinning and any additional subtleties + * that may differentiate the display plane from ordinary buffers. */ int -i915_gem_object_set_to_display_plane(struct drm_i915_gem_object *obj, +i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, + u32 alignment, struct intel_ring_buffer *pipelined) { - uint32_t old_read_domains; + u32 old_read_domains, old_write_domain; int ret; - /* Not valid to be called on unbound objects. */ - if (obj->gtt_space == NULL) - return -EINVAL; - ret = i915_gem_object_flush_gpu_write_domain(obj); if (ret) return ret; - - /* Currently, we are always called from an non-interruptible context. */ if (pipelined != obj->ring) { ret = i915_gem_object_wait_rendering(obj); if (ret) return ret; } + /* The display engine is not coherent with the LLC cache on gen6. As + * a result, we make sure that the pinning that is about to occur is + * done with uncached PTEs. This is lowest common denominator for all + * chipsets. + * + * However for gen6+, we could do better by using the GFDT bit instead + * of uncaching, which would allow us to flush all the LLC-cached data + * with that bit in the PTE to main memory with just one PIPE_CONTROL. + */ + ret = i915_gem_object_set_cache_level(obj, I915_CACHE_NONE); + if (ret) + return ret; + + /* As the user may map the buffer once pinned in the display plane + * (e.g. libkms for the bootup splash), we have to ensure that we + * always use map_and_fenceable for all scanout buffers. + */ + ret = i915_gem_object_pin(obj, alignment, true); + if (ret) + return ret; + i915_gem_object_flush_cpu_write_domain(obj); + old_write_domain = obj->base.write_domain; old_read_domains = obj->base.read_domains; + + /* It should now be out of any other write domains, and we can update + * the domain values for our changes. + */ + BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0); obj->base.read_domains |= I915_GEM_DOMAIN_GTT; trace_i915_gem_object_change_domain(obj, old_read_domains, - obj->base.write_domain); + old_write_domain); return 0; } int -i915_gem_object_flush_gpu(struct drm_i915_gem_object *obj) +i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj) { int ret; - if (!obj->active) + if ((obj->base.read_domains & I915_GEM_GPU_DOMAINS) == 0) return 0; if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) { @@ -3049,6 +3169,9 @@ i915_gem_object_flush_gpu(struct drm_i915_gem_object *obj) return ret; } + /* Ensure that we invalidate the GPU's caches and TLBs. */ + obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS; + return i915_gem_object_wait_rendering(obj); } @@ -3575,7 +3698,23 @@ struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, obj->base.write_domain = I915_GEM_DOMAIN_CPU; obj->base.read_domains = I915_GEM_DOMAIN_CPU; - obj->cache_level = I915_CACHE_NONE; + if (IS_GEN6(dev)) { + /* On Gen6, we can have the GPU use the LLC (the CPU + * cache) for about a 10% performance improvement + * compared to uncached. Graphics requests other than + * display scanout are coherent with the CPU in + * accessing this cache. This means in this mode we + * don't need to clflush on the CPU side, and on the + * GPU side we only need to flush internal caches to + * get data visible to the CPU. + * + * However, we maintain the display planes as UC, and so + * need to rebind when first used as such. + */ + obj->cache_level = I915_CACHE_LLC; + } else + obj->cache_level = I915_CACHE_NONE; + obj->base.driver_private = NULL; obj->fence_reg = I915_FENCE_REG_NONE; INIT_LIST_HEAD(&obj->mm_list); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index e46b645..7a709cd 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -59,24 +59,8 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev) (dev_priv->mm.gtt_end - dev_priv->mm.gtt_start) / PAGE_SIZE); list_for_each_entry(obj, &dev_priv->mm.gtt_list, gtt_list) { - unsigned int agp_type = - cache_level_to_agp_type(dev, obj->cache_level); - i915_gem_clflush_object(obj); - - if (dev_priv->mm.gtt->needs_dmar) { - BUG_ON(!obj->sg_list); - - intel_gtt_insert_sg_entries(obj->sg_list, - obj->num_sg, - obj->gtt_space->start >> PAGE_SHIFT, - agp_type); - } else - intel_gtt_insert_pages(obj->gtt_space->start - >> PAGE_SHIFT, - obj->base.size >> PAGE_SHIFT, - obj->pages, - agp_type); + i915_gem_gtt_rebind_object(obj, obj->cache_level); } intel_gtt_chipset_flush(); @@ -110,6 +94,27 @@ int i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj) return 0; } +void i915_gem_gtt_rebind_object(struct drm_i915_gem_object *obj, + enum i915_cache_level cache_level) +{ + struct drm_device *dev = obj->base.dev; + struct drm_i915_private *dev_priv = dev->dev_private; + unsigned int agp_type = cache_level_to_agp_type(dev, cache_level); + + if (dev_priv->mm.gtt->needs_dmar) { + BUG_ON(!obj->sg_list); + + intel_gtt_insert_sg_entries(obj->sg_list, + obj->num_sg, + obj->gtt_space->start >> PAGE_SHIFT, + agp_type); + } else + intel_gtt_insert_pages(obj->gtt_space->start >> PAGE_SHIFT, + obj->base.size >> PAGE_SHIFT, + obj->pages, + agp_type); +} + void i915_gem_gtt_unbind_object(struct drm_i915_gem_object *obj) { intel_gtt_clear_range(obj->gtt_space->start >> PAGE_SHIFT, diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 3b03f85..23d1ae6 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -361,10 +361,12 @@ static void notify_ring(struct drm_device *dev, ring->irq_seqno = seqno; wake_up_all(&ring->irq_queue); - - dev_priv->hangcheck_count = 0; - mod_timer(&dev_priv->hangcheck_timer, - jiffies + msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD)); + if (i915_enable_hangcheck) { + dev_priv->hangcheck_count = 0; + mod_timer(&dev_priv->hangcheck_timer, + jiffies + + msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD)); + } } static void gen6_pm_rps_work(struct work_struct *work) @@ -1664,6 +1666,9 @@ void i915_hangcheck_elapsed(unsigned long data) uint32_t acthd, instdone, instdone1; bool err = false; + if (!i915_enable_hangcheck) + return; + /* If all work is done then ACTHD clearly hasn't advanced. */ if (i915_hangcheck_ring_idle(&dev_priv->ring[RCS], &err) && i915_hangcheck_ring_idle(&dev_priv->ring[VCS], &err) && diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 5d5def7..02db299 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -579,6 +579,7 @@ #define DPFC_CTL_PLANEA (0<<30) #define DPFC_CTL_PLANEB (1<<30) #define DPFC_CTL_FENCE_EN (1<<29) +#define DPFC_CTL_PERSISTENT_MODE (1<<25) #define DPFC_SR_EN (1<<10) #define DPFC_CTL_LIMIT_1X (0<<6) #define DPFC_CTL_LIMIT_2X (1<<6) @@ -3360,6 +3361,7 @@ #define FORCEWAKE_ACK 0x130090 #define GT_FIFO_FREE_ENTRIES 0x120008 +#define GT_FIFO_NUM_RESERVED_ENTRIES 20 #define GEN6_RPNSWREQ 0xA008 #define GEN6_TURBO_DISABLE (1<<31) @@ -3434,7 +3436,9 @@ #define GEN6_PCODE_MAILBOX 0x138124 #define GEN6_PCODE_READY (1<<31) #define GEN6_READ_OC_PARAMS 0xc -#define GEN6_PCODE_WRITE_MIN_FREQ_TABLE 0x9 +#define GEN6_PCODE_WRITE_MIN_FREQ_TABLE 0x8 +#define GEN6_PCODE_READ_MIN_FREQ_TABLE 0x9 #define GEN6_PCODE_DATA 0x138128 +#define GEN6_PCODE_FREQ_IA_RATIO_SHIFT 8 #endif /* _I915_REG_H_ */ diff --git a/drivers/gpu/drm/i915/i915_suspend.c b/drivers/gpu/drm/i915/i915_suspend.c index 5257cfc..2857586 100644 --- a/drivers/gpu/drm/i915/i915_suspend.c +++ b/drivers/gpu/drm/i915/i915_suspend.c @@ -760,15 +760,13 @@ static void i915_restore_display(struct drm_device *dev) /* FIXME: restore TV & SDVO state */ /* only restore FBC info on the platform that supports FBC*/ + intel_disable_fbc(dev); if (I915_HAS_FBC(dev)) { if (HAS_PCH_SPLIT(dev)) { - ironlake_disable_fbc(dev); I915_WRITE(ILK_DPFC_CB_BASE, dev_priv->saveDPFC_CB_BASE); } else if (IS_GM45(dev)) { - g4x_disable_fbc(dev); I915_WRITE(DPFC_CB_BASE, dev_priv->saveDPFC_CB_BASE); } else { - i8xx_disable_fbc(dev); I915_WRITE(FBC_CFB_BASE, dev_priv->saveFBC_CFB_BASE); I915_WRITE(FBC_LL_BASE, dev_priv->saveFBC_LL_BASE); I915_WRITE(FBC_CONTROL2, dev_priv->saveFBC_CONTROL2); @@ -878,8 +876,10 @@ int i915_restore_state(struct drm_device *dev) intel_init_emon(dev); } - if (IS_GEN6(dev)) + if (IS_GEN6(dev)) { gen6_enable_rps(dev_priv); + gen6_update_ring_freq(dev_priv); + } mutex_lock(&dev->struct_mutex); diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c index 927442a..61abef8a 100644 --- a/drivers/gpu/drm/i915/intel_bios.c +++ b/drivers/gpu/drm/i915/intel_bios.c @@ -74,7 +74,7 @@ get_blocksize(void *p) static void fill_detail_timing_data(struct drm_display_mode *panel_fixed_mode, - struct lvds_dvo_timing *dvo_timing) + const struct lvds_dvo_timing *dvo_timing) { panel_fixed_mode->hdisplay = (dvo_timing->hactive_hi << 8) | dvo_timing->hactive_lo; @@ -115,20 +115,75 @@ fill_detail_timing_data(struct drm_display_mode *panel_fixed_mode, drm_mode_set_name(panel_fixed_mode); } +static bool +lvds_dvo_timing_equal_size(const struct lvds_dvo_timing *a, + const struct lvds_dvo_timing *b) +{ + if (a->hactive_hi != b->hactive_hi || + a->hactive_lo != b->hactive_lo) + return false; + + if (a->hsync_off_hi != b->hsync_off_hi || + a->hsync_off_lo != b->hsync_off_lo) + return false; + + if (a->hsync_pulse_width != b->hsync_pulse_width) + return false; + + if (a->hblank_hi != b->hblank_hi || + a->hblank_lo != b->hblank_lo) + return false; + + if (a->vactive_hi != b->vactive_hi || + a->vactive_lo != b->vactive_lo) + return false; + + if (a->vsync_off != b->vsync_off) + return false; + + if (a->vsync_pulse_width != b->vsync_pulse_width) + return false; + + if (a->vblank_hi != b->vblank_hi || + a->vblank_lo != b->vblank_lo) + return false; + + return true; +} + +static const struct lvds_dvo_timing * +get_lvds_dvo_timing(const struct bdb_lvds_lfp_data *lvds_lfp_data, + const struct bdb_lvds_lfp_data_ptrs *lvds_lfp_data_ptrs, + int index) +{ + /* + * the size of fp_timing varies on the different platform. + * So calculate the DVO timing relative offset in LVDS data + * entry to get the DVO timing entry + */ + + int lfp_data_size = + lvds_lfp_data_ptrs->ptr[1].dvo_timing_offset - + lvds_lfp_data_ptrs->ptr[0].dvo_timing_offset; + int dvo_timing_offset = + lvds_lfp_data_ptrs->ptr[0].dvo_timing_offset - + lvds_lfp_data_ptrs->ptr[0].fp_timing_offset; + char *entry = (char *)lvds_lfp_data->data + lfp_data_size * index; + + return (struct lvds_dvo_timing *)(entry + dvo_timing_offset); +} + /* Try to find integrated panel data */ static void parse_lfp_panel_data(struct drm_i915_private *dev_priv, struct bdb_header *bdb) { - struct bdb_lvds_options *lvds_options; - struct bdb_lvds_lfp_data *lvds_lfp_data; - struct bdb_lvds_lfp_data_ptrs *lvds_lfp_data_ptrs; - struct bdb_lvds_lfp_data_entry *entry; - struct lvds_dvo_timing *dvo_timing; + const struct bdb_lvds_options *lvds_options; + const struct bdb_lvds_lfp_data *lvds_lfp_data; + const struct bdb_lvds_lfp_data_ptrs *lvds_lfp_data_ptrs; + const struct lvds_dvo_timing *panel_dvo_timing; struct drm_display_mode *panel_fixed_mode; - int lfp_data_size, dvo_timing_offset; - int i, temp_downclock; - struct drm_display_mode *temp_mode; + int i, downclock; lvds_options = find_section(bdb, BDB_LVDS_OPTIONS); if (!lvds_options) @@ -150,75 +205,44 @@ parse_lfp_panel_data(struct drm_i915_private *dev_priv, dev_priv->lvds_vbt = 1; - lfp_data_size = lvds_lfp_data_ptrs->ptr[1].dvo_timing_offset - - lvds_lfp_data_ptrs->ptr[0].dvo_timing_offset; - entry = (struct bdb_lvds_lfp_data_entry *) - ((uint8_t *)lvds_lfp_data->data + (lfp_data_size * - lvds_options->panel_type)); - dvo_timing_offset = lvds_lfp_data_ptrs->ptr[0].dvo_timing_offset - - lvds_lfp_data_ptrs->ptr[0].fp_timing_offset; - - /* - * the size of fp_timing varies on the different platform. - * So calculate the DVO timing relative offset in LVDS data - * entry to get the DVO timing entry - */ - dvo_timing = (struct lvds_dvo_timing *) - ((unsigned char *)entry + dvo_timing_offset); + panel_dvo_timing = get_lvds_dvo_timing(lvds_lfp_data, + lvds_lfp_data_ptrs, + lvds_options->panel_type); panel_fixed_mode = kzalloc(sizeof(*panel_fixed_mode), GFP_KERNEL); if (!panel_fixed_mode) return; - fill_detail_timing_data(panel_fixed_mode, dvo_timing); + fill_detail_timing_data(panel_fixed_mode, panel_dvo_timing); dev_priv->lfp_lvds_vbt_mode = panel_fixed_mode; DRM_DEBUG_KMS("Found panel mode in BIOS VBT tables:\n"); drm_mode_debug_printmodeline(panel_fixed_mode); - temp_mode = kzalloc(sizeof(*temp_mode), GFP_KERNEL); - temp_downclock = panel_fixed_mode->clock; /* - * enumerate the LVDS panel timing info entry in VBT to check whether - * the LVDS downclock is found. + * Iterate over the LVDS panel timing info to find the lowest clock + * for the native resolution. */ + downclock = panel_dvo_timing->clock; for (i = 0; i < 16; i++) { - entry = (struct bdb_lvds_lfp_data_entry *) - ((uint8_t *)lvds_lfp_data->data + (lfp_data_size * i)); - dvo_timing = (struct lvds_dvo_timing *) - ((unsigned char *)entry + dvo_timing_offset); - - fill_detail_timing_data(temp_mode, dvo_timing); - - if (temp_mode->hdisplay == panel_fixed_mode->hdisplay && - temp_mode->hsync_start == panel_fixed_mode->hsync_start && - temp_mode->hsync_end == panel_fixed_mode->hsync_end && - temp_mode->htotal == panel_fixed_mode->htotal && - temp_mode->vdisplay == panel_fixed_mode->vdisplay && - temp_mode->vsync_start == panel_fixed_mode->vsync_start && - temp_mode->vsync_end == panel_fixed_mode->vsync_end && - temp_mode->vtotal == panel_fixed_mode->vtotal && - temp_mode->clock < temp_downclock) { - /* - * downclock is already found. But we expect - * to find the lower downclock. - */ - temp_downclock = temp_mode->clock; - } - /* clear it to zero */ - memset(temp_mode, 0, sizeof(*temp_mode)); + const struct lvds_dvo_timing *dvo_timing; + + dvo_timing = get_lvds_dvo_timing(lvds_lfp_data, + lvds_lfp_data_ptrs, + i); + if (lvds_dvo_timing_equal_size(dvo_timing, panel_dvo_timing) && + dvo_timing->clock < downclock) + downclock = dvo_timing->clock; } - kfree(temp_mode); - if (temp_downclock < panel_fixed_mode->clock && - i915_lvds_downclock) { + + if (downclock < panel_dvo_timing->clock && i915_lvds_downclock) { dev_priv->lvds_downclock_avail = 1; - dev_priv->lvds_downclock = temp_downclock; + dev_priv->lvds_downclock = downclock * 10; DRM_DEBUG_KMS("LVDS downclock is found in VBT. " "Normal Clock %dKHz, downclock %dKHz\n", - temp_downclock, panel_fixed_mode->clock); + panel_fixed_mode->clock, 10*downclock); } - return; } /* Try to find sdvo panel data */ diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 0f1c799..393a399 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -24,6 +24,7 @@ * Eric Anholt <eric@anholt.net> */ +#include <linux/cpufreq.h> #include <linux/module.h> #include <linux/input.h> #include <linux/i2c.h> @@ -1157,12 +1158,15 @@ static void intel_enable_transcoder(struct drm_i915_private *dev_priv, reg = TRANSCONF(pipe); val = I915_READ(reg); - /* - * make the BPC in transcoder be consistent with - * that in pipeconf reg. - */ - val &= ~PIPE_BPC_MASK; - val |= I915_READ(PIPECONF(pipe)) & PIPE_BPC_MASK; + + if (HAS_PCH_IBX(dev_priv->dev)) { + /* + * make the BPC in transcoder be consistent with + * that in pipeconf reg. + */ + val &= ~PIPE_BPC_MASK; + val |= I915_READ(PIPECONF(pipe)) & PIPE_BPC_MASK; + } I915_WRITE(reg, val | TRANS_ENABLE); if (wait_for(I915_READ(reg) & TRANS_STATE_ENABLE, 100)) DRM_ERROR("failed to enable transcoder %d\n", pipe); @@ -1380,6 +1384,28 @@ static void intel_disable_pch_ports(struct drm_i915_private *dev_priv, disable_pch_hdmi(dev_priv, pipe, HDMID); } +static void i8xx_disable_fbc(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + u32 fbc_ctl; + + /* Disable compression */ + fbc_ctl = I915_READ(FBC_CONTROL); + if ((fbc_ctl & FBC_CTL_EN) == 0) + return; + + fbc_ctl &= ~FBC_CTL_EN; + I915_WRITE(FBC_CONTROL, fbc_ctl); + + /* Wait for compressing bit to clear */ + if (wait_for((I915_READ(FBC_STATUS) & FBC_STAT_COMPRESSING) == 0, 10)) { + DRM_DEBUG_KMS("FBC idle timed out\n"); + return; + } + + DRM_DEBUG_KMS("disabled FBC\n"); +} + static void i8xx_enable_fbc(struct drm_crtc *crtc, unsigned long interval) { struct drm_device *dev = crtc->dev; @@ -1388,36 +1414,25 @@ static void i8xx_enable_fbc(struct drm_crtc *crtc, unsigned long interval) struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb); struct drm_i915_gem_object *obj = intel_fb->obj; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + int cfb_pitch; int plane, i; u32 fbc_ctl, fbc_ctl2; - if (fb->pitch == dev_priv->cfb_pitch && - obj->fence_reg == dev_priv->cfb_fence && - intel_crtc->plane == dev_priv->cfb_plane && - I915_READ(FBC_CONTROL) & FBC_CTL_EN) - return; - - i8xx_disable_fbc(dev); - - dev_priv->cfb_pitch = dev_priv->cfb_size / FBC_LL_SIZE; - - if (fb->pitch < dev_priv->cfb_pitch) - dev_priv->cfb_pitch = fb->pitch; + cfb_pitch = dev_priv->cfb_size / FBC_LL_SIZE; + if (fb->pitch < cfb_pitch) + cfb_pitch = fb->pitch; /* FBC_CTL wants 64B units */ - dev_priv->cfb_pitch = (dev_priv->cfb_pitch / 64) - 1; - dev_priv->cfb_fence = obj->fence_reg; - dev_priv->cfb_plane = intel_crtc->plane; - plane = dev_priv->cfb_plane == 0 ? FBC_CTL_PLANEA : FBC_CTL_PLANEB; + cfb_pitch = (cfb_pitch / 64) - 1; + plane = intel_crtc->plane == 0 ? FBC_CTL_PLANEA : FBC_CTL_PLANEB; /* Clear old tags */ for (i = 0; i < (FBC_LL_SIZE / 32) + 1; i++) I915_WRITE(FBC_TAG + (i * 4), 0); /* Set it up... */ - fbc_ctl2 = FBC_CTL_FENCE_DBL | FBC_CTL_IDLE_IMM | plane; - if (obj->tiling_mode != I915_TILING_NONE) - fbc_ctl2 |= FBC_CTL_CPU_FENCE; + fbc_ctl2 = FBC_CTL_FENCE_DBL | FBC_CTL_IDLE_IMM | FBC_CTL_CPU_FENCE; + fbc_ctl2 |= plane; I915_WRITE(FBC_CONTROL2, fbc_ctl2); I915_WRITE(FBC_FENCE_OFF, crtc->y); @@ -1425,36 +1440,13 @@ static void i8xx_enable_fbc(struct drm_crtc *crtc, unsigned long interval) fbc_ctl = FBC_CTL_EN | FBC_CTL_PERIODIC; if (IS_I945GM(dev)) fbc_ctl |= FBC_CTL_C3_IDLE; /* 945 needs special SR handling */ - fbc_ctl |= (dev_priv->cfb_pitch & 0xff) << FBC_CTL_STRIDE_SHIFT; + fbc_ctl |= (cfb_pitch & 0xff) << FBC_CTL_STRIDE_SHIFT; fbc_ctl |= (interval & 0x2fff) << FBC_CTL_INTERVAL_SHIFT; - if (obj->tiling_mode != I915_TILING_NONE) - fbc_ctl |= dev_priv->cfb_fence; - I915_WRITE(FBC_CONTROL, fbc_ctl); - - DRM_DEBUG_KMS("enabled FBC, pitch %ld, yoff %d, plane %d, ", - dev_priv->cfb_pitch, crtc->y, dev_priv->cfb_plane); -} - -void i8xx_disable_fbc(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - u32 fbc_ctl; - - /* Disable compression */ - fbc_ctl = I915_READ(FBC_CONTROL); - if ((fbc_ctl & FBC_CTL_EN) == 0) - return; - - fbc_ctl &= ~FBC_CTL_EN; + fbc_ctl |= obj->fence_reg; I915_WRITE(FBC_CONTROL, fbc_ctl); - /* Wait for compressing bit to clear */ - if (wait_for((I915_READ(FBC_STATUS) & FBC_STAT_COMPRESSING) == 0, 10)) { - DRM_DEBUG_KMS("FBC idle timed out\n"); - return; - } - - DRM_DEBUG_KMS("disabled FBC\n"); + DRM_DEBUG_KMS("enabled FBC, pitch %d, yoff %d, plane %d, ", + cfb_pitch, crtc->y, intel_crtc->plane); } static bool i8xx_fbc_enabled(struct drm_device *dev) @@ -1476,30 +1468,9 @@ static void g4x_enable_fbc(struct drm_crtc *crtc, unsigned long interval) unsigned long stall_watermark = 200; u32 dpfc_ctl; - dpfc_ctl = I915_READ(DPFC_CONTROL); - if (dpfc_ctl & DPFC_CTL_EN) { - if (dev_priv->cfb_pitch == dev_priv->cfb_pitch / 64 - 1 && - dev_priv->cfb_fence == obj->fence_reg && - dev_priv->cfb_plane == intel_crtc->plane && - dev_priv->cfb_y == crtc->y) - return; - - I915_WRITE(DPFC_CONTROL, dpfc_ctl & ~DPFC_CTL_EN); - intel_wait_for_vblank(dev, intel_crtc->pipe); - } - - dev_priv->cfb_pitch = (dev_priv->cfb_pitch / 64) - 1; - dev_priv->cfb_fence = obj->fence_reg; - dev_priv->cfb_plane = intel_crtc->plane; - dev_priv->cfb_y = crtc->y; - dpfc_ctl = plane | DPFC_SR_EN | DPFC_CTL_LIMIT_1X; - if (obj->tiling_mode != I915_TILING_NONE) { - dpfc_ctl |= DPFC_CTL_FENCE_EN | dev_priv->cfb_fence; - I915_WRITE(DPFC_CHICKEN, DPFC_HT_MODIFY); - } else { - I915_WRITE(DPFC_CHICKEN, ~DPFC_HT_MODIFY); - } + dpfc_ctl |= DPFC_CTL_FENCE_EN | obj->fence_reg; + I915_WRITE(DPFC_CHICKEN, DPFC_HT_MODIFY); I915_WRITE(DPFC_RECOMP_CTL, DPFC_RECOMP_STALL_EN | (stall_watermark << DPFC_RECOMP_STALL_WM_SHIFT) | @@ -1512,7 +1483,7 @@ static void g4x_enable_fbc(struct drm_crtc *crtc, unsigned long interval) DRM_DEBUG_KMS("enabled fbc on plane %d\n", intel_crtc->plane); } -void g4x_disable_fbc(struct drm_device *dev) +static void g4x_disable_fbc(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; u32 dpfc_ctl; @@ -1567,32 +1538,12 @@ static void ironlake_enable_fbc(struct drm_crtc *crtc, unsigned long interval) u32 dpfc_ctl; dpfc_ctl = I915_READ(ILK_DPFC_CONTROL); - if (dpfc_ctl & DPFC_CTL_EN) { - if (dev_priv->cfb_pitch == dev_priv->cfb_pitch / 64 - 1 && - dev_priv->cfb_fence == obj->fence_reg && - dev_priv->cfb_plane == intel_crtc->plane && - dev_priv->cfb_offset == obj->gtt_offset && - dev_priv->cfb_y == crtc->y) - return; - - I915_WRITE(ILK_DPFC_CONTROL, dpfc_ctl & ~DPFC_CTL_EN); - intel_wait_for_vblank(dev, intel_crtc->pipe); - } - - dev_priv->cfb_pitch = (dev_priv->cfb_pitch / 64) - 1; - dev_priv->cfb_fence = obj->fence_reg; - dev_priv->cfb_plane = intel_crtc->plane; - dev_priv->cfb_offset = obj->gtt_offset; - dev_priv->cfb_y = crtc->y; - dpfc_ctl &= DPFC_RESERVED; dpfc_ctl |= (plane | DPFC_CTL_LIMIT_1X); - if (obj->tiling_mode != I915_TILING_NONE) { - dpfc_ctl |= (DPFC_CTL_FENCE_EN | dev_priv->cfb_fence); - I915_WRITE(ILK_DPFC_CHICKEN, DPFC_HT_MODIFY); - } else { - I915_WRITE(ILK_DPFC_CHICKEN, ~DPFC_HT_MODIFY); - } + /* Set persistent mode for front-buffer rendering, ala X. */ + dpfc_ctl |= DPFC_CTL_PERSISTENT_MODE; + dpfc_ctl |= (DPFC_CTL_FENCE_EN | obj->fence_reg); + I915_WRITE(ILK_DPFC_CHICKEN, DPFC_HT_MODIFY); I915_WRITE(ILK_DPFC_RECOMP_CTL, DPFC_RECOMP_STALL_EN | (stall_watermark << DPFC_RECOMP_STALL_WM_SHIFT) | @@ -1604,7 +1555,7 @@ static void ironlake_enable_fbc(struct drm_crtc *crtc, unsigned long interval) if (IS_GEN6(dev)) { I915_WRITE(SNB_DPFC_CTL_SA, - SNB_CPU_FENCE_ENABLE | dev_priv->cfb_fence); + SNB_CPU_FENCE_ENABLE | obj->fence_reg); I915_WRITE(DPFC_CPU_FENCE_OFFSET, crtc->y); sandybridge_blit_fbc_update(dev); } @@ -1612,7 +1563,7 @@ static void ironlake_enable_fbc(struct drm_crtc *crtc, unsigned long interval) DRM_DEBUG_KMS("enabled fbc on plane %d\n", intel_crtc->plane); } -void ironlake_disable_fbc(struct drm_device *dev) +static void ironlake_disable_fbc(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; u32 dpfc_ctl; @@ -1644,24 +1595,109 @@ bool intel_fbc_enabled(struct drm_device *dev) return dev_priv->display.fbc_enabled(dev); } -void intel_enable_fbc(struct drm_crtc *crtc, unsigned long interval) +static void intel_fbc_work_fn(struct work_struct *__work) { - struct drm_i915_private *dev_priv = crtc->dev->dev_private; + struct intel_fbc_work *work = + container_of(to_delayed_work(__work), + struct intel_fbc_work, work); + struct drm_device *dev = work->crtc->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + + mutex_lock(&dev->struct_mutex); + if (work == dev_priv->fbc_work) { + /* Double check that we haven't switched fb without cancelling + * the prior work. + */ + if (work->crtc->fb == work->fb) { + dev_priv->display.enable_fbc(work->crtc, + work->interval); + + dev_priv->cfb_plane = to_intel_crtc(work->crtc)->plane; + dev_priv->cfb_fb = work->crtc->fb->base.id; + dev_priv->cfb_y = work->crtc->y; + } + + dev_priv->fbc_work = NULL; + } + mutex_unlock(&dev->struct_mutex); + + kfree(work); +} + +static void intel_cancel_fbc_work(struct drm_i915_private *dev_priv) +{ + if (dev_priv->fbc_work == NULL) + return; + + DRM_DEBUG_KMS("cancelling pending FBC enable\n"); + + /* Synchronisation is provided by struct_mutex and checking of + * dev_priv->fbc_work, so we can perform the cancellation + * entirely asynchronously. + */ + if (cancel_delayed_work(&dev_priv->fbc_work->work)) + /* tasklet was killed before being run, clean up */ + kfree(dev_priv->fbc_work); + + /* Mark the work as no longer wanted so that if it does + * wake-up (because the work was already running and waiting + * for our mutex), it will discover that is no longer + * necessary to run. + */ + dev_priv->fbc_work = NULL; +} + +static void intel_enable_fbc(struct drm_crtc *crtc, unsigned long interval) +{ + struct intel_fbc_work *work; + struct drm_device *dev = crtc->dev; + struct drm_i915_private *dev_priv = dev->dev_private; if (!dev_priv->display.enable_fbc) return; - dev_priv->display.enable_fbc(crtc, interval); + intel_cancel_fbc_work(dev_priv); + + work = kzalloc(sizeof *work, GFP_KERNEL); + if (work == NULL) { + dev_priv->display.enable_fbc(crtc, interval); + return; + } + + work->crtc = crtc; + work->fb = crtc->fb; + work->interval = interval; + INIT_DELAYED_WORK(&work->work, intel_fbc_work_fn); + + dev_priv->fbc_work = work; + + DRM_DEBUG_KMS("scheduling delayed FBC enable\n"); + + /* Delay the actual enabling to let pageflipping cease and the + * display to settle before starting the compression. Note that + * this delay also serves a second purpose: it allows for a + * vblank to pass after disabling the FBC before we attempt + * to modify the control registers. + * + * A more complicated solution would involve tracking vblanks + * following the termination of the page-flipping sequence + * and indeed performing the enable as a co-routine and not + * waiting synchronously upon the vblank. + */ + schedule_delayed_work(&work->work, msecs_to_jiffies(50)); } void intel_disable_fbc(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; + intel_cancel_fbc_work(dev_priv); + if (!dev_priv->display.disable_fbc) return; dev_priv->display.disable_fbc(dev); + dev_priv->cfb_plane = -1; } /** @@ -1760,8 +1796,13 @@ static void intel_update_fbc(struct drm_device *dev) dev_priv->no_fbc_reason = FBC_BAD_PLANE; goto out_disable; } - if (obj->tiling_mode != I915_TILING_X) { - DRM_DEBUG_KMS("framebuffer not tiled, disabling compression\n"); + + /* The use of a CPU fence is mandatory in order to detect writes + * by the CPU to the scanout and trigger updates to the FBC. + */ + if (obj->tiling_mode != I915_TILING_X || + obj->fence_reg == I915_FENCE_REG_NONE) { + DRM_DEBUG_KMS("framebuffer not tiled or fenced, disabling compression\n"); dev_priv->no_fbc_reason = FBC_NOT_TILED; goto out_disable; } @@ -1770,6 +1811,44 @@ static void intel_update_fbc(struct drm_device *dev) if (in_dbg_master()) goto out_disable; + /* If the scanout has not changed, don't modify the FBC settings. + * Note that we make the fundamental assumption that the fb->obj + * cannot be unpinned (and have its GTT offset and fence revoked) + * without first being decoupled from the scanout and FBC disabled. + */ + if (dev_priv->cfb_plane == intel_crtc->plane && + dev_priv->cfb_fb == fb->base.id && + dev_priv->cfb_y == crtc->y) + return; + + if (intel_fbc_enabled(dev)) { + /* We update FBC along two paths, after changing fb/crtc + * configuration (modeswitching) and after page-flipping + * finishes. For the latter, we know that not only did + * we disable the FBC at the start of the page-flip + * sequence, but also more than one vblank has passed. + * + * For the former case of modeswitching, it is possible + * to switch between two FBC valid configurations + * instantaneously so we do need to disable the FBC + * before we can modify its control registers. We also + * have to wait for the next vblank for that to take + * effect. However, since we delay enabling FBC we can + * assume that a vblank has passed since disabling and + * that we can safely alter the registers in the deferred + * callback. + * + * In the scenario that we go from a valid to invalid + * and then back to valid FBC configuration we have + * no strict enforcement that a vblank occurred since + * disabling the FBC. However, along all current pipe + * disabling paths we do need to wait for a vblank at + * some point. And we wait before enabling FBC anyway. + */ + DRM_DEBUG_KMS("disabling active FBC for update\n"); + intel_disable_fbc(dev); + } + intel_enable_fbc(crtc, 500); return; @@ -1812,14 +1891,10 @@ intel_pin_and_fence_fb_obj(struct drm_device *dev, } dev_priv->mm.interruptible = false; - ret = i915_gem_object_pin(obj, alignment, true); + ret = i915_gem_object_pin_to_display_plane(obj, alignment, pipelined); if (ret) goto err_interruptible; - ret = i915_gem_object_set_to_display_plane(obj, pipelined); - if (ret) - goto err_unpin; - /* Install a fence for tiled scan-out. Pre-i965 always needs a * fence, whereas 965+ only requires a fence if using * framebuffer compression. For simplicity, we always install @@ -1841,10 +1916,8 @@ err_interruptible: return ret; } -/* Assume fb object is pinned & idle & fenced and just update base pointers */ -static int -intel_pipe_set_base_atomic(struct drm_crtc *crtc, struct drm_framebuffer *fb, - int x, int y, enum mode_set_atomic state) +static int i9xx_update_plane(struct drm_crtc *crtc, struct drm_framebuffer *fb, + int x, int y) { struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = dev->dev_private; @@ -1887,7 +1960,7 @@ intel_pipe_set_base_atomic(struct drm_crtc *crtc, struct drm_framebuffer *fb, dspcntr |= DISPPLANE_32BPP_NO_ALPHA; break; default: - DRM_ERROR("Unknown color depth\n"); + DRM_ERROR("Unknown color depth %d\n", fb->bits_per_pixel); return -EINVAL; } if (INTEL_INFO(dev)->gen >= 4) { @@ -1897,10 +1970,6 @@ intel_pipe_set_base_atomic(struct drm_crtc *crtc, struct drm_framebuffer *fb, dspcntr &= ~DISPPLANE_TILED; } - if (HAS_PCH_SPLIT(dev)) - /* must disable */ - dspcntr |= DISPPLANE_TRICKLE_FEED_DISABLE; - I915_WRITE(reg, dspcntr); Start = obj->gtt_offset; @@ -1917,6 +1986,99 @@ intel_pipe_set_base_atomic(struct drm_crtc *crtc, struct drm_framebuffer *fb, I915_WRITE(DSPADDR(plane), Start + Offset); POSTING_READ(reg); + return 0; +} + +static int ironlake_update_plane(struct drm_crtc *crtc, + struct drm_framebuffer *fb, int x, int y) +{ + struct drm_device *dev = crtc->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + struct intel_framebuffer *intel_fb; + struct drm_i915_gem_object *obj; + int plane = intel_crtc->plane; + unsigned long Start, Offset; + u32 dspcntr; + u32 reg; + + switch (plane) { + case 0: + case 1: + break; + default: + DRM_ERROR("Can't update plane %d in SAREA\n", plane); + return -EINVAL; + } + + intel_fb = to_intel_framebuffer(fb); + obj = intel_fb->obj; + + reg = DSPCNTR(plane); + dspcntr = I915_READ(reg); + /* Mask out pixel format bits in case we change it */ + dspcntr &= ~DISPPLANE_PIXFORMAT_MASK; + switch (fb->bits_per_pixel) { + case 8: + dspcntr |= DISPPLANE_8BPP; + break; + case 16: + if (fb->depth != 16) + return -EINVAL; + + dspcntr |= DISPPLANE_16BPP; + break; + case 24: + case 32: + if (fb->depth == 24) + dspcntr |= DISPPLANE_32BPP_NO_ALPHA; + else if (fb->depth == 30) + dspcntr |= DISPPLANE_32BPP_30BIT_NO_ALPHA; + else + return -EINVAL; + break; + default: + DRM_ERROR("Unknown color depth %d\n", fb->bits_per_pixel); + return -EINVAL; + } + + if (obj->tiling_mode != I915_TILING_NONE) + dspcntr |= DISPPLANE_TILED; + else + dspcntr &= ~DISPPLANE_TILED; + + /* must disable */ + dspcntr |= DISPPLANE_TRICKLE_FEED_DISABLE; + + I915_WRITE(reg, dspcntr); + + Start = obj->gtt_offset; + Offset = y * fb->pitch + x * (fb->bits_per_pixel / 8); + + DRM_DEBUG_KMS("Writing base %08lX %08lX %d %d %d\n", + Start, Offset, x, y, fb->pitch); + I915_WRITE(DSPSTRIDE(plane), fb->pitch); + I915_WRITE(DSPSURF(plane), Start); + I915_WRITE(DSPTILEOFF(plane), (y << 16) | x); + I915_WRITE(DSPADDR(plane), Offset); + POSTING_READ(reg); + + return 0; +} + +/* Assume fb object is pinned & idle & fenced and just update base pointers */ +static int +intel_pipe_set_base_atomic(struct drm_crtc *crtc, struct drm_framebuffer *fb, + int x, int y, enum mode_set_atomic state) +{ + struct drm_device *dev = crtc->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + int ret; + + ret = dev_priv->display.update_plane(crtc, fb, x, y); + if (ret) + return ret; + intel_update_fbc(dev); intel_increase_pllclock(crtc); @@ -1971,7 +2133,7 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y, * This should only fail upon a hung GPU, in which case we * can safely continue. */ - ret = i915_gem_object_flush_gpu(obj); + ret = i915_gem_object_finish_gpu(obj); (void) ret; } @@ -2622,6 +2784,7 @@ static void ironlake_pch_enable(struct drm_crtc *crtc) /* For PCH DP, enable TRANS_DP_CTL */ if (HAS_PCH_CPT(dev) && intel_pipe_has_type(crtc, INTEL_OUTPUT_DISPLAYPORT)) { + u32 bpc = (I915_READ(PIPECONF(pipe)) & PIPE_BPC_MASK) >> 5; reg = TRANS_DP_CTL(pipe); temp = I915_READ(reg); temp &= ~(TRANS_DP_PORT_SEL_MASK | @@ -2629,7 +2792,7 @@ static void ironlake_pch_enable(struct drm_crtc *crtc) TRANS_DP_BPC_MASK); temp |= (TRANS_DP_OUTPUT_ENABLE | TRANS_DP_ENH_FRAMING); - temp |= TRANS_DP_8BPC; + temp |= bpc << 9; /* same format but at 11:9 */ if (crtc->mode.flags & DRM_MODE_FLAG_PHSYNC) temp |= TRANS_DP_HSYNC_ACTIVE_HIGH; @@ -2732,9 +2895,8 @@ static void ironlake_crtc_disable(struct drm_crtc *crtc) intel_disable_plane(dev_priv, plane, pipe); - if (dev_priv->cfb_plane == plane && - dev_priv->display.disable_fbc) - dev_priv->display.disable_fbc(dev); + if (dev_priv->cfb_plane == plane) + intel_disable_fbc(dev); intel_disable_pipe(dev_priv, pipe); @@ -2898,9 +3060,8 @@ static void i9xx_crtc_disable(struct drm_crtc *crtc) intel_crtc_dpms_overlay(intel_crtc, false); intel_crtc_update_cursor(crtc, false); - if (dev_priv->cfb_plane == plane && - dev_priv->display.disable_fbc) - dev_priv->display.disable_fbc(dev); + if (dev_priv->cfb_plane == plane) + intel_disable_fbc(dev); intel_disable_plane(dev_priv, plane, pipe); intel_disable_pipe(dev_priv, pipe); @@ -4309,6 +4470,133 @@ static inline bool intel_panel_use_ssc(struct drm_i915_private *dev_priv) && !(dev_priv->quirks & QUIRK_LVDS_SSC_DISABLE); } +/** + * intel_choose_pipe_bpp_dither - figure out what color depth the pipe should send + * @crtc: CRTC structure + * + * A pipe may be connected to one or more outputs. Based on the depth of the + * attached framebuffer, choose a good color depth to use on the pipe. + * + * If possible, match the pipe depth to the fb depth. In some cases, this + * isn't ideal, because the connected output supports a lesser or restricted + * set of depths. Resolve that here: + * LVDS typically supports only 6bpc, so clamp down in that case + * HDMI supports only 8bpc or 12bpc, so clamp to 8bpc with dither for 10bpc + * Displays may support a restricted set as well, check EDID and clamp as + * appropriate. + * + * RETURNS: + * Dithering requirement (i.e. false if display bpc and pipe bpc match, + * true if they don't match). + */ +static bool intel_choose_pipe_bpp_dither(struct drm_crtc *crtc, + unsigned int *pipe_bpp) +{ + struct drm_device *dev = crtc->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_encoder *encoder; + struct drm_connector *connector; + unsigned int display_bpc = UINT_MAX, bpc; + + /* Walk the encoders & connectors on this crtc, get min bpc */ + list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) { + struct intel_encoder *intel_encoder = to_intel_encoder(encoder); + + if (encoder->crtc != crtc) + continue; + + if (intel_encoder->type == INTEL_OUTPUT_LVDS) { + unsigned int lvds_bpc; + + if ((I915_READ(PCH_LVDS) & LVDS_A3_POWER_MASK) == + LVDS_A3_POWER_UP) + lvds_bpc = 8; + else + lvds_bpc = 6; + + if (lvds_bpc < display_bpc) { + DRM_DEBUG_DRIVER("clamping display bpc (was %d) to LVDS (%d)\n", display_bpc, lvds_bpc); + display_bpc = lvds_bpc; + } + continue; + } + + if (intel_encoder->type == INTEL_OUTPUT_EDP) { + /* Use VBT settings if we have an eDP panel */ + unsigned int edp_bpc = dev_priv->edp.bpp / 3; + + if (edp_bpc < display_bpc) { + DRM_DEBUG_DRIVER("clamping display bpc (was %d) to eDP (%d)\n", display_bpc, edp_bpc); + display_bpc = edp_bpc; + } + continue; + } + + /* Not one of the known troublemakers, check the EDID */ + list_for_each_entry(connector, &dev->mode_config.connector_list, + head) { + if (connector->encoder != encoder) + continue; + + if (connector->display_info.bpc < display_bpc) { + DRM_DEBUG_DRIVER("clamping display bpc (was %d) to EDID reported max of %d\n", display_bpc, connector->display_info.bpc); + display_bpc = connector->display_info.bpc; + } + } + + /* + * HDMI is either 12 or 8, so if the display lets 10bpc sneak + * through, clamp it down. (Note: >12bpc will be caught below.) + */ + if (intel_encoder->type == INTEL_OUTPUT_HDMI) { + if (display_bpc > 8 && display_bpc < 12) { + DRM_DEBUG_DRIVER("forcing bpc to 12 for HDMI\n"); + display_bpc = 12; + } else { + DRM_DEBUG_DRIVER("forcing bpc to 8 for HDMI\n"); + display_bpc = 8; + } + } + } + + /* + * We could just drive the pipe at the highest bpc all the time and + * enable dithering as needed, but that costs bandwidth. So choose + * the minimum value that expresses the full color range of the fb but + * also stays within the max display bpc discovered above. + */ + + switch (crtc->fb->depth) { + case 8: + bpc = 8; /* since we go through a colormap */ + break; + case 15: + case 16: + bpc = 6; /* min is 18bpp */ + break; + case 24: + bpc = min((unsigned int)8, display_bpc); + break; + case 30: + bpc = min((unsigned int)10, display_bpc); + break; + case 48: + bpc = min((unsigned int)12, display_bpc); + break; + default: + DRM_DEBUG("unsupported depth, assuming 24 bits\n"); + bpc = min((unsigned int)8, display_bpc); + break; + } + + DRM_DEBUG_DRIVER("setting pipe bpc to %d (max display bpc %d)\n", + bpc, display_bpc); + + *pipe_bpp = bpc * 3; + + return display_bpc != bpc; +} + static int i9xx_crtc_mode_set(struct drm_crtc *crtc, struct drm_display_mode *mode, struct drm_display_mode *adjusted_mode, @@ -4721,7 +5009,9 @@ static int ironlake_crtc_mode_set(struct drm_crtc *crtc, struct fdi_m_n m_n = {0}; u32 temp; u32 lvds_sync = 0; - int target_clock, pixel_multiplier, lane, link_bw, bpp, factor; + int target_clock, pixel_multiplier, lane, link_bw, factor; + unsigned int pipe_bpp; + bool dither; list_for_each_entry(encoder, &mode_config->encoder_list, base.head) { if (encoder->base.crtc != crtc) @@ -4848,56 +5138,37 @@ static int ironlake_crtc_mode_set(struct drm_crtc *crtc, /* determine panel color depth */ temp = I915_READ(PIPECONF(pipe)); temp &= ~PIPE_BPC_MASK; - if (is_lvds) { - /* the BPC will be 6 if it is 18-bit LVDS panel */ - if ((I915_READ(PCH_LVDS) & LVDS_A3_POWER_MASK) == LVDS_A3_POWER_UP) - temp |= PIPE_8BPC; - else - temp |= PIPE_6BPC; - } else if (has_edp_encoder) { - switch (dev_priv->edp.bpp/3) { - case 8: - temp |= PIPE_8BPC; - break; - case 10: - temp |= PIPE_10BPC; - break; - case 6: - temp |= PIPE_6BPC; - break; - case 12: - temp |= PIPE_12BPC; - break; - } - } else - temp |= PIPE_8BPC; - I915_WRITE(PIPECONF(pipe), temp); - - switch (temp & PIPE_BPC_MASK) { - case PIPE_8BPC: - bpp = 24; + dither = intel_choose_pipe_bpp_dither(crtc, &pipe_bpp); + switch (pipe_bpp) { + case 18: + temp |= PIPE_6BPC; break; - case PIPE_10BPC: - bpp = 30; + case 24: + temp |= PIPE_8BPC; break; - case PIPE_6BPC: - bpp = 18; + case 30: + temp |= PIPE_10BPC; break; - case PIPE_12BPC: - bpp = 36; + case 36: + temp |= PIPE_12BPC; break; default: - DRM_ERROR("unknown pipe bpc value\n"); - bpp = 24; + WARN(1, "intel_choose_pipe_bpp returned invalid value\n"); + temp |= PIPE_8BPC; + pipe_bpp = 24; + break; } + intel_crtc->bpp = pipe_bpp; + I915_WRITE(PIPECONF(pipe), temp); + if (!lane) { /* * Account for spread spectrum to avoid * oversubscribing the link. Max center spread * is 2.5%; use 5% for safety's sake. */ - u32 bps = target_clock * bpp * 21 / 20; + u32 bps = target_clock * intel_crtc->bpp * 21 / 20; lane = bps / (link_bw * 8) + 1; } @@ -4905,7 +5176,8 @@ static int ironlake_crtc_mode_set(struct drm_crtc *crtc, if (pixel_multiplier > 1) link_bw *= pixel_multiplier; - ironlake_compute_m_n(bpp, lane, target_clock, link_bw, &m_n); + ironlake_compute_m_n(intel_crtc->bpp, lane, target_clock, link_bw, + &m_n); /* Ironlake: try to setup display ref clock before DPLL * enabling. This is only under driver's control after @@ -5108,14 +5380,12 @@ static int ironlake_crtc_mode_set(struct drm_crtc *crtc, I915_WRITE(PCH_LVDS, temp); } - /* set the dithering flag and clear for anything other than a panel. */ pipeconf &= ~PIPECONF_DITHER_EN; pipeconf &= ~PIPECONF_DITHER_TYPE_MASK; - if (dev_priv->lvds_dither && (is_lvds || has_edp_encoder)) { + if ((is_lvds && dev_priv->lvds_dither) || dither) { pipeconf |= PIPECONF_DITHER_EN; pipeconf |= PIPECONF_DITHER_TYPE_ST1; } - if (is_dp || intel_encoder_is_pch_edp(&has_edp_encoder->base)) { intel_dp_set_m_n(crtc, mode, adjusted_mode); } else { @@ -5435,21 +5705,15 @@ static int intel_crtc_cursor_set(struct drm_crtc *crtc, goto fail_locked; } - ret = i915_gem_object_pin(obj, PAGE_SIZE, true); - if (ret) { - DRM_ERROR("failed to pin cursor bo\n"); - goto fail_locked; - } - - ret = i915_gem_object_set_to_gtt_domain(obj, 0); + ret = i915_gem_object_pin_to_display_plane(obj, 0, NULL); if (ret) { DRM_ERROR("failed to move cursor bo into the GTT\n"); - goto fail_unpin; + goto fail_locked; } ret = i915_gem_object_put_fence(obj); if (ret) { - DRM_ERROR("failed to move cursor bo into the GTT\n"); + DRM_ERROR("failed to release fence for cursor"); goto fail_unpin; } @@ -6152,6 +6416,7 @@ static void intel_unpin_work_fn(struct work_struct *__work) drm_gem_object_unreference(&work->pending_flip_obj->base); drm_gem_object_unreference(&work->old_fb_obj->base); + intel_update_fbc(work->dev); mutex_unlock(&work->dev->struct_mutex); kfree(work); } @@ -6516,6 +6781,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, if (ret) goto cleanup_pending; + intel_disable_fbc(dev); mutex_unlock(&dev->struct_mutex); trace_i915_flip_request(intel_crtc->plane, obj); @@ -6644,6 +6910,7 @@ static void intel_crtc_init(struct drm_device *dev, int pipe) intel_crtc_reset(&intel_crtc->base); intel_crtc->active = true; /* force the pipe off on setup_init_config */ + intel_crtc->bpp = 24; /* default for pre-Ironlake */ if (HAS_PCH_SPLIT(dev)) { intel_helper_funcs.prepare = ironlake_crtc_prepare; @@ -6870,6 +7137,11 @@ int intel_framebuffer_init(struct drm_device *dev, switch (mode_cmd->bpp) { case 8: case 16: + /* Only pre-ILK can handle 5:5:5 */ + if (mode_cmd->depth == 15 && !HAS_PCH_SPLIT(dev)) + return -EINVAL; + break; + case 24: case 32: break; @@ -7284,6 +7556,59 @@ void gen6_enable_rps(struct drm_i915_private *dev_priv) mutex_unlock(&dev_priv->dev->struct_mutex); } +void gen6_update_ring_freq(struct drm_i915_private *dev_priv) +{ + int min_freq = 15; + int gpu_freq, ia_freq, max_ia_freq; + int scaling_factor = 180; + + max_ia_freq = cpufreq_quick_get_max(0); + /* + * Default to measured freq if none found, PCU will ensure we don't go + * over + */ + if (!max_ia_freq) + max_ia_freq = tsc_khz; + + /* Convert from kHz to MHz */ + max_ia_freq /= 1000; + + mutex_lock(&dev_priv->dev->struct_mutex); + + /* + * For each potential GPU frequency, load a ring frequency we'd like + * to use for memory access. We do this by specifying the IA frequency + * the PCU should use as a reference to determine the ring frequency. + */ + for (gpu_freq = dev_priv->max_delay; gpu_freq >= dev_priv->min_delay; + gpu_freq--) { + int diff = dev_priv->max_delay - gpu_freq; + + /* + * For GPU frequencies less than 750MHz, just use the lowest + * ring freq. + */ + if (gpu_freq < min_freq) + ia_freq = 800; + else + ia_freq = max_ia_freq - ((diff * scaling_factor) / 2); + ia_freq = DIV_ROUND_CLOSEST(ia_freq, 100); + + I915_WRITE(GEN6_PCODE_DATA, + (ia_freq << GEN6_PCODE_FREQ_IA_RATIO_SHIFT) | + gpu_freq); + I915_WRITE(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | + GEN6_PCODE_WRITE_MIN_FREQ_TABLE); + if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & + GEN6_PCODE_READY) == 0, 10)) { + DRM_ERROR("pcode write of freq table timed out\n"); + continue; + } + } + + mutex_unlock(&dev_priv->dev->struct_mutex); +} + static void ironlake_init_clock_gating(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -7640,9 +7965,11 @@ static void intel_init_display(struct drm_device *dev) if (HAS_PCH_SPLIT(dev)) { dev_priv->display.dpms = ironlake_crtc_dpms; dev_priv->display.crtc_mode_set = ironlake_crtc_mode_set; + dev_priv->display.update_plane = ironlake_update_plane; } else { dev_priv->display.dpms = i9xx_crtc_dpms; dev_priv->display.crtc_mode_set = i9xx_crtc_mode_set; + dev_priv->display.update_plane = i9xx_update_plane; } if (I915_HAS_FBC(dev)) { @@ -7939,8 +8266,10 @@ void intel_modeset_init(struct drm_device *dev) intel_init_emon(dev); } - if (IS_GEN6(dev)) + if (IS_GEN6(dev) || IS_GEN7(dev)) { gen6_enable_rps(dev_priv); + gen6_update_ring_freq(dev_priv); + } INIT_WORK(&dev_priv->idle_work, intel_idle_update); setup_timer(&dev_priv->idle_timer, intel_gpu_idle_timer, @@ -7976,12 +8305,11 @@ void intel_modeset_cleanup(struct drm_device *dev) intel_increase_pllclock(crtc); } - if (dev_priv->display.disable_fbc) - dev_priv->display.disable_fbc(dev); + intel_disable_fbc(dev); if (IS_IRONLAKE_M(dev)) ironlake_disable_drps(dev); - if (IS_GEN6(dev)) + if (IS_GEN6(dev) || IS_GEN7(dev)) gen6_disable_rps(dev); if (IS_IRONLAKE_M(dev)) @@ -7994,6 +8322,9 @@ void intel_modeset_cleanup(struct drm_device *dev) drm_irq_uninstall(dev); cancel_work_sync(&dev_priv->hotplug_work); + /* flush any delayed tasks or pending work */ + flush_scheduled_work(); + /* Shut off idle work before the crtcs get freed. */ list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { intel_crtc = to_intel_crtc(crtc); diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index e2aced6..f797fb5 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -178,12 +178,14 @@ intel_dp_link_clock(uint8_t link_bw) static int intel_dp_link_required(struct drm_device *dev, struct intel_dp *intel_dp, int pixel_clock) { - struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_crtc *crtc = intel_dp->base.base.crtc; + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + int bpp = 24; - if (is_edp(intel_dp)) - return (pixel_clock * dev_priv->edp.bpp + 7) / 8; - else - return pixel_clock * 3; + if (intel_crtc) + bpp = intel_crtc->bpp; + + return (pixel_clock * bpp + 7) / 8; } static int @@ -681,7 +683,7 @@ intel_dp_set_m_n(struct drm_crtc *crtc, struct drm_display_mode *mode, struct drm_encoder *encoder; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - int lane_count = 4, bpp = 24; + int lane_count = 4; struct intel_dp_m_n m_n; int pipe = intel_crtc->pipe; @@ -700,7 +702,6 @@ intel_dp_set_m_n(struct drm_crtc *crtc, struct drm_display_mode *mode, break; } else if (is_edp(intel_dp)) { lane_count = dev_priv->edp.lanes; - bpp = dev_priv->edp.bpp; break; } } @@ -710,7 +711,7 @@ intel_dp_set_m_n(struct drm_crtc *crtc, struct drm_display_mode *mode, * the number of bytes_per_pixel post-LUT, which we always * set up for 8-bits of R/G/B, or 3 bytes total. */ - intel_dp_compute_m_n(bpp, lane_count, + intel_dp_compute_m_n(intel_crtc->bpp, lane_count, mode->clock, adjusted_mode->clock, &m_n); if (HAS_PCH_SPLIT(dev)) { diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 9ffa61e..6e990f9 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -170,6 +170,7 @@ struct intel_crtc { int16_t cursor_x, cursor_y; int16_t cursor_width, cursor_height; bool cursor_visible; + unsigned int bpp; }; #define to_intel_crtc(x) container_of(x, struct intel_crtc, base) @@ -233,6 +234,13 @@ struct intel_unpin_work { bool enable_stall_check; }; +struct intel_fbc_work { + struct delayed_work work; + struct drm_crtc *crtc; + struct drm_framebuffer *fb; + int interval; +}; + int intel_ddc_get_modes(struct drm_connector *c, struct i2c_adapter *adapter); extern bool intel_ddc_probe(struct intel_encoder *intel_encoder, int ddc_bus); @@ -317,6 +325,7 @@ extern void intel_enable_clock_gating(struct drm_device *dev); extern void ironlake_enable_drps(struct drm_device *dev); extern void ironlake_disable_drps(struct drm_device *dev); extern void gen6_enable_rps(struct drm_i915_private *dev_priv); +extern void gen6_update_ring_freq(struct drm_i915_private *dev_priv); extern void gen6_disable_rps(struct drm_device *dev); extern void intel_init_emon(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index aa0a8e8..1ed8e69 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -124,12 +124,18 @@ static void intel_hdmi_mode_set(struct drm_encoder *encoder, u32 sdvox; sdvox = SDVO_ENCODING_HDMI | SDVO_BORDER_ENABLE; - sdvox |= intel_hdmi->color_range; + if (!HAS_PCH_SPLIT(dev)) + sdvox |= intel_hdmi->color_range; if (adjusted_mode->flags & DRM_MODE_FLAG_PVSYNC) sdvox |= SDVO_VSYNC_ACTIVE_HIGH; if (adjusted_mode->flags & DRM_MODE_FLAG_PHSYNC) sdvox |= SDVO_HSYNC_ACTIVE_HIGH; + if (intel_crtc->bpp > 24) + sdvox |= COLOR_FORMAT_12bpc; + else + sdvox |= COLOR_FORMAT_8bpc; + /* Required on CPT */ if (intel_hdmi->has_hdmi_sink && HAS_PCH_CPT(dev)) sdvox |= HDMI_MODE_SELECT; diff --git a/drivers/gpu/drm/i915/intel_opregion.c b/drivers/gpu/drm/i915/intel_opregion.c index d2c7104..b7c5ddb 100644 --- a/drivers/gpu/drm/i915/intel_opregion.c +++ b/drivers/gpu/drm/i915/intel_opregion.c @@ -297,19 +297,26 @@ static int intel_opregion_video_event(struct notifier_block *nb, /* The only video events relevant to opregion are 0x80. These indicate either a docking event, lid switch or display switch request. In Linux, these are handled by the dock, button and video drivers. - We might want to fix the video driver to be opregion-aware in - future, but right now we just indicate to the firmware that the - request has been handled */ + */ struct opregion_acpi *acpi; + struct acpi_bus_event *event = data; + int ret = NOTIFY_OK; + + if (strcmp(event->device_class, ACPI_VIDEO_CLASS) != 0) + return NOTIFY_DONE; if (!system_opregion) return NOTIFY_DONE; acpi = system_opregion->acpi; + + if (event->type == 0x80 && !(acpi->cevt & 0x1)) + ret = NOTIFY_BAD; + acpi->csts = 0; - return NOTIFY_OK; + return ret; } static struct notifier_block intel_opregion_notifier = { diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index 9e2959b..d360380 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -773,14 +773,10 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay, if (ret != 0) return ret; - ret = i915_gem_object_pin(new_bo, PAGE_SIZE, true); + ret = i915_gem_object_pin_to_display_plane(new_bo, 0, NULL); if (ret != 0) return ret; - ret = i915_gem_object_set_to_gtt_domain(new_bo, 0); - if (ret != 0) - goto out_unpin; - ret = i915_gem_object_put_fence(new_bo); if (ret) goto out_unpin; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 95c4b14..e961568 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -236,7 +236,8 @@ init_pipe_control(struct intel_ring_buffer *ring) ret = -ENOMEM; goto err; } - obj->cache_level = I915_CACHE_LLC; + + i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); ret = i915_gem_object_pin(obj, 4096, true); if (ret) @@ -776,7 +777,8 @@ static int init_status_page(struct intel_ring_buffer *ring) ret = -ENOMEM; goto err; } - obj->cache_level = I915_CACHE_LLC; + + i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); ret = i915_gem_object_pin(obj, 4096, true); if (ret != 0) { diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c index 113e4e7..210d570 100644 --- a/drivers/gpu/drm/i915/intel_tv.c +++ b/drivers/gpu/drm/i915/intel_tv.c @@ -1236,6 +1236,8 @@ intel_tv_detect_type (struct intel_tv *intel_tv, struct drm_connector *connector) { struct drm_encoder *encoder = &intel_tv->base.base; + struct drm_crtc *crtc = encoder->crtc; + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct drm_device *dev = encoder->dev; struct drm_i915_private *dev_priv = dev->dev_private; unsigned long irqflags; @@ -1258,6 +1260,10 @@ intel_tv_detect_type (struct intel_tv *intel_tv, /* Poll for TV detection */ tv_ctl &= ~(TV_ENC_ENABLE | TV_TEST_MODE_MASK); tv_ctl |= TV_TEST_MODE_MONITOR_DETECT; + if (intel_crtc->pipe == 1) + tv_ctl |= TV_ENC_PIPEB_SELECT; + else + tv_ctl &= ~TV_ENC_PIPEB_SELECT; tv_dac &= ~(TVDAC_SENSE_MASK | DAC_A_MASK | DAC_B_MASK | DAC_C_MASK); tv_dac |= (TVDAC_STATE_CHG_EN | @@ -1277,26 +1283,26 @@ intel_tv_detect_type (struct intel_tv *intel_tv, to_intel_crtc(intel_tv->base.base.crtc)->pipe); type = -1; - if (wait_for((tv_dac = I915_READ(TV_DAC)) & TVDAC_STATE_CHG, 20) == 0) { - DRM_DEBUG_KMS("TV detected: %x, %x\n", tv_ctl, tv_dac); - /* - * A B C - * 0 1 1 Composite - * 1 0 X svideo - * 0 0 0 Component - */ - if ((tv_dac & TVDAC_SENSE_MASK) == (TVDAC_B_SENSE | TVDAC_C_SENSE)) { - DRM_DEBUG_KMS("Detected Composite TV connection\n"); - type = DRM_MODE_CONNECTOR_Composite; - } else if ((tv_dac & (TVDAC_A_SENSE|TVDAC_B_SENSE)) == TVDAC_A_SENSE) { - DRM_DEBUG_KMS("Detected S-Video TV connection\n"); - type = DRM_MODE_CONNECTOR_SVIDEO; - } else if ((tv_dac & TVDAC_SENSE_MASK) == 0) { - DRM_DEBUG_KMS("Detected Component TV connection\n"); - type = DRM_MODE_CONNECTOR_Component; - } else { - DRM_DEBUG_KMS("Unrecognised TV connection\n"); - } + tv_dac = I915_READ(TV_DAC); + DRM_DEBUG_KMS("TV detected: %x, %x\n", tv_ctl, tv_dac); + /* + * A B C + * 0 1 1 Composite + * 1 0 X svideo + * 0 0 0 Component + */ + if ((tv_dac & TVDAC_SENSE_MASK) == (TVDAC_B_SENSE | TVDAC_C_SENSE)) { + DRM_DEBUG_KMS("Detected Composite TV connection\n"); + type = DRM_MODE_CONNECTOR_Composite; + } else if ((tv_dac & (TVDAC_A_SENSE|TVDAC_B_SENSE)) == TVDAC_A_SENSE) { + DRM_DEBUG_KMS("Detected S-Video TV connection\n"); + type = DRM_MODE_CONNECTOR_SVIDEO; + } else if ((tv_dac & TVDAC_SENSE_MASK) == 0) { + DRM_DEBUG_KMS("Detected Component TV connection\n"); + type = DRM_MODE_CONNECTOR_Component; + } else { + DRM_DEBUG_KMS("Unrecognised TV connection\n"); + type = -1; } I915_WRITE(TV_DAC, save_tv_dac & ~TVDAC_STATE_CHG_EN); diff --git a/drivers/gpu/drm/nouveau/nouveau_bios.c b/drivers/gpu/drm/nouveau/nouveau_bios.c index 729d5fd..b311fab 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bios.c +++ b/drivers/gpu/drm/nouveau/nouveau_bios.c @@ -135,13 +135,14 @@ static void load_vbios_pramin(struct drm_device *dev, uint8_t *data) int i; if (dev_priv->card_type >= NV_50) { - uint32_t vbios_vram = (nv_rd32(dev, 0x619f04) & ~0xff) << 8; - - if (!vbios_vram) - vbios_vram = (nv_rd32(dev, 0x1700) << 16) + 0xf0000; + u64 addr = (u64)(nv_rd32(dev, 0x619f04) & 0xffffff00) << 8; + if (!addr) { + addr = (u64)nv_rd32(dev, 0x1700) << 16; + addr += 0xf0000; + } old_bar0_pramin = nv_rd32(dev, 0x1700); - nv_wr32(dev, 0x1700, vbios_vram >> 16); + nv_wr32(dev, 0x1700, addr >> 16); } /* bail if no rom signature */ @@ -5186,7 +5187,7 @@ static int parse_bit_A_tbl_entry(struct drm_device *dev, struct nvbios *bios, st load_table_ptr = ROM16(bios->data[bitentry->offset]); if (load_table_ptr == 0x0) { - NV_ERROR(dev, "Pointer to BIT loadval table invalid\n"); + NV_DEBUG(dev, "Pointer to BIT loadval table invalid\n"); return -EINVAL; } @@ -5965,6 +5966,12 @@ apply_dcb_connector_quirks(struct nvbios *bios, int idx) if (cte->type == DCB_CONNECTOR_HDMI_1) cte->type = DCB_CONNECTOR_DVI_I; } + + /* Gigabyte GV-NX86T512H */ + if (nv_match_device(dev, 0x0402, 0x1458, 0x3455)) { + if (cte->type == DCB_CONNECTOR_HDMI_1) + cte->type = DCB_CONNECTOR_DVI_I; + } } static const u8 hpd_gpio[16] = { @@ -6377,6 +6384,37 @@ apply_dcb_encoder_quirks(struct drm_device *dev, int idx, u32 *conn, u32 *conf) } } + /* Some other twisted XFX board (rhbz#694914) + * + * The DVI/VGA encoder combo that's supposed to represent the + * DVI-I connector actually point at two different ones, and + * the HDMI connector ends up paired with the VGA instead. + * + * Connector table is missing anything for VGA at all, pointing it + * an invalid conntab entry 2 so we figure it out ourself. + */ + if (nv_match_device(dev, 0x0615, 0x1682, 0x2605)) { + if (idx == 0) { + *conn = 0x02002300; /* VGA, connector 2 */ + *conf = 0x00000028; + } else + if (idx == 1) { + *conn = 0x01010312; /* DVI, connector 0 */ + *conf = 0x00020030; + } else + if (idx == 2) { + *conn = 0x04020310; /* VGA, connector 0 */ + *conf = 0x00000028; + } else + if (idx == 3) { + *conn = 0x02021322; /* HDMI, connector 1 */ + *conf = 0x00020010; + } else { + *conn = 0x0000000e; /* EOL */ + *conf = 0x00000000; + } + } + return true; } diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 2ad49cb..890d50e 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -49,16 +49,12 @@ nouveau_bo_del_ttm(struct ttm_buffer_object *bo) DRM_ERROR("bo %p still attached to GEM object\n", bo); nv10_mem_put_tile_region(dev, nvbo->tile, NULL); - if (nvbo->vma.node) { - nouveau_vm_unmap(&nvbo->vma); - nouveau_vm_put(&nvbo->vma); - } kfree(nvbo); } static void nouveau_bo_fixup_align(struct nouveau_bo *nvbo, u32 flags, - int *align, int *size, int *page_shift) + int *align, int *size) { struct drm_nouveau_private *dev_priv = nouveau_bdev(nvbo->bo.bdev); @@ -82,67 +78,51 @@ nouveau_bo_fixup_align(struct nouveau_bo *nvbo, u32 flags, } } } else { - if (likely(dev_priv->chan_vm)) { - if (!(flags & TTM_PL_FLAG_TT) && *size > 256 * 1024) - *page_shift = dev_priv->chan_vm->lpg_shift; - else - *page_shift = dev_priv->chan_vm->spg_shift; - } else { - *page_shift = 12; - } - - *size = roundup(*size, (1 << *page_shift)); - *align = max((1 << *page_shift), *align); + *size = roundup(*size, (1 << nvbo->page_shift)); + *align = max((1 << nvbo->page_shift), *align); } *size = roundup(*size, PAGE_SIZE); } int -nouveau_bo_new(struct drm_device *dev, struct nouveau_channel *chan, - int size, int align, uint32_t flags, uint32_t tile_mode, - uint32_t tile_flags, struct nouveau_bo **pnvbo) +nouveau_bo_new(struct drm_device *dev, int size, int align, + uint32_t flags, uint32_t tile_mode, uint32_t tile_flags, + struct nouveau_bo **pnvbo) { struct drm_nouveau_private *dev_priv = dev->dev_private; struct nouveau_bo *nvbo; - int ret = 0, page_shift = 0; + int ret; nvbo = kzalloc(sizeof(struct nouveau_bo), GFP_KERNEL); if (!nvbo) return -ENOMEM; INIT_LIST_HEAD(&nvbo->head); INIT_LIST_HEAD(&nvbo->entry); + INIT_LIST_HEAD(&nvbo->vma_list); nvbo->tile_mode = tile_mode; nvbo->tile_flags = tile_flags; nvbo->bo.bdev = &dev_priv->ttm.bdev; - nouveau_bo_fixup_align(nvbo, flags, &align, &size, &page_shift); - align >>= PAGE_SHIFT; - - if (dev_priv->chan_vm) { - ret = nouveau_vm_get(dev_priv->chan_vm, size, page_shift, - NV_MEM_ACCESS_RW, &nvbo->vma); - if (ret) { - kfree(nvbo); - return ret; - } + nvbo->page_shift = 12; + if (dev_priv->bar1_vm) { + if (!(flags & TTM_PL_FLAG_TT) && size > 256 * 1024) + nvbo->page_shift = dev_priv->bar1_vm->lpg_shift; } + nouveau_bo_fixup_align(nvbo, flags, &align, &size); nvbo->bo.mem.num_pages = size >> PAGE_SHIFT; nouveau_bo_placement_set(nvbo, flags, 0); - nvbo->channel = chan; ret = ttm_bo_init(&dev_priv->ttm.bdev, &nvbo->bo, size, - ttm_bo_type_device, &nvbo->placement, align, 0, - false, NULL, size, nouveau_bo_del_ttm); + ttm_bo_type_device, &nvbo->placement, + align >> PAGE_SHIFT, 0, false, NULL, size, + nouveau_bo_del_ttm); if (ret) { /* ttm will call nouveau_bo_del_ttm if it fails.. */ return ret; } - nvbo->channel = NULL; - if (nvbo->vma.node) - nvbo->bo.offset = nvbo->vma.offset; *pnvbo = nvbo; return 0; } @@ -312,8 +292,6 @@ nouveau_bo_validate(struct nouveau_bo *nvbo, bool interruptible, if (ret) return ret; - if (nvbo->vma.node) - nvbo->bo.offset = nvbo->vma.offset; return 0; } @@ -440,7 +418,6 @@ nouveau_bo_init_mem_type(struct ttm_bo_device *bdev, uint32_t type, TTM_MEMTYPE_FLAG_CMA; man->available_caching = TTM_PL_MASK_CACHING; man->default_caching = TTM_PL_FLAG_CACHED; - man->gpu_offset = dev_priv->gart_info.aper_base; break; default: NV_ERROR(dev, "Unknown GART type: %d\n", @@ -501,19 +478,12 @@ static int nvc0_bo_move_m2mf(struct nouveau_channel *chan, struct ttm_buffer_object *bo, struct ttm_mem_reg *old_mem, struct ttm_mem_reg *new_mem) { - struct nouveau_mem *old_node = old_mem->mm_node; - struct nouveau_mem *new_node = new_mem->mm_node; - struct nouveau_bo *nvbo = nouveau_bo(bo); + struct nouveau_mem *node = old_mem->mm_node; + u64 src_offset = node->vma[0].offset; + u64 dst_offset = node->vma[1].offset; u32 page_count = new_mem->num_pages; - u64 src_offset, dst_offset; int ret; - src_offset = old_node->tmp_vma.offset; - if (new_node->tmp_vma.node) - dst_offset = new_node->tmp_vma.offset; - else - dst_offset = nvbo->vma.offset; - page_count = new_mem->num_pages; while (page_count) { int line_count = (page_count > 2047) ? 2047 : page_count; @@ -547,19 +517,13 @@ static int nv50_bo_move_m2mf(struct nouveau_channel *chan, struct ttm_buffer_object *bo, struct ttm_mem_reg *old_mem, struct ttm_mem_reg *new_mem) { - struct nouveau_mem *old_node = old_mem->mm_node; - struct nouveau_mem *new_node = new_mem->mm_node; + struct nouveau_mem *node = old_mem->mm_node; struct nouveau_bo *nvbo = nouveau_bo(bo); u64 length = (new_mem->num_pages << PAGE_SHIFT); - u64 src_offset, dst_offset; + u64 src_offset = node->vma[0].offset; + u64 dst_offset = node->vma[1].offset; int ret; - src_offset = old_node->tmp_vma.offset; - if (new_node->tmp_vma.node) - dst_offset = new_node->tmp_vma.offset; - else - dst_offset = nvbo->vma.offset; - while (length) { u32 amount, stride, height; @@ -695,6 +659,27 @@ nv04_bo_move_m2mf(struct nouveau_channel *chan, struct ttm_buffer_object *bo, } static int +nouveau_vma_getmap(struct nouveau_channel *chan, struct nouveau_bo *nvbo, + struct ttm_mem_reg *mem, struct nouveau_vma *vma) +{ + struct nouveau_mem *node = mem->mm_node; + int ret; + + ret = nouveau_vm_get(chan->vm, mem->num_pages << PAGE_SHIFT, + node->page_shift, NV_MEM_ACCESS_RO, vma); + if (ret) + return ret; + + if (mem->mem_type == TTM_PL_VRAM) + nouveau_vm_map(vma, node); + else + nouveau_vm_map_sg(vma, 0, mem->num_pages << PAGE_SHIFT, + node, node->pages); + + return 0; +} + +static int nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict, bool intr, bool no_wait_reserve, bool no_wait_gpu, struct ttm_mem_reg *new_mem) @@ -711,31 +696,20 @@ nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict, bool intr, mutex_lock_nested(&chan->mutex, NOUVEAU_KCHANNEL_MUTEX); } - /* create temporary vma for old memory, this will get cleaned - * up after ttm destroys the ttm_mem_reg + /* create temporary vmas for the transfer and attach them to the + * old nouveau_mem node, these will get cleaned up after ttm has + * destroyed the ttm_mem_reg */ if (dev_priv->card_type >= NV_50) { struct nouveau_mem *node = old_mem->mm_node; - if (!node->tmp_vma.node) { - u32 page_shift = nvbo->vma.node->type; - if (old_mem->mem_type == TTM_PL_TT) - page_shift = nvbo->vma.vm->spg_shift; - - ret = nouveau_vm_get(chan->vm, - old_mem->num_pages << PAGE_SHIFT, - page_shift, NV_MEM_ACCESS_RO, - &node->tmp_vma); - if (ret) - goto out; - } - if (old_mem->mem_type == TTM_PL_VRAM) - nouveau_vm_map(&node->tmp_vma, node); - else { - nouveau_vm_map_sg(&node->tmp_vma, 0, - old_mem->num_pages << PAGE_SHIFT, - node, node->pages); - } + ret = nouveau_vma_getmap(chan, nvbo, old_mem, &node->vma[0]); + if (ret) + goto out; + + ret = nouveau_vma_getmap(chan, nvbo, new_mem, &node->vma[1]); + if (ret) + goto out; } if (dev_priv->card_type < NV_50) @@ -762,7 +736,6 @@ nouveau_bo_move_flipd(struct ttm_buffer_object *bo, bool evict, bool intr, bool no_wait_reserve, bool no_wait_gpu, struct ttm_mem_reg *new_mem) { - struct drm_nouveau_private *dev_priv = nouveau_bdev(bo->bdev); u32 placement_memtype = TTM_PL_FLAG_TT | TTM_PL_MASK_CACHING; struct ttm_placement placement; struct ttm_mem_reg tmp_mem; @@ -782,23 +755,7 @@ nouveau_bo_move_flipd(struct ttm_buffer_object *bo, bool evict, bool intr, if (ret) goto out; - if (dev_priv->card_type >= NV_50) { - struct nouveau_bo *nvbo = nouveau_bo(bo); - struct nouveau_mem *node = tmp_mem.mm_node; - struct nouveau_vma *vma = &nvbo->vma; - if (vma->node->type != vma->vm->spg_shift) - vma = &node->tmp_vma; - nouveau_vm_map_sg(vma, 0, tmp_mem.num_pages << PAGE_SHIFT, - node, node->pages); - } - ret = nouveau_bo_move_m2mf(bo, true, intr, no_wait_reserve, no_wait_gpu, &tmp_mem); - - if (dev_priv->card_type >= NV_50) { - struct nouveau_bo *nvbo = nouveau_bo(bo); - nouveau_vm_unmap(&nvbo->vma); - } - if (ret) goto out; @@ -844,30 +801,22 @@ out: static void nouveau_bo_move_ntfy(struct ttm_buffer_object *bo, struct ttm_mem_reg *new_mem) { - struct drm_nouveau_private *dev_priv = nouveau_bdev(bo->bdev); struct nouveau_mem *node = new_mem->mm_node; struct nouveau_bo *nvbo = nouveau_bo(bo); - struct nouveau_vma *vma = &nvbo->vma; - struct nouveau_vm *vm = vma->vm; - - if (dev_priv->card_type < NV_50) - return; - - switch (new_mem->mem_type) { - case TTM_PL_VRAM: - nouveau_vm_map(vma, node); - break; - case TTM_PL_TT: - if (vma->node->type != vm->spg_shift) { + struct nouveau_vma *vma; + + list_for_each_entry(vma, &nvbo->vma_list, head) { + if (new_mem->mem_type == TTM_PL_VRAM) { + nouveau_vm_map(vma, new_mem->mm_node); + } else + if (new_mem->mem_type == TTM_PL_TT && + nvbo->page_shift == vma->vm->spg_shift) { + nouveau_vm_map_sg(vma, 0, new_mem-> + num_pages << PAGE_SHIFT, + node, node->pages); + } else { nouveau_vm_unmap(vma); - vma = &node->tmp_vma; } - nouveau_vm_map_sg(vma, 0, new_mem->num_pages << PAGE_SHIFT, - node, node->pages); - break; - default: - nouveau_vm_unmap(&nvbo->vma); - break; } } @@ -1113,3 +1062,54 @@ struct ttm_bo_driver nouveau_bo_driver = { .io_mem_free = &nouveau_ttm_io_mem_free, }; +struct nouveau_vma * +nouveau_bo_vma_find(struct nouveau_bo *nvbo, struct nouveau_vm *vm) +{ + struct nouveau_vma *vma; + list_for_each_entry(vma, &nvbo->vma_list, head) { + if (vma->vm == vm) + return vma; + } + + return NULL; +} + +int +nouveau_bo_vma_add(struct nouveau_bo *nvbo, struct nouveau_vm *vm, + struct nouveau_vma *vma) +{ + const u32 size = nvbo->bo.mem.num_pages << PAGE_SHIFT; + struct nouveau_mem *node = nvbo->bo.mem.mm_node; + int ret; + + ret = nouveau_vm_get(vm, size, nvbo->page_shift, + NV_MEM_ACCESS_RW, vma); + if (ret) + return ret; + + if (nvbo->bo.mem.mem_type == TTM_PL_VRAM) + nouveau_vm_map(vma, nvbo->bo.mem.mm_node); + else + if (nvbo->bo.mem.mem_type == TTM_PL_TT) + nouveau_vm_map_sg(vma, 0, size, node, node->pages); + + list_add_tail(&vma->head, &nvbo->vma_list); + vma->refcount = 1; + return 0; +} + +void +nouveau_bo_vma_del(struct nouveau_bo *nvbo, struct nouveau_vma *vma) +{ + if (vma->node) { + if (nvbo->bo.mem.mem_type != TTM_PL_SYSTEM) { + spin_lock(&nvbo->bo.bdev->fence_lock); + ttm_bo_wait(&nvbo->bo, false, false, false); + spin_unlock(&nvbo->bo.bdev->fence_lock); + nouveau_vm_unmap(vma); + } + + nouveau_vm_put(vma); + list_del(&vma->head); + } +} diff --git a/drivers/gpu/drm/nouveau/nouveau_channel.c b/drivers/gpu/drm/nouveau/nouveau_channel.c index a7583a8..b0d753f 100644 --- a/drivers/gpu/drm/nouveau/nouveau_channel.c +++ b/drivers/gpu/drm/nouveau/nouveau_channel.c @@ -27,40 +27,63 @@ #include "nouveau_drv.h" #include "nouveau_drm.h" #include "nouveau_dma.h" +#include "nouveau_ramht.h" static int -nouveau_channel_pushbuf_ctxdma_init(struct nouveau_channel *chan) +nouveau_channel_pushbuf_init(struct nouveau_channel *chan) { + u32 mem = nouveau_vram_pushbuf ? TTM_PL_FLAG_VRAM : TTM_PL_FLAG_TT; struct drm_device *dev = chan->dev; struct drm_nouveau_private *dev_priv = dev->dev_private; - struct nouveau_bo *pb = chan->pushbuf_bo; - struct nouveau_gpuobj *pushbuf = NULL; - int ret = 0; + int ret; + + /* allocate buffer object */ + ret = nouveau_bo_new(dev, 65536, 0, mem, 0, 0, &chan->pushbuf_bo); + if (ret) + goto out; + + ret = nouveau_bo_pin(chan->pushbuf_bo, mem); + if (ret) + goto out; + + ret = nouveau_bo_map(chan->pushbuf_bo); + if (ret) + goto out; + /* create DMA object covering the entire memtype where the push + * buffer resides, userspace can submit its own push buffers from + * anywhere within the same memtype. + */ + chan->pushbuf_base = chan->pushbuf_bo->bo.offset; if (dev_priv->card_type >= NV_50) { + ret = nouveau_bo_vma_add(chan->pushbuf_bo, chan->vm, + &chan->pushbuf_vma); + if (ret) + goto out; + if (dev_priv->card_type < NV_C0) { ret = nouveau_gpuobj_dma_new(chan, NV_CLASS_DMA_IN_MEMORY, 0, (1ULL << 40), NV_MEM_ACCESS_RO, NV_MEM_TARGET_VM, - &pushbuf); + &chan->pushbuf); } - chan->pushbuf_base = pb->bo.offset; + chan->pushbuf_base = chan->pushbuf_vma.offset; } else - if (pb->bo.mem.mem_type == TTM_PL_TT) { + if (chan->pushbuf_bo->bo.mem.mem_type == TTM_PL_TT) { ret = nouveau_gpuobj_dma_new(chan, NV_CLASS_DMA_IN_MEMORY, 0, dev_priv->gart_info.aper_size, NV_MEM_ACCESS_RO, - NV_MEM_TARGET_GART, &pushbuf); - chan->pushbuf_base = pb->bo.mem.start << PAGE_SHIFT; + NV_MEM_TARGET_GART, + &chan->pushbuf); } else if (dev_priv->card_type != NV_04) { ret = nouveau_gpuobj_dma_new(chan, NV_CLASS_DMA_IN_MEMORY, 0, dev_priv->fb_available_size, NV_MEM_ACCESS_RO, - NV_MEM_TARGET_VRAM, &pushbuf); - chan->pushbuf_base = pb->bo.mem.start << PAGE_SHIFT; + NV_MEM_TARGET_VRAM, + &chan->pushbuf); } else { /* NV04 cmdbuf hack, from original ddx.. not sure of it's * exact reason for existing :) PCI access to cmdbuf in @@ -70,47 +93,22 @@ nouveau_channel_pushbuf_ctxdma_init(struct nouveau_channel *chan) pci_resource_start(dev->pdev, 1), dev_priv->fb_available_size, NV_MEM_ACCESS_RO, - NV_MEM_TARGET_PCI, &pushbuf); - chan->pushbuf_base = pb->bo.mem.start << PAGE_SHIFT; + NV_MEM_TARGET_PCI, + &chan->pushbuf); } - nouveau_gpuobj_ref(pushbuf, &chan->pushbuf); - nouveau_gpuobj_ref(NULL, &pushbuf); - return ret; -} - -static struct nouveau_bo * -nouveau_channel_user_pushbuf_alloc(struct drm_device *dev) -{ - struct nouveau_bo *pushbuf = NULL; - int location, ret; - - if (nouveau_vram_pushbuf) - location = TTM_PL_FLAG_VRAM; - else - location = TTM_PL_FLAG_TT; - - ret = nouveau_bo_new(dev, NULL, 65536, 0, location, 0, 0x0000, &pushbuf); - if (ret) { - NV_ERROR(dev, "error allocating DMA push buffer: %d\n", ret); - return NULL; - } - - ret = nouveau_bo_pin(pushbuf, location); - if (ret) { - NV_ERROR(dev, "error pinning DMA push buffer: %d\n", ret); - nouveau_bo_ref(NULL, &pushbuf); - return NULL; - } - - ret = nouveau_bo_map(pushbuf); +out: if (ret) { - nouveau_bo_unpin(pushbuf); - nouveau_bo_ref(NULL, &pushbuf); - return NULL; + NV_ERROR(dev, "error initialising pushbuf: %d\n", ret); + nouveau_bo_vma_del(chan->pushbuf_bo, &chan->pushbuf_vma); + nouveau_gpuobj_ref(NULL, &chan->pushbuf); + if (chan->pushbuf_bo) { + nouveau_bo_unmap(chan->pushbuf_bo); + nouveau_bo_ref(NULL, &chan->pushbuf_bo); + } } - return pushbuf; + return 0; } /* allocates and initializes a fifo for user space consumption */ @@ -121,6 +119,7 @@ nouveau_channel_alloc(struct drm_device *dev, struct nouveau_channel **chan_ret, { struct drm_nouveau_private *dev_priv = dev->dev_private; struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo; + struct nouveau_fpriv *fpriv = nouveau_fpriv(file_priv); struct nouveau_channel *chan; unsigned long flags; int ret; @@ -160,19 +159,14 @@ nouveau_channel_alloc(struct drm_device *dev, struct nouveau_channel **chan_ret, INIT_LIST_HEAD(&chan->nvsw.flip); INIT_LIST_HEAD(&chan->fence.pending); - /* Allocate DMA push buffer */ - chan->pushbuf_bo = nouveau_channel_user_pushbuf_alloc(dev); - if (!chan->pushbuf_bo) { - ret = -ENOMEM; - NV_ERROR(dev, "pushbuf %d\n", ret); + /* setup channel's memory and vm */ + ret = nouveau_gpuobj_channel_init(chan, vram_handle, gart_handle); + if (ret) { + NV_ERROR(dev, "gpuobj %d\n", ret); nouveau_channel_put(&chan); return ret; } - nouveau_dma_pre_init(chan); - chan->user_put = 0x40; - chan->user_get = 0x44; - /* Allocate space for per-channel fixed notifier memory */ ret = nouveau_notifier_init_channel(chan); if (ret) { @@ -181,21 +175,17 @@ nouveau_channel_alloc(struct drm_device *dev, struct nouveau_channel **chan_ret, return ret; } - /* Setup channel's default objects */ - ret = nouveau_gpuobj_channel_init(chan, vram_handle, gart_handle); + /* Allocate DMA push buffer */ + ret = nouveau_channel_pushbuf_init(chan); if (ret) { - NV_ERROR(dev, "gpuobj %d\n", ret); + NV_ERROR(dev, "pushbuf %d\n", ret); nouveau_channel_put(&chan); return ret; } - /* Create a dma object for the push buffer */ - ret = nouveau_channel_pushbuf_ctxdma_init(chan); - if (ret) { - NV_ERROR(dev, "pbctxdma %d\n", ret); - nouveau_channel_put(&chan); - return ret; - } + nouveau_dma_pre_init(chan); + chan->user_put = 0x40; + chan->user_get = 0x44; /* disable the fifo caches */ pfifo->reassign(dev, false); @@ -220,6 +210,11 @@ nouveau_channel_alloc(struct drm_device *dev, struct nouveau_channel **chan_ret, nouveau_debugfs_channel_init(chan); NV_DEBUG(dev, "channel %d initialised\n", chan->id); + if (fpriv) { + spin_lock(&fpriv->lock); + list_add(&chan->list, &fpriv->channels); + spin_unlock(&fpriv->lock); + } *chan_ret = chan; return 0; } @@ -236,29 +231,23 @@ nouveau_channel_get_unlocked(struct nouveau_channel *ref) } struct nouveau_channel * -nouveau_channel_get(struct drm_device *dev, struct drm_file *file_priv, int id) +nouveau_channel_get(struct drm_file *file_priv, int id) { - struct drm_nouveau_private *dev_priv = dev->dev_private; + struct nouveau_fpriv *fpriv = nouveau_fpriv(file_priv); struct nouveau_channel *chan; - unsigned long flags; - - if (unlikely(id < 0 || id >= NOUVEAU_MAX_CHANNEL_NR)) - return ERR_PTR(-EINVAL); - - spin_lock_irqsave(&dev_priv->channels.lock, flags); - chan = nouveau_channel_get_unlocked(dev_priv->channels.ptr[id]); - spin_unlock_irqrestore(&dev_priv->channels.lock, flags); - - if (unlikely(!chan)) - return ERR_PTR(-EINVAL); - if (unlikely(file_priv && chan->file_priv != file_priv)) { - nouveau_channel_put_unlocked(&chan); - return ERR_PTR(-EINVAL); + spin_lock(&fpriv->lock); + list_for_each_entry(chan, &fpriv->channels, list) { + if (chan->id == id) { + chan = nouveau_channel_get_unlocked(chan); + spin_unlock(&fpriv->lock); + mutex_lock(&chan->mutex); + return chan; + } } + spin_unlock(&fpriv->lock); - mutex_lock(&chan->mutex); - return chan; + return ERR_PTR(-EINVAL); } void @@ -312,12 +301,14 @@ nouveau_channel_put_unlocked(struct nouveau_channel **pchan) /* destroy any resources the channel owned */ nouveau_gpuobj_ref(NULL, &chan->pushbuf); if (chan->pushbuf_bo) { + nouveau_bo_vma_del(chan->pushbuf_bo, &chan->pushbuf_vma); nouveau_bo_unmap(chan->pushbuf_bo); nouveau_bo_unpin(chan->pushbuf_bo); nouveau_bo_ref(NULL, &chan->pushbuf_bo); } - nouveau_gpuobj_channel_takedown(chan); + nouveau_ramht_ref(NULL, &chan->ramht, chan); nouveau_notifier_takedown_channel(chan); + nouveau_gpuobj_channel_takedown(chan); nouveau_channel_ref(NULL, pchan); } @@ -383,10 +374,11 @@ nouveau_channel_cleanup(struct drm_device *dev, struct drm_file *file_priv) NV_DEBUG(dev, "clearing FIFO enables from file_priv\n"); for (i = 0; i < engine->fifo.channels; i++) { - chan = nouveau_channel_get(dev, file_priv, i); + chan = nouveau_channel_get(file_priv, i); if (IS_ERR(chan)) continue; + list_del(&chan->list); atomic_dec(&chan->users); nouveau_channel_put(&chan); } @@ -459,10 +451,11 @@ nouveau_ioctl_fifo_free(struct drm_device *dev, void *data, struct drm_nouveau_channel_free *req = data; struct nouveau_channel *chan; - chan = nouveau_channel_get(dev, file_priv, req->channel); + chan = nouveau_channel_get(file_priv, req->channel); if (IS_ERR(chan)) return PTR_ERR(chan); + list_del(&chan->list); atomic_dec(&chan->users); nouveau_channel_put(&chan); return 0; diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c index 1595d0b..939d4df 100644 --- a/drivers/gpu/drm/nouveau/nouveau_connector.c +++ b/drivers/gpu/drm/nouveau/nouveau_connector.c @@ -40,7 +40,7 @@ static void nouveau_connector_hotplug(void *, int); static struct nouveau_encoder * -find_encoder_by_type(struct drm_connector *connector, int type) +find_encoder(struct drm_connector *connector, int type) { struct drm_device *dev = connector->dev; struct nouveau_encoder *nv_encoder; @@ -170,8 +170,8 @@ nouveau_connector_of_detect(struct drm_connector *connector) struct device_node *cn, *dn = pci_device_to_OF_node(dev->pdev); if (!dn || - !((nv_encoder = find_encoder_by_type(connector, OUTPUT_TMDS)) || - (nv_encoder = find_encoder_by_type(connector, OUTPUT_ANALOG)))) + !((nv_encoder = find_encoder(connector, OUTPUT_TMDS)) || + (nv_encoder = find_encoder(connector, OUTPUT_ANALOG)))) return NULL; for_each_child_of_node(dn, cn) { @@ -233,6 +233,7 @@ nouveau_connector_detect(struct drm_connector *connector, bool force) struct drm_device *dev = connector->dev; struct nouveau_connector *nv_connector = nouveau_connector(connector); struct nouveau_encoder *nv_encoder = NULL; + struct nouveau_encoder *nv_partner; struct nouveau_i2c_chan *i2c; int type; @@ -266,19 +267,22 @@ nouveau_connector_detect(struct drm_connector *connector, bool force) * same i2c channel so the value returned from ddc_detect * isn't necessarily correct. */ - if (nv_connector->dcb->type == DCB_CONNECTOR_DVI_I) { + nv_partner = NULL; + if (nv_encoder->dcb->type == OUTPUT_TMDS) + nv_partner = find_encoder(connector, OUTPUT_ANALOG); + if (nv_encoder->dcb->type == OUTPUT_ANALOG) + nv_partner = find_encoder(connector, OUTPUT_TMDS); + + if (nv_partner && ((nv_encoder->dcb->type == OUTPUT_ANALOG && + nv_partner->dcb->type == OUTPUT_TMDS) || + (nv_encoder->dcb->type == OUTPUT_TMDS && + nv_partner->dcb->type == OUTPUT_ANALOG))) { if (nv_connector->edid->input & DRM_EDID_INPUT_DIGITAL) type = OUTPUT_TMDS; else type = OUTPUT_ANALOG; - nv_encoder = find_encoder_by_type(connector, type); - if (!nv_encoder) { - NV_ERROR(dev, "Detected %d encoder on %s, " - "but no object!\n", type, - drm_get_connector_name(connector)); - return connector_status_disconnected; - } + nv_encoder = find_encoder(connector, type); } nouveau_connector_set_encoder(connector, nv_encoder); @@ -292,9 +296,9 @@ nouveau_connector_detect(struct drm_connector *connector, bool force) } detect_analog: - nv_encoder = find_encoder_by_type(connector, OUTPUT_ANALOG); + nv_encoder = find_encoder(connector, OUTPUT_ANALOG); if (!nv_encoder && !nouveau_tv_disable) - nv_encoder = find_encoder_by_type(connector, OUTPUT_TV); + nv_encoder = find_encoder(connector, OUTPUT_TV); if (nv_encoder && force) { struct drm_encoder *encoder = to_drm_encoder(nv_encoder); struct drm_encoder_helper_funcs *helper = @@ -327,7 +331,7 @@ nouveau_connector_detect_lvds(struct drm_connector *connector, bool force) nv_connector->edid = NULL; } - nv_encoder = find_encoder_by_type(connector, OUTPUT_LVDS); + nv_encoder = find_encoder(connector, OUTPUT_LVDS); if (!nv_encoder) return connector_status_disconnected; @@ -405,7 +409,7 @@ nouveau_connector_force(struct drm_connector *connector) } else type = OUTPUT_ANY; - nv_encoder = find_encoder_by_type(connector, type); + nv_encoder = find_encoder(connector, type); if (!nv_encoder) { NV_ERROR(connector->dev, "can't find encoder to force %s on!\n", drm_get_connector_name(connector)); diff --git a/drivers/gpu/drm/nouveau/nouveau_dma.c b/drivers/gpu/drm/nouveau/nouveau_dma.c index 568caed..00bc6ea 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dma.c +++ b/drivers/gpu/drm/nouveau/nouveau_dma.c @@ -167,8 +167,13 @@ nv50_dma_push(struct nouveau_channel *chan, struct nouveau_bo *bo, int delta, int length) { struct nouveau_bo *pb = chan->pushbuf_bo; - uint64_t offset = bo->bo.offset + delta; + struct nouveau_vma *vma; int ip = (chan->dma.ib_put * 2) + chan->dma.ib_base; + u64 offset; + + vma = nouveau_bo_vma_find(bo, chan->vm); + BUG_ON(!vma); + offset = vma->offset + delta; BUG_ON(chan->dma.ib_free < 1); nouveau_bo_wr32(pb, ip++, lower_32_bits(offset)); diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.c b/drivers/gpu/drm/nouveau/nouveau_drv.c index 02c6f37..b30ddd8 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drv.c +++ b/drivers/gpu/drm/nouveau/nouveau_drv.c @@ -73,7 +73,7 @@ int nouveau_ignorelid = 0; module_param_named(ignorelid, nouveau_ignorelid, int, 0400); MODULE_PARM_DESC(noaccel, "Disable all acceleration"); -int nouveau_noaccel = 0; +int nouveau_noaccel = -1; module_param_named(noaccel, nouveau_noaccel, int, 0400); MODULE_PARM_DESC(nofbaccel, "Disable fbcon acceleration"); @@ -119,6 +119,10 @@ MODULE_PARM_DESC(msi, "Enable MSI (default: off)\n"); int nouveau_msi; module_param_named(msi, nouveau_msi, int, 0400); +MODULE_PARM_DESC(ctxfw, "Use external HUB/GPC ucode (fermi)\n"); +int nouveau_ctxfw; +module_param_named(ctxfw, nouveau_ctxfw, int, 0400); + int nouveau_fbpercrtc; #if 0 module_param_named(fbpercrtc, nouveau_fbpercrtc, int, 0400); @@ -210,10 +214,13 @@ nouveau_pci_suspend(struct pci_dev *pdev, pm_message_t pm_state) pfifo->unload_context(dev); for (e = NVOBJ_ENGINE_NR - 1; e >= 0; e--) { - if (dev_priv->eng[e]) { - ret = dev_priv->eng[e]->fini(dev, e); - if (ret) - goto out_abort; + if (!dev_priv->eng[e]) + continue; + + ret = dev_priv->eng[e]->fini(dev, e, true); + if (ret) { + NV_ERROR(dev, "... engine %d failed: %d\n", i, ret); + goto out_abort; } } @@ -354,7 +361,7 @@ nouveau_pci_resume(struct pci_dev *pdev) list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc); - u32 offset = nv_crtc->cursor.nvbo->bo.mem.start << PAGE_SHIFT; + u32 offset = nv_crtc->cursor.nvbo->bo.offset; nv_crtc->cursor.set_offset(nv_crtc, offset); nv_crtc->cursor.set_pos(nv_crtc, nv_crtc->cursor_saved_x, @@ -389,7 +396,9 @@ static struct drm_driver driver = { .firstopen = nouveau_firstopen, .lastclose = nouveau_lastclose, .unload = nouveau_unload, + .open = nouveau_open, .preclose = nouveau_preclose, + .postclose = nouveau_postclose, #if defined(CONFIG_DRM_NOUVEAU_DEBUG) .debugfs_init = nouveau_debugfs_init, .debugfs_cleanup = nouveau_debugfs_takedown, @@ -420,6 +429,8 @@ static struct drm_driver driver = { .gem_init_object = nouveau_gem_object_new, .gem_free_object = nouveau_gem_object_del, + .gem_open_object = nouveau_gem_object_open, + .gem_close_object = nouveau_gem_object_close, .name = DRIVER_NAME, .desc = DRIVER_DESC, diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h index 9c56331..d7d51de 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drv.h +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h @@ -46,9 +46,17 @@ #include "ttm/ttm_module.h" struct nouveau_fpriv { - struct ttm_object_file *tfile; + spinlock_t lock; + struct list_head channels; + struct nouveau_vm *vm; }; +static inline struct nouveau_fpriv * +nouveau_fpriv(struct drm_file *file_priv) +{ + return file_priv ? file_priv->driver_priv : NULL; +} + #define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT) #include "nouveau_drm.h" @@ -69,7 +77,7 @@ struct nouveau_mem { struct drm_device *dev; struct nouveau_vma bar_vma; - struct nouveau_vma tmp_vma; + struct nouveau_vma vma[2]; u8 page_shift; struct drm_mm_node *tag; @@ -107,7 +115,8 @@ struct nouveau_bo { struct nouveau_channel *channel; - struct nouveau_vma vma; + struct list_head vma_list; + unsigned page_shift; uint32_t tile_mode; uint32_t tile_flags; @@ -176,9 +185,10 @@ struct nouveau_gpuobj { uint32_t flags; u32 size; - u32 pinst; - u32 cinst; - u64 vinst; + u32 pinst; /* PRAMIN BAR offset */ + u32 cinst; /* Channel offset */ + u64 vinst; /* VRAM address */ + u64 linst; /* VM address */ uint32_t engine; uint32_t class; @@ -201,6 +211,7 @@ enum nouveau_channel_mutex_class { struct nouveau_channel { struct drm_device *dev; + struct list_head list; int id; /* references to the channel data structure */ @@ -228,15 +239,18 @@ struct nouveau_channel { uint32_t sequence; uint32_t sequence_ack; atomic_t last_sequence_irq; + struct nouveau_vma vma; } fence; /* DMA push buffer */ struct nouveau_gpuobj *pushbuf; struct nouveau_bo *pushbuf_bo; + struct nouveau_vma pushbuf_vma; uint32_t pushbuf_base; /* Notifier memory */ struct nouveau_bo *notifier_bo; + struct nouveau_vma notifier_vma; struct drm_mm notifier_heap; /* PFIFO context */ @@ -278,6 +292,7 @@ struct nouveau_channel { uint32_t sw_subchannel[8]; + struct nouveau_vma dispc_vma[2]; struct { struct nouveau_gpuobj *vblsem; uint32_t vblsem_head; @@ -297,7 +312,7 @@ struct nouveau_channel { struct nouveau_exec_engine { void (*destroy)(struct drm_device *, int engine); int (*init)(struct drm_device *, int engine); - int (*fini)(struct drm_device *, int engine); + int (*fini)(struct drm_device *, int engine, bool suspend); int (*context_new)(struct nouveau_channel *, int engine); void (*context_del)(struct nouveau_channel *, int engine); int (*object_new)(struct nouveau_channel *, int engine, @@ -314,7 +329,8 @@ struct nouveau_instmem_engine { int (*suspend)(struct drm_device *dev); void (*resume)(struct drm_device *dev); - int (*get)(struct nouveau_gpuobj *, u32 size, u32 align); + int (*get)(struct nouveau_gpuobj *, struct nouveau_channel *, + u32 size, u32 align); void (*put)(struct nouveau_gpuobj *); int (*map)(struct nouveau_gpuobj *); void (*unmap)(struct nouveau_gpuobj *); @@ -445,9 +461,9 @@ struct nouveau_pm_level { struct nouveau_pm_temp_sensor_constants { u16 offset_constant; s16 offset_mult; - u16 offset_div; - u16 slope_mult; - u16 slope_div; + s16 offset_div; + s16 slope_mult; + s16 slope_div; }; struct nouveau_pm_threshold_temp { @@ -488,7 +504,10 @@ struct nouveau_pm_engine { }; struct nouveau_vram_engine { + struct nouveau_mm *mm; + int (*init)(struct drm_device *); + void (*takedown)(struct drm_device *dev); int (*get)(struct drm_device *, u64, u32 align, u32 size_nc, u32 type, struct nouveau_mem **); void (*put)(struct drm_device *, struct nouveau_mem **); @@ -608,6 +627,7 @@ enum nouveau_card_type { struct drm_nouveau_private { struct drm_device *dev; + bool noaccel; /* the card type, takes NV_* as values */ enum nouveau_card_type card_type; @@ -700,7 +720,6 @@ struct drm_nouveau_private { /* VRAM/fb configuration */ uint64_t vram_size; uint64_t vram_sys_base; - u32 vram_rblock_size; uint64_t fb_phys; uint64_t fb_available_size; @@ -784,12 +803,15 @@ extern int nouveau_override_conntype; extern char *nouveau_perflvl; extern int nouveau_perflvl_wr; extern int nouveau_msi; +extern int nouveau_ctxfw; extern int nouveau_pci_suspend(struct pci_dev *pdev, pm_message_t pm_state); extern int nouveau_pci_resume(struct pci_dev *pdev); /* nouveau_state.c */ +extern int nouveau_open(struct drm_device *, struct drm_file *); extern void nouveau_preclose(struct drm_device *dev, struct drm_file *); +extern void nouveau_postclose(struct drm_device *, struct drm_file *); extern int nouveau_load(struct drm_device *, unsigned long flags); extern int nouveau_firstopen(struct drm_device *); extern void nouveau_lastclose(struct drm_device *); @@ -847,7 +869,7 @@ extern int nouveau_channel_alloc(struct drm_device *dev, extern struct nouveau_channel * nouveau_channel_get_unlocked(struct nouveau_channel *); extern struct nouveau_channel * -nouveau_channel_get(struct drm_device *, struct drm_file *, int id); +nouveau_channel_get(struct drm_file *, int id); extern void nouveau_channel_put_unlocked(struct nouveau_channel **); extern void nouveau_channel_put(struct nouveau_channel **); extern void nouveau_channel_ref(struct nouveau_channel *chan, @@ -1120,7 +1142,6 @@ extern int nvc0_fifo_unload_context(struct drm_device *); /* nv04_graph.c */ extern int nv04_graph_create(struct drm_device *); -extern void nv04_graph_fifo_access(struct drm_device *, bool); extern int nv04_graph_object_new(struct nouveau_channel *, int, u32, u16); extern int nv04_graph_mthd_page_flip(struct nouveau_channel *chan, u32 class, u32 mthd, u32 data); @@ -1169,7 +1190,8 @@ extern int nv04_instmem_init(struct drm_device *); extern void nv04_instmem_takedown(struct drm_device *); extern int nv04_instmem_suspend(struct drm_device *); extern void nv04_instmem_resume(struct drm_device *); -extern int nv04_instmem_get(struct nouveau_gpuobj *, u32 size, u32 align); +extern int nv04_instmem_get(struct nouveau_gpuobj *, struct nouveau_channel *, + u32 size, u32 align); extern void nv04_instmem_put(struct nouveau_gpuobj *); extern int nv04_instmem_map(struct nouveau_gpuobj *); extern void nv04_instmem_unmap(struct nouveau_gpuobj *); @@ -1180,7 +1202,8 @@ extern int nv50_instmem_init(struct drm_device *); extern void nv50_instmem_takedown(struct drm_device *); extern int nv50_instmem_suspend(struct drm_device *); extern void nv50_instmem_resume(struct drm_device *); -extern int nv50_instmem_get(struct nouveau_gpuobj *, u32 size, u32 align); +extern int nv50_instmem_get(struct nouveau_gpuobj *, struct nouveau_channel *, + u32 size, u32 align); extern void nv50_instmem_put(struct nouveau_gpuobj *); extern int nv50_instmem_map(struct nouveau_gpuobj *); extern void nv50_instmem_unmap(struct nouveau_gpuobj *); @@ -1247,10 +1270,9 @@ extern int nv04_crtc_create(struct drm_device *, int index); /* nouveau_bo.c */ extern struct ttm_bo_driver nouveau_bo_driver; -extern int nouveau_bo_new(struct drm_device *, struct nouveau_channel *, - int size, int align, uint32_t flags, - uint32_t tile_mode, uint32_t tile_flags, - struct nouveau_bo **); +extern int nouveau_bo_new(struct drm_device *, int size, int align, + uint32_t flags, uint32_t tile_mode, + uint32_t tile_flags, struct nouveau_bo **); extern int nouveau_bo_pin(struct nouveau_bo *, uint32_t flags); extern int nouveau_bo_unpin(struct nouveau_bo *); extern int nouveau_bo_map(struct nouveau_bo *); @@ -1265,6 +1287,12 @@ extern void nouveau_bo_fence(struct nouveau_bo *, struct nouveau_fence *); extern int nouveau_bo_validate(struct nouveau_bo *, bool interruptible, bool no_wait_reserve, bool no_wait_gpu); +extern struct nouveau_vma * +nouveau_bo_vma_find(struct nouveau_bo *, struct nouveau_vm *); +extern int nouveau_bo_vma_add(struct nouveau_bo *, struct nouveau_vm *, + struct nouveau_vma *); +extern void nouveau_bo_vma_del(struct nouveau_bo *, struct nouveau_vma *); + /* nouveau_fence.c */ struct nouveau_fence; extern int nouveau_fence_init(struct drm_device *); @@ -1310,12 +1338,14 @@ static inline struct nouveau_fence *nouveau_fence_ref(struct nouveau_fence *obj) } /* nouveau_gem.c */ -extern int nouveau_gem_new(struct drm_device *, struct nouveau_channel *, - int size, int align, uint32_t domain, - uint32_t tile_mode, uint32_t tile_flags, - struct nouveau_bo **); +extern int nouveau_gem_new(struct drm_device *, int size, int align, + uint32_t domain, uint32_t tile_mode, + uint32_t tile_flags, struct nouveau_bo **); extern int nouveau_gem_object_new(struct drm_gem_object *); extern void nouveau_gem_object_del(struct drm_gem_object *); +extern int nouveau_gem_object_open(struct drm_gem_object *, struct drm_file *); +extern void nouveau_gem_object_close(struct drm_gem_object *, + struct drm_file *); extern int nouveau_gem_ioctl_new(struct drm_device *, void *, struct drm_file *); extern int nouveau_gem_ioctl_pushbuf(struct drm_device *, void *, diff --git a/drivers/gpu/drm/nouveau/nouveau_fb.h b/drivers/gpu/drm/nouveau/nouveau_fb.h index a3a88ad..95c843e 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fb.h +++ b/drivers/gpu/drm/nouveau/nouveau_fb.h @@ -30,6 +30,7 @@ struct nouveau_framebuffer { struct drm_framebuffer base; struct nouveau_bo *nvbo; + struct nouveau_vma vma; u32 r_dma; u32 r_format; u32 r_pitch; diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c index 39aee6d..14a8627 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c +++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c @@ -279,6 +279,7 @@ nouveau_fbcon_create(struct nouveau_fbdev *nfbdev, struct fb_info *info; struct drm_framebuffer *fb; struct nouveau_framebuffer *nouveau_fb; + struct nouveau_channel *chan; struct nouveau_bo *nvbo; struct drm_mode_fb_cmd mode_cmd; struct pci_dev *pdev = dev->pdev; @@ -296,8 +297,8 @@ nouveau_fbcon_create(struct nouveau_fbdev *nfbdev, size = mode_cmd.pitch * mode_cmd.height; size = roundup(size, PAGE_SIZE); - ret = nouveau_gem_new(dev, dev_priv->channel, size, 0, - NOUVEAU_GEM_DOMAIN_VRAM, 0, 0x0000, &nvbo); + ret = nouveau_gem_new(dev, size, 0, NOUVEAU_GEM_DOMAIN_VRAM, + 0, 0x0000, &nvbo); if (ret) { NV_ERROR(dev, "failed to allocate framebuffer\n"); goto out; @@ -318,6 +319,15 @@ nouveau_fbcon_create(struct nouveau_fbdev *nfbdev, goto out; } + chan = nouveau_nofbaccel ? NULL : dev_priv->channel; + if (chan && dev_priv->card_type >= NV_50) { + ret = nouveau_bo_vma_add(nvbo, chan->vm, &nfbdev->nouveau_fb.vma); + if (ret) { + NV_ERROR(dev, "failed to map fb into chan: %d\n", ret); + chan = NULL; + } + } + mutex_lock(&dev->struct_mutex); info = framebuffer_alloc(0, device); @@ -448,6 +458,7 @@ nouveau_fbcon_destroy(struct drm_device *dev, struct nouveau_fbdev *nfbdev) if (nouveau_fb->nvbo) { nouveau_bo_unmap(nouveau_fb->nvbo); + nouveau_bo_vma_del(nouveau_fb->nvbo, &nouveau_fb->vma); drm_gem_object_unreference_unlocked(nouveau_fb->nvbo->gem); nouveau_fb->nvbo = NULL; } diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index 7347075..8d02d87 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -336,6 +336,7 @@ semaphore_acquire(struct nouveau_channel *chan, struct nouveau_semaphore *sema) { struct drm_nouveau_private *dev_priv = chan->dev->dev_private; struct nouveau_fence *fence = NULL; + u64 offset = chan->fence.vma.offset + sema->mem->start; int ret; if (dev_priv->chipset < 0x84) { @@ -345,13 +346,10 @@ semaphore_acquire(struct nouveau_channel *chan, struct nouveau_semaphore *sema) BEGIN_RING(chan, NvSubSw, NV_SW_DMA_SEMAPHORE, 3); OUT_RING (chan, NvSema); - OUT_RING (chan, sema->mem->start); + OUT_RING (chan, offset); OUT_RING (chan, 1); } else if (dev_priv->chipset < 0xc0) { - struct nouveau_vma *vma = &dev_priv->fence.bo->vma; - u64 offset = vma->offset + sema->mem->start; - ret = RING_SPACE(chan, 7); if (ret) return ret; @@ -364,9 +362,6 @@ semaphore_acquire(struct nouveau_channel *chan, struct nouveau_semaphore *sema) OUT_RING (chan, 1); OUT_RING (chan, 1); /* ACQUIRE_EQ */ } else { - struct nouveau_vma *vma = &dev_priv->fence.bo->vma; - u64 offset = vma->offset + sema->mem->start; - ret = RING_SPACE(chan, 5); if (ret) return ret; @@ -394,6 +389,7 @@ semaphore_release(struct nouveau_channel *chan, struct nouveau_semaphore *sema) { struct drm_nouveau_private *dev_priv = chan->dev->dev_private; struct nouveau_fence *fence = NULL; + u64 offset = chan->fence.vma.offset + sema->mem->start; int ret; if (dev_priv->chipset < 0x84) { @@ -403,14 +399,11 @@ semaphore_release(struct nouveau_channel *chan, struct nouveau_semaphore *sema) BEGIN_RING(chan, NvSubSw, NV_SW_DMA_SEMAPHORE, 2); OUT_RING (chan, NvSema); - OUT_RING (chan, sema->mem->start); + OUT_RING (chan, offset); BEGIN_RING(chan, NvSubSw, NV_SW_SEMAPHORE_RELEASE, 1); OUT_RING (chan, 1); } else if (dev_priv->chipset < 0xc0) { - struct nouveau_vma *vma = &dev_priv->fence.bo->vma; - u64 offset = vma->offset + sema->mem->start; - ret = RING_SPACE(chan, 7); if (ret) return ret; @@ -423,9 +416,6 @@ semaphore_release(struct nouveau_channel *chan, struct nouveau_semaphore *sema) OUT_RING (chan, 1); OUT_RING (chan, 2); /* RELEASE */ } else { - struct nouveau_vma *vma = &dev_priv->fence.bo->vma; - u64 offset = vma->offset + sema->mem->start; - ret = RING_SPACE(chan, 5); if (ret) return ret; @@ -540,6 +530,12 @@ nouveau_fence_channel_init(struct nouveau_channel *chan) nouveau_gpuobj_ref(NULL, &obj); if (ret) return ret; + } else { + /* map fence bo into channel's vm */ + ret = nouveau_bo_vma_add(dev_priv->fence.bo, chan->vm, + &chan->fence.vma); + if (ret) + return ret; } INIT_LIST_HEAD(&chan->fence.pending); @@ -551,10 +547,10 @@ nouveau_fence_channel_init(struct nouveau_channel *chan) void nouveau_fence_channel_fini(struct nouveau_channel *chan) { + struct drm_nouveau_private *dev_priv = chan->dev->dev_private; struct nouveau_fence *tmp, *fence; spin_lock(&chan->fence.lock); - list_for_each_entry_safe(fence, tmp, &chan->fence.pending, entry) { fence->signalled = true; list_del(&fence->entry); @@ -564,8 +560,9 @@ nouveau_fence_channel_fini(struct nouveau_channel *chan) kref_put(&fence->refcount, nouveau_fence_del); } - spin_unlock(&chan->fence.lock); + + nouveau_bo_vma_del(dev_priv->fence.bo, &chan->fence.vma); } int @@ -577,7 +574,7 @@ nouveau_fence_init(struct drm_device *dev) /* Create a shared VRAM heap for cross-channel sync. */ if (USE_SEMA(dev)) { - ret = nouveau_bo_new(dev, NULL, size, 0, TTM_PL_FLAG_VRAM, + ret = nouveau_bo_new(dev, size, 0, TTM_PL_FLAG_VRAM, 0, 0, &dev_priv->fence.bo); if (ret) return ret; diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index b52e460..5f0bc57 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -60,9 +60,71 @@ nouveau_gem_object_del(struct drm_gem_object *gem) } int -nouveau_gem_new(struct drm_device *dev, struct nouveau_channel *chan, - int size, int align, uint32_t domain, uint32_t tile_mode, - uint32_t tile_flags, struct nouveau_bo **pnvbo) +nouveau_gem_object_open(struct drm_gem_object *gem, struct drm_file *file_priv) +{ + struct nouveau_fpriv *fpriv = nouveau_fpriv(file_priv); + struct nouveau_bo *nvbo = nouveau_gem_object(gem); + struct nouveau_vma *vma; + int ret; + + if (!fpriv->vm) + return 0; + + ret = ttm_bo_reserve(&nvbo->bo, false, false, false, 0); + if (ret) + return ret; + + vma = nouveau_bo_vma_find(nvbo, fpriv->vm); + if (!vma) { + vma = kzalloc(sizeof(*vma), GFP_KERNEL); + if (!vma) { + ret = -ENOMEM; + goto out; + } + + ret = nouveau_bo_vma_add(nvbo, fpriv->vm, vma); + if (ret) { + kfree(vma); + goto out; + } + } else { + vma->refcount++; + } + +out: + ttm_bo_unreserve(&nvbo->bo); + return ret; +} + +void +nouveau_gem_object_close(struct drm_gem_object *gem, struct drm_file *file_priv) +{ + struct nouveau_fpriv *fpriv = nouveau_fpriv(file_priv); + struct nouveau_bo *nvbo = nouveau_gem_object(gem); + struct nouveau_vma *vma; + int ret; + + if (!fpriv->vm) + return; + + ret = ttm_bo_reserve(&nvbo->bo, false, false, false, 0); + if (ret) + return; + + vma = nouveau_bo_vma_find(nvbo, fpriv->vm); + if (vma) { + if (--vma->refcount == 0) { + nouveau_bo_vma_del(nvbo, vma); + kfree(vma); + } + } + ttm_bo_unreserve(&nvbo->bo); +} + +int +nouveau_gem_new(struct drm_device *dev, int size, int align, uint32_t domain, + uint32_t tile_mode, uint32_t tile_flags, + struct nouveau_bo **pnvbo) { struct drm_nouveau_private *dev_priv = dev->dev_private; struct nouveau_bo *nvbo; @@ -76,7 +138,7 @@ nouveau_gem_new(struct drm_device *dev, struct nouveau_channel *chan, if (!flags || domain & NOUVEAU_GEM_DOMAIN_CPU) flags |= TTM_PL_FLAG_SYSTEM; - ret = nouveau_bo_new(dev, chan, size, align, flags, tile_mode, + ret = nouveau_bo_new(dev, size, align, flags, tile_mode, tile_flags, pnvbo); if (ret) return ret; @@ -103,17 +165,28 @@ nouveau_gem_new(struct drm_device *dev, struct nouveau_channel *chan, } static int -nouveau_gem_info(struct drm_gem_object *gem, struct drm_nouveau_gem_info *rep) +nouveau_gem_info(struct drm_file *file_priv, struct drm_gem_object *gem, + struct drm_nouveau_gem_info *rep) { + struct nouveau_fpriv *fpriv = nouveau_fpriv(file_priv); struct nouveau_bo *nvbo = nouveau_gem_object(gem); + struct nouveau_vma *vma; if (nvbo->bo.mem.mem_type == TTM_PL_TT) rep->domain = NOUVEAU_GEM_DOMAIN_GART; else rep->domain = NOUVEAU_GEM_DOMAIN_VRAM; - rep->size = nvbo->bo.mem.num_pages << PAGE_SHIFT; rep->offset = nvbo->bo.offset; + if (fpriv->vm) { + vma = nouveau_bo_vma_find(nvbo, fpriv->vm); + if (!vma) + return -EINVAL; + + rep->offset = vma->offset; + } + + rep->size = nvbo->bo.mem.num_pages << PAGE_SHIFT; rep->map_handle = nvbo->bo.addr_space_offset; rep->tile_mode = nvbo->tile_mode; rep->tile_flags = nvbo->tile_flags; @@ -127,7 +200,6 @@ nouveau_gem_ioctl_new(struct drm_device *dev, void *data, struct drm_nouveau_private *dev_priv = dev->dev_private; struct drm_nouveau_gem_new *req = data; struct nouveau_bo *nvbo = NULL; - struct nouveau_channel *chan = NULL; int ret = 0; if (unlikely(dev_priv->ttm.bdev.dev_mapping == NULL)) @@ -138,28 +210,21 @@ nouveau_gem_ioctl_new(struct drm_device *dev, void *data, return -EINVAL; } - if (req->channel_hint) { - chan = nouveau_channel_get(dev, file_priv, req->channel_hint); - if (IS_ERR(chan)) - return PTR_ERR(chan); - } - - ret = nouveau_gem_new(dev, chan, req->info.size, req->align, + ret = nouveau_gem_new(dev, req->info.size, req->align, req->info.domain, req->info.tile_mode, req->info.tile_flags, &nvbo); - if (chan) - nouveau_channel_put(&chan); if (ret) return ret; - ret = nouveau_gem_info(nvbo->gem, &req->info); - if (ret) - goto out; - ret = drm_gem_handle_create(file_priv, nvbo->gem, &req->info.handle); + if (ret == 0) { + ret = nouveau_gem_info(file_priv, nvbo->gem, &req->info); + if (ret) + drm_gem_handle_delete(file_priv, req->info.handle); + } + /* drop reference from allocate - handle holds it now */ drm_gem_object_unreference_unlocked(nvbo->gem); -out: return ret; } @@ -318,6 +383,7 @@ static int validate_list(struct nouveau_channel *chan, struct list_head *list, struct drm_nouveau_gem_pushbuf_bo *pbbo, uint64_t user_pbbo_ptr) { + struct drm_nouveau_private *dev_priv = chan->dev->dev_private; struct drm_nouveau_gem_pushbuf_bo __user *upbbo = (void __force __user *)(uintptr_t)user_pbbo_ptr; struct drm_device *dev = chan->dev; @@ -356,24 +422,26 @@ validate_list(struct nouveau_channel *chan, struct list_head *list, return ret; } - if (nvbo->bo.offset == b->presumed.offset && - ((nvbo->bo.mem.mem_type == TTM_PL_VRAM && - b->presumed.domain & NOUVEAU_GEM_DOMAIN_VRAM) || - (nvbo->bo.mem.mem_type == TTM_PL_TT && - b->presumed.domain & NOUVEAU_GEM_DOMAIN_GART))) - continue; + if (dev_priv->card_type < NV_50) { + if (nvbo->bo.offset == b->presumed.offset && + ((nvbo->bo.mem.mem_type == TTM_PL_VRAM && + b->presumed.domain & NOUVEAU_GEM_DOMAIN_VRAM) || + (nvbo->bo.mem.mem_type == TTM_PL_TT && + b->presumed.domain & NOUVEAU_GEM_DOMAIN_GART))) + continue; - if (nvbo->bo.mem.mem_type == TTM_PL_TT) - b->presumed.domain = NOUVEAU_GEM_DOMAIN_GART; - else - b->presumed.domain = NOUVEAU_GEM_DOMAIN_VRAM; - b->presumed.offset = nvbo->bo.offset; - b->presumed.valid = 0; - relocs++; - - if (DRM_COPY_TO_USER(&upbbo[nvbo->pbbo_index].presumed, - &b->presumed, sizeof(b->presumed))) - return -EFAULT; + if (nvbo->bo.mem.mem_type == TTM_PL_TT) + b->presumed.domain = NOUVEAU_GEM_DOMAIN_GART; + else + b->presumed.domain = NOUVEAU_GEM_DOMAIN_VRAM; + b->presumed.offset = nvbo->bo.offset; + b->presumed.valid = 0; + relocs++; + + if (DRM_COPY_TO_USER(&upbbo[nvbo->pbbo_index].presumed, + &b->presumed, sizeof(b->presumed))) + return -EFAULT; + } } return relocs; @@ -548,7 +616,7 @@ nouveau_gem_ioctl_pushbuf(struct drm_device *dev, void *data, struct nouveau_fence *fence = NULL; int i, j, ret = 0, do_reloc = 0; - chan = nouveau_channel_get(dev, file_priv, req->channel); + chan = nouveau_channel_get(file_priv, req->channel); if (IS_ERR(chan)) return PTR_ERR(chan); @@ -782,7 +850,7 @@ nouveau_gem_ioctl_info(struct drm_device *dev, void *data, if (!gem) return -ENOENT; - ret = nouveau_gem_info(gem, req); + ret = nouveau_gem_info(file_priv, gem, req); drm_gem_object_unreference_unlocked(gem); return ret; } diff --git a/drivers/gpu/drm/nouveau/nouveau_irq.c b/drivers/gpu/drm/nouveau/nouveau_irq.c index 2ba7265..868c7fd 100644 --- a/drivers/gpu/drm/nouveau/nouveau_irq.c +++ b/drivers/gpu/drm/nouveau/nouveau_irq.c @@ -79,7 +79,7 @@ nouveau_irq_handler(DRM_IRQ_ARGS) int i; stat = nv_rd32(dev, NV03_PMC_INTR_0); - if (!stat) + if (stat == 0 || stat == ~0) return IRQ_NONE; spin_lock_irqsave(&dev_priv->context_switch_lock, flags); diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.c b/drivers/gpu/drm/nouveau/nouveau_mem.c index 5ee14d2..f9ae2fc 100644 --- a/drivers/gpu/drm/nouveau/nouveau_mem.c +++ b/drivers/gpu/drm/nouveau/nouveau_mem.c @@ -397,7 +397,7 @@ nouveau_mem_vram_init(struct drm_device *dev) if (pci_dma_supported(dev->pdev, DMA_BIT_MASK(40))) dma_bits = 40; } else - if (0 && drm_pci_device_is_pcie(dev) && + if (0 && pci_is_pcie(dev->pdev) && dev_priv->chipset > 0x40 && dev_priv->chipset != 0x45) { if (pci_dma_supported(dev->pdev, DMA_BIT_MASK(39))) @@ -423,38 +423,6 @@ nouveau_mem_vram_init(struct drm_device *dev) return ret; } - /* reserve space at end of VRAM for PRAMIN */ - if (dev_priv->card_type >= NV_50) { - dev_priv->ramin_rsvd_vram = 1 * 1024 * 1024; - } else - if (dev_priv->card_type >= NV_40) { - u32 vs = hweight8((nv_rd32(dev, 0x001540) & 0x0000ff00) >> 8); - u32 rsvd; - - /* estimate grctx size, the magics come from nv40_grctx.c */ - if (dev_priv->chipset == 0x40) rsvd = 0x6aa0 * vs; - else if (dev_priv->chipset < 0x43) rsvd = 0x4f00 * vs; - else if (nv44_graph_class(dev)) rsvd = 0x4980 * vs; - else rsvd = 0x4a40 * vs; - rsvd += 16 * 1024; - rsvd *= dev_priv->engine.fifo.channels; - - /* pciegart table */ - if (drm_pci_device_is_pcie(dev)) - rsvd += 512 * 1024; - - /* object storage */ - rsvd += 512 * 1024; - - dev_priv->ramin_rsvd_vram = round_up(rsvd, 4096); - } else { - dev_priv->ramin_rsvd_vram = 512 * 1024; - } - - ret = dev_priv->engine.vram.init(dev); - if (ret) - return ret; - NV_INFO(dev, "Detected %dMiB VRAM\n", (int)(dev_priv->vram_size >> 20)); if (dev_priv->vram_sys_base) { NV_INFO(dev, "Stolen system memory at: 0x%010llx\n", @@ -479,7 +447,7 @@ nouveau_mem_vram_init(struct drm_device *dev) } if (dev_priv->card_type < NV_50) { - ret = nouveau_bo_new(dev, NULL, 256*1024, 0, TTM_PL_FLAG_VRAM, + ret = nouveau_bo_new(dev, 256*1024, 0, TTM_PL_FLAG_VRAM, 0, 0, &dev_priv->vga_ram); if (ret == 0) ret = nouveau_bo_pin(dev_priv->vga_ram, @@ -729,37 +697,31 @@ nouveau_mem_timing_fini(struct drm_device *dev) } static int -nouveau_vram_manager_init(struct ttm_mem_type_manager *man, unsigned long p_size) +nouveau_vram_manager_init(struct ttm_mem_type_manager *man, unsigned long psize) { - struct drm_nouveau_private *dev_priv = nouveau_bdev(man->bdev); - struct nouveau_mm *mm; - u64 size, block, rsvd; - int ret; - - rsvd = (256 * 1024); /* vga memory */ - size = (p_size << PAGE_SHIFT) - rsvd; - block = dev_priv->vram_rblock_size; - - ret = nouveau_mm_init(&mm, rsvd >> 12, size >> 12, block >> 12); - if (ret) - return ret; - - man->priv = mm; + /* nothing to do */ return 0; } static int nouveau_vram_manager_fini(struct ttm_mem_type_manager *man) { - struct nouveau_mm *mm = man->priv; - int ret; + /* nothing to do */ + return 0; +} - ret = nouveau_mm_fini(&mm); - if (ret) - return ret; +static inline void +nouveau_mem_node_cleanup(struct nouveau_mem *node) +{ + if (node->vma[0].node) { + nouveau_vm_unmap(&node->vma[0]); + nouveau_vm_put(&node->vma[0]); + } - man->priv = NULL; - return 0; + if (node->vma[1].node) { + nouveau_vm_unmap(&node->vma[1]); + nouveau_vm_put(&node->vma[1]); + } } static void @@ -768,14 +730,9 @@ nouveau_vram_manager_del(struct ttm_mem_type_manager *man, { struct drm_nouveau_private *dev_priv = nouveau_bdev(man->bdev); struct nouveau_vram_engine *vram = &dev_priv->engine.vram; - struct nouveau_mem *node = mem->mm_node; struct drm_device *dev = dev_priv->dev; - if (node->tmp_vma.node) { - nouveau_vm_unmap(&node->tmp_vma); - nouveau_vm_put(&node->tmp_vma); - } - + nouveau_mem_node_cleanup(mem->mm_node); vram->put(dev, (struct nouveau_mem **)&mem->mm_node); } @@ -794,7 +751,7 @@ nouveau_vram_manager_new(struct ttm_mem_type_manager *man, int ret; if (nvbo->tile_flags & NOUVEAU_GEM_TILE_NONCONTIG) - size_nc = 1 << nvbo->vma.node->type; + size_nc = 1 << nvbo->page_shift; ret = vram->get(dev, mem->num_pages << PAGE_SHIFT, mem->page_alignment << PAGE_SHIFT, size_nc, @@ -804,9 +761,7 @@ nouveau_vram_manager_new(struct ttm_mem_type_manager *man, return (ret == -ENOSPC) ? 0 : ret; } - node->page_shift = 12; - if (nvbo->vma.node) - node->page_shift = nvbo->vma.node->type; + node->page_shift = nvbo->page_shift; mem->mm_node = node; mem->start = node->offset >> PAGE_SHIFT; @@ -862,15 +817,9 @@ static void nouveau_gart_manager_del(struct ttm_mem_type_manager *man, struct ttm_mem_reg *mem) { - struct nouveau_mem *node = mem->mm_node; - - if (node->tmp_vma.node) { - nouveau_vm_unmap(&node->tmp_vma); - nouveau_vm_put(&node->tmp_vma); - } - + nouveau_mem_node_cleanup(mem->mm_node); + kfree(mem->mm_node); mem->mm_node = NULL; - kfree(node); } static int @@ -880,11 +829,7 @@ nouveau_gart_manager_new(struct ttm_mem_type_manager *man, struct ttm_mem_reg *mem) { struct drm_nouveau_private *dev_priv = nouveau_bdev(bo->bdev); - struct nouveau_bo *nvbo = nouveau_bo(bo); - struct nouveau_vma *vma = &nvbo->vma; - struct nouveau_vm *vm = vma->vm; struct nouveau_mem *node; - int ret; if (unlikely((mem->num_pages << PAGE_SHIFT) >= dev_priv->gart_info.aper_size)) @@ -893,24 +838,8 @@ nouveau_gart_manager_new(struct ttm_mem_type_manager *man, node = kzalloc(sizeof(*node), GFP_KERNEL); if (!node) return -ENOMEM; + node->page_shift = 12; - /* This node must be for evicting large-paged VRAM - * to system memory. Due to a nv50 limitation of - * not being able to mix large/small pages within - * the same PDE, we need to create a temporary - * small-paged VMA for the eviction. - */ - if (vma->node->type != vm->spg_shift) { - ret = nouveau_vm_get(vm, (u64)vma->node->length << 12, - vm->spg_shift, NV_MEM_ACCESS_RW, - &node->tmp_vma); - if (ret) { - kfree(node); - return ret; - } - } - - node->page_shift = nvbo->vma.node->type; mem->mm_node = node; mem->start = 0; return 0; diff --git a/drivers/gpu/drm/nouveau/nouveau_mm.c b/drivers/gpu/drm/nouveau/nouveau_mm.c index 7609756..1640dec 100644 --- a/drivers/gpu/drm/nouveau/nouveau_mm.c +++ b/drivers/gpu/drm/nouveau/nouveau_mm.c @@ -158,11 +158,18 @@ int nouveau_mm_fini(struct nouveau_mm **prmm) { struct nouveau_mm *rmm = *prmm; - struct nouveau_mm_node *heap = + struct nouveau_mm_node *node, *heap = list_first_entry(&rmm->nodes, struct nouveau_mm_node, nl_entry); - if (!list_is_singular(&rmm->nodes)) + if (!list_is_singular(&rmm->nodes)) { + printk(KERN_ERR "nouveau_mm not empty at destroy time!\n"); + list_for_each_entry(node, &rmm->nodes, nl_entry) { + printk(KERN_ERR "0x%02x: 0x%08x 0x%08x\n", + node->type, node->offset, node->length); + } + WARN_ON(1); return -EBUSY; + } kfree(heap); kfree(rmm); diff --git a/drivers/gpu/drm/nouveau/nouveau_mm.h b/drivers/gpu/drm/nouveau/nouveau_mm.h index 1f7483a..b9c016d 100644 --- a/drivers/gpu/drm/nouveau/nouveau_mm.h +++ b/drivers/gpu/drm/nouveau/nouveau_mm.h @@ -52,6 +52,7 @@ int nouveau_mm_get(struct nouveau_mm *, int type, u32 size, u32 size_nc, void nouveau_mm_put(struct nouveau_mm *, struct nouveau_mm_node *); int nv50_vram_init(struct drm_device *); +void nv50_vram_fini(struct drm_device *); int nv50_vram_new(struct drm_device *, u64 size, u32 align, u32 size_nc, u32 memtype, struct nouveau_mem **); void nv50_vram_del(struct drm_device *, struct nouveau_mem **); diff --git a/drivers/gpu/drm/nouveau/nouveau_notifier.c b/drivers/gpu/drm/nouveau/nouveau_notifier.c index 5b39718..6abdbe6 100644 --- a/drivers/gpu/drm/nouveau/nouveau_notifier.c +++ b/drivers/gpu/drm/nouveau/nouveau_notifier.c @@ -34,6 +34,7 @@ int nouveau_notifier_init_channel(struct nouveau_channel *chan) { struct drm_device *dev = chan->dev; + struct drm_nouveau_private *dev_priv = dev->dev_private; struct nouveau_bo *ntfy = NULL; uint32_t flags, ttmpl; int ret; @@ -46,7 +47,7 @@ nouveau_notifier_init_channel(struct nouveau_channel *chan) ttmpl = TTM_PL_FLAG_TT; } - ret = nouveau_gem_new(dev, NULL, PAGE_SIZE, 0, flags, 0, 0, &ntfy); + ret = nouveau_gem_new(dev, PAGE_SIZE, 0, flags, 0, 0, &ntfy); if (ret) return ret; @@ -58,14 +59,22 @@ nouveau_notifier_init_channel(struct nouveau_channel *chan) if (ret) goto out_err; + if (dev_priv->card_type >= NV_50) { + ret = nouveau_bo_vma_add(ntfy, chan->vm, &chan->notifier_vma); + if (ret) + goto out_err; + } + ret = drm_mm_init(&chan->notifier_heap, 0, ntfy->bo.mem.size); if (ret) goto out_err; chan->notifier_bo = ntfy; out_err: - if (ret) + if (ret) { + nouveau_bo_vma_del(ntfy, &chan->notifier_vma); drm_gem_object_unreference_unlocked(ntfy->gem); + } return ret; } @@ -78,6 +87,7 @@ nouveau_notifier_takedown_channel(struct nouveau_channel *chan) if (!chan->notifier_bo) return; + nouveau_bo_vma_del(chan->notifier_bo, &chan->notifier_vma); nouveau_bo_unmap(chan->notifier_bo); mutex_lock(&dev->struct_mutex); nouveau_bo_unpin(chan->notifier_bo); @@ -122,10 +132,10 @@ nouveau_notifier_alloc(struct nouveau_channel *chan, uint32_t handle, target = NV_MEM_TARGET_VRAM; else target = NV_MEM_TARGET_GART; - offset = chan->notifier_bo->bo.mem.start << PAGE_SHIFT; + offset = chan->notifier_bo->bo.offset; } else { target = NV_MEM_TARGET_VM; - offset = chan->notifier_bo->vma.offset; + offset = chan->notifier_vma.offset; } offset += mem->start; @@ -183,7 +193,7 @@ nouveau_ioctl_notifier_alloc(struct drm_device *dev, void *data, if (unlikely(dev_priv->card_type >= NV_C0)) return -EINVAL; - chan = nouveau_channel_get(dev, file_priv, na->channel); + chan = nouveau_channel_get(file_priv, na->channel); if (IS_ERR(chan)) return PTR_ERR(chan); diff --git a/drivers/gpu/drm/nouveau/nouveau_object.c b/drivers/gpu/drm/nouveau/nouveau_object.c index 8f97016..159b7c4 100644 --- a/drivers/gpu/drm/nouveau/nouveau_object.c +++ b/drivers/gpu/drm/nouveau/nouveau_object.c @@ -125,7 +125,7 @@ nouveau_gpuobj_mthd_call2(struct drm_device *dev, int chid, int ret = -EINVAL; spin_lock_irqsave(&dev_priv->channels.lock, flags); - if (chid > 0 && chid < dev_priv->engine.fifo.channels) + if (chid >= 0 && chid < dev_priv->engine.fifo.channels) chan = dev_priv->channels.ptr[chid]; if (chan) ret = nouveau_gpuobj_mthd_call(chan, class, mthd, data); @@ -191,7 +191,7 @@ nouveau_gpuobj_new(struct drm_device *dev, struct nouveau_channel *chan, list_add_tail(&gpuobj->list, &dev_priv->gpuobj_list); spin_unlock(&dev_priv->ramin_lock); - if (chan) { + if (!(flags & NVOBJ_FLAG_VM) && chan) { ramin = drm_mm_search_free(&chan->ramin_heap, size, align, 0); if (ramin) ramin = drm_mm_get_block(ramin, size, align); @@ -208,7 +208,7 @@ nouveau_gpuobj_new(struct drm_device *dev, struct nouveau_channel *chan, gpuobj->vinst = ramin->start + chan->ramin->vinst; gpuobj->node = ramin; } else { - ret = instmem->get(gpuobj, size, align); + ret = instmem->get(gpuobj, chan, size, align); if (ret) { nouveau_gpuobj_ref(NULL, &gpuobj); return ret; @@ -690,35 +690,64 @@ nouveau_gpuobj_channel_init_pramin(struct nouveau_channel *chan) return 0; } +static int +nvc0_gpuobj_channel_init(struct nouveau_channel *chan, struct nouveau_vm *vm) +{ + struct drm_device *dev = chan->dev; + struct nouveau_gpuobj *pgd = NULL; + struct nouveau_vm_pgd *vpgd; + int ret, i; + + ret = nouveau_gpuobj_new(dev, NULL, 4096, 0x1000, 0, &chan->ramin); + if (ret) + return ret; + + /* create page directory for this vm if none currently exists, + * will be destroyed automagically when last reference to the + * vm is removed + */ + if (list_empty(&vm->pgd_list)) { + ret = nouveau_gpuobj_new(dev, NULL, 65536, 0x1000, 0, &pgd); + if (ret) + return ret; + } + nouveau_vm_ref(vm, &chan->vm, pgd); + nouveau_gpuobj_ref(NULL, &pgd); + + /* point channel at vm's page directory */ + vpgd = list_first_entry(&vm->pgd_list, struct nouveau_vm_pgd, head); + nv_wo32(chan->ramin, 0x0200, lower_32_bits(vpgd->obj->vinst)); + nv_wo32(chan->ramin, 0x0204, upper_32_bits(vpgd->obj->vinst)); + nv_wo32(chan->ramin, 0x0208, 0xffffffff); + nv_wo32(chan->ramin, 0x020c, 0x000000ff); + + /* map display semaphore buffers into channel's vm */ + for (i = 0; i < 2; i++) { + struct nv50_display_crtc *dispc = &nv50_display(dev)->crtc[i]; + + ret = nouveau_bo_vma_add(dispc->sem.bo, chan->vm, + &chan->dispc_vma[i]); + if (ret) + return ret; + } + + return 0; +} + int nouveau_gpuobj_channel_init(struct nouveau_channel *chan, uint32_t vram_h, uint32_t tt_h) { struct drm_device *dev = chan->dev; struct drm_nouveau_private *dev_priv = dev->dev_private; + struct nouveau_fpriv *fpriv = nouveau_fpriv(chan->file_priv); + struct nouveau_vm *vm = fpriv ? fpriv->vm : dev_priv->chan_vm; struct nouveau_gpuobj *vram = NULL, *tt = NULL; int ret, i; NV_DEBUG(dev, "ch%d vram=0x%08x tt=0x%08x\n", chan->id, vram_h, tt_h); - - if (dev_priv->card_type == NV_C0) { - struct nouveau_vm *vm = dev_priv->chan_vm; - struct nouveau_vm_pgd *vpgd; - - ret = nouveau_gpuobj_new(dev, NULL, 4096, 0x1000, 0, - &chan->ramin); - if (ret) - return ret; - - nouveau_vm_ref(vm, &chan->vm, NULL); - - vpgd = list_first_entry(&vm->pgd_list, struct nouveau_vm_pgd, head); - nv_wo32(chan->ramin, 0x0200, lower_32_bits(vpgd->obj->vinst)); - nv_wo32(chan->ramin, 0x0204, upper_32_bits(vpgd->obj->vinst)); - nv_wo32(chan->ramin, 0x0208, 0xffffffff); - nv_wo32(chan->ramin, 0x020c, 0x000000ff); - return 0; - } + if (dev_priv->card_type == NV_C0) + return nvc0_gpuobj_channel_init(chan, vm); /* Allocate a chunk of memory for per-channel object storage */ ret = nouveau_gpuobj_channel_init_pramin(chan); @@ -731,7 +760,7 @@ nouveau_gpuobj_channel_init(struct nouveau_channel *chan, * - Allocate per-channel page-directory * - Link with shared channel VM */ - if (dev_priv->chan_vm) { + if (vm) { u32 pgd_offs = (dev_priv->chipset == 0x50) ? 0x1400 : 0x0200; u64 vm_vinst = chan->ramin->vinst + pgd_offs; u32 vm_pinst = chan->ramin->pinst; @@ -744,7 +773,7 @@ nouveau_gpuobj_channel_init(struct nouveau_channel *chan, if (ret) return ret; - nouveau_vm_ref(dev_priv->chan_vm, &chan->vm, chan->vm_pd); + nouveau_vm_ref(vm, &chan->vm, chan->vm_pd); } /* RAMHT */ @@ -768,7 +797,7 @@ nouveau_gpuobj_channel_init(struct nouveau_channel *chan, struct nouveau_gpuobj *sem = NULL; struct nv50_display_crtc *dispc = &nv50_display(dev)->crtc[i]; - u64 offset = dispc->sem.bo->bo.mem.start << PAGE_SHIFT; + u64 offset = dispc->sem.bo->bo.offset; ret = nouveau_gpuobj_dma_new(chan, 0x3d, offset, 0xfff, NV_MEM_ACCESS_RW, @@ -841,13 +870,22 @@ void nouveau_gpuobj_channel_takedown(struct nouveau_channel *chan) { struct drm_device *dev = chan->dev; + struct drm_nouveau_private *dev_priv = dev->dev_private; + int i; NV_DEBUG(dev, "ch%d\n", chan->id); - nouveau_ramht_ref(NULL, &chan->ramht, chan); + if (dev_priv->card_type >= NV_50) { + struct nv50_display *disp = nv50_display(dev); + + for (i = 0; i < 2; i++) { + struct nv50_display_crtc *dispc = &disp->crtc[i]; + nouveau_bo_vma_del(dispc->sem.bo, &chan->dispc_vma[i]); + } - nouveau_vm_ref(NULL, &chan->vm, chan->vm_pd); - nouveau_gpuobj_ref(NULL, &chan->vm_pd); + nouveau_vm_ref(NULL, &chan->vm, chan->vm_pd); + nouveau_gpuobj_ref(NULL, &chan->vm_pd); + } if (drm_mm_initialized(&chan->ramin_heap)) drm_mm_takedown(&chan->ramin_heap); @@ -909,7 +947,7 @@ int nouveau_ioctl_grobj_alloc(struct drm_device *dev, void *data, if (init->handle == ~0) return -EINVAL; - chan = nouveau_channel_get(dev, file_priv, init->channel); + chan = nouveau_channel_get(file_priv, init->channel); if (IS_ERR(chan)) return PTR_ERR(chan); @@ -936,7 +974,7 @@ int nouveau_ioctl_gpuobj_free(struct drm_device *dev, void *data, struct nouveau_channel *chan; int ret; - chan = nouveau_channel_get(dev, file_priv, objfree->channel); + chan = nouveau_channel_get(file_priv, objfree->channel); if (IS_ERR(chan)) return PTR_ERR(chan); diff --git a/drivers/gpu/drm/nouveau/nouveau_sgdma.c b/drivers/gpu/drm/nouveau/nouveau_sgdma.c index 82fad91..c444cad 100644 --- a/drivers/gpu/drm/nouveau/nouveau_sgdma.c +++ b/drivers/gpu/drm/nouveau/nouveau_sgdma.c @@ -429,7 +429,7 @@ nouveau_sgdma_init(struct drm_device *dev) u32 aper_size, align; int ret; - if (dev_priv->card_type >= NV_40 && drm_pci_device_is_pcie(dev)) + if (dev_priv->card_type >= NV_40 && pci_is_pcie(dev->pdev)) aper_size = 512 * 1024 * 1024; else aper_size = 64 * 1024 * 1024; @@ -458,7 +458,7 @@ nouveau_sgdma_init(struct drm_device *dev) dev_priv->gart_info.type = NOUVEAU_GART_HW; dev_priv->gart_info.func = &nv50_sgdma_backend; } else - if (0 && drm_pci_device_is_pcie(dev) && + if (0 && pci_is_pcie(dev->pdev) && dev_priv->chipset > 0x40 && dev_priv->chipset != 0x45) { if (nv44_graph_class(dev)) { dev_priv->gart_info.func = &nv44_sgdma_backend; diff --git a/drivers/gpu/drm/nouveau/nouveau_state.c b/drivers/gpu/drm/nouveau/nouveau_state.c index 731acea..10656e4 100644 --- a/drivers/gpu/drm/nouveau/nouveau_state.c +++ b/drivers/gpu/drm/nouveau/nouveau_state.c @@ -91,6 +91,7 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev) engine->pm.clock_pre = nv04_pm_clock_pre; engine->pm.clock_set = nv04_pm_clock_set; engine->vram.init = nouveau_mem_detect; + engine->vram.takedown = nouveau_stub_takedown; engine->vram.flags_valid = nouveau_mem_flags_valid; break; case 0x10: @@ -139,6 +140,7 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev) engine->pm.clock_pre = nv04_pm_clock_pre; engine->pm.clock_set = nv04_pm_clock_set; engine->vram.init = nouveau_mem_detect; + engine->vram.takedown = nouveau_stub_takedown; engine->vram.flags_valid = nouveau_mem_flags_valid; break; case 0x20: @@ -187,6 +189,7 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev) engine->pm.clock_pre = nv04_pm_clock_pre; engine->pm.clock_set = nv04_pm_clock_set; engine->vram.init = nouveau_mem_detect; + engine->vram.takedown = nouveau_stub_takedown; engine->vram.flags_valid = nouveau_mem_flags_valid; break; case 0x30: @@ -237,6 +240,7 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev) engine->pm.voltage_get = nouveau_voltage_gpio_get; engine->pm.voltage_set = nouveau_voltage_gpio_set; engine->vram.init = nouveau_mem_detect; + engine->vram.takedown = nouveau_stub_takedown; engine->vram.flags_valid = nouveau_mem_flags_valid; break; case 0x40: @@ -289,6 +293,7 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev) engine->pm.voltage_set = nouveau_voltage_gpio_set; engine->pm.temp_get = nv40_temp_get; engine->vram.init = nouveau_mem_detect; + engine->vram.takedown = nouveau_stub_takedown; engine->vram.flags_valid = nouveau_mem_flags_valid; break; case 0x50: @@ -366,6 +371,7 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev) else engine->pm.temp_get = nv40_temp_get; engine->vram.init = nv50_vram_init; + engine->vram.takedown = nv50_vram_fini; engine->vram.get = nv50_vram_new; engine->vram.put = nv50_vram_del; engine->vram.flags_valid = nv50_vram_flags_valid; @@ -411,9 +417,11 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev) engine->gpio.irq_unregister = nv50_gpio_irq_unregister; engine->gpio.irq_enable = nv50_gpio_irq_enable; engine->vram.init = nvc0_vram_init; + engine->vram.takedown = nv50_vram_fini; engine->vram.get = nvc0_vram_new; engine->vram.put = nv50_vram_del; engine->vram.flags_valid = nvc0_vram_flags_valid; + engine->pm.temp_get = nv84_temp_get; break; default: NV_ERROR(dev, "NV%02x unsupported\n", dev_priv->chipset); @@ -447,8 +455,8 @@ nouveau_card_init_channel(struct drm_device *dev) struct drm_nouveau_private *dev_priv = dev->dev_private; int ret; - ret = nouveau_channel_alloc(dev, &dev_priv->channel, - (struct drm_file *)-2, NvDmaFB, NvDmaTT); + ret = nouveau_channel_alloc(dev, &dev_priv->channel, NULL, + NvDmaFB, NvDmaTT); if (ret) return ret; @@ -527,7 +535,7 @@ nouveau_card_init(struct drm_device *dev) nouveau_pm_init(dev); - ret = nouveau_mem_vram_init(dev); + ret = engine->vram.init(dev); if (ret) goto out_bios; @@ -539,10 +547,14 @@ nouveau_card_init(struct drm_device *dev) if (ret) goto out_gpuobj; - ret = nouveau_mem_gart_init(dev); + ret = nouveau_mem_vram_init(dev); if (ret) goto out_instmem; + ret = nouveau_mem_gart_init(dev); + if (ret) + goto out_ttmvram; + /* PMC */ ret = engine->mc.init(dev); if (ret) @@ -563,7 +575,7 @@ nouveau_card_init(struct drm_device *dev) if (ret) goto out_timer; - if (!nouveau_noaccel) { + if (!dev_priv->noaccel) { switch (dev_priv->card_type) { case NV_04: nv04_graph_create(dev); @@ -675,14 +687,14 @@ out_vblank: drm_vblank_cleanup(dev); engine->display.destroy(dev); out_fifo: - if (!nouveau_noaccel) + if (!dev_priv->noaccel) engine->fifo.takedown(dev); out_engine: - if (!nouveau_noaccel) { + if (!dev_priv->noaccel) { for (e = e - 1; e >= 0; e--) { if (!dev_priv->eng[e]) continue; - dev_priv->eng[e]->fini(dev, e); + dev_priv->eng[e]->fini(dev, e, false); dev_priv->eng[e]->destroy(dev,e ); } } @@ -696,12 +708,14 @@ out_mc: engine->mc.takedown(dev); out_gart: nouveau_mem_gart_fini(dev); +out_ttmvram: + nouveau_mem_vram_fini(dev); out_instmem: engine->instmem.takedown(dev); out_gpuobj: nouveau_gpuobj_takedown(dev); out_vram: - nouveau_mem_vram_fini(dev); + engine->vram.takedown(dev); out_bios: nouveau_pm_fini(dev); nouveau_bios_takedown(dev); @@ -718,16 +732,21 @@ static void nouveau_card_takedown(struct drm_device *dev) struct nouveau_engine *engine = &dev_priv->engine; int e; + drm_kms_helper_poll_fini(dev); + nouveau_fbcon_fini(dev); + if (dev_priv->channel) { - nouveau_fence_fini(dev); nouveau_channel_put_unlocked(&dev_priv->channel); + nouveau_fence_fini(dev); } - if (!nouveau_noaccel) { + engine->display.destroy(dev); + + if (!dev_priv->noaccel) { engine->fifo.takedown(dev); for (e = NVOBJ_ENGINE_NR - 1; e >= 0; e--) { if (dev_priv->eng[e]) { - dev_priv->eng[e]->fini(dev, e); + dev_priv->eng[e]->fini(dev, e, false); dev_priv->eng[e]->destroy(dev,e ); } } @@ -748,10 +767,11 @@ static void nouveau_card_takedown(struct drm_device *dev) ttm_bo_clean_mm(&dev_priv->ttm.bdev, TTM_PL_TT); mutex_unlock(&dev->struct_mutex); nouveau_mem_gart_fini(dev); + nouveau_mem_vram_fini(dev); engine->instmem.takedown(dev); nouveau_gpuobj_takedown(dev); - nouveau_mem_vram_fini(dev); + engine->vram.takedown(dev); nouveau_irq_fini(dev); drm_vblank_cleanup(dev); @@ -762,6 +782,41 @@ static void nouveau_card_takedown(struct drm_device *dev) vga_client_register(dev->pdev, NULL, NULL, NULL); } +int +nouveau_open(struct drm_device *dev, struct drm_file *file_priv) +{ + struct drm_nouveau_private *dev_priv = dev->dev_private; + struct nouveau_fpriv *fpriv; + int ret; + + fpriv = kzalloc(sizeof(*fpriv), GFP_KERNEL); + if (unlikely(!fpriv)) + return -ENOMEM; + + spin_lock_init(&fpriv->lock); + INIT_LIST_HEAD(&fpriv->channels); + + if (dev_priv->card_type == NV_50) { + ret = nouveau_vm_new(dev, 0, (1ULL << 40), 0x0020000000ULL, + &fpriv->vm); + if (ret) { + kfree(fpriv); + return ret; + } + } else + if (dev_priv->card_type >= NV_C0) { + ret = nouveau_vm_new(dev, 0, (1ULL << 40), 0x0008000000ULL, + &fpriv->vm); + if (ret) { + kfree(fpriv); + return ret; + } + } + + file_priv->driver_priv = fpriv; + return 0; +} + /* here a client dies, release the stuff that was allocated for its * file_priv */ void nouveau_preclose(struct drm_device *dev, struct drm_file *file_priv) @@ -769,6 +824,14 @@ void nouveau_preclose(struct drm_device *dev, struct drm_file *file_priv) nouveau_channel_cleanup(dev, file_priv); } +void +nouveau_postclose(struct drm_device *dev, struct drm_file *file_priv) +{ + struct nouveau_fpriv *fpriv = nouveau_fpriv(file_priv); + nouveau_vm_ref(NULL, &fpriv->vm, NULL); + kfree(fpriv); +} + /* first module load, setup the mmio/fb mapping */ /* KMS: we need mmio at load time, not when the first drm client opens. */ int nouveau_firstopen(struct drm_device *dev) @@ -933,6 +996,25 @@ int nouveau_load(struct drm_device *dev, unsigned long flags) NV_INFO(dev, "Detected an NV%2x generation card (0x%08x)\n", dev_priv->card_type, reg0); + /* Determine whether we'll attempt acceleration or not, some + * cards are disabled by default here due to them being known + * non-functional, or never been tested due to lack of hw. + */ + dev_priv->noaccel = !!nouveau_noaccel; + if (nouveau_noaccel == -1) { + switch (dev_priv->chipset) { + case 0xc1: /* known broken */ + case 0xc8: /* never tested */ + NV_INFO(dev, "acceleration disabled by default, pass " + "noaccel=0 to force enable\n"); + dev_priv->noaccel = true; + break; + default: + dev_priv->noaccel = false; + break; + } + } + ret = nouveau_remove_conflicting_drivers(dev); if (ret) goto err_mmio; @@ -997,11 +1079,7 @@ void nouveau_lastclose(struct drm_device *dev) int nouveau_unload(struct drm_device *dev) { struct drm_nouveau_private *dev_priv = dev->dev_private; - struct nouveau_engine *engine = &dev_priv->engine; - drm_kms_helper_poll_fini(dev); - nouveau_fbcon_fini(dev); - engine->display.destroy(dev); nouveau_card_takedown(dev); iounmap(dev_priv->mmio); @@ -1031,7 +1109,7 @@ int nouveau_ioctl_getparam(struct drm_device *dev, void *data, case NOUVEAU_GETPARAM_BUS_TYPE: if (drm_pci_device_is_agp(dev)) getparam->value = NV_AGP; - else if (drm_pci_device_is_pcie(dev)) + else if (pci_is_pcie(dev->pdev)) getparam->value = NV_PCIE; else getparam->value = NV_PCI; diff --git a/drivers/gpu/drm/nouveau/nouveau_temp.c b/drivers/gpu/drm/nouveau/nouveau_temp.c index 649b041..081ca7b 100644 --- a/drivers/gpu/drm/nouveau/nouveau_temp.c +++ b/drivers/gpu/drm/nouveau/nouveau_temp.c @@ -43,7 +43,7 @@ nouveau_temp_vbios_parse(struct drm_device *dev, u8 *temp) /* Set the default sensor's contants */ sensor->offset_constant = 0; - sensor->offset_mult = 1; + sensor->offset_mult = 0; sensor->offset_div = 1; sensor->slope_mult = 1; sensor->slope_div = 1; @@ -99,6 +99,13 @@ nouveau_temp_vbios_parse(struct drm_device *dev, u8 *temp) sensor->slope_mult = 431; sensor->slope_div = 10000; break; + + case 0x67: + sensor->offset_mult = -26149; + sensor->offset_div = 100; + sensor->slope_mult = 484; + sensor->slope_div = 10000; + break; } } @@ -109,7 +116,7 @@ nouveau_temp_vbios_parse(struct drm_device *dev, u8 *temp) /* Read the entries from the table */ for (i = 0; i < entries; i++) { - u16 value = ROM16(temp[1]); + s16 value = ROM16(temp[1]); switch (temp[0]) { case 0x01: @@ -160,8 +167,8 @@ nv40_sensor_setup(struct drm_device *dev) struct drm_nouveau_private *dev_priv = dev->dev_private; struct nouveau_pm_engine *pm = &dev_priv->engine.pm; struct nouveau_pm_temp_sensor_constants *sensor = &pm->sensor_constants; - u32 offset = sensor->offset_mult / sensor->offset_div; - u32 sensor_calibration; + s32 offset = sensor->offset_mult / sensor->offset_div; + s32 sensor_calibration; /* set up the sensors */ sensor_calibration = 120 - offset - sensor->offset_constant; diff --git a/drivers/gpu/drm/nouveau/nouveau_vm.c b/drivers/gpu/drm/nouveau/nouveau_vm.c index 519a6b4..244fd38 100644 --- a/drivers/gpu/drm/nouveau/nouveau_vm.c +++ b/drivers/gpu/drm/nouveau/nouveau_vm.c @@ -369,23 +369,26 @@ nouveau_vm_link(struct nouveau_vm *vm, struct nouveau_gpuobj *pgd) } static void -nouveau_vm_unlink(struct nouveau_vm *vm, struct nouveau_gpuobj *pgd) +nouveau_vm_unlink(struct nouveau_vm *vm, struct nouveau_gpuobj *mpgd) { struct nouveau_vm_pgd *vpgd, *tmp; + struct nouveau_gpuobj *pgd = NULL; - if (!pgd) + if (!mpgd) return; mutex_lock(&vm->mm->mutex); list_for_each_entry_safe(vpgd, tmp, &vm->pgd_list, head) { - if (vpgd->obj != pgd) - continue; - - list_del(&vpgd->head); - nouveau_gpuobj_ref(NULL, &vpgd->obj); - kfree(vpgd); + if (vpgd->obj == mpgd) { + pgd = vpgd->obj; + list_del(&vpgd->head); + kfree(vpgd); + break; + } } mutex_unlock(&vm->mm->mutex); + + nouveau_gpuobj_ref(NULL, &pgd); } static void @@ -396,8 +399,8 @@ nouveau_vm_del(struct nouveau_vm *vm) list_for_each_entry_safe(vpgd, tmp, &vm->pgd_list, head) { nouveau_vm_unlink(vm, vpgd->obj); } - WARN_ON(nouveau_mm_fini(&vm->mm) != 0); + nouveau_mm_fini(&vm->mm); kfree(vm->pgt); kfree(vm); } diff --git a/drivers/gpu/drm/nouveau/nouveau_vm.h b/drivers/gpu/drm/nouveau/nouveau_vm.h index c48a9fc..579ca8c 100644 --- a/drivers/gpu/drm/nouveau/nouveau_vm.h +++ b/drivers/gpu/drm/nouveau/nouveau_vm.h @@ -41,6 +41,8 @@ struct nouveau_vm_pgd { }; struct nouveau_vma { + struct list_head head; + int refcount; struct nouveau_vm *vm; struct nouveau_mm_node *node; u64 offset; diff --git a/drivers/gpu/drm/nouveau/nv04_crtc.c b/drivers/gpu/drm/nouveau/nv04_crtc.c index f1a3ae4..118261d 100644 --- a/drivers/gpu/drm/nouveau/nv04_crtc.c +++ b/drivers/gpu/drm/nouveau/nv04_crtc.c @@ -1035,7 +1035,7 @@ nv04_crtc_create(struct drm_device *dev, int crtc_num) drm_crtc_helper_add(&nv_crtc->base, &nv04_crtc_helper_funcs); drm_mode_crtc_set_gamma_size(&nv_crtc->base, 256); - ret = nouveau_bo_new(dev, NULL, 64*64*4, 0x100, TTM_PL_FLAG_VRAM, + ret = nouveau_bo_new(dev, 64*64*4, 0x100, TTM_PL_FLAG_VRAM, 0, 0x0000, &nv_crtc->cursor.nvbo); if (!ret) { ret = nouveau_bo_pin(nv_crtc->cursor.nvbo, TTM_PL_FLAG_VRAM); diff --git a/drivers/gpu/drm/nouveau/nv04_graph.c b/drivers/gpu/drm/nouveau/nv04_graph.c index 3626ee7..dbdea8e 100644 --- a/drivers/gpu/drm/nouveau/nv04_graph.c +++ b/drivers/gpu/drm/nouveau/nv04_graph.c @@ -450,13 +450,13 @@ nv04_graph_context_del(struct nouveau_channel *chan, int engine) unsigned long flags; spin_lock_irqsave(&dev_priv->context_switch_lock, flags); - nv04_graph_fifo_access(dev, false); + nv_mask(dev, NV04_PGRAPH_FIFO, 0x00000001, 0x00000000); /* Unload the context if it's the currently active one */ if (nv04_graph_channel(dev) == chan) nv04_graph_unload_context(dev); - nv04_graph_fifo_access(dev, true); + nv_mask(dev, NV04_PGRAPH_FIFO, 0x00000001, 0x00000001); spin_unlock_irqrestore(&dev_priv->context_switch_lock, flags); /* Free the context resources */ @@ -538,24 +538,18 @@ nv04_graph_init(struct drm_device *dev, int engine) } static int -nv04_graph_fini(struct drm_device *dev, int engine) +nv04_graph_fini(struct drm_device *dev, int engine, bool suspend) { + nv_mask(dev, NV04_PGRAPH_FIFO, 0x00000001, 0x00000000); + if (!nv_wait(dev, NV04_PGRAPH_STATUS, ~0, 0) && suspend) { + nv_mask(dev, NV04_PGRAPH_FIFO, 0x00000001, 0x00000001); + return -EBUSY; + } nv04_graph_unload_context(dev); nv_wr32(dev, NV03_PGRAPH_INTR_EN, 0x00000000); return 0; } -void -nv04_graph_fifo_access(struct drm_device *dev, bool enabled) -{ - if (enabled) - nv_wr32(dev, NV04_PGRAPH_FIFO, - nv_rd32(dev, NV04_PGRAPH_FIFO) | 1); - else - nv_wr32(dev, NV04_PGRAPH_FIFO, - nv_rd32(dev, NV04_PGRAPH_FIFO) & ~1); -} - static int nv04_graph_mthd_set_ref(struct nouveau_channel *chan, u32 class, u32 mthd, u32 data) diff --git a/drivers/gpu/drm/nouveau/nv04_instmem.c b/drivers/gpu/drm/nouveau/nv04_instmem.c index b8611b9..c1248e0 100644 --- a/drivers/gpu/drm/nouveau/nv04_instmem.c +++ b/drivers/gpu/drm/nouveau/nv04_instmem.c @@ -28,6 +28,31 @@ int nv04_instmem_init(struct drm_device *dev) /* RAMIN always available */ dev_priv->ramin_available = true; + /* Reserve space at end of VRAM for PRAMIN */ + if (dev_priv->card_type >= NV_40) { + u32 vs = hweight8((nv_rd32(dev, 0x001540) & 0x0000ff00) >> 8); + u32 rsvd; + + /* estimate grctx size, the magics come from nv40_grctx.c */ + if (dev_priv->chipset == 0x40) rsvd = 0x6aa0 * vs; + else if (dev_priv->chipset < 0x43) rsvd = 0x4f00 * vs; + else if (nv44_graph_class(dev)) rsvd = 0x4980 * vs; + else rsvd = 0x4a40 * vs; + rsvd += 16 * 1024; + rsvd *= dev_priv->engine.fifo.channels; + + /* pciegart table */ + if (pci_is_pcie(dev->pdev)) + rsvd += 512 * 1024; + + /* object storage */ + rsvd += 512 * 1024; + + dev_priv->ramin_rsvd_vram = round_up(rsvd, 4096); + } else { + dev_priv->ramin_rsvd_vram = 512 * 1024; + } + /* Setup shared RAMHT */ ret = nouveau_gpuobj_new_fake(dev, 0x10000, ~0, 4096, NVOBJ_FLAG_ZERO_ALLOC, &ramht); @@ -112,7 +137,8 @@ nv04_instmem_resume(struct drm_device *dev) } int -nv04_instmem_get(struct nouveau_gpuobj *gpuobj, u32 size, u32 align) +nv04_instmem_get(struct nouveau_gpuobj *gpuobj, struct nouveau_channel *chan, + u32 size, u32 align) { struct drm_nouveau_private *dev_priv = gpuobj->dev->dev_private; struct drm_mm_node *ramin = NULL; diff --git a/drivers/gpu/drm/nouveau/nv10_graph.c b/drivers/gpu/drm/nouveau/nv10_graph.c index 0930c6c..7255e4a 100644 --- a/drivers/gpu/drm/nouveau/nv10_graph.c +++ b/drivers/gpu/drm/nouveau/nv10_graph.c @@ -708,8 +708,8 @@ static void nv10_graph_load_dma_vtxbuf(struct nouveau_channel *chan, 0x2c000000 | chan->id << 20 | subchan << 16 | 0x18c); nv_wr32(dev, NV10_PGRAPH_FFINTFC_ST2_DL, inst); nv_mask(dev, NV10_PGRAPH_CTX_CONTROL, 0, 0x10000); - nv04_graph_fifo_access(dev, true); - nv04_graph_fifo_access(dev, false); + nv_mask(dev, NV04_PGRAPH_FIFO, 0x00000001, 0x00000001); + nv_mask(dev, NV04_PGRAPH_FIFO, 0x00000001, 0x00000000); /* Restore the FIFO state */ for (i = 0; i < ARRAY_SIZE(fifo); i++) @@ -879,13 +879,13 @@ nv10_graph_context_del(struct nouveau_channel *chan, int engine) unsigned long flags; spin_lock_irqsave(&dev_priv->context_switch_lock, flags); - nv04_graph_fifo_access(dev, false); + nv_mask(dev, NV04_PGRAPH_FIFO, 0x00000001, 0x00000000); /* Unload the context if it's the currently active one */ if (nv10_graph_channel(dev) == chan) nv10_graph_unload_context(dev); - nv04_graph_fifo_access(dev, true); + nv_mask(dev, NV04_PGRAPH_FIFO, 0x00000001, 0x00000001); spin_unlock_irqrestore(&dev_priv->context_switch_lock, flags); /* Free the context resources */ @@ -957,8 +957,13 @@ nv10_graph_init(struct drm_device *dev, int engine) } static int -nv10_graph_fini(struct drm_device *dev, int engine) +nv10_graph_fini(struct drm_device *dev, int engine, bool suspend) { + nv_mask(dev, NV04_PGRAPH_FIFO, 0x00000001, 0x00000000); + if (!nv_wait(dev, NV04_PGRAPH_STATUS, ~0, 0) && suspend) { + nv_mask(dev, NV04_PGRAPH_FIFO, 0x00000001, 0x00000001); + return -EBUSY; + } nv10_graph_unload_context(dev); nv_wr32(dev, NV03_PGRAPH_INTR_EN, 0x00000000); return 0; diff --git a/drivers/gpu/drm/nouveau/nv20_graph.c b/drivers/gpu/drm/nouveau/nv20_graph.c index affc7d7..183e375 100644 --- a/drivers/gpu/drm/nouveau/nv20_graph.c +++ b/drivers/gpu/drm/nouveau/nv20_graph.c @@ -454,13 +454,13 @@ nv20_graph_context_del(struct nouveau_channel *chan, int engine) unsigned long flags; spin_lock_irqsave(&dev_priv->context_switch_lock, flags); - nv04_graph_fifo_access(dev, false); + nv_mask(dev, NV04_PGRAPH_FIFO, 0x00000001, 0x00000000); /* Unload the context if it's the currently active one */ if (nv10_graph_channel(dev) == chan) nv20_graph_unload_context(dev); - nv04_graph_fifo_access(dev, true); + nv_mask(dev, NV04_PGRAPH_FIFO, 0x00000001, 0x00000001); spin_unlock_irqrestore(&dev_priv->context_switch_lock, flags); /* Free the context resources */ @@ -654,8 +654,13 @@ nv30_graph_init(struct drm_device *dev, int engine) } int -nv20_graph_fini(struct drm_device *dev, int engine) +nv20_graph_fini(struct drm_device *dev, int engine, bool suspend) { + nv_mask(dev, NV04_PGRAPH_FIFO, 0x00000001, 0x00000000); + if (!nv_wait(dev, NV04_PGRAPH_STATUS, ~0, 0) && suspend) { + nv_mask(dev, NV04_PGRAPH_FIFO, 0x00000001, 0x00000001); + return -EBUSY; + } nv20_graph_unload_context(dev); nv_wr32(dev, NV03_PGRAPH_INTR_EN, 0x00000000); return 0; @@ -753,6 +758,7 @@ nv20_graph_create(struct drm_device *dev) break; default: NV_ERROR(dev, "PGRAPH: unknown chipset\n"); + kfree(pgraph); return 0; } } else { @@ -774,6 +780,7 @@ nv20_graph_create(struct drm_device *dev) break; default: NV_ERROR(dev, "PGRAPH: unknown chipset\n"); + kfree(pgraph); return 0; } } diff --git a/drivers/gpu/drm/nouveau/nv40_graph.c b/drivers/gpu/drm/nouveau/nv40_graph.c index 5beb01b..ba14a93 100644 --- a/drivers/gpu/drm/nouveau/nv40_graph.c +++ b/drivers/gpu/drm/nouveau/nv40_graph.c @@ -35,89 +35,6 @@ struct nv40_graph_engine { u32 grctx_size; }; -static struct nouveau_channel * -nv40_graph_channel(struct drm_device *dev) -{ - struct drm_nouveau_private *dev_priv = dev->dev_private; - struct nouveau_gpuobj *grctx; - uint32_t inst; - int i; - - inst = nv_rd32(dev, NV40_PGRAPH_CTXCTL_CUR); - if (!(inst & NV40_PGRAPH_CTXCTL_CUR_LOADED)) - return NULL; - inst = (inst & NV40_PGRAPH_CTXCTL_CUR_INSTANCE) << 4; - - for (i = 0; i < dev_priv->engine.fifo.channels; i++) { - if (!dev_priv->channels.ptr[i]) - continue; - - grctx = dev_priv->channels.ptr[i]->engctx[NVOBJ_ENGINE_GR]; - if (grctx && grctx->pinst == inst) - return dev_priv->channels.ptr[i]; - } - - return NULL; -} - -static int -nv40_graph_transfer_context(struct drm_device *dev, uint32_t inst, int save) -{ - uint32_t old_cp, tv = 1000, tmp; - int i; - - old_cp = nv_rd32(dev, NV20_PGRAPH_CHANNEL_CTX_POINTER); - nv_wr32(dev, NV20_PGRAPH_CHANNEL_CTX_POINTER, inst); - - tmp = nv_rd32(dev, NV40_PGRAPH_CTXCTL_0310); - tmp |= save ? NV40_PGRAPH_CTXCTL_0310_XFER_SAVE : - NV40_PGRAPH_CTXCTL_0310_XFER_LOAD; - nv_wr32(dev, NV40_PGRAPH_CTXCTL_0310, tmp); - - tmp = nv_rd32(dev, NV40_PGRAPH_CTXCTL_0304); - tmp |= NV40_PGRAPH_CTXCTL_0304_XFER_CTX; - nv_wr32(dev, NV40_PGRAPH_CTXCTL_0304, tmp); - - nouveau_wait_for_idle(dev); - - for (i = 0; i < tv; i++) { - if (nv_rd32(dev, NV40_PGRAPH_CTXCTL_030C) == 0) - break; - } - - nv_wr32(dev, NV20_PGRAPH_CHANNEL_CTX_POINTER, old_cp); - - if (i == tv) { - uint32_t ucstat = nv_rd32(dev, NV40_PGRAPH_CTXCTL_UCODE_STAT); - NV_ERROR(dev, "Failed: Instance=0x%08x Save=%d\n", inst, save); - NV_ERROR(dev, "IP: 0x%02x, Opcode: 0x%08x\n", - ucstat >> NV40_PGRAPH_CTXCTL_UCODE_STAT_IP_SHIFT, - ucstat & NV40_PGRAPH_CTXCTL_UCODE_STAT_OP_MASK); - NV_ERROR(dev, "0x40030C = 0x%08x\n", - nv_rd32(dev, NV40_PGRAPH_CTXCTL_030C)); - return -EBUSY; - } - - return 0; -} - -static int -nv40_graph_unload_context(struct drm_device *dev) -{ - uint32_t inst; - int ret; - - inst = nv_rd32(dev, NV40_PGRAPH_CTXCTL_CUR); - if (!(inst & NV40_PGRAPH_CTXCTL_CUR_LOADED)) - return 0; - inst &= NV40_PGRAPH_CTXCTL_CUR_INSTANCE; - - ret = nv40_graph_transfer_context(dev, inst, 1); - - nv_wr32(dev, NV40_PGRAPH_CTXCTL_CUR, inst); - return ret; -} - static int nv40_graph_context_new(struct nouveau_channel *chan, int engine) { @@ -163,16 +80,16 @@ nv40_graph_context_del(struct nouveau_channel *chan, int engine) struct nouveau_gpuobj *grctx = chan->engctx[engine]; struct drm_device *dev = chan->dev; struct drm_nouveau_private *dev_priv = dev->dev_private; + u32 inst = 0x01000000 | (grctx->pinst >> 4); unsigned long flags; spin_lock_irqsave(&dev_priv->context_switch_lock, flags); - nv04_graph_fifo_access(dev, false); - - /* Unload the context if it's the currently active one */ - if (nv40_graph_channel(dev) == chan) - nv40_graph_unload_context(dev); - - nv04_graph_fifo_access(dev, true); + nv_mask(dev, 0x400720, 0x00000000, 0x00000001); + if (nv_rd32(dev, 0x40032c) == inst) + nv_mask(dev, 0x40032c, 0x01000000, 0x00000000); + if (nv_rd32(dev, 0x400330) == inst) + nv_mask(dev, 0x400330, 0x01000000, 0x00000000); + nv_mask(dev, 0x400720, 0x00000001, 0x00000001); spin_unlock_irqrestore(&dev_priv->context_switch_lock, flags); /* Free the context resources */ @@ -429,9 +346,20 @@ nv40_graph_init(struct drm_device *dev, int engine) } static int -nv40_graph_fini(struct drm_device *dev, int engine) +nv40_graph_fini(struct drm_device *dev, int engine, bool suspend) { - nv40_graph_unload_context(dev); + u32 inst = nv_rd32(dev, 0x40032c); + if (inst & 0x01000000) { + nv_wr32(dev, 0x400720, 0x00000000); + nv_wr32(dev, 0x400784, inst); + nv_mask(dev, 0x400310, 0x00000020, 0x00000020); + nv_mask(dev, 0x400304, 0x00000001, 0x00000001); + if (!nv_wait(dev, 0x400300, 0x00000001, 0x00000000)) { + u32 insn = nv_rd32(dev, 0x400308); + NV_ERROR(dev, "PGRAPH: ctxprog timeout 0x%08x\n", insn); + } + nv_mask(dev, 0x40032c, 0x01000000, 0x00000000); + } return 0; } diff --git a/drivers/gpu/drm/nouveau/nv40_mpeg.c b/drivers/gpu/drm/nouveau/nv40_mpeg.c index 6d2af29..ad03a0e 100644 --- a/drivers/gpu/drm/nouveau/nv40_mpeg.c +++ b/drivers/gpu/drm/nouveau/nv40_mpeg.c @@ -137,7 +137,7 @@ nv40_mpeg_init(struct drm_device *dev, int engine) } static int -nv40_mpeg_fini(struct drm_device *dev, int engine) +nv40_mpeg_fini(struct drm_device *dev, int engine, bool suspend) { /*XXX: context save? */ nv_mask(dev, 0x00b32c, 0x00000001, 0x00000000); diff --git a/drivers/gpu/drm/nouveau/nv50_crtc.c b/drivers/gpu/drm/nouveau/nv50_crtc.c index ebabacf..46ad59e 100644 --- a/drivers/gpu/drm/nouveau/nv50_crtc.c +++ b/drivers/gpu/drm/nouveau/nv50_crtc.c @@ -104,7 +104,7 @@ nv50_crtc_blank(struct nouveau_crtc *nv_crtc, bool blanked) OUT_RING(evo, nv_crtc->lut.depth == 8 ? NV50_EVO_CRTC_CLUT_MODE_OFF : NV50_EVO_CRTC_CLUT_MODE_ON); - OUT_RING(evo, (nv_crtc->lut.nvbo->bo.mem.start << PAGE_SHIFT) >> 8); + OUT_RING(evo, nv_crtc->lut.nvbo->bo.offset >> 8); if (dev_priv->chipset != 0x50) { BEGIN_RING(evo, 0, NV84_EVO_CRTC(index, CLUT_DMA), 1); OUT_RING(evo, NvEvoVRAM); @@ -372,7 +372,7 @@ nv50_crtc_cursor_set(struct drm_crtc *crtc, struct drm_file *file_priv, nouveau_bo_unmap(cursor); - nv_crtc->cursor.set_offset(nv_crtc, nv_crtc->cursor.nvbo->bo.mem.start << PAGE_SHIFT); + nv_crtc->cursor.set_offset(nv_crtc, nv_crtc->cursor.nvbo->bo.offset); nv_crtc->cursor.show(nv_crtc, true); out: @@ -546,7 +546,7 @@ nv50_crtc_do_mode_set_base(struct drm_crtc *crtc, } } - nv_crtc->fb.offset = fb->nvbo->bo.mem.start << PAGE_SHIFT; + nv_crtc->fb.offset = fb->nvbo->bo.offset; nv_crtc->fb.tile_flags = nouveau_bo_tile_layout(fb->nvbo); nv_crtc->fb.cpp = drm_fb->bits_per_pixel / 8; if (!nv_crtc->fb.blanked && dev_priv->chipset != 0x50) { @@ -747,7 +747,7 @@ nv50_crtc_create(struct drm_device *dev, int index) } nv_crtc->lut.depth = 0; - ret = nouveau_bo_new(dev, NULL, 4096, 0x100, TTM_PL_FLAG_VRAM, + ret = nouveau_bo_new(dev, 4096, 0x100, TTM_PL_FLAG_VRAM, 0, 0x0000, &nv_crtc->lut.nvbo); if (!ret) { ret = nouveau_bo_pin(nv_crtc->lut.nvbo, TTM_PL_FLAG_VRAM); @@ -773,7 +773,7 @@ nv50_crtc_create(struct drm_device *dev, int index) drm_crtc_helper_add(&nv_crtc->base, &nv50_crtc_helper_funcs); drm_mode_crtc_set_gamma_size(&nv_crtc->base, 256); - ret = nouveau_bo_new(dev, NULL, 64*64*4, 0x100, TTM_PL_FLAG_VRAM, + ret = nouveau_bo_new(dev, 64*64*4, 0x100, TTM_PL_FLAG_VRAM, 0, 0x0000, &nv_crtc->cursor.nvbo); if (!ret) { ret = nouveau_bo_pin(nv_crtc->cursor.nvbo, TTM_PL_FLAG_VRAM); diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c index 08da478..db1a5f4 100644 --- a/drivers/gpu/drm/nouveau/nv50_display.c +++ b/drivers/gpu/drm/nouveau/nv50_display.c @@ -415,8 +415,6 @@ nv50_display_flip_next(struct drm_crtc *crtc, struct drm_framebuffer *fb, /* synchronise with the rendering channel, if necessary */ if (likely(chan)) { - u64 offset = dispc->sem.bo->vma.offset + dispc->sem.offset; - ret = RING_SPACE(chan, 10); if (ret) { WIND_RING(evo); @@ -438,6 +436,8 @@ nv50_display_flip_next(struct drm_crtc *crtc, struct drm_framebuffer *fb, else OUT_RING (chan, chan->vram_handle); } else { + u64 offset = chan->dispc_vma[nv_crtc->index].offset; + offset += dispc->sem.offset; BEGIN_NVC0(chan, 2, NvSubM2MF, 0x0010, 4); OUT_RING (chan, upper_32_bits(offset)); OUT_RING (chan, lower_32_bits(offset)); @@ -484,7 +484,7 @@ nv50_display_flip_next(struct drm_crtc *crtc, struct drm_framebuffer *fb, OUT_RING (evo, 0x00000000); OUT_RING (evo, 0x00000000); BEGIN_RING(evo, 0, 0x0800, 5); - OUT_RING (evo, (nv_fb->nvbo->bo.mem.start << PAGE_SHIFT) >> 8); + OUT_RING (evo, nv_fb->nvbo->bo.offset >> 8); OUT_RING (evo, 0); OUT_RING (evo, (fb->height << 16) | fb->width); OUT_RING (evo, nv_fb->r_pitch); diff --git a/drivers/gpu/drm/nouveau/nv50_evo.c b/drivers/gpu/drm/nouveau/nv50_evo.c index c8e83c1..c99d975 100644 --- a/drivers/gpu/drm/nouveau/nv50_evo.c +++ b/drivers/gpu/drm/nouveau/nv50_evo.c @@ -38,6 +38,7 @@ nv50_evo_channel_del(struct nouveau_channel **pevo) return; *pevo = NULL; + nouveau_ramht_ref(NULL, &evo->ramht, evo); nouveau_gpuobj_channel_takedown(evo); nouveau_bo_unmap(evo->pushbuf_bo); nouveau_bo_ref(NULL, &evo->pushbuf_bo); @@ -116,7 +117,7 @@ nv50_evo_channel_new(struct drm_device *dev, int chid, evo->user_get = 4; evo->user_put = 0; - ret = nouveau_bo_new(dev, NULL, 4096, 0, TTM_PL_FLAG_VRAM, 0, 0, + ret = nouveau_bo_new(dev, 4096, 0, TTM_PL_FLAG_VRAM, 0, 0, &evo->pushbuf_bo); if (ret == 0) ret = nouveau_bo_pin(evo->pushbuf_bo, TTM_PL_FLAG_VRAM); @@ -153,7 +154,7 @@ nv50_evo_channel_init(struct nouveau_channel *evo) { struct drm_device *dev = evo->dev; int id = evo->id, ret, i; - u64 pushbuf = evo->pushbuf_bo->bo.mem.start << PAGE_SHIFT; + u64 pushbuf = evo->pushbuf_bo->bo.offset; u32 tmp; tmp = nv_rd32(dev, NV50_PDISPLAY_EVO_CTRL(id)); @@ -331,16 +332,15 @@ nv50_evo_create(struct drm_device *dev) if (ret) goto err; - ret = nouveau_bo_new(dev, NULL, 4096, 0x1000, TTM_PL_FLAG_VRAM, + ret = nouveau_bo_new(dev, 4096, 0x1000, TTM_PL_FLAG_VRAM, 0, 0x0000, &dispc->sem.bo); if (!ret) { - offset = dispc->sem.bo->bo.mem.start << PAGE_SHIFT; - ret = nouveau_bo_pin(dispc->sem.bo, TTM_PL_FLAG_VRAM); if (!ret) ret = nouveau_bo_map(dispc->sem.bo); if (ret) nouveau_bo_ref(NULL, &dispc->sem.bo); + offset = dispc->sem.bo->bo.offset; } if (ret) diff --git a/drivers/gpu/drm/nouveau/nv50_fbcon.c b/drivers/gpu/drm/nouveau/nv50_fbcon.c index 791ded1..dc75a72 100644 --- a/drivers/gpu/drm/nouveau/nv50_fbcon.c +++ b/drivers/gpu/drm/nouveau/nv50_fbcon.c @@ -159,7 +159,7 @@ nv50_fbcon_accel_init(struct fb_info *info) struct drm_device *dev = nfbdev->dev; struct drm_nouveau_private *dev_priv = dev->dev_private; struct nouveau_channel *chan = dev_priv->channel; - struct nouveau_bo *nvbo = nfbdev->nouveau_fb.nvbo; + struct nouveau_framebuffer *fb = &nfbdev->nouveau_fb; int ret, format; switch (info->var.bits_per_pixel) { @@ -247,8 +247,8 @@ nv50_fbcon_accel_init(struct fb_info *info) OUT_RING(chan, info->fix.line_length); OUT_RING(chan, info->var.xres_virtual); OUT_RING(chan, info->var.yres_virtual); - OUT_RING(chan, upper_32_bits(nvbo->vma.offset)); - OUT_RING(chan, lower_32_bits(nvbo->vma.offset)); + OUT_RING(chan, upper_32_bits(fb->vma.offset)); + OUT_RING(chan, lower_32_bits(fb->vma.offset)); BEGIN_RING(chan, NvSub2D, 0x0230, 2); OUT_RING(chan, format); OUT_RING(chan, 1); @@ -256,8 +256,8 @@ nv50_fbcon_accel_init(struct fb_info *info) OUT_RING(chan, info->fix.line_length); OUT_RING(chan, info->var.xres_virtual); OUT_RING(chan, info->var.yres_virtual); - OUT_RING(chan, upper_32_bits(nvbo->vma.offset)); - OUT_RING(chan, lower_32_bits(nvbo->vma.offset)); + OUT_RING(chan, upper_32_bits(fb->vma.offset)); + OUT_RING(chan, lower_32_bits(fb->vma.offset)); return 0; } diff --git a/drivers/gpu/drm/nouveau/nv50_graph.c b/drivers/gpu/drm/nouveau/nv50_graph.c index 40680f2b..d43c46c 100644 --- a/drivers/gpu/drm/nouveau/nv50_graph.c +++ b/drivers/gpu/drm/nouveau/nv50_graph.c @@ -124,7 +124,6 @@ static void nv50_graph_init_reset(struct drm_device *dev) { uint32_t pmc_e = NV_PMC_ENABLE_PGRAPH | (1 << 21); - NV_DEBUG(dev, "\n"); nv_wr32(dev, NV03_PMC_ENABLE, nv_rd32(dev, NV03_PMC_ENABLE) & ~pmc_e); @@ -254,9 +253,13 @@ nv50_graph_init(struct drm_device *dev, int engine) } static int -nv50_graph_fini(struct drm_device *dev, int engine) +nv50_graph_fini(struct drm_device *dev, int engine, bool suspend) { - NV_DEBUG(dev, "\n"); + nv_mask(dev, 0x400500, 0x00010001, 0x00000000); + if (!nv_wait(dev, 0x400700, ~0, 0) && suspend) { + nv_mask(dev, 0x400500, 0x00010001, 0x00010001); + return -EBUSY; + } nv50_graph_unload_context(dev); nv_wr32(dev, 0x40013c, 0x00000000); return 0; diff --git a/drivers/gpu/drm/nouveau/nv50_instmem.c b/drivers/gpu/drm/nouveau/nv50_instmem.c index 4f95a1e..a7c12c9 100644 --- a/drivers/gpu/drm/nouveau/nv50_instmem.c +++ b/drivers/gpu/drm/nouveau/nv50_instmem.c @@ -305,9 +305,9 @@ struct nv50_gpuobj_node { u32 align; }; - int -nv50_instmem_get(struct nouveau_gpuobj *gpuobj, u32 size, u32 align) +nv50_instmem_get(struct nouveau_gpuobj *gpuobj, struct nouveau_channel *chan, + u32 size, u32 align) { struct drm_device *dev = gpuobj->dev; struct drm_nouveau_private *dev_priv = dev->dev_private; @@ -336,7 +336,7 @@ nv50_instmem_get(struct nouveau_gpuobj *gpuobj, u32 size, u32 align) if (!(gpuobj->flags & NVOBJ_FLAG_VM_USER)) flags |= NV_MEM_ACCESS_SYS; - ret = nouveau_vm_get(dev_priv->chan_vm, size, 12, flags, + ret = nouveau_vm_get(chan->vm, size, 12, flags, &node->chan_vma); if (ret) { vram->put(dev, &node->vram); @@ -345,7 +345,7 @@ nv50_instmem_get(struct nouveau_gpuobj *gpuobj, u32 size, u32 align) } nouveau_vm_map(&node->chan_vma, node->vram); - gpuobj->vinst = node->chan_vma.offset; + gpuobj->linst = node->chan_vma.offset; } gpuobj->size = size; diff --git a/drivers/gpu/drm/nouveau/nv50_mpeg.c b/drivers/gpu/drm/nouveau/nv50_mpeg.c index 1dc5913..b57a2d1 100644 --- a/drivers/gpu/drm/nouveau/nv50_mpeg.c +++ b/drivers/gpu/drm/nouveau/nv50_mpeg.c @@ -160,7 +160,7 @@ nv50_mpeg_init(struct drm_device *dev, int engine) } static int -nv50_mpeg_fini(struct drm_device *dev, int engine) +nv50_mpeg_fini(struct drm_device *dev, int engine, bool suspend) { /*XXX: context save for s/r */ nv_mask(dev, 0x00b32c, 0x00000001, 0x00000000); diff --git a/drivers/gpu/drm/nouveau/nv50_sor.c b/drivers/gpu/drm/nouveau/nv50_sor.c index c25c593..ffe8b48 100644 --- a/drivers/gpu/drm/nouveau/nv50_sor.c +++ b/drivers/gpu/drm/nouveau/nv50_sor.c @@ -318,6 +318,8 @@ nv50_sor_create(struct drm_connector *connector, struct dcb_entry *entry) uint32_t tmp; tmp = nv_rd32(dev, 0x61c700 + (or * 0x800)); + if (!tmp) + tmp = nv_rd32(dev, 0x610798 + (or * 8)); switch ((tmp & 0x00000f00) >> 8) { case 8: diff --git a/drivers/gpu/drm/nouveau/nv50_vm.c b/drivers/gpu/drm/nouveau/nv50_vm.c index 1a0dd49..40b84f2 100644 --- a/drivers/gpu/drm/nouveau/nv50_vm.c +++ b/drivers/gpu/drm/nouveau/nv50_vm.c @@ -156,7 +156,7 @@ nv50_vm_flush(struct nouveau_vm *vm) pinstmem->flush(vm->dev); /* BAR */ - if (vm != dev_priv->chan_vm) { + if (vm == dev_priv->bar1_vm || vm == dev_priv->bar3_vm) { nv50_vm_flush_engine(vm->dev, 6); return; } diff --git a/drivers/gpu/drm/nouveau/nv50_vram.c b/drivers/gpu/drm/nouveau/nv50_vram.c index ffbc3d8..af32dae 100644 --- a/drivers/gpu/drm/nouveau/nv50_vram.c +++ b/drivers/gpu/drm/nouveau/nv50_vram.c @@ -51,9 +51,7 @@ void nv50_vram_del(struct drm_device *dev, struct nouveau_mem **pmem) { struct drm_nouveau_private *dev_priv = dev->dev_private; - struct ttm_bo_device *bdev = &dev_priv->ttm.bdev; - struct ttm_mem_type_manager *man = &bdev->man[TTM_PL_VRAM]; - struct nouveau_mm *mm = man->priv; + struct nouveau_mm *mm = dev_priv->engine.vram.mm; struct nouveau_mm_node *this; struct nouveau_mem *mem; @@ -84,9 +82,7 @@ nv50_vram_new(struct drm_device *dev, u64 size, u32 align, u32 size_nc, u32 memtype, struct nouveau_mem **pmem) { struct drm_nouveau_private *dev_priv = dev->dev_private; - struct ttm_bo_device *bdev = &dev_priv->ttm.bdev; - struct ttm_mem_type_manager *man = &bdev->man[TTM_PL_VRAM]; - struct nouveau_mm *mm = man->priv; + struct nouveau_mm *mm = dev_priv->engine.vram.mm; struct nouveau_mm_node *r; struct nouveau_mem *mem; int comp = (memtype & 0x300) >> 8; @@ -190,22 +186,35 @@ int nv50_vram_init(struct drm_device *dev) { struct drm_nouveau_private *dev_priv = dev->dev_private; + struct nouveau_vram_engine *vram = &dev_priv->engine.vram; + const u32 rsvd_head = ( 256 * 1024) >> 12; /* vga memory */ + const u32 rsvd_tail = (1024 * 1024) >> 12; /* vbios etc */ + u32 rblock, length; dev_priv->vram_size = nv_rd32(dev, 0x10020c); dev_priv->vram_size |= (dev_priv->vram_size & 0xff) << 32; dev_priv->vram_size &= 0xffffffff00ULL; - switch (dev_priv->chipset) { - case 0xaa: - case 0xac: - case 0xaf: + /* IGPs, no funky reordering happens here, they don't have VRAM */ + if (dev_priv->chipset == 0xaa || + dev_priv->chipset == 0xac || + dev_priv->chipset == 0xaf) { dev_priv->vram_sys_base = (u64)nv_rd32(dev, 0x100e10) << 12; - dev_priv->vram_rblock_size = 4096; - break; - default: - dev_priv->vram_rblock_size = nv50_vram_rblock(dev); - break; + rblock = 4096 >> 12; + } else { + rblock = nv50_vram_rblock(dev) >> 12; } - return 0; + length = (dev_priv->vram_size >> 12) - rsvd_head - rsvd_tail; + + return nouveau_mm_init(&vram->mm, rsvd_head, length, rblock); +} + +void +nv50_vram_fini(struct drm_device *dev) +{ + struct drm_nouveau_private *dev_priv = dev->dev_private; + struct nouveau_vram_engine *vram = &dev_priv->engine.vram; + + nouveau_mm_fini(&vram->mm); } diff --git a/drivers/gpu/drm/nouveau/nv84_crypt.c b/drivers/gpu/drm/nouveau/nv84_crypt.c index 75b809a..edece9c 100644 --- a/drivers/gpu/drm/nouveau/nv84_crypt.c +++ b/drivers/gpu/drm/nouveau/nv84_crypt.c @@ -138,7 +138,7 @@ nv84_crypt_isr(struct drm_device *dev) } static int -nv84_crypt_fini(struct drm_device *dev, int engine) +nv84_crypt_fini(struct drm_device *dev, int engine, bool suspend) { nv_wr32(dev, 0x102140, 0x00000000); return 0; diff --git a/drivers/gpu/drm/nouveau/nva3_copy.c b/drivers/gpu/drm/nouveau/nva3_copy.c index b86820a..8f356d5 100644 --- a/drivers/gpu/drm/nouveau/nva3_copy.c +++ b/drivers/gpu/drm/nouveau/nva3_copy.c @@ -140,7 +140,7 @@ nva3_copy_init(struct drm_device *dev, int engine) } static int -nva3_copy_fini(struct drm_device *dev, int engine) +nva3_copy_fini(struct drm_device *dev, int engine, bool suspend) { nv_mask(dev, 0x104048, 0x00000003, 0x00000000); diff --git a/drivers/gpu/drm/nouveau/nvc0_copy.c b/drivers/gpu/drm/nouveau/nvc0_copy.c index 208fa7a..dddf006 100644 --- a/drivers/gpu/drm/nouveau/nvc0_copy.c +++ b/drivers/gpu/drm/nouveau/nvc0_copy.c @@ -48,14 +48,14 @@ nvc0_copy_context_new(struct nouveau_channel *chan, int engine) struct nouveau_gpuobj *ctx = NULL; int ret; - ret = nouveau_gpuobj_new(dev, NULL, 256, 256, + ret = nouveau_gpuobj_new(dev, chan, 256, 256, NVOBJ_FLAG_VM | NVOBJ_FLAG_VM_USER | NVOBJ_FLAG_ZERO_ALLOC, &ctx); if (ret) return ret; - nv_wo32(ramin, pcopy->ctx + 0, lower_32_bits(ctx->vinst)); - nv_wo32(ramin, pcopy->ctx + 4, upper_32_bits(ctx->vinst)); + nv_wo32(ramin, pcopy->ctx + 0, lower_32_bits(ctx->linst)); + nv_wo32(ramin, pcopy->ctx + 4, upper_32_bits(ctx->linst)); dev_priv->engine.instmem.flush(dev); chan->engctx[engine] = ctx; @@ -127,7 +127,7 @@ nvc0_copy_init(struct drm_device *dev, int engine) } static int -nvc0_copy_fini(struct drm_device *dev, int engine) +nvc0_copy_fini(struct drm_device *dev, int engine, bool suspend) { struct nvc0_copy_engine *pcopy = nv_engine(dev, engine); diff --git a/drivers/gpu/drm/nouveau/nvc0_fb.c b/drivers/gpu/drm/nouveau/nvc0_fb.c index 26a9960..08e6b11 100644 --- a/drivers/gpu/drm/nouveau/nvc0_fb.c +++ b/drivers/gpu/drm/nouveau/nvc0_fb.c @@ -1,5 +1,5 @@ /* - * Copyright 2010 Red Hat Inc. + * Copyright 2011 Red Hat Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -23,16 +23,80 @@ */ #include "drmP.h" - +#include "drm.h" #include "nouveau_drv.h" +#include "nouveau_drm.h" + +struct nvc0_fb_priv { + struct page *r100c10_page; + dma_addr_t r100c10; +}; + +static void +nvc0_fb_destroy(struct drm_device *dev) +{ + struct drm_nouveau_private *dev_priv = dev->dev_private; + struct nouveau_fb_engine *pfb = &dev_priv->engine.fb; + struct nvc0_fb_priv *priv = pfb->priv; + + if (priv->r100c10_page) { + pci_unmap_page(dev->pdev, priv->r100c10, PAGE_SIZE, + PCI_DMA_BIDIRECTIONAL); + __free_page(priv->r100c10_page); + } + + kfree(priv); + pfb->priv = NULL; +} + +static int +nvc0_fb_create(struct drm_device *dev) +{ + struct drm_nouveau_private *dev_priv = dev->dev_private; + struct nouveau_fb_engine *pfb = &dev_priv->engine.fb; + struct nvc0_fb_priv *priv; + + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + pfb->priv = priv; + + priv->r100c10_page = alloc_page(GFP_KERNEL | __GFP_ZERO); + if (!priv->r100c10_page) { + nvc0_fb_destroy(dev); + return -ENOMEM; + } + + priv->r100c10 = pci_map_page(dev->pdev, priv->r100c10_page, 0, + PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); + if (pci_dma_mapping_error(dev->pdev, priv->r100c10)) { + nvc0_fb_destroy(dev); + return -EFAULT; + } + + return 0; +} int nvc0_fb_init(struct drm_device *dev) { + struct drm_nouveau_private *dev_priv = dev->dev_private; + struct nvc0_fb_priv *priv; + int ret; + + if (!dev_priv->engine.fb.priv) { + ret = nvc0_fb_create(dev); + if (ret) + return ret; + } + priv = dev_priv->engine.fb.priv; + + nv_wr32(dev, 0x100c10, priv->r100c10 >> 8); return 0; } void nvc0_fb_takedown(struct drm_device *dev) { + nvc0_fb_destroy(dev); } diff --git a/drivers/gpu/drm/nouveau/nvc0_fbcon.c b/drivers/gpu/drm/nouveau/nvc0_fbcon.c index fa5d4c2..a495e48 100644 --- a/drivers/gpu/drm/nouveau/nvc0_fbcon.c +++ b/drivers/gpu/drm/nouveau/nvc0_fbcon.c @@ -159,7 +159,7 @@ nvc0_fbcon_accel_init(struct fb_info *info) struct drm_device *dev = nfbdev->dev; struct drm_nouveau_private *dev_priv = dev->dev_private; struct nouveau_channel *chan = dev_priv->channel; - struct nouveau_bo *nvbo = nfbdev->nouveau_fb.nvbo; + struct nouveau_framebuffer *fb = &nfbdev->nouveau_fb; int ret, format; ret = nouveau_gpuobj_gr_new(chan, 0x902d, 0x902d); @@ -203,8 +203,8 @@ nvc0_fbcon_accel_init(struct fb_info *info) BEGIN_NVC0(chan, 2, NvSub2D, 0x0000, 1); OUT_RING (chan, 0x0000902d); BEGIN_NVC0(chan, 2, NvSub2D, 0x0104, 2); - OUT_RING (chan, upper_32_bits(chan->notifier_bo->bo.offset)); - OUT_RING (chan, lower_32_bits(chan->notifier_bo->bo.offset)); + OUT_RING (chan, upper_32_bits(chan->notifier_vma.offset)); + OUT_RING (chan, lower_32_bits(chan->notifier_vma.offset)); BEGIN_NVC0(chan, 2, NvSub2D, 0x0290, 1); OUT_RING (chan, 0); BEGIN_NVC0(chan, 2, NvSub2D, 0x0888, 1); @@ -249,8 +249,8 @@ nvc0_fbcon_accel_init(struct fb_info *info) OUT_RING (chan, info->fix.line_length); OUT_RING (chan, info->var.xres_virtual); OUT_RING (chan, info->var.yres_virtual); - OUT_RING (chan, upper_32_bits(nvbo->vma.offset)); - OUT_RING (chan, lower_32_bits(nvbo->vma.offset)); + OUT_RING (chan, upper_32_bits(fb->vma.offset)); + OUT_RING (chan, lower_32_bits(fb->vma.offset)); BEGIN_NVC0(chan, 2, NvSub2D, 0x0230, 10); OUT_RING (chan, format); OUT_RING (chan, 1); @@ -260,8 +260,8 @@ nvc0_fbcon_accel_init(struct fb_info *info) OUT_RING (chan, info->fix.line_length); OUT_RING (chan, info->var.xres_virtual); OUT_RING (chan, info->var.yres_virtual); - OUT_RING (chan, upper_32_bits(nvbo->vma.offset)); - OUT_RING (chan, lower_32_bits(nvbo->vma.offset)); + OUT_RING (chan, upper_32_bits(fb->vma.offset)); + OUT_RING (chan, lower_32_bits(fb->vma.offset)); FIRE_RING (chan); return 0; diff --git a/drivers/gpu/drm/nouveau/nvc0_fifo.c b/drivers/gpu/drm/nouveau/nvc0_fifo.c index fb4f594..6f9f341 100644 --- a/drivers/gpu/drm/nouveau/nvc0_fifo.c +++ b/drivers/gpu/drm/nouveau/nvc0_fifo.c @@ -210,10 +210,10 @@ nvc0_fifo_unload_context(struct drm_device *dev) int i; for (i = 0; i < 128; i++) { - if (!(nv_rd32(dev, 0x003004 + (i * 4)) & 1)) + if (!(nv_rd32(dev, 0x003004 + (i * 8)) & 1)) continue; - nv_mask(dev, 0x003004 + (i * 4), 0x00000001, 0x00000000); + nv_mask(dev, 0x003004 + (i * 8), 0x00000001, 0x00000000); nv_wr32(dev, 0x002634, i); if (!nv_wait(dev, 0x002634, 0xffffffff, i)) { NV_INFO(dev, "PFIFO: kick ch %d failed: 0x%08x\n", diff --git a/drivers/gpu/drm/nouveau/nvc0_graph.c b/drivers/gpu/drm/nouveau/nvc0_graph.c index ca6db20..5b2f6f4 100644 --- a/drivers/gpu/drm/nouveau/nvc0_graph.c +++ b/drivers/gpu/drm/nouveau/nvc0_graph.c @@ -28,7 +28,34 @@ #include "nouveau_drv.h" #include "nouveau_mm.h" + #include "nvc0_graph.h" +#include "nvc0_grhub.fuc.h" +#include "nvc0_grgpc.fuc.h" + +static void +nvc0_graph_ctxctl_debug_unit(struct drm_device *dev, u32 base) +{ + NV_INFO(dev, "PGRAPH: %06x - done 0x%08x\n", base, + nv_rd32(dev, base + 0x400)); + NV_INFO(dev, "PGRAPH: %06x - stat 0x%08x 0x%08x 0x%08x 0x%08x\n", base, + nv_rd32(dev, base + 0x800), nv_rd32(dev, base + 0x804), + nv_rd32(dev, base + 0x808), nv_rd32(dev, base + 0x80c)); + NV_INFO(dev, "PGRAPH: %06x - stat 0x%08x 0x%08x 0x%08x 0x%08x\n", base, + nv_rd32(dev, base + 0x810), nv_rd32(dev, base + 0x814), + nv_rd32(dev, base + 0x818), nv_rd32(dev, base + 0x81c)); +} + +static void +nvc0_graph_ctxctl_debug(struct drm_device *dev) +{ + u32 gpcnr = nv_rd32(dev, 0x409604) & 0xffff; + u32 gpc; + + nvc0_graph_ctxctl_debug_unit(dev, 0x409000); + for (gpc = 0; gpc < gpcnr; gpc++) + nvc0_graph_ctxctl_debug_unit(dev, 0x502000 + (gpc * 0x8000)); +} static int nvc0_graph_load_context(struct nouveau_channel *chan) @@ -72,24 +99,44 @@ nvc0_graph_construct_context(struct nouveau_channel *chan) if (!ctx) return -ENOMEM; - nvc0_graph_load_context(chan); - - nv_wo32(grch->grctx, 0x1c, 1); - nv_wo32(grch->grctx, 0x20, 0); - nv_wo32(grch->grctx, 0x28, 0); - nv_wo32(grch->grctx, 0x2c, 0); - dev_priv->engine.instmem.flush(dev); - - ret = nvc0_grctx_generate(chan); - if (ret) { - kfree(ctx); - return ret; + if (!nouveau_ctxfw) { + nv_wr32(dev, 0x409840, 0x80000000); + nv_wr32(dev, 0x409500, 0x80000000 | chan->ramin->vinst >> 12); + nv_wr32(dev, 0x409504, 0x00000001); + if (!nv_wait(dev, 0x409800, 0x80000000, 0x80000000)) { + NV_ERROR(dev, "PGRAPH: HUB_SET_CHAN timeout\n"); + nvc0_graph_ctxctl_debug(dev); + ret = -EBUSY; + goto err; + } + } else { + nvc0_graph_load_context(chan); + + nv_wo32(grch->grctx, 0x1c, 1); + nv_wo32(grch->grctx, 0x20, 0); + nv_wo32(grch->grctx, 0x28, 0); + nv_wo32(grch->grctx, 0x2c, 0); + dev_priv->engine.instmem.flush(dev); } - ret = nvc0_graph_unload_context_to(dev, chan->ramin->vinst); - if (ret) { - kfree(ctx); - return ret; + ret = nvc0_grctx_generate(chan); + if (ret) + goto err; + + if (!nouveau_ctxfw) { + nv_wr32(dev, 0x409840, 0x80000000); + nv_wr32(dev, 0x409500, 0x80000000 | chan->ramin->vinst >> 12); + nv_wr32(dev, 0x409504, 0x00000002); + if (!nv_wait(dev, 0x409800, 0x80000000, 0x80000000)) { + NV_ERROR(dev, "PGRAPH: HUB_CTX_SAVE timeout\n"); + nvc0_graph_ctxctl_debug(dev); + ret = -EBUSY; + goto err; + } + } else { + ret = nvc0_graph_unload_context_to(dev, chan->ramin->vinst); + if (ret) + goto err; } for (i = 0; i < priv->grctx_size; i += 4) @@ -97,6 +144,10 @@ nvc0_graph_construct_context(struct nouveau_channel *chan) priv->grctx_vals = ctx; return 0; + +err: + kfree(ctx); + return ret; } static int @@ -108,50 +159,50 @@ nvc0_graph_create_context_mmio_list(struct nouveau_channel *chan) int i = 0, gpc, tp, ret; u32 magic; - ret = nouveau_gpuobj_new(dev, NULL, 0x2000, 256, NVOBJ_FLAG_VM, + ret = nouveau_gpuobj_new(dev, chan, 0x2000, 256, NVOBJ_FLAG_VM, &grch->unk408004); if (ret) return ret; - ret = nouveau_gpuobj_new(dev, NULL, 0x8000, 256, NVOBJ_FLAG_VM, + ret = nouveau_gpuobj_new(dev, chan, 0x8000, 256, NVOBJ_FLAG_VM, &grch->unk40800c); if (ret) return ret; - ret = nouveau_gpuobj_new(dev, NULL, 384 * 1024, 4096, + ret = nouveau_gpuobj_new(dev, chan, 384 * 1024, 4096, NVOBJ_FLAG_VM | NVOBJ_FLAG_VM_USER, &grch->unk418810); if (ret) return ret; - ret = nouveau_gpuobj_new(dev, NULL, 0x1000, 0, NVOBJ_FLAG_VM, + ret = nouveau_gpuobj_new(dev, chan, 0x1000, 0, NVOBJ_FLAG_VM, &grch->mmio); if (ret) return ret; nv_wo32(grch->mmio, i++ * 4, 0x00408004); - nv_wo32(grch->mmio, i++ * 4, grch->unk408004->vinst >> 8); + nv_wo32(grch->mmio, i++ * 4, grch->unk408004->linst >> 8); nv_wo32(grch->mmio, i++ * 4, 0x00408008); nv_wo32(grch->mmio, i++ * 4, 0x80000018); nv_wo32(grch->mmio, i++ * 4, 0x0040800c); - nv_wo32(grch->mmio, i++ * 4, grch->unk40800c->vinst >> 8); + nv_wo32(grch->mmio, i++ * 4, grch->unk40800c->linst >> 8); nv_wo32(grch->mmio, i++ * 4, 0x00408010); nv_wo32(grch->mmio, i++ * 4, 0x80000000); nv_wo32(grch->mmio, i++ * 4, 0x00418810); - nv_wo32(grch->mmio, i++ * 4, 0x80000000 | grch->unk418810->vinst >> 12); + nv_wo32(grch->mmio, i++ * 4, 0x80000000 | grch->unk418810->linst >> 12); nv_wo32(grch->mmio, i++ * 4, 0x00419848); - nv_wo32(grch->mmio, i++ * 4, 0x10000000 | grch->unk418810->vinst >> 12); + nv_wo32(grch->mmio, i++ * 4, 0x10000000 | grch->unk418810->linst >> 12); nv_wo32(grch->mmio, i++ * 4, 0x00419004); - nv_wo32(grch->mmio, i++ * 4, grch->unk40800c->vinst >> 8); + nv_wo32(grch->mmio, i++ * 4, grch->unk40800c->linst >> 8); nv_wo32(grch->mmio, i++ * 4, 0x00419008); nv_wo32(grch->mmio, i++ * 4, 0x00000000); nv_wo32(grch->mmio, i++ * 4, 0x00418808); - nv_wo32(grch->mmio, i++ * 4, grch->unk408004->vinst >> 8); + nv_wo32(grch->mmio, i++ * 4, grch->unk408004->linst >> 8); nv_wo32(grch->mmio, i++ * 4, 0x0041880c); nv_wo32(grch->mmio, i++ * 4, 0x80000018); @@ -159,7 +210,7 @@ nvc0_graph_create_context_mmio_list(struct nouveau_channel *chan) nv_wo32(grch->mmio, i++ * 4, 0x00405830); nv_wo32(grch->mmio, i++ * 4, magic); for (gpc = 0; gpc < priv->gpc_nr; gpc++) { - for (tp = 0; tp < priv->tp_nr[gpc]; tp++, magic += 0x02fc) { + for (tp = 0; tp < priv->tp_nr[gpc]; tp++, magic += 0x0324) { u32 reg = 0x504520 + (gpc * 0x8000) + (tp * 0x0800); nv_wo32(grch->mmio, i++ * 4, reg); nv_wo32(grch->mmio, i++ * 4, magic); @@ -186,7 +237,7 @@ nvc0_graph_context_new(struct nouveau_channel *chan, int engine) return -ENOMEM; chan->engctx[NVOBJ_ENGINE_GR] = grch; - ret = nouveau_gpuobj_new(dev, NULL, priv->grctx_size, 256, + ret = nouveau_gpuobj_new(dev, chan, priv->grctx_size, 256, NVOBJ_FLAG_VM | NVOBJ_FLAG_ZERO_ALLOC, &grch->grctx); if (ret) @@ -197,8 +248,8 @@ nvc0_graph_context_new(struct nouveau_channel *chan, int engine) if (ret) goto error; - nv_wo32(chan->ramin, 0x0210, lower_32_bits(grctx->vinst) | 4); - nv_wo32(chan->ramin, 0x0214, upper_32_bits(grctx->vinst)); + nv_wo32(chan->ramin, 0x0210, lower_32_bits(grctx->linst) | 4); + nv_wo32(chan->ramin, 0x0214, upper_32_bits(grctx->linst)); pinstmem->flush(dev); if (!priv->grctx_vals) { @@ -210,15 +261,20 @@ nvc0_graph_context_new(struct nouveau_channel *chan, int engine) for (i = 0; i < priv->grctx_size; i += 4) nv_wo32(grctx, i, priv->grctx_vals[i / 4]); - nv_wo32(grctx, 0xf4, 0); - nv_wo32(grctx, 0xf8, 0); - nv_wo32(grctx, 0x10, grch->mmio_nr); - nv_wo32(grctx, 0x14, lower_32_bits(grch->mmio->vinst)); - nv_wo32(grctx, 0x18, upper_32_bits(grch->mmio->vinst)); - nv_wo32(grctx, 0x1c, 1); - nv_wo32(grctx, 0x20, 0); - nv_wo32(grctx, 0x28, 0); - nv_wo32(grctx, 0x2c, 0); + if (!nouveau_ctxfw) { + nv_wo32(grctx, 0x00, grch->mmio_nr); + nv_wo32(grctx, 0x04, grch->mmio->linst >> 8); + } else { + nv_wo32(grctx, 0xf4, 0); + nv_wo32(grctx, 0xf8, 0); + nv_wo32(grctx, 0x10, grch->mmio_nr); + nv_wo32(grctx, 0x14, lower_32_bits(grch->mmio->linst)); + nv_wo32(grctx, 0x18, upper_32_bits(grch->mmio->linst)); + nv_wo32(grctx, 0x1c, 1); + nv_wo32(grctx, 0x20, 0); + nv_wo32(grctx, 0x28, 0); + nv_wo32(grctx, 0x2c, 0); + } pinstmem->flush(dev); return 0; @@ -248,7 +304,7 @@ nvc0_graph_object_new(struct nouveau_channel *chan, int engine, } static int -nvc0_graph_fini(struct drm_device *dev, int engine) +nvc0_graph_fini(struct drm_device *dev, int engine, bool suspend) { return 0; } @@ -296,6 +352,7 @@ static void nvc0_graph_init_gpc_0(struct drm_device *dev) { struct nvc0_graph_priv *priv = nv_engine(dev, NVOBJ_ENGINE_GR); + const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, priv->tp_total); u32 data[TP_MAX / 8]; u8 tpnr[GPC_MAX]; int i, gpc, tpc; @@ -307,13 +364,6 @@ nvc0_graph_init_gpc_0(struct drm_device *dev) * 465: 3/4/4/0 4 7 * 470: 3/3/4/4 5 5 * 480: 3/4/4/4 6 6 - * - * magicgpc918 - * 450: 00200000 00000000001000000000000000000000 - * 460: 00124925 00000000000100100100100100100101 - * 465: 000ba2e9 00000000000010111010001011101001 - * 470: 00092493 00000000000010010010010010010011 - * 480: 00088889 00000000000010001000100010001001 */ memset(data, 0x00, sizeof(data)); @@ -336,10 +386,10 @@ nvc0_graph_init_gpc_0(struct drm_device *dev) nv_wr32(dev, GPC_UNIT(gpc, 0x0914), priv->magic_not_rop_nr << 8 | priv->tp_nr[gpc]); nv_wr32(dev, GPC_UNIT(gpc, 0x0910), 0x00040000 | priv->tp_total); - nv_wr32(dev, GPC_UNIT(gpc, 0x0918), priv->magicgpc918); + nv_wr32(dev, GPC_UNIT(gpc, 0x0918), magicgpc918); } - nv_wr32(dev, GPC_BCAST(0x1bd4), priv->magicgpc918); + nv_wr32(dev, GPC_BCAST(0x1bd4), magicgpc918); nv_wr32(dev, GPC_BCAST(0x08ac), priv->rop_nr); } @@ -419,8 +469,51 @@ nvc0_graph_init_fuc(struct drm_device *dev, u32 fuc_base, static int nvc0_graph_init_ctxctl(struct drm_device *dev) { + struct drm_nouveau_private *dev_priv = dev->dev_private; struct nvc0_graph_priv *priv = nv_engine(dev, NVOBJ_ENGINE_GR); u32 r000260; + int i; + + if (!nouveau_ctxfw) { + /* load HUB microcode */ + r000260 = nv_mask(dev, 0x000260, 0x00000001, 0x00000000); + nv_wr32(dev, 0x4091c0, 0x01000000); + for (i = 0; i < sizeof(nvc0_grhub_data) / 4; i++) + nv_wr32(dev, 0x4091c4, nvc0_grhub_data[i]); + + nv_wr32(dev, 0x409180, 0x01000000); + for (i = 0; i < sizeof(nvc0_grhub_code) / 4; i++) { + if ((i & 0x3f) == 0) + nv_wr32(dev, 0x409188, i >> 6); + nv_wr32(dev, 0x409184, nvc0_grhub_code[i]); + } + + /* load GPC microcode */ + nv_wr32(dev, 0x41a1c0, 0x01000000); + for (i = 0; i < sizeof(nvc0_grgpc_data) / 4; i++) + nv_wr32(dev, 0x41a1c4, nvc0_grgpc_data[i]); + + nv_wr32(dev, 0x41a180, 0x01000000); + for (i = 0; i < sizeof(nvc0_grgpc_code) / 4; i++) { + if ((i & 0x3f) == 0) + nv_wr32(dev, 0x41a188, i >> 6); + nv_wr32(dev, 0x41a184, nvc0_grgpc_code[i]); + } + nv_wr32(dev, 0x000260, r000260); + + /* start HUB ucode running, it'll init the GPCs */ + nv_wr32(dev, 0x409800, dev_priv->chipset); + nv_wr32(dev, 0x40910c, 0x00000000); + nv_wr32(dev, 0x409100, 0x00000002); + if (!nv_wait(dev, 0x409800, 0x80000000, 0x80000000)) { + NV_ERROR(dev, "PGRAPH: HUB_INIT timed out\n"); + nvc0_graph_ctxctl_debug(dev); + return -EBUSY; + } + + priv->grctx_size = nv_rd32(dev, 0x409804); + return 0; + } /* load fuc microcode */ r000260 = nv_mask(dev, 0x000260, 0x00000001, 0x00000000); @@ -528,6 +621,22 @@ nvc0_graph_isr_chid(struct drm_device *dev, u64 inst) } static void +nvc0_graph_ctxctl_isr(struct drm_device *dev) +{ + u32 ustat = nv_rd32(dev, 0x409c18); + + if (ustat & 0x00000001) + NV_INFO(dev, "PGRAPH: CTXCTRL ucode error\n"); + if (ustat & 0x00080000) + NV_INFO(dev, "PGRAPH: CTXCTRL watchdog timeout\n"); + if (ustat & ~0x00080001) + NV_INFO(dev, "PGRAPH: CTXCTRL 0x%08x\n", ustat); + + nvc0_graph_ctxctl_debug(dev); + nv_wr32(dev, 0x409c20, ustat); +} + +static void nvc0_graph_isr(struct drm_device *dev) { u64 inst = (u64)(nv_rd32(dev, 0x409b00) & 0x0fffffff) << 12; @@ -578,11 +687,7 @@ nvc0_graph_isr(struct drm_device *dev) } if (stat & 0x00080000) { - u32 ustat = nv_rd32(dev, 0x409c18); - - NV_INFO(dev, "PGRAPH: CTXCTRL ustat 0x%08x\n", ustat); - - nv_wr32(dev, 0x409c20, ustat); + nvc0_graph_ctxctl_isr(dev); nv_wr32(dev, 0x400100, 0x00080000); stat &= ~0x00080000; } @@ -606,7 +711,7 @@ nvc0_runk140_isr(struct drm_device *dev) u32 st0 = nv_mask(dev, reg + 0x1020, 0, 0); u32 st1 = nv_mask(dev, reg + 0x1420, 0, 0); - NV_INFO(dev, "PRUNK140: %d 0x%08x 0x%08x\n", unit, st0, st1); + NV_DEBUG(dev, "PRUNK140: %d 0x%08x 0x%08x\n", unit, st0, st1); units &= ~(1 << unit); } } @@ -651,10 +756,12 @@ nvc0_graph_destroy(struct drm_device *dev, int engine) { struct nvc0_graph_priv *priv = nv_engine(dev, engine); - nvc0_graph_destroy_fw(&priv->fuc409c); - nvc0_graph_destroy_fw(&priv->fuc409d); - nvc0_graph_destroy_fw(&priv->fuc41ac); - nvc0_graph_destroy_fw(&priv->fuc41ad); + if (nouveau_ctxfw) { + nvc0_graph_destroy_fw(&priv->fuc409c); + nvc0_graph_destroy_fw(&priv->fuc409d); + nvc0_graph_destroy_fw(&priv->fuc41ac); + nvc0_graph_destroy_fw(&priv->fuc41ad); + } nouveau_irq_unregister(dev, 12); nouveau_irq_unregister(dev, 25); @@ -675,13 +782,10 @@ nvc0_graph_create(struct drm_device *dev) struct drm_nouveau_private *dev_priv = dev->dev_private; struct nvc0_graph_priv *priv; int ret, gpc, i; + u32 fermi; - switch (dev_priv->chipset) { - case 0xc0: - case 0xc3: - case 0xc4: - break; - default: + fermi = nvc0_graph_class(dev); + if (!fermi) { NV_ERROR(dev, "PGRAPH: unsupported chipset, please report!\n"); return 0; } @@ -701,15 +805,17 @@ nvc0_graph_create(struct drm_device *dev) nouveau_irq_register(dev, 12, nvc0_graph_isr); nouveau_irq_register(dev, 25, nvc0_runk140_isr); - if (nvc0_graph_create_fw(dev, "fuc409c", &priv->fuc409c) || - nvc0_graph_create_fw(dev, "fuc409d", &priv->fuc409d) || - nvc0_graph_create_fw(dev, "fuc41ac", &priv->fuc41ac) || - nvc0_graph_create_fw(dev, "fuc41ad", &priv->fuc41ad)) { - ret = 0; - goto error; + if (nouveau_ctxfw) { + NV_INFO(dev, "PGRAPH: using external firmware\n"); + if (nvc0_graph_create_fw(dev, "fuc409c", &priv->fuc409c) || + nvc0_graph_create_fw(dev, "fuc409d", &priv->fuc409d) || + nvc0_graph_create_fw(dev, "fuc41ac", &priv->fuc41ac) || + nvc0_graph_create_fw(dev, "fuc41ad", &priv->fuc41ad)) { + ret = 0; + goto error; + } } - ret = nouveau_gpuobj_new(dev, NULL, 0x1000, 256, 0, &priv->unk4188b4); if (ret) goto error; @@ -735,25 +841,28 @@ nvc0_graph_create(struct drm_device *dev) case 0xc0: if (priv->tp_total == 11) { /* 465, 3/4/4/0, 4 */ priv->magic_not_rop_nr = 0x07; - /* filled values up to tp_total, the rest 0 */ - priv->magicgpc918 = 0x000ba2e9; } else if (priv->tp_total == 14) { /* 470, 3/3/4/4, 5 */ priv->magic_not_rop_nr = 0x05; - priv->magicgpc918 = 0x00092493; } else if (priv->tp_total == 15) { /* 480, 3/4/4/4, 6 */ priv->magic_not_rop_nr = 0x06; - priv->magicgpc918 = 0x00088889; } break; case 0xc3: /* 450, 4/0/0/0, 2 */ priv->magic_not_rop_nr = 0x03; - priv->magicgpc918 = 0x00200000; break; case 0xc4: /* 460, 3/4/0/0, 4 */ priv->magic_not_rop_nr = 0x01; - priv->magicgpc918 = 0x00124925; + break; + case 0xc1: /* 2/0/0/0, 1 */ + priv->magic_not_rop_nr = 0x01; + break; + case 0xc8: /* 4/4/3/4, 5 */ + priv->magic_not_rop_nr = 0x06; + break; + case 0xce: /* 4/4/0/0, 4 */ + priv->magic_not_rop_nr = 0x03; break; } @@ -763,13 +872,16 @@ nvc0_graph_create(struct drm_device *dev) priv->tp_nr[3], priv->rop_nr); /* use 0xc3's values... */ priv->magic_not_rop_nr = 0x03; - priv->magicgpc918 = 0x00200000; } NVOBJ_CLASS(dev, 0x902d, GR); /* 2D */ NVOBJ_CLASS(dev, 0x9039, GR); /* M2MF */ NVOBJ_MTHD (dev, 0x9039, 0x0500, nvc0_graph_mthd_page_flip); NVOBJ_CLASS(dev, 0x9097, GR); /* 3D */ + if (fermi >= 0x9197) + NVOBJ_CLASS(dev, 0x9197, GR); /* 3D (NVC1-) */ + if (fermi >= 0x9297) + NVOBJ_CLASS(dev, 0x9297, GR); /* 3D (NVC8-) */ NVOBJ_CLASS(dev, 0x90c0, GR); /* COMPUTE */ return 0; diff --git a/drivers/gpu/drm/nouveau/nvc0_graph.fuc b/drivers/gpu/drm/nouveau/nvc0_graph.fuc new file mode 100644 index 0000000..2a4b6dc --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvc0_graph.fuc @@ -0,0 +1,400 @@ +/* fuc microcode util functions for nvc0 PGRAPH + * + * Copyright 2011 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Ben Skeggs + */ + +define(`mmctx_data', `.b32 eval((($2 - 1) << 26) | $1)') +define(`queue_init', `.skip eval((2 * 4) + ((8 * 4) * 2))') + +ifdef(`include_code', ` +// Error codes +define(`E_BAD_COMMAND', 0x01) +define(`E_CMD_OVERFLOW', 0x02) + +// Util macros to help with debugging ucode hangs etc +define(`T_WAIT', 0) +define(`T_MMCTX', 1) +define(`T_STRWAIT', 2) +define(`T_STRINIT', 3) +define(`T_AUTO', 4) +define(`T_CHAN', 5) +define(`T_LOAD', 6) +define(`T_SAVE', 7) +define(`T_LCHAN', 8) +define(`T_LCTXH', 9) + +define(`trace_set', ` + mov $r8 0x83c + shl b32 $r8 6 + clear b32 $r9 + bset $r9 $1 + iowr I[$r8 + 0x000] $r9 // CC_SCRATCH[7] +') + +define(`trace_clr', ` + mov $r8 0x85c + shl b32 $r8 6 + clear b32 $r9 + bset $r9 $1 + iowr I[$r8 + 0x000] $r9 // CC_SCRATCH[7] +') + +// queue_put - add request to queue +// +// In : $r13 queue pointer +// $r14 command +// $r15 data +// +queue_put: + // make sure we have space.. + ld b32 $r8 D[$r13 + 0x0] // GET + ld b32 $r9 D[$r13 + 0x4] // PUT + xor $r8 8 + cmpu b32 $r8 $r9 + bra ne queue_put_next + mov $r15 E_CMD_OVERFLOW + call error + ret + + // store cmd/data on queue + queue_put_next: + and $r8 $r9 7 + shl b32 $r8 3 + add b32 $r8 $r13 + add b32 $r8 8 + st b32 D[$r8 + 0x0] $r14 + st b32 D[$r8 + 0x4] $r15 + + // update PUT + add b32 $r9 1 + and $r9 0xf + st b32 D[$r13 + 0x4] $r9 + ret + +// queue_get - fetch request from queue +// +// In : $r13 queue pointer +// +// Out: $p1 clear on success (data available) +// $r14 command +// $r15 data +// +queue_get: + bset $flags $p1 + ld b32 $r8 D[$r13 + 0x0] // GET + ld b32 $r9 D[$r13 + 0x4] // PUT + cmpu b32 $r8 $r9 + bra e queue_get_done + // fetch first cmd/data pair + and $r9 $r8 7 + shl b32 $r9 3 + add b32 $r9 $r13 + add b32 $r9 8 + ld b32 $r14 D[$r9 + 0x0] + ld b32 $r15 D[$r9 + 0x4] + + // update GET + add b32 $r8 1 + and $r8 0xf + st b32 D[$r13 + 0x0] $r8 + bclr $flags $p1 +queue_get_done: + ret + +// nv_rd32 - read 32-bit value from nv register +// +// In : $r14 register +// Out: $r15 value +// +nv_rd32: + mov $r11 0x728 + shl b32 $r11 6 + mov b32 $r12 $r14 + bset $r12 31 // MMIO_CTRL_PENDING + iowr I[$r11 + 0x000] $r12 // MMIO_CTRL + nv_rd32_wait: + iord $r12 I[$r11 + 0x000] + xbit $r12 $r12 31 + bra ne nv_rd32_wait + mov $r10 6 // DONE_MMIO_RD + call wait_doneo + iord $r15 I[$r11 + 0x100] // MMIO_RDVAL + ret + +// nv_wr32 - write 32-bit value to nv register +// +// In : $r14 register +// $r15 value +// +nv_wr32: + mov $r11 0x728 + shl b32 $r11 6 + iowr I[$r11 + 0x200] $r15 // MMIO_WRVAL + mov b32 $r12 $r14 + bset $r12 31 // MMIO_CTRL_PENDING + bset $r12 30 // MMIO_CTRL_WRITE + iowr I[$r11 + 0x000] $r12 // MMIO_CTRL + nv_wr32_wait: + iord $r12 I[$r11 + 0x000] + xbit $r12 $r12 31 + bra ne nv_wr32_wait + ret + +// (re)set watchdog timer +// +// In : $r15 timeout +// +watchdog_reset: + mov $r8 0x430 + shl b32 $r8 6 + bset $r15 31 + iowr I[$r8 + 0x000] $r15 + ret + +// clear watchdog timer +watchdog_clear: + mov $r8 0x430 + shl b32 $r8 6 + iowr I[$r8 + 0x000] $r0 + ret + +// wait_done{z,o} - wait on FUC_DONE bit to become clear/set +// +// In : $r10 bit to wait on +// +define(`wait_done', ` +$1: + trace_set(T_WAIT); + mov $r8 0x818 + shl b32 $r8 6 + iowr I[$r8 + 0x000] $r10 // CC_SCRATCH[6] = wait bit + wait_done_$1: + mov $r8 0x400 + shl b32 $r8 6 + iord $r8 I[$r8 + 0x000] // DONE + xbit $r8 $r8 $r10 + bra $2 wait_done_$1 + trace_clr(T_WAIT) + ret +') +wait_done(wait_donez, ne) +wait_done(wait_doneo, e) + +// mmctx_size - determine size of a mmio list transfer +// +// In : $r14 mmio list head +// $r15 mmio list tail +// Out: $r15 transfer size (in bytes) +// +mmctx_size: + clear b32 $r9 + nv_mmctx_size_loop: + ld b32 $r8 D[$r14] + shr b32 $r8 26 + add b32 $r8 1 + shl b32 $r8 2 + add b32 $r9 $r8 + add b32 $r14 4 + cmpu b32 $r14 $r15 + bra ne nv_mmctx_size_loop + mov b32 $r15 $r9 + ret + +// mmctx_xfer - execute a list of mmio transfers +// +// In : $r10 flags +// bit 0: direction (0 = save, 1 = load) +// bit 1: set if first transfer +// bit 2: set if last transfer +// $r11 base +// $r12 mmio list head +// $r13 mmio list tail +// $r14 multi_stride +// $r15 multi_mask +// +mmctx_xfer: + trace_set(T_MMCTX) + mov $r8 0x710 + shl b32 $r8 6 + clear b32 $r9 + or $r11 $r11 + bra e mmctx_base_disabled + iowr I[$r8 + 0x000] $r11 // MMCTX_BASE + bset $r9 0 // BASE_EN + mmctx_base_disabled: + or $r14 $r14 + bra e mmctx_multi_disabled + iowr I[$r8 + 0x200] $r14 // MMCTX_MULTI_STRIDE + iowr I[$r8 + 0x300] $r15 // MMCTX_MULTI_MASK + bset $r9 1 // MULTI_EN + mmctx_multi_disabled: + add b32 $r8 0x100 + + xbit $r11 $r10 0 + shl b32 $r11 16 // DIR + bset $r11 12 // QLIMIT = 0x10 + xbit $r14 $r10 1 + shl b32 $r14 17 + or $r11 $r14 // START_TRIGGER + iowr I[$r8 + 0x000] $r11 // MMCTX_CTRL + + // loop over the mmio list, and send requests to the hw + mmctx_exec_loop: + // wait for space in mmctx queue + mmctx_wait_free: + iord $r14 I[$r8 + 0x000] // MMCTX_CTRL + and $r14 0x1f + bra e mmctx_wait_free + + // queue up an entry + ld b32 $r14 D[$r12] + or $r14 $r9 + iowr I[$r8 + 0x300] $r14 + add b32 $r12 4 + cmpu b32 $r12 $r13 + bra ne mmctx_exec_loop + + xbit $r11 $r10 2 + bra ne mmctx_stop + // wait for queue to empty + mmctx_fini_wait: + iord $r11 I[$r8 + 0x000] // MMCTX_CTRL + and $r11 0x1f + cmpu b32 $r11 0x10 + bra ne mmctx_fini_wait + mov $r10 2 // DONE_MMCTX + call wait_donez + bra mmctx_done + mmctx_stop: + xbit $r11 $r10 0 + shl b32 $r11 16 // DIR + bset $r11 12 // QLIMIT = 0x10 + bset $r11 18 // STOP_TRIGGER + iowr I[$r8 + 0x000] $r11 // MMCTX_CTRL + mmctx_stop_wait: + // wait for STOP_TRIGGER to clear + iord $r11 I[$r8 + 0x000] // MMCTX_CTRL + xbit $r11 $r11 18 + bra ne mmctx_stop_wait + mmctx_done: + trace_clr(T_MMCTX) + ret + +// Wait for DONE_STRAND +// +strand_wait: + push $r10 + mov $r10 2 + call wait_donez + pop $r10 + ret + +// unknown - call before issuing strand commands +// +strand_pre: + mov $r8 0x4afc + sethi $r8 0x20000 + mov $r9 0xc + iowr I[$r8] $r9 + call strand_wait + ret + +// unknown - call after issuing strand commands +// +strand_post: + mov $r8 0x4afc + sethi $r8 0x20000 + mov $r9 0xd + iowr I[$r8] $r9 + call strand_wait + ret + +// Selects strand set?! +// +// In: $r14 id +// +strand_set: + mov $r10 0x4ffc + sethi $r10 0x20000 + sub b32 $r11 $r10 0x500 + mov $r12 0xf + iowr I[$r10 + 0x000] $r12 // 0x93c = 0xf + mov $r12 0xb + iowr I[$r11 + 0x000] $r12 // 0x928 = 0xb + call strand_wait + iowr I[$r10 + 0x000] $r14 // 0x93c = <id> + mov $r12 0xa + iowr I[$r11 + 0x000] $r12 // 0x928 = 0xa + call strand_wait + ret + +// Initialise strand context data +// +// In : $r15 context base +// Out: $r15 context size (in bytes) +// +// Strandset(?) 3 hardcoded currently +// +strand_ctx_init: + trace_set(T_STRINIT) + call strand_pre + mov $r14 3 + call strand_set + mov $r10 0x46fc + sethi $r10 0x20000 + add b32 $r11 $r10 0x400 + iowr I[$r10 + 0x100] $r0 // STRAND_FIRST_GENE = 0 + mov $r12 1 + iowr I[$r11 + 0x000] $r12 // STRAND_CMD = LATCH_FIRST_GENE + call strand_wait + sub b32 $r12 $r0 1 + iowr I[$r10 + 0x000] $r12 // STRAND_GENE_CNT = 0xffffffff + mov $r12 2 + iowr I[$r11 + 0x000] $r12 // STRAND_CMD = LATCH_GENE_CNT + call strand_wait + call strand_post + + // read the size of each strand, poke the context offset of + // each into STRAND_{SAVE,LOAD}_SWBASE now, no need to worry + // about it later then. + mov $r8 0x880 + shl b32 $r8 6 + iord $r9 I[$r8 + 0x000] // STRANDS + add b32 $r8 0x2200 + shr b32 $r14 $r15 8 + ctx_init_strand_loop: + iowr I[$r8 + 0x000] $r14 // STRAND_SAVE_SWBASE + iowr I[$r8 + 0x100] $r14 // STRAND_LOAD_SWBASE + iord $r10 I[$r8 + 0x200] // STRAND_SIZE + shr b32 $r10 6 + add b32 $r10 1 + add b32 $r14 $r10 + add b32 $r8 4 + sub b32 $r9 1 + bra ne ctx_init_strand_loop + + shl b32 $r14 8 + sub b32 $r15 $r14 $r15 + trace_clr(T_STRINIT) + ret +') diff --git a/drivers/gpu/drm/nouveau/nvc0_graph.h b/drivers/gpu/drm/nouveau/nvc0_graph.h index f5d184e0..55689e9 100644 --- a/drivers/gpu/drm/nouveau/nvc0_graph.h +++ b/drivers/gpu/drm/nouveau/nvc0_graph.h @@ -57,8 +57,7 @@ struct nvc0_graph_priv { struct nouveau_gpuobj *unk4188b4; struct nouveau_gpuobj *unk4188b8; - u8 magic_not_rop_nr; - u32 magicgpc918; + u8 magic_not_rop_nr; }; struct nvc0_graph_chan { @@ -72,4 +71,25 @@ struct nvc0_graph_chan { int nvc0_grctx_generate(struct nouveau_channel *); +/* nvc0_graph.c uses this also to determine supported chipsets */ +static inline u32 +nvc0_graph_class(struct drm_device *dev) +{ + struct drm_nouveau_private *dev_priv = dev->dev_private; + + switch (dev_priv->chipset) { + case 0xc0: + case 0xc3: + case 0xc4: + case 0xce: /* guess, mmio trace shows only 0x9097 state */ + return 0x9097; + case 0xc1: + return 0x9197; + case 0xc8: + return 0x9297; + default: + return 0; + } +} + #endif diff --git a/drivers/gpu/drm/nouveau/nvc0_grctx.c b/drivers/gpu/drm/nouveau/nvc0_grctx.c index 6df0661..31018ea 100644 --- a/drivers/gpu/drm/nouveau/nvc0_grctx.c +++ b/drivers/gpu/drm/nouveau/nvc0_grctx.c @@ -45,6 +45,9 @@ nv_mthd(struct drm_device *dev, u32 class, u32 mthd, u32 data) static void nvc0_grctx_generate_9097(struct drm_device *dev) { + u32 fermi = nvc0_graph_class(dev); + u32 mthd; + nv_mthd(dev, 0x9097, 0x0800, 0x00000000); nv_mthd(dev, 0x9097, 0x0840, 0x00000000); nv_mthd(dev, 0x9097, 0x0880, 0x00000000); @@ -824,134 +827,10 @@ nvc0_grctx_generate_9097(struct drm_device *dev) nv_mthd(dev, 0x9097, 0x1eb8, 0x00000001); nv_mthd(dev, 0x9097, 0x1ed8, 0x00000001); nv_mthd(dev, 0x9097, 0x1ef8, 0x00000001); - nv_mthd(dev, 0x9097, 0x3400, 0x00000000); - nv_mthd(dev, 0x9097, 0x3404, 0x00000000); - nv_mthd(dev, 0x9097, 0x3408, 0x00000000); - nv_mthd(dev, 0x9097, 0x340c, 0x00000000); - nv_mthd(dev, 0x9097, 0x3410, 0x00000000); - nv_mthd(dev, 0x9097, 0x3414, 0x00000000); - nv_mthd(dev, 0x9097, 0x3418, 0x00000000); - nv_mthd(dev, 0x9097, 0x341c, 0x00000000); - nv_mthd(dev, 0x9097, 0x3420, 0x00000000); - nv_mthd(dev, 0x9097, 0x3424, 0x00000000); - nv_mthd(dev, 0x9097, 0x3428, 0x00000000); - nv_mthd(dev, 0x9097, 0x342c, 0x00000000); - nv_mthd(dev, 0x9097, 0x3430, 0x00000000); - nv_mthd(dev, 0x9097, 0x3434, 0x00000000); - nv_mthd(dev, 0x9097, 0x3438, 0x00000000); - nv_mthd(dev, 0x9097, 0x343c, 0x00000000); - nv_mthd(dev, 0x9097, 0x3440, 0x00000000); - nv_mthd(dev, 0x9097, 0x3444, 0x00000000); - nv_mthd(dev, 0x9097, 0x3448, 0x00000000); - nv_mthd(dev, 0x9097, 0x344c, 0x00000000); - nv_mthd(dev, 0x9097, 0x3450, 0x00000000); - nv_mthd(dev, 0x9097, 0x3454, 0x00000000); - nv_mthd(dev, 0x9097, 0x3458, 0x00000000); - nv_mthd(dev, 0x9097, 0x345c, 0x00000000); - nv_mthd(dev, 0x9097, 0x3460, 0x00000000); - nv_mthd(dev, 0x9097, 0x3464, 0x00000000); - nv_mthd(dev, 0x9097, 0x3468, 0x00000000); - nv_mthd(dev, 0x9097, 0x346c, 0x00000000); - nv_mthd(dev, 0x9097, 0x3470, 0x00000000); - nv_mthd(dev, 0x9097, 0x3474, 0x00000000); - nv_mthd(dev, 0x9097, 0x3478, 0x00000000); - nv_mthd(dev, 0x9097, 0x347c, 0x00000000); - nv_mthd(dev, 0x9097, 0x3480, 0x00000000); - nv_mthd(dev, 0x9097, 0x3484, 0x00000000); - nv_mthd(dev, 0x9097, 0x3488, 0x00000000); - nv_mthd(dev, 0x9097, 0x348c, 0x00000000); - nv_mthd(dev, 0x9097, 0x3490, 0x00000000); - nv_mthd(dev, 0x9097, 0x3494, 0x00000000); - nv_mthd(dev, 0x9097, 0x3498, 0x00000000); - nv_mthd(dev, 0x9097, 0x349c, 0x00000000); - nv_mthd(dev, 0x9097, 0x34a0, 0x00000000); - nv_mthd(dev, 0x9097, 0x34a4, 0x00000000); - nv_mthd(dev, 0x9097, 0x34a8, 0x00000000); - nv_mthd(dev, 0x9097, 0x34ac, 0x00000000); - nv_mthd(dev, 0x9097, 0x34b0, 0x00000000); - nv_mthd(dev, 0x9097, 0x34b4, 0x00000000); - nv_mthd(dev, 0x9097, 0x34b8, 0x00000000); - nv_mthd(dev, 0x9097, 0x34bc, 0x00000000); - nv_mthd(dev, 0x9097, 0x34c0, 0x00000000); - nv_mthd(dev, 0x9097, 0x34c4, 0x00000000); - nv_mthd(dev, 0x9097, 0x34c8, 0x00000000); - nv_mthd(dev, 0x9097, 0x34cc, 0x00000000); - nv_mthd(dev, 0x9097, 0x34d0, 0x00000000); - nv_mthd(dev, 0x9097, 0x34d4, 0x00000000); - nv_mthd(dev, 0x9097, 0x34d8, 0x00000000); - nv_mthd(dev, 0x9097, 0x34dc, 0x00000000); - nv_mthd(dev, 0x9097, 0x34e0, 0x00000000); - nv_mthd(dev, 0x9097, 0x34e4, 0x00000000); - nv_mthd(dev, 0x9097, 0x34e8, 0x00000000); - nv_mthd(dev, 0x9097, 0x34ec, 0x00000000); - nv_mthd(dev, 0x9097, 0x34f0, 0x00000000); - nv_mthd(dev, 0x9097, 0x34f4, 0x00000000); - nv_mthd(dev, 0x9097, 0x34f8, 0x00000000); - nv_mthd(dev, 0x9097, 0x34fc, 0x00000000); - nv_mthd(dev, 0x9097, 0x3500, 0x00000000); - nv_mthd(dev, 0x9097, 0x3504, 0x00000000); - nv_mthd(dev, 0x9097, 0x3508, 0x00000000); - nv_mthd(dev, 0x9097, 0x350c, 0x00000000); - nv_mthd(dev, 0x9097, 0x3510, 0x00000000); - nv_mthd(dev, 0x9097, 0x3514, 0x00000000); - nv_mthd(dev, 0x9097, 0x3518, 0x00000000); - nv_mthd(dev, 0x9097, 0x351c, 0x00000000); - nv_mthd(dev, 0x9097, 0x3520, 0x00000000); - nv_mthd(dev, 0x9097, 0x3524, 0x00000000); - nv_mthd(dev, 0x9097, 0x3528, 0x00000000); - nv_mthd(dev, 0x9097, 0x352c, 0x00000000); - nv_mthd(dev, 0x9097, 0x3530, 0x00000000); - nv_mthd(dev, 0x9097, 0x3534, 0x00000000); - nv_mthd(dev, 0x9097, 0x3538, 0x00000000); - nv_mthd(dev, 0x9097, 0x353c, 0x00000000); - nv_mthd(dev, 0x9097, 0x3540, 0x00000000); - nv_mthd(dev, 0x9097, 0x3544, 0x00000000); - nv_mthd(dev, 0x9097, 0x3548, 0x00000000); - nv_mthd(dev, 0x9097, 0x354c, 0x00000000); - nv_mthd(dev, 0x9097, 0x3550, 0x00000000); - nv_mthd(dev, 0x9097, 0x3554, 0x00000000); - nv_mthd(dev, 0x9097, 0x3558, 0x00000000); - nv_mthd(dev, 0x9097, 0x355c, 0x00000000); - nv_mthd(dev, 0x9097, 0x3560, 0x00000000); - nv_mthd(dev, 0x9097, 0x3564, 0x00000000); - nv_mthd(dev, 0x9097, 0x3568, 0x00000000); - nv_mthd(dev, 0x9097, 0x356c, 0x00000000); - nv_mthd(dev, 0x9097, 0x3570, 0x00000000); - nv_mthd(dev, 0x9097, 0x3574, 0x00000000); - nv_mthd(dev, 0x9097, 0x3578, 0x00000000); - nv_mthd(dev, 0x9097, 0x357c, 0x00000000); - nv_mthd(dev, 0x9097, 0x3580, 0x00000000); - nv_mthd(dev, 0x9097, 0x3584, 0x00000000); - nv_mthd(dev, 0x9097, 0x3588, 0x00000000); - nv_mthd(dev, 0x9097, 0x358c, 0x00000000); - nv_mthd(dev, 0x9097, 0x3590, 0x00000000); - nv_mthd(dev, 0x9097, 0x3594, 0x00000000); - nv_mthd(dev, 0x9097, 0x3598, 0x00000000); - nv_mthd(dev, 0x9097, 0x359c, 0x00000000); - nv_mthd(dev, 0x9097, 0x35a0, 0x00000000); - nv_mthd(dev, 0x9097, 0x35a4, 0x00000000); - nv_mthd(dev, 0x9097, 0x35a8, 0x00000000); - nv_mthd(dev, 0x9097, 0x35ac, 0x00000000); - nv_mthd(dev, 0x9097, 0x35b0, 0x00000000); - nv_mthd(dev, 0x9097, 0x35b4, 0x00000000); - nv_mthd(dev, 0x9097, 0x35b8, 0x00000000); - nv_mthd(dev, 0x9097, 0x35bc, 0x00000000); - nv_mthd(dev, 0x9097, 0x35c0, 0x00000000); - nv_mthd(dev, 0x9097, 0x35c4, 0x00000000); - nv_mthd(dev, 0x9097, 0x35c8, 0x00000000); - nv_mthd(dev, 0x9097, 0x35cc, 0x00000000); - nv_mthd(dev, 0x9097, 0x35d0, 0x00000000); - nv_mthd(dev, 0x9097, 0x35d4, 0x00000000); - nv_mthd(dev, 0x9097, 0x35d8, 0x00000000); - nv_mthd(dev, 0x9097, 0x35dc, 0x00000000); - nv_mthd(dev, 0x9097, 0x35e0, 0x00000000); - nv_mthd(dev, 0x9097, 0x35e4, 0x00000000); - nv_mthd(dev, 0x9097, 0x35e8, 0x00000000); - nv_mthd(dev, 0x9097, 0x35ec, 0x00000000); - nv_mthd(dev, 0x9097, 0x35f0, 0x00000000); - nv_mthd(dev, 0x9097, 0x35f4, 0x00000000); - nv_mthd(dev, 0x9097, 0x35f8, 0x00000000); - nv_mthd(dev, 0x9097, 0x35fc, 0x00000000); + if (fermi == 0x9097) { + for (mthd = 0x3400; mthd <= 0x35fc; mthd += 4) + nv_mthd(dev, 0x9097, mthd, 0x00000000); + } nv_mthd(dev, 0x9097, 0x030c, 0x00000001); nv_mthd(dev, 0x9097, 0x1944, 0x00000000); nv_mthd(dev, 0x9097, 0x1514, 0x00000000); @@ -1321,6 +1200,37 @@ nvc0_grctx_generate_9097(struct drm_device *dev) } static void +nvc0_grctx_generate_9197(struct drm_device *dev) +{ + u32 fermi = nvc0_graph_class(dev); + u32 mthd; + + if (fermi == 0x9197) { + for (mthd = 0x3400; mthd <= 0x35fc; mthd += 4) + nv_mthd(dev, 0x9197, mthd, 0x00000000); + } + nv_mthd(dev, 0x9197, 0x02e4, 0x0000b001); +} + +static void +nvc0_grctx_generate_9297(struct drm_device *dev) +{ + u32 fermi = nvc0_graph_class(dev); + u32 mthd; + + if (fermi == 0x9297) { + for (mthd = 0x3400; mthd <= 0x35fc; mthd += 4) + nv_mthd(dev, 0x9297, mthd, 0x00000000); + } + nv_mthd(dev, 0x9297, 0x036c, 0x00000000); + nv_mthd(dev, 0x9297, 0x0370, 0x00000000); + nv_mthd(dev, 0x9297, 0x07a4, 0x00000000); + nv_mthd(dev, 0x9297, 0x07a8, 0x00000000); + nv_mthd(dev, 0x9297, 0x0374, 0x00000000); + nv_mthd(dev, 0x9297, 0x0378, 0x00000020); +} + +static void nvc0_grctx_generate_902d(struct drm_device *dev) { nv_mthd(dev, 0x902d, 0x0200, 0x000000cf); @@ -1559,8 +1469,15 @@ nvc0_grctx_generate_unk47xx(struct drm_device *dev) static void nvc0_grctx_generate_shaders(struct drm_device *dev) { - nv_wr32(dev, 0x405800, 0x078000bf); - nv_wr32(dev, 0x405830, 0x02180000); + struct drm_nouveau_private *dev_priv = dev->dev_private; + + if (dev_priv->chipset != 0xc1) { + nv_wr32(dev, 0x405800, 0x078000bf); + nv_wr32(dev, 0x405830, 0x02180000); + } else { + nv_wr32(dev, 0x405800, 0x0f8000bf); + nv_wr32(dev, 0x405830, 0x02180218); + } nv_wr32(dev, 0x405834, 0x00000000); nv_wr32(dev, 0x405838, 0x00000000); nv_wr32(dev, 0x405854, 0x00000000); @@ -1586,10 +1503,16 @@ nvc0_grctx_generate_unk60xx(struct drm_device *dev) static void nvc0_grctx_generate_unk64xx(struct drm_device *dev) { + struct drm_nouveau_private *dev_priv = dev->dev_private; + nv_wr32(dev, 0x4064a8, 0x00000000); nv_wr32(dev, 0x4064ac, 0x00003fff); nv_wr32(dev, 0x4064b4, 0x00000000); nv_wr32(dev, 0x4064b8, 0x00000000); + if (dev_priv->chipset == 0xc1) { + nv_wr32(dev, 0x4064c0, 0x80140078); + nv_wr32(dev, 0x4064c4, 0x0086ffff); + } } static void @@ -1622,21 +1545,14 @@ static void nvc0_grctx_generate_rop(struct drm_device *dev) { struct drm_nouveau_private *dev_priv = dev->dev_private; + int chipset = dev_priv->chipset; /* ROPC_BROADCAST */ nv_wr32(dev, 0x408800, 0x02802a3c); nv_wr32(dev, 0x408804, 0x00000040); - nv_wr32(dev, 0x408808, 0x0003e00d); - switch (dev_priv->chipset) { - case 0xc0: - nv_wr32(dev, 0x408900, 0x0080b801); - break; - case 0xc3: - case 0xc4: - nv_wr32(dev, 0x408900, 0x3080b801); - break; - } - nv_wr32(dev, 0x408904, 0x02000001); + nv_wr32(dev, 0x408808, chipset != 0xc1 ? 0x0003e00d : 0x1003e005); + nv_wr32(dev, 0x408900, 0x3080b801); + nv_wr32(dev, 0x408904, chipset != 0xc1 ? 0x02000001 : 0x62000001); nv_wr32(dev, 0x408908, 0x00c80929); nv_wr32(dev, 0x40890c, 0x00000000); nv_wr32(dev, 0x408980, 0x0000011d); @@ -1645,6 +1561,8 @@ nvc0_grctx_generate_rop(struct drm_device *dev) static void nvc0_grctx_generate_gpc(struct drm_device *dev) { + struct drm_nouveau_private *dev_priv = dev->dev_private; + int chipset = dev_priv->chipset; int i; /* GPC_BROADCAST */ @@ -1676,7 +1594,7 @@ nvc0_grctx_generate_gpc(struct drm_device *dev) nv_wr32(dev, 0x41880c, 0x00000000); nv_wr32(dev, 0x418810, 0x00000000); nv_wr32(dev, 0x418828, 0x00008442); - nv_wr32(dev, 0x418830, 0x00000001); + nv_wr32(dev, 0x418830, chipset != 0xc1 ? 0x00000001 : 0x10000001); nv_wr32(dev, 0x4188d8, 0x00000008); nv_wr32(dev, 0x4188e0, 0x01000000); nv_wr32(dev, 0x4188e8, 0x00000000); @@ -1684,7 +1602,7 @@ nvc0_grctx_generate_gpc(struct drm_device *dev) nv_wr32(dev, 0x4188f0, 0x00000000); nv_wr32(dev, 0x4188f4, 0x00000000); nv_wr32(dev, 0x4188f8, 0x00000000); - nv_wr32(dev, 0x4188fc, 0x00100000); + nv_wr32(dev, 0x4188fc, chipset != 0xc1 ? 0x00100000 : 0x00100018); nv_wr32(dev, 0x41891c, 0x00ff00ff); nv_wr32(dev, 0x418924, 0x00000000); nv_wr32(dev, 0x418928, 0x00ffff00); @@ -1715,6 +1633,8 @@ nvc0_grctx_generate_gpc(struct drm_device *dev) nv_wr32(dev, 0x418c24, 0x00000000); nv_wr32(dev, 0x418c28, 0x00000000); nv_wr32(dev, 0x418c2c, 0x00000000); + if (chipset == 0xc1) + nv_wr32(dev, 0x418c6c, 0x00000001); nv_wr32(dev, 0x418c80, 0x20200004); nv_wr32(dev, 0x418c8c, 0x00000001); nv_wr32(dev, 0x419000, 0x00000780); @@ -1727,10 +1647,13 @@ static void nvc0_grctx_generate_tp(struct drm_device *dev) { struct drm_nouveau_private *dev_priv = dev->dev_private; + int chipset = dev_priv->chipset; /* GPC_BROADCAST.TP_BROADCAST */ + nv_wr32(dev, 0x419818, 0x00000000); + nv_wr32(dev, 0x41983c, 0x00038bc7); nv_wr32(dev, 0x419848, 0x00000000); - nv_wr32(dev, 0x419864, 0x0000012a); + nv_wr32(dev, 0x419864, chipset != 0xc1 ? 0x0000012a : 0x00000129); nv_wr32(dev, 0x419888, 0x00000000); nv_wr32(dev, 0x419a00, 0x000001f0); nv_wr32(dev, 0x419a04, 0x00000001); @@ -1740,8 +1663,8 @@ nvc0_grctx_generate_tp(struct drm_device *dev) nv_wr32(dev, 0x419a14, 0x00000200); nv_wr32(dev, 0x419a1c, 0x00000000); nv_wr32(dev, 0x419a20, 0x00000800); - if (dev_priv->chipset != 0xc0) - nv_wr32(dev, 0x00419ac4, 0x0007f440); /* 0xc3 */ + if (chipset != 0xc0 && chipset != 0xc8) + nv_wr32(dev, 0x00419ac4, 0x0007f440); nv_wr32(dev, 0x419b00, 0x0a418820); nv_wr32(dev, 0x419b04, 0x062080e6); nv_wr32(dev, 0x419b08, 0x020398a4); @@ -1749,17 +1672,19 @@ nvc0_grctx_generate_tp(struct drm_device *dev) nv_wr32(dev, 0x419b10, 0x0a418820); nv_wr32(dev, 0x419b14, 0x000000e6); nv_wr32(dev, 0x419bd0, 0x00900103); - nv_wr32(dev, 0x419be0, 0x00000001); + nv_wr32(dev, 0x419be0, chipset != 0xc1 ? 0x00000001 : 0x00400001); nv_wr32(dev, 0x419be4, 0x00000000); nv_wr32(dev, 0x419c00, 0x00000002); nv_wr32(dev, 0x419c04, 0x00000006); nv_wr32(dev, 0x419c08, 0x00000002); nv_wr32(dev, 0x419c20, 0x00000000); - nv_wr32(dev, 0x419cbc, 0x28137606); + nv_wr32(dev, 0x419cb0, 0x00060048); //XXX: 0xce 0x00020048 nv_wr32(dev, 0x419ce8, 0x00000000); nv_wr32(dev, 0x419cf4, 0x00000183); - nv_wr32(dev, 0x419d20, 0x02180000); + nv_wr32(dev, 0x419d20, chipset != 0xc1 ? 0x02180000 : 0x12180000); nv_wr32(dev, 0x419d24, 0x00001fff); + if (chipset == 0xc1) + nv_wr32(dev, 0x419d44, 0x02180218); nv_wr32(dev, 0x419e04, 0x00000000); nv_wr32(dev, 0x419e08, 0x00000000); nv_wr32(dev, 0x419e0c, 0x00000000); @@ -1785,11 +1710,11 @@ nvc0_grctx_generate_tp(struct drm_device *dev) nv_wr32(dev, 0x419e8c, 0x00000000); nv_wr32(dev, 0x419e90, 0x00000000); nv_wr32(dev, 0x419e98, 0x00000000); - if (dev_priv->chipset != 0xc0) + if (chipset != 0xc0 && chipset != 0xc8) nv_wr32(dev, 0x419ee0, 0x00011110); nv_wr32(dev, 0x419f50, 0x00000000); nv_wr32(dev, 0x419f54, 0x00000000); - if (dev_priv->chipset != 0xc0) + if (chipset != 0xc0 && chipset != 0xc8) nv_wr32(dev, 0x419f58, 0x00000000); } @@ -1801,6 +1726,7 @@ nvc0_grctx_generate(struct nouveau_channel *chan) struct nvc0_graph_chan *grch = chan->engctx[NVOBJ_ENGINE_GR]; struct drm_device *dev = chan->dev; int i, gpc, tp, id; + u32 fermi = nvc0_graph_class(dev); u32 r000260, tmp; r000260 = nv_rd32(dev, 0x000260); @@ -1857,10 +1783,11 @@ nvc0_grctx_generate(struct nouveau_channel *chan) nv_wr32(dev, 0x40587c, 0x00000000); if (1) { - const u8 chipset_tp_max[] = { 16, 0, 0, 4, 8 }; + const u8 chipset_tp_max[] = { 16, 4, 0, 4, 8, 0, 0, 0, + 16, 0, 0, 0, 0, 0, 8, 0 }; u8 max = chipset_tp_max[dev_priv->chipset & 0x0f]; u8 tpnr[GPC_MAX]; - u8 data[32]; + u8 data[TP_MAX]; memcpy(tpnr, priv->tp_nr, sizeof(priv->tp_nr)); memset(data, 0x1f, sizeof(data)); @@ -2633,6 +2560,8 @@ nvc0_grctx_generate(struct nouveau_channel *chan) nv_icmd(dev, 0x0000053f, 0xffff0000); nv_icmd(dev, 0x00000585, 0x0000003f); nv_icmd(dev, 0x00000576, 0x00000003); + if (dev_priv->chipset == 0xc1) + nv_icmd(dev, 0x0000057b, 0x00000059); nv_icmd(dev, 0x00000586, 0x00000040); nv_icmd(dev, 0x00000582, 0x00000080); nv_icmd(dev, 0x00000583, 0x00000080); @@ -2865,6 +2794,10 @@ nvc0_grctx_generate(struct nouveau_channel *chan) nv_wr32(dev, 0x404154, 0x00000400); nvc0_grctx_generate_9097(dev); + if (fermi >= 0x9197) + nvc0_grctx_generate_9197(dev); + if (fermi >= 0x9297) + nvc0_grctx_generate_9297(dev); nvc0_grctx_generate_902d(dev); nvc0_grctx_generate_9039(dev); nvc0_grctx_generate_90c0(dev); diff --git a/drivers/gpu/drm/nouveau/nvc0_grgpc.fuc b/drivers/gpu/drm/nouveau/nvc0_grgpc.fuc new file mode 100644 index 0000000..0ec2add --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvc0_grgpc.fuc @@ -0,0 +1,474 @@ +/* fuc microcode for nvc0 PGRAPH/GPC + * + * Copyright 2011 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Ben Skeggs + */ + +/* To build: + * m4 nvc0_grgpc.fuc | envyas -a -w -m fuc -V nva3 -o nvc0_grgpc.fuc.h + */ + +/* TODO + * - bracket certain functions with scratch writes, useful for debugging + * - watchdog timer around ctx operations + */ + +.section nvc0_grgpc_data +include(`nvc0_graph.fuc') +gpc_id: .b32 0 +gpc_mmio_list_head: .b32 0 +gpc_mmio_list_tail: .b32 0 + +tpc_count: .b32 0 +tpc_mask: .b32 0 +tpc_mmio_list_head: .b32 0 +tpc_mmio_list_tail: .b32 0 + +cmd_queue: queue_init + +// chipset descriptions +chipsets: +.b8 0xc0 0 0 0 +.b16 nvc0_gpc_mmio_head +.b16 nvc0_gpc_mmio_tail +.b16 nvc0_tpc_mmio_head +.b16 nvc0_tpc_mmio_tail +.b8 0xc1 0 0 0 +.b16 nvc0_gpc_mmio_head +.b16 nvc1_gpc_mmio_tail +.b16 nvc0_tpc_mmio_head +.b16 nvc1_tpc_mmio_tail +.b8 0xc3 0 0 0 +.b16 nvc0_gpc_mmio_head +.b16 nvc0_gpc_mmio_tail +.b16 nvc0_tpc_mmio_head +.b16 nvc3_tpc_mmio_tail +.b8 0xc4 0 0 0 +.b16 nvc0_gpc_mmio_head +.b16 nvc0_gpc_mmio_tail +.b16 nvc0_tpc_mmio_head +.b16 nvc3_tpc_mmio_tail +.b8 0xc8 0 0 0 +.b16 nvc0_gpc_mmio_head +.b16 nvc0_gpc_mmio_tail +.b16 nvc0_tpc_mmio_head +.b16 nvc0_tpc_mmio_tail +.b8 0xce 0 0 0 +.b16 nvc0_gpc_mmio_head +.b16 nvc0_gpc_mmio_tail +.b16 nvc0_tpc_mmio_head +.b16 nvc3_tpc_mmio_tail +.b8 0 0 0 0 + +// GPC mmio lists +nvc0_gpc_mmio_head: +mmctx_data(0x000380, 1) +mmctx_data(0x000400, 6) +mmctx_data(0x000450, 9) +mmctx_data(0x000600, 1) +mmctx_data(0x000684, 1) +mmctx_data(0x000700, 5) +mmctx_data(0x000800, 1) +mmctx_data(0x000808, 3) +mmctx_data(0x000828, 1) +mmctx_data(0x000830, 1) +mmctx_data(0x0008d8, 1) +mmctx_data(0x0008e0, 1) +mmctx_data(0x0008e8, 6) +mmctx_data(0x00091c, 1) +mmctx_data(0x000924, 3) +mmctx_data(0x000b00, 1) +mmctx_data(0x000b08, 6) +mmctx_data(0x000bb8, 1) +mmctx_data(0x000c08, 1) +mmctx_data(0x000c10, 8) +mmctx_data(0x000c80, 1) +mmctx_data(0x000c8c, 1) +mmctx_data(0x001000, 3) +mmctx_data(0x001014, 1) +nvc0_gpc_mmio_tail: +mmctx_data(0x000c6c, 1); +nvc1_gpc_mmio_tail: + +// TPC mmio lists +nvc0_tpc_mmio_head: +mmctx_data(0x000018, 1) +mmctx_data(0x00003c, 1) +mmctx_data(0x000048, 1) +mmctx_data(0x000064, 1) +mmctx_data(0x000088, 1) +mmctx_data(0x000200, 6) +mmctx_data(0x00021c, 2) +mmctx_data(0x000300, 6) +mmctx_data(0x0003d0, 1) +mmctx_data(0x0003e0, 2) +mmctx_data(0x000400, 3) +mmctx_data(0x000420, 1) +mmctx_data(0x0004b0, 1) +mmctx_data(0x0004e8, 1) +mmctx_data(0x0004f4, 1) +mmctx_data(0x000520, 2) +mmctx_data(0x000604, 4) +mmctx_data(0x000644, 20) +mmctx_data(0x000698, 1) +mmctx_data(0x000750, 2) +nvc0_tpc_mmio_tail: +mmctx_data(0x000758, 1) +mmctx_data(0x0002c4, 1) +mmctx_data(0x0004bc, 1) +mmctx_data(0x0006e0, 1) +nvc3_tpc_mmio_tail: +mmctx_data(0x000544, 1) +nvc1_tpc_mmio_tail: + + +.section nvc0_grgpc_code +bra init +define(`include_code') +include(`nvc0_graph.fuc') + +// reports an exception to the host +// +// In: $r15 error code (see nvc0_graph.fuc) +// +error: + push $r14 + mov $r14 -0x67ec // 0x9814 + sethi $r14 0x400000 + call nv_wr32 // HUB_CTXCTL_CC_SCRATCH[5] = error code + add b32 $r14 0x41c + mov $r15 1 + call nv_wr32 // HUB_CTXCTL_INTR_UP_SET + pop $r14 + ret + +// GPC fuc initialisation, executed by triggering ucode start, will +// fall through to main loop after completion. +// +// Input: +// CC_SCRATCH[0]: chipset (PMC_BOOT_0 read returns 0x0bad0bad... sigh) +// CC_SCRATCH[1]: context base +// +// Output: +// CC_SCRATCH[0]: +// 31:31: set to signal completion +// CC_SCRATCH[1]: +// 31:0: GPC context size +// +init: + clear b32 $r0 + mov $sp $r0 + + // enable fifo access + mov $r1 0x1200 + mov $r2 2 + iowr I[$r1 + 0x000] $r2 // FIFO_ENABLE + + // setup i0 handler, and route all interrupts to it + mov $r1 ih + mov $iv0 $r1 + mov $r1 0x400 + iowr I[$r1 + 0x300] $r0 // INTR_DISPATCH + + // enable fifo interrupt + mov $r2 4 + iowr I[$r1 + 0x000] $r2 // INTR_EN_SET + + // enable interrupts + bset $flags ie0 + + // figure out which GPC we are, and how many TPCs we have + mov $r1 0x608 + shl b32 $r1 6 + iord $r2 I[$r1 + 0x000] // UNITS + mov $r3 1 + and $r2 0x1f + shl b32 $r3 $r2 + sub b32 $r3 1 + st b32 D[$r0 + tpc_count] $r2 + st b32 D[$r0 + tpc_mask] $r3 + add b32 $r1 0x400 + iord $r2 I[$r1 + 0x000] // MYINDEX + st b32 D[$r0 + gpc_id] $r2 + + // find context data for this chipset + mov $r2 0x800 + shl b32 $r2 6 + iord $r2 I[$r2 + 0x000] // CC_SCRATCH[0] + mov $r1 chipsets - 12 + init_find_chipset: + add b32 $r1 12 + ld b32 $r3 D[$r1 + 0x00] + cmpu b32 $r3 $r2 + bra e init_context + cmpu b32 $r3 0 + bra ne init_find_chipset + // unknown chipset + ret + + // initialise context base, and size tracking + init_context: + mov $r2 0x800 + shl b32 $r2 6 + iord $r2 I[$r2 + 0x100] // CC_SCRATCH[1], initial base + clear b32 $r3 // track GPC context size here + + // set mmctx base addresses now so we don't have to do it later, + // they don't currently ever change + mov $r4 0x700 + shl b32 $r4 6 + shr b32 $r5 $r2 8 + iowr I[$r4 + 0x000] $r5 // MMCTX_SAVE_SWBASE + iowr I[$r4 + 0x100] $r5 // MMCTX_LOAD_SWBASE + + // calculate GPC mmio context size, store the chipset-specific + // mmio list pointers somewhere we can get at them later without + // re-parsing the chipset list + clear b32 $r14 + clear b32 $r15 + ld b16 $r14 D[$r1 + 4] + ld b16 $r15 D[$r1 + 6] + st b16 D[$r0 + gpc_mmio_list_head] $r14 + st b16 D[$r0 + gpc_mmio_list_tail] $r15 + call mmctx_size + add b32 $r2 $r15 + add b32 $r3 $r15 + + // calculate per-TPC mmio context size, store the list pointers + ld b16 $r14 D[$r1 + 8] + ld b16 $r15 D[$r1 + 10] + st b16 D[$r0 + tpc_mmio_list_head] $r14 + st b16 D[$r0 + tpc_mmio_list_tail] $r15 + call mmctx_size + ld b32 $r14 D[$r0 + tpc_count] + mulu $r14 $r15 + add b32 $r2 $r14 + add b32 $r3 $r14 + + // round up base/size to 256 byte boundary (for strand SWBASE) + add b32 $r4 0x1300 + shr b32 $r3 2 + iowr I[$r4 + 0x000] $r3 // MMCTX_LOAD_COUNT, wtf for?!? + shr b32 $r2 8 + shr b32 $r3 6 + add b32 $r2 1 + add b32 $r3 1 + shl b32 $r2 8 + shl b32 $r3 8 + + // calculate size of strand context data + mov b32 $r15 $r2 + call strand_ctx_init + add b32 $r3 $r15 + + // save context size, and tell HUB we're done + mov $r1 0x800 + shl b32 $r1 6 + iowr I[$r1 + 0x100] $r3 // CC_SCRATCH[1] = context size + add b32 $r1 0x800 + clear b32 $r2 + bset $r2 31 + iowr I[$r1 + 0x000] $r2 // CC_SCRATCH[0] |= 0x80000000 + +// Main program loop, very simple, sleeps until woken up by the interrupt +// handler, pulls a command from the queue and executes its handler +// +main: + bset $flags $p0 + sleep $p0 + mov $r13 cmd_queue + call queue_get + bra $p1 main + + // 0x0000-0x0003 are all context transfers + cmpu b32 $r14 0x04 + bra nc main_not_ctx_xfer + // fetch $flags and mask off $p1/$p2 + mov $r1 $flags + mov $r2 0x0006 + not b32 $r2 + and $r1 $r2 + // set $p1/$p2 according to transfer type + shl b32 $r14 1 + or $r1 $r14 + mov $flags $r1 + // transfer context data + call ctx_xfer + bra main + + main_not_ctx_xfer: + shl b32 $r15 $r14 16 + or $r15 E_BAD_COMMAND + call error + bra main + +// interrupt handler +ih: + push $r8 + mov $r8 $flags + push $r8 + push $r9 + push $r10 + push $r11 + push $r13 + push $r14 + push $r15 + + // incoming fifo command? + iord $r10 I[$r0 + 0x200] // INTR + and $r11 $r10 0x00000004 + bra e ih_no_fifo + // queue incoming fifo command for later processing + mov $r11 0x1900 + mov $r13 cmd_queue + iord $r14 I[$r11 + 0x100] // FIFO_CMD + iord $r15 I[$r11 + 0x000] // FIFO_DATA + call queue_put + add b32 $r11 0x400 + mov $r14 1 + iowr I[$r11 + 0x000] $r14 // FIFO_ACK + + // ack, and wake up main() + ih_no_fifo: + iowr I[$r0 + 0x100] $r10 // INTR_ACK + + pop $r15 + pop $r14 + pop $r13 + pop $r11 + pop $r10 + pop $r9 + pop $r8 + mov $flags $r8 + pop $r8 + bclr $flags $p0 + iret + +// Set this GPC's bit in HUB_BAR, used to signal completion of various +// activities to the HUB fuc +// +hub_barrier_done: + mov $r15 1 + ld b32 $r14 D[$r0 + gpc_id] + shl b32 $r15 $r14 + mov $r14 -0x6be8 // 0x409418 - HUB_BAR_SET + sethi $r14 0x400000 + call nv_wr32 + ret + +// Disables various things, waits a bit, and re-enables them.. +// +// Not sure how exactly this helps, perhaps "ENABLE" is not such a +// good description for the bits we turn off? Anyways, without this, +// funny things happen. +// +ctx_redswitch: + mov $r14 0x614 + shl b32 $r14 6 + mov $r15 0x020 + iowr I[$r14] $r15 // GPC_RED_SWITCH = POWER + mov $r15 8 + ctx_redswitch_delay: + sub b32 $r15 1 + bra ne ctx_redswitch_delay + mov $r15 0xa20 + iowr I[$r14] $r15 // GPC_RED_SWITCH = UNK11, ENABLE, POWER + ret + +// Transfer GPC context data between GPU and storage area +// +// In: $r15 context base address +// $p1 clear on save, set on load +// $p2 set if opposite direction done/will be done, so: +// on save it means: "a load will follow this save" +// on load it means: "a save preceeded this load" +// +ctx_xfer: + // set context base address + mov $r1 0xa04 + shl b32 $r1 6 + iowr I[$r1 + 0x000] $r15// MEM_BASE + bra not $p1 ctx_xfer_not_load + call ctx_redswitch + ctx_xfer_not_load: + + // strands + mov $r1 0x4afc + sethi $r1 0x20000 + mov $r2 0xc + iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x0c + call strand_wait + mov $r2 0x47fc + sethi $r2 0x20000 + iowr I[$r2] $r0 // STRAND_FIRST_GENE(0x3f) = 0x00 + xbit $r2 $flags $p1 + add b32 $r2 3 + iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x03/0x04 (SAVE/LOAD) + + // mmio context + xbit $r10 $flags $p1 // direction + or $r10 2 // first + mov $r11 0x0000 + sethi $r11 0x500000 + ld b32 $r12 D[$r0 + gpc_id] + shl b32 $r12 15 + add b32 $r11 $r12 // base = NV_PGRAPH_GPCn + ld b32 $r12 D[$r0 + gpc_mmio_list_head] + ld b32 $r13 D[$r0 + gpc_mmio_list_tail] + mov $r14 0 // not multi + call mmctx_xfer + + // per-TPC mmio context + xbit $r10 $flags $p1 // direction + or $r10 4 // last + mov $r11 0x4000 + sethi $r11 0x500000 // base = NV_PGRAPH_GPC0_TPC0 + ld b32 $r12 D[$r0 + gpc_id] + shl b32 $r12 15 + add b32 $r11 $r12 // base = NV_PGRAPH_GPCn_TPC0 + ld b32 $r12 D[$r0 + tpc_mmio_list_head] + ld b32 $r13 D[$r0 + tpc_mmio_list_tail] + ld b32 $r15 D[$r0 + tpc_mask] + mov $r14 0x800 // stride = 0x800 + call mmctx_xfer + + // wait for strands to finish + call strand_wait + + // if load, or a save without a load following, do some + // unknown stuff that's done after finishing a block of + // strand commands + bra $p1 ctx_xfer_post + bra not $p2 ctx_xfer_done + ctx_xfer_post: + mov $r1 0x4afc + sethi $r1 0x20000 + mov $r2 0xd + iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x0d + call strand_wait + + // mark completion in HUB's barrier + ctx_xfer_done: + call hub_barrier_done + ret + +.align 256 diff --git a/drivers/gpu/drm/nouveau/nvc0_grgpc.fuc.h b/drivers/gpu/drm/nouveau/nvc0_grgpc.fuc.h new file mode 100644 index 0000000..1896c89 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvc0_grgpc.fuc.h @@ -0,0 +1,483 @@ +uint32_t nvc0_grgpc_data[] = { + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x000000c0, + 0x011000b0, + 0x01640114, + 0x000000c1, + 0x011400b0, + 0x01780114, + 0x000000c3, + 0x011000b0, + 0x01740114, + 0x000000c4, + 0x011000b0, + 0x01740114, + 0x000000c8, + 0x011000b0, + 0x01640114, + 0x000000ce, + 0x011000b0, + 0x01740114, + 0x00000000, + 0x00000380, + 0x14000400, + 0x20000450, + 0x00000600, + 0x00000684, + 0x10000700, + 0x00000800, + 0x08000808, + 0x00000828, + 0x00000830, + 0x000008d8, + 0x000008e0, + 0x140008e8, + 0x0000091c, + 0x08000924, + 0x00000b00, + 0x14000b08, + 0x00000bb8, + 0x00000c08, + 0x1c000c10, + 0x00000c80, + 0x00000c8c, + 0x08001000, + 0x00001014, + 0x00000c6c, + 0x00000018, + 0x0000003c, + 0x00000048, + 0x00000064, + 0x00000088, + 0x14000200, + 0x0400021c, + 0x14000300, + 0x000003d0, + 0x040003e0, + 0x08000400, + 0x00000420, + 0x000004b0, + 0x000004e8, + 0x000004f4, + 0x04000520, + 0x0c000604, + 0x4c000644, + 0x00000698, + 0x04000750, + 0x00000758, + 0x000002c4, + 0x000004bc, + 0x000006e0, + 0x00000544, +}; + +uint32_t nvc0_grgpc_code[] = { + 0x03060ef5, + 0x9800d898, + 0x86f001d9, + 0x0489b808, + 0xf00c1bf4, + 0x21f502f7, + 0x00f802ec, + 0xb60798c4, + 0x8dbb0384, + 0x0880b600, + 0x80008e80, + 0x90b6018f, + 0x0f94f001, + 0xf801d980, + 0x0131f400, + 0x9800d898, + 0x89b801d9, + 0x210bf404, + 0xb60789c4, + 0x9dbb0394, + 0x0890b600, + 0x98009e98, + 0x80b6019f, + 0x0f84f001, + 0xf400d880, + 0x00f80132, + 0x0728b7f1, + 0xb906b4b6, + 0xc9f002ec, + 0x00bcd01f, + 0xc800bccf, + 0x1bf41fcc, + 0x06a7f0fa, + 0x010321f5, + 0xf840bfcf, + 0x28b7f100, + 0x06b4b607, + 0xb980bfd0, + 0xc9f002ec, + 0x1ec9f01f, + 0xcf00bcd0, + 0xccc800bc, + 0xfa1bf41f, + 0x87f100f8, + 0x84b60430, + 0x1ff9f006, + 0xf8008fd0, + 0x3087f100, + 0x0684b604, + 0xf80080d0, + 0x3c87f100, + 0x0684b608, + 0x99f094bd, + 0x0089d000, + 0x081887f1, + 0xd00684b6, + 0x87f1008a, + 0x84b60400, + 0x0088cf06, + 0xf4888aff, + 0x87f1f31b, + 0x84b6085c, + 0xf094bd06, + 0x89d00099, + 0xf100f800, + 0xb6083c87, + 0x94bd0684, + 0xd00099f0, + 0x87f10089, + 0x84b60818, + 0x008ad006, + 0x040087f1, + 0xcf0684b6, + 0x8aff0088, + 0xf30bf488, + 0x085c87f1, + 0xbd0684b6, + 0x0099f094, + 0xf80089d0, + 0x9894bd00, + 0x85b600e8, + 0x0180b61a, + 0xbb0284b6, + 0xe0b60098, + 0x04efb804, + 0xb9eb1bf4, + 0x00f8029f, + 0x083c87f1, + 0xbd0684b6, + 0x0199f094, + 0xf10089d0, + 0xb6071087, + 0x94bd0684, + 0xf405bbfd, + 0x8bd0090b, + 0x0099f000, + 0xf405eefd, + 0x8ed00c0b, + 0xc08fd080, + 0xb70199f0, + 0xc8010080, + 0xb4b600ab, + 0x0cb9f010, + 0xb601aec8, + 0xbefd11e4, + 0x008bd005, + 0xf0008ecf, + 0x0bf41fe4, + 0x00ce98fa, + 0xd005e9fd, + 0xc0b6c08e, + 0x04cdb804, + 0xc8e81bf4, + 0x1bf402ab, + 0x008bcf18, + 0xb01fb4f0, + 0x1bf410b4, + 0x02a7f0f7, + 0xf4c921f4, + 0xabc81b0e, + 0x10b4b600, + 0xf00cb9f0, + 0x8bd012b9, + 0x008bcf00, + 0xf412bbc8, + 0x87f1fa1b, + 0x84b6085c, + 0xf094bd06, + 0x89d00199, + 0xf900f800, + 0x02a7f0a0, + 0xfcc921f4, + 0xf100f8a0, + 0xf04afc87, + 0x97f00283, + 0x0089d00c, + 0x020721f5, + 0x87f100f8, + 0x83f04afc, + 0x0d97f002, + 0xf50089d0, + 0xf8020721, + 0xfca7f100, + 0x02a3f04f, + 0x0500aba2, + 0xd00fc7f0, + 0xc7f000ac, + 0x00bcd00b, + 0x020721f5, + 0xf000aed0, + 0xbcd00ac7, + 0x0721f500, + 0xf100f802, + 0xb6083c87, + 0x94bd0684, + 0xd00399f0, + 0x21f50089, + 0xe7f00213, + 0x3921f503, + 0xfca7f102, + 0x02a3f046, + 0x0400aba0, + 0xf040a0d0, + 0xbcd001c7, + 0x0721f500, + 0x010c9202, + 0xf000acd0, + 0xbcd002c7, + 0x0721f500, + 0x2621f502, + 0x8087f102, + 0x0684b608, + 0xb70089cf, + 0x95220080, + 0x8ed008fe, + 0x408ed000, + 0xb6808acf, + 0xa0b606a5, + 0x00eabb01, + 0xb60480b6, + 0x1bf40192, + 0x08e4b6e8, + 0xf1f2efbc, + 0xb6085c87, + 0x94bd0684, + 0xd00399f0, + 0x00f80089, + 0xe7f1e0f9, + 0xe3f09814, + 0x8d21f440, + 0x041ce0b7, + 0xf401f7f0, + 0xe0fc8d21, + 0x04bd00f8, + 0xf10004fe, + 0xf0120017, + 0x12d00227, + 0x3e17f100, + 0x0010fe04, + 0x040017f1, + 0xf0c010d0, + 0x12d00427, + 0x1031f400, + 0x060817f1, + 0xcf0614b6, + 0x37f00012, + 0x1f24f001, + 0xb60432bb, + 0x02800132, + 0x04038003, + 0x040010b7, + 0x800012cf, + 0x27f10002, + 0x24b60800, + 0x0022cf06, + 0xb65817f0, + 0x13980c10, + 0x0432b800, + 0xb00b0bf4, + 0x1bf40034, + 0xf100f8f1, + 0xb6080027, + 0x22cf0624, + 0xf134bd40, + 0xb6070047, + 0x25950644, + 0x0045d008, + 0xbd4045d0, + 0x58f4bde4, + 0x1f58021e, + 0x020e4003, + 0xf5040f40, + 0xbb013d21, + 0x3fbb002f, + 0x041e5800, + 0x40051f58, + 0x0f400a0e, + 0x3d21f50c, + 0x030e9801, + 0xbb00effd, + 0x3ebb002e, + 0x0040b700, + 0x0235b613, + 0xb60043d0, + 0x35b60825, + 0x0120b606, + 0xb60130b6, + 0x34b60824, + 0x022fb908, + 0x026321f5, + 0xf1003fbb, + 0xb6080017, + 0x13d00614, + 0x0010b740, + 0xf024bd08, + 0x12d01f29, + 0x0031f400, + 0xf00028f4, + 0x21f41cd7, + 0xf401f439, + 0xf404e4b0, + 0x81fe1e18, + 0x0627f001, + 0x12fd20bd, + 0x01e4b604, + 0xfe051efd, + 0x21f50018, + 0x0ef404c3, + 0x10ef94d3, + 0xf501f5f0, + 0xf402ec21, + 0x80f9c60e, + 0xf90188fe, + 0xf990f980, + 0xf9b0f9a0, + 0xf9e0f9d0, + 0x800acff0, + 0xf404abc4, + 0xb7f11d0b, + 0xd7f01900, + 0x40becf1c, + 0xf400bfcf, + 0xb0b70421, + 0xe7f00400, + 0x00bed001, + 0xfc400ad0, + 0xfce0fcf0, + 0xfcb0fcd0, + 0xfc90fca0, + 0x0088fe80, + 0x32f480fc, + 0xf001f800, + 0x0e9801f7, + 0x04febb00, + 0x9418e7f1, + 0xf440e3f0, + 0x00f88d21, + 0x0614e7f1, + 0xf006e4b6, + 0xefd020f7, + 0x08f7f000, + 0xf401f2b6, + 0xf7f1fd1b, + 0xefd00a20, + 0xf100f800, + 0xb60a0417, + 0x1fd00614, + 0x0711f400, + 0x04a421f5, + 0x4afc17f1, + 0xf00213f0, + 0x12d00c27, + 0x0721f500, + 0xfc27f102, + 0x0223f047, + 0xf00020d0, + 0x20b6012c, + 0x0012d003, + 0xf001acf0, + 0xb7f002a5, + 0x50b3f000, + 0xb6000c98, + 0xbcbb0fc4, + 0x010c9800, + 0xf0020d98, + 0x21f500e7, + 0xacf0015c, + 0x04a5f001, + 0x4000b7f1, + 0x9850b3f0, + 0xc4b6000c, + 0x00bcbb0f, + 0x98050c98, + 0x0f98060d, + 0x00e7f104, + 0x5c21f508, + 0x0721f501, + 0x0601f402, + 0xf11412f4, + 0xf04afc17, + 0x27f00213, + 0x0012d00d, + 0x020721f5, + 0x048f21f5, + 0x000000f8, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, +}; diff --git a/drivers/gpu/drm/nouveau/nvc0_grhub.fuc b/drivers/gpu/drm/nouveau/nvc0_grhub.fuc new file mode 100644 index 0000000..a1a5991 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvc0_grhub.fuc @@ -0,0 +1,808 @@ +/* fuc microcode for nvc0 PGRAPH/HUB + * + * Copyright 2011 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Ben Skeggs + */ + +/* To build: + * m4 nvc0_grhub.fuc | envyas -a -w -m fuc -V nva3 -o nvc0_grhub.fuc.h + */ + +.section nvc0_grhub_data +include(`nvc0_graph.fuc') +gpc_count: .b32 0 +rop_count: .b32 0 +cmd_queue: queue_init +hub_mmio_list_head: .b32 0 +hub_mmio_list_tail: .b32 0 + +ctx_current: .b32 0 + +chipsets: +.b8 0xc0 0 0 0 +.b16 nvc0_hub_mmio_head +.b16 nvc0_hub_mmio_tail +.b8 0xc1 0 0 0 +.b16 nvc0_hub_mmio_head +.b16 nvc1_hub_mmio_tail +.b8 0xc3 0 0 0 +.b16 nvc0_hub_mmio_head +.b16 nvc0_hub_mmio_tail +.b8 0xc4 0 0 0 +.b16 nvc0_hub_mmio_head +.b16 nvc0_hub_mmio_tail +.b8 0xc8 0 0 0 +.b16 nvc0_hub_mmio_head +.b16 nvc0_hub_mmio_tail +.b8 0xce 0 0 0 +.b16 nvc0_hub_mmio_head +.b16 nvc0_hub_mmio_tail +.b8 0 0 0 0 + +nvc0_hub_mmio_head: +mmctx_data(0x17e91c, 2) +mmctx_data(0x400204, 2) +mmctx_data(0x404004, 11) +mmctx_data(0x404044, 1) +mmctx_data(0x404094, 14) +mmctx_data(0x4040d0, 7) +mmctx_data(0x4040f8, 1) +mmctx_data(0x404130, 3) +mmctx_data(0x404150, 3) +mmctx_data(0x404164, 2) +mmctx_data(0x404174, 3) +mmctx_data(0x404200, 8) +mmctx_data(0x404404, 14) +mmctx_data(0x404460, 4) +mmctx_data(0x404480, 1) +mmctx_data(0x404498, 1) +mmctx_data(0x404604, 4) +mmctx_data(0x404618, 32) +mmctx_data(0x404698, 21) +mmctx_data(0x4046f0, 2) +mmctx_data(0x404700, 22) +mmctx_data(0x405800, 1) +mmctx_data(0x405830, 3) +mmctx_data(0x405854, 1) +mmctx_data(0x405870, 4) +mmctx_data(0x405a00, 2) +mmctx_data(0x405a18, 1) +mmctx_data(0x406020, 1) +mmctx_data(0x406028, 4) +mmctx_data(0x4064a8, 2) +mmctx_data(0x4064b4, 2) +mmctx_data(0x407804, 1) +mmctx_data(0x40780c, 6) +mmctx_data(0x4078bc, 1) +mmctx_data(0x408000, 7) +mmctx_data(0x408064, 1) +mmctx_data(0x408800, 3) +mmctx_data(0x408900, 4) +mmctx_data(0x408980, 1) +nvc0_hub_mmio_tail: +mmctx_data(0x4064c0, 2) +nvc1_hub_mmio_tail: + +.align 256 +chan_data: +chan_mmio_count: .b32 0 +chan_mmio_address: .b32 0 + +.align 256 +xfer_data: .b32 0 + +.section nvc0_grhub_code +bra init +define(`include_code') +include(`nvc0_graph.fuc') + +// reports an exception to the host +// +// In: $r15 error code (see nvc0_graph.fuc) +// +error: + push $r14 + mov $r14 0x814 + shl b32 $r14 6 + iowr I[$r14 + 0x000] $r15 // CC_SCRATCH[5] = error code + mov $r14 0xc1c + shl b32 $r14 6 + mov $r15 1 + iowr I[$r14 + 0x000] $r15 // INTR_UP_SET + pop $r14 + ret + +// HUB fuc initialisation, executed by triggering ucode start, will +// fall through to main loop after completion. +// +// Input: +// CC_SCRATCH[0]: chipset (PMC_BOOT_0 read returns 0x0bad0bad... sigh) +// +// Output: +// CC_SCRATCH[0]: +// 31:31: set to signal completion +// CC_SCRATCH[1]: +// 31:0: total PGRAPH context size +// +init: + clear b32 $r0 + mov $sp $r0 + mov $xdbase $r0 + + // enable fifo access + mov $r1 0x1200 + mov $r2 2 + iowr I[$r1 + 0x000] $r2 // FIFO_ENABLE + + // setup i0 handler, and route all interrupts to it + mov $r1 ih + mov $iv0 $r1 + mov $r1 0x400 + iowr I[$r1 + 0x300] $r0 // INTR_DISPATCH + + // route HUB_CHANNEL_SWITCH to fuc interrupt 8 + mov $r3 0x404 + shl b32 $r3 6 + mov $r2 0x2003 // { HUB_CHANNEL_SWITCH, ZERO } -> intr 8 + iowr I[$r3 + 0x000] $r2 + + // not sure what these are, route them because NVIDIA does, and + // the IRQ handler will signal the host if we ever get one.. we + // may find out if/why we need to handle these if so.. + // + mov $r2 0x2004 + iowr I[$r3 + 0x004] $r2 // { 0x04, ZERO } -> intr 9 + mov $r2 0x200b + iowr I[$r3 + 0x008] $r2 // { 0x0b, ZERO } -> intr 10 + mov $r2 0x200c + iowr I[$r3 + 0x01c] $r2 // { 0x0c, ZERO } -> intr 15 + + // enable all INTR_UP interrupts + mov $r2 0xc24 + shl b32 $r2 6 + not b32 $r3 $r0 + iowr I[$r2] $r3 + + // enable fifo, ctxsw, 9, 10, 15 interrupts + mov $r2 -0x78fc // 0x8704 + sethi $r2 0 + iowr I[$r1 + 0x000] $r2 // INTR_EN_SET + + // fifo level triggered, rest edge + sub b32 $r1 0x100 + mov $r2 4 + iowr I[$r1] $r2 + + // enable interrupts + bset $flags ie0 + + // fetch enabled GPC/ROP counts + mov $r14 -0x69fc // 0x409604 + sethi $r14 0x400000 + call nv_rd32 + extr $r1 $r15 16:20 + st b32 D[$r0 + rop_count] $r1 + and $r15 0x1f + st b32 D[$r0 + gpc_count] $r15 + + // set BAR_REQMASK to GPC mask + mov $r1 1 + shl b32 $r1 $r15 + sub b32 $r1 1 + mov $r2 0x40c + shl b32 $r2 6 + iowr I[$r2 + 0x000] $r1 + iowr I[$r2 + 0x100] $r1 + + // find context data for this chipset + mov $r2 0x800 + shl b32 $r2 6 + iord $r2 I[$r2 + 0x000] // CC_SCRATCH[0] + mov $r15 chipsets - 8 + init_find_chipset: + add b32 $r15 8 + ld b32 $r3 D[$r15 + 0x00] + cmpu b32 $r3 $r2 + bra e init_context + cmpu b32 $r3 0 + bra ne init_find_chipset + // unknown chipset + ret + + // context size calculation, reserve first 256 bytes for use by fuc + init_context: + mov $r1 256 + + // calculate size of mmio context data + ld b16 $r14 D[$r15 + 4] + ld b16 $r15 D[$r15 + 6] + sethi $r14 0 + st b32 D[$r0 + hub_mmio_list_head] $r14 + st b32 D[$r0 + hub_mmio_list_tail] $r15 + call mmctx_size + + // set mmctx base addresses now so we don't have to do it later, + // they don't (currently) ever change + mov $r3 0x700 + shl b32 $r3 6 + shr b32 $r4 $r1 8 + iowr I[$r3 + 0x000] $r4 // MMCTX_SAVE_SWBASE + iowr I[$r3 + 0x100] $r4 // MMCTX_LOAD_SWBASE + add b32 $r3 0x1300 + add b32 $r1 $r15 + shr b32 $r15 2 + iowr I[$r3 + 0x000] $r15 // MMCTX_LOAD_COUNT, wtf for?!? + + // strands, base offset needs to be aligned to 256 bytes + shr b32 $r1 8 + add b32 $r1 1 + shl b32 $r1 8 + mov b32 $r15 $r1 + call strand_ctx_init + add b32 $r1 $r15 + + // initialise each GPC in sequence by passing in the offset of its + // context data in GPCn_CC_SCRATCH[1], and starting its FUC (which + // has previously been uploaded by the host) running. + // + // the GPC fuc init sequence will set GPCn_CC_SCRATCH[0] bit 31 + // when it has completed, and return the size of its context data + // in GPCn_CC_SCRATCH[1] + // + ld b32 $r3 D[$r0 + gpc_count] + mov $r4 0x2000 + sethi $r4 0x500000 + init_gpc: + // setup, and start GPC ucode running + add b32 $r14 $r4 0x804 + mov b32 $r15 $r1 + call nv_wr32 // CC_SCRATCH[1] = ctx offset + add b32 $r14 $r4 0x800 + mov b32 $r15 $r2 + call nv_wr32 // CC_SCRATCH[0] = chipset + add b32 $r14 $r4 0x10c + clear b32 $r15 + call nv_wr32 + add b32 $r14 $r4 0x104 + call nv_wr32 // ENTRY + add b32 $r14 $r4 0x100 + mov $r15 2 // CTRL_START_TRIGGER + call nv_wr32 // CTRL + + // wait for it to complete, and adjust context size + add b32 $r14 $r4 0x800 + init_gpc_wait: + call nv_rd32 + xbit $r15 $r15 31 + bra e init_gpc_wait + add b32 $r14 $r4 0x804 + call nv_rd32 + add b32 $r1 $r15 + + // next! + add b32 $r4 0x8000 + sub b32 $r3 1 + bra ne init_gpc + + // save context size, and tell host we're ready + mov $r2 0x800 + shl b32 $r2 6 + iowr I[$r2 + 0x100] $r1 // CC_SCRATCH[1] = context size + add b32 $r2 0x800 + clear b32 $r1 + bset $r1 31 + iowr I[$r2 + 0x000] $r1 // CC_SCRATCH[0] |= 0x80000000 + +// Main program loop, very simple, sleeps until woken up by the interrupt +// handler, pulls a command from the queue and executes its handler +// +main: + // sleep until we have something to do + bset $flags $p0 + sleep $p0 + mov $r13 cmd_queue + call queue_get + bra $p1 main + + // context switch, requested by GPU? + cmpu b32 $r14 0x4001 + bra ne main_not_ctx_switch + trace_set(T_AUTO) + mov $r1 0xb00 + shl b32 $r1 6 + iord $r2 I[$r1 + 0x100] // CHAN_NEXT + iord $r1 I[$r1 + 0x000] // CHAN_CUR + + xbit $r3 $r1 31 + bra e chsw_no_prev + xbit $r3 $r2 31 + bra e chsw_prev_no_next + push $r2 + mov b32 $r2 $r1 + trace_set(T_SAVE) + bclr $flags $p1 + bset $flags $p2 + call ctx_xfer + trace_clr(T_SAVE); + pop $r2 + trace_set(T_LOAD); + bset $flags $p1 + call ctx_xfer + trace_clr(T_LOAD); + bra chsw_done + chsw_prev_no_next: + push $r2 + mov b32 $r2 $r1 + bclr $flags $p1 + bclr $flags $p2 + call ctx_xfer + pop $r2 + mov $r1 0xb00 + shl b32 $r1 6 + iowr I[$r1] $r2 + bra chsw_done + chsw_no_prev: + xbit $r3 $r2 31 + bra e chsw_done + bset $flags $p1 + bclr $flags $p2 + call ctx_xfer + + // ack the context switch request + chsw_done: + mov $r1 0xb0c + shl b32 $r1 6 + mov $r2 1 + iowr I[$r1 + 0x000] $r2 // 0x409b0c + trace_clr(T_AUTO) + bra main + + // request to set current channel? (*not* a context switch) + main_not_ctx_switch: + cmpu b32 $r14 0x0001 + bra ne main_not_ctx_chan + mov b32 $r2 $r15 + call ctx_chan + bra main_done + + // request to store current channel context? + main_not_ctx_chan: + cmpu b32 $r14 0x0002 + bra ne main_not_ctx_save + trace_set(T_SAVE) + bclr $flags $p1 + bclr $flags $p2 + call ctx_xfer + trace_clr(T_SAVE) + bra main_done + + main_not_ctx_save: + shl b32 $r15 $r14 16 + or $r15 E_BAD_COMMAND + call error + bra main + + main_done: + mov $r1 0x820 + shl b32 $r1 6 + clear b32 $r2 + bset $r2 31 + iowr I[$r1 + 0x000] $r2 // CC_SCRATCH[0] |= 0x80000000 + bra main + +// interrupt handler +ih: + push $r8 + mov $r8 $flags + push $r8 + push $r9 + push $r10 + push $r11 + push $r13 + push $r14 + push $r15 + + // incoming fifo command? + iord $r10 I[$r0 + 0x200] // INTR + and $r11 $r10 0x00000004 + bra e ih_no_fifo + // queue incoming fifo command for later processing + mov $r11 0x1900 + mov $r13 cmd_queue + iord $r14 I[$r11 + 0x100] // FIFO_CMD + iord $r15 I[$r11 + 0x000] // FIFO_DATA + call queue_put + add b32 $r11 0x400 + mov $r14 1 + iowr I[$r11 + 0x000] $r14 // FIFO_ACK + + // context switch request? + ih_no_fifo: + and $r11 $r10 0x00000100 + bra e ih_no_ctxsw + // enqueue a context switch for later processing + mov $r13 cmd_queue + mov $r14 0x4001 + call queue_put + + // anything we didn't handle, bring it to the host's attention + ih_no_ctxsw: + mov $r11 0x104 + not b32 $r11 + and $r11 $r10 $r11 + bra e ih_no_other + mov $r10 0xc1c + shl b32 $r10 6 + iowr I[$r10] $r11 // INTR_UP_SET + + // ack, and wake up main() + ih_no_other: + iowr I[$r0 + 0x100] $r10 // INTR_ACK + + pop $r15 + pop $r14 + pop $r13 + pop $r11 + pop $r10 + pop $r9 + pop $r8 + mov $flags $r8 + pop $r8 + bclr $flags $p0 + iret + +// Not real sure, but, MEM_CMD 7 will hang forever if this isn't done +ctx_4160s: + mov $r14 0x4160 + sethi $r14 0x400000 + mov $r15 1 + call nv_wr32 + ctx_4160s_wait: + call nv_rd32 + xbit $r15 $r15 4 + bra e ctx_4160s_wait + ret + +// Without clearing again at end of xfer, some things cause PGRAPH +// to hang with STATUS=0x00000007 until it's cleared.. fbcon can +// still function with it set however... +ctx_4160c: + mov $r14 0x4160 + sethi $r14 0x400000 + clear b32 $r15 + call nv_wr32 + ret + +// Again, not real sure +// +// In: $r15 value to set 0x404170 to +// +ctx_4170s: + mov $r14 0x4170 + sethi $r14 0x400000 + or $r15 0x10 + call nv_wr32 + ret + +// Waits for a ctx_4170s() call to complete +// +ctx_4170w: + mov $r14 0x4170 + sethi $r14 0x400000 + call nv_rd32 + and $r15 0x10 + bra ne ctx_4170w + ret + +// Disables various things, waits a bit, and re-enables them.. +// +// Not sure how exactly this helps, perhaps "ENABLE" is not such a +// good description for the bits we turn off? Anyways, without this, +// funny things happen. +// +ctx_redswitch: + mov $r14 0x614 + shl b32 $r14 6 + mov $r15 0x270 + iowr I[$r14] $r15 // HUB_RED_SWITCH = ENABLE_GPC, POWER_ALL + mov $r15 8 + ctx_redswitch_delay: + sub b32 $r15 1 + bra ne ctx_redswitch_delay + mov $r15 0x770 + iowr I[$r14] $r15 // HUB_RED_SWITCH = ENABLE_ALL, POWER_ALL + ret + +// Not a clue what this is for, except that unless the value is 0x10, the +// strand context is saved (and presumably restored) incorrectly.. +// +// In: $r15 value to set to (0x00/0x10 are used) +// +ctx_86c: + mov $r14 0x86c + shl b32 $r14 6 + iowr I[$r14] $r15 // HUB(0x86c) = val + mov $r14 -0x75ec + sethi $r14 0x400000 + call nv_wr32 // ROP(0xa14) = val + mov $r14 -0x5794 + sethi $r14 0x410000 + call nv_wr32 // GPC(0x86c) = val + ret + +// ctx_load - load's a channel's ctxctl data, and selects its vm +// +// In: $r2 channel address +// +ctx_load: + trace_set(T_CHAN) + + // switch to channel, somewhat magic in parts.. + mov $r10 12 // DONE_UNK12 + call wait_donez + mov $r1 0xa24 + shl b32 $r1 6 + iowr I[$r1 + 0x000] $r0 // 0x409a24 + mov $r3 0xb00 + shl b32 $r3 6 + iowr I[$r3 + 0x100] $r2 // CHAN_NEXT + mov $r1 0xa0c + shl b32 $r1 6 + mov $r4 7 + iowr I[$r1 + 0x000] $r2 // MEM_CHAN + iowr I[$r1 + 0x100] $r4 // MEM_CMD + ctx_chan_wait_0: + iord $r4 I[$r1 + 0x100] + and $r4 0x1f + bra ne ctx_chan_wait_0 + iowr I[$r3 + 0x000] $r2 // CHAN_CUR + + // load channel header, fetch PGRAPH context pointer + mov $xtargets $r0 + bclr $r2 31 + shl b32 $r2 4 + add b32 $r2 2 + + trace_set(T_LCHAN) + mov $r1 0xa04 + shl b32 $r1 6 + iowr I[$r1 + 0x000] $r2 // MEM_BASE + mov $r1 0xa20 + shl b32 $r1 6 + mov $r2 0x0002 + sethi $r2 0x80000000 + iowr I[$r1 + 0x000] $r2 // MEM_TARGET = vram + mov $r1 0x10 // chan + 0x0210 + mov $r2 xfer_data + sethi $r2 0x00020000 // 16 bytes + xdld $r1 $r2 + xdwait + trace_clr(T_LCHAN) + + // update current context + ld b32 $r1 D[$r0 + xfer_data + 4] + shl b32 $r1 24 + ld b32 $r2 D[$r0 + xfer_data + 0] + shr b32 $r2 8 + or $r1 $r2 + st b32 D[$r0 + ctx_current] $r1 + + // set transfer base to start of context, and fetch context header + trace_set(T_LCTXH) + mov $r2 0xa04 + shl b32 $r2 6 + iowr I[$r2 + 0x000] $r1 // MEM_BASE + mov $r2 1 + mov $r1 0xa20 + shl b32 $r1 6 + iowr I[$r1 + 0x000] $r2 // MEM_TARGET = vm + mov $r1 chan_data + sethi $r1 0x00060000 // 256 bytes + xdld $r0 $r1 + xdwait + trace_clr(T_LCTXH) + + trace_clr(T_CHAN) + ret + +// ctx_chan - handler for HUB_SET_CHAN command, will set a channel as +// the active channel for ctxctl, but not actually transfer +// any context data. intended for use only during initial +// context construction. +// +// In: $r2 channel address +// +ctx_chan: + call ctx_4160s + call ctx_load + mov $r10 12 // DONE_UNK12 + call wait_donez + mov $r1 0xa10 + shl b32 $r1 6 + mov $r2 5 + iowr I[$r1 + 0x000] $r2 // MEM_CMD = 5 (???) + ctx_chan_wait: + iord $r2 I[$r1 + 0x000] + or $r2 $r2 + bra ne ctx_chan_wait + call ctx_4160c + ret + +// Execute per-context state overrides list +// +// Only executed on the first load of a channel. Might want to look into +// removing this and having the host directly modify the channel's context +// to change this state... The nouveau DRM already builds this list as +// it's definitely needed for NVIDIA's, so we may as well use it for now +// +// Input: $r1 mmio list length +// +ctx_mmio_exec: + // set transfer base to be the mmio list + ld b32 $r3 D[$r0 + chan_mmio_address] + mov $r2 0xa04 + shl b32 $r2 6 + iowr I[$r2 + 0x000] $r3 // MEM_BASE + + clear b32 $r3 + ctx_mmio_loop: + // fetch next 256 bytes of mmio list if necessary + and $r4 $r3 0xff + bra ne ctx_mmio_pull + mov $r5 xfer_data + sethi $r5 0x00060000 // 256 bytes + xdld $r3 $r5 + xdwait + + // execute a single list entry + ctx_mmio_pull: + ld b32 $r14 D[$r4 + xfer_data + 0x00] + ld b32 $r15 D[$r4 + xfer_data + 0x04] + call nv_wr32 + + // next! + add b32 $r3 8 + sub b32 $r1 1 + bra ne ctx_mmio_loop + + // set transfer base back to the current context + ctx_mmio_done: + ld b32 $r3 D[$r0 + ctx_current] + iowr I[$r2 + 0x000] $r3 // MEM_BASE + + // disable the mmio list now, we don't need/want to execute it again + st b32 D[$r0 + chan_mmio_count] $r0 + mov $r1 chan_data + sethi $r1 0x00060000 // 256 bytes + xdst $r0 $r1 + xdwait + ret + +// Transfer HUB context data between GPU and storage area +// +// In: $r2 channel address +// $p1 clear on save, set on load +// $p2 set if opposite direction done/will be done, so: +// on save it means: "a load will follow this save" +// on load it means: "a save preceeded this load" +// +ctx_xfer: + bra not $p1 ctx_xfer_pre + bra $p2 ctx_xfer_pre_load + ctx_xfer_pre: + mov $r15 0x10 + call ctx_86c + call ctx_4160s + bra not $p1 ctx_xfer_exec + + ctx_xfer_pre_load: + mov $r15 2 + call ctx_4170s + call ctx_4170w + call ctx_redswitch + clear b32 $r15 + call ctx_4170s + call ctx_load + + // fetch context pointer, and initiate xfer on all GPCs + ctx_xfer_exec: + ld b32 $r1 D[$r0 + ctx_current] + mov $r2 0x414 + shl b32 $r2 6 + iowr I[$r2 + 0x000] $r0 // BAR_STATUS = reset + mov $r14 -0x5b00 + sethi $r14 0x410000 + mov b32 $r15 $r1 + call nv_wr32 // GPC_BCAST_WRCMD_DATA = ctx pointer + add b32 $r14 4 + xbit $r15 $flags $p1 + xbit $r2 $flags $p2 + shl b32 $r2 1 + or $r15 $r2 + call nv_wr32 // GPC_BCAST_WRCMD_CMD = GPC_XFER(type) + + // strands + mov $r1 0x4afc + sethi $r1 0x20000 + mov $r2 0xc + iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x0c + call strand_wait + mov $r2 0x47fc + sethi $r2 0x20000 + iowr I[$r2] $r0 // STRAND_FIRST_GENE(0x3f) = 0x00 + xbit $r2 $flags $p1 + add b32 $r2 3 + iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x03/0x04 (SAVE/LOAD) + + // mmio context + xbit $r10 $flags $p1 // direction + or $r10 6 // first, last + mov $r11 0 // base = 0 + ld b32 $r12 D[$r0 + hub_mmio_list_head] + ld b32 $r13 D[$r0 + hub_mmio_list_tail] + mov $r14 0 // not multi + call mmctx_xfer + + // wait for GPCs to all complete + mov $r10 8 // DONE_BAR + call wait_doneo + + // wait for strand xfer to complete + call strand_wait + + // post-op + bra $p1 ctx_xfer_post + mov $r10 12 // DONE_UNK12 + call wait_donez + mov $r1 0xa10 + shl b32 $r1 6 + mov $r2 5 + iowr I[$r1] $r2 // MEM_CMD + ctx_xfer_post_save_wait: + iord $r2 I[$r1] + or $r2 $r2 + bra ne ctx_xfer_post_save_wait + + bra $p2 ctx_xfer_done + ctx_xfer_post: + mov $r15 2 + call ctx_4170s + clear b32 $r15 + call ctx_86c + call strand_post + call ctx_4170w + clear b32 $r15 + call ctx_4170s + + bra not $p1 ctx_xfer_no_post_mmio + ld b32 $r1 D[$r0 + chan_mmio_count] + or $r1 $r1 + bra e ctx_xfer_no_post_mmio + call ctx_mmio_exec + + ctx_xfer_no_post_mmio: + call ctx_4160c + + ctx_xfer_done: + ret + +.align 256 diff --git a/drivers/gpu/drm/nouveau/nvc0_grhub.fuc.h b/drivers/gpu/drm/nouveau/nvc0_grhub.fuc.h new file mode 100644 index 0000000..b3b541b --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvc0_grhub.fuc.h @@ -0,0 +1,838 @@ +uint32_t nvc0_grhub_data[] = { + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x000000c0, + 0x012c0090, + 0x000000c1, + 0x01300090, + 0x000000c3, + 0x012c0090, + 0x000000c4, + 0x012c0090, + 0x000000c8, + 0x012c0090, + 0x000000ce, + 0x012c0090, + 0x00000000, + 0x0417e91c, + 0x04400204, + 0x28404004, + 0x00404044, + 0x34404094, + 0x184040d0, + 0x004040f8, + 0x08404130, + 0x08404150, + 0x04404164, + 0x08404174, + 0x1c404200, + 0x34404404, + 0x0c404460, + 0x00404480, + 0x00404498, + 0x0c404604, + 0x7c404618, + 0x50404698, + 0x044046f0, + 0x54404700, + 0x00405800, + 0x08405830, + 0x00405854, + 0x0c405870, + 0x04405a00, + 0x00405a18, + 0x00406020, + 0x0c406028, + 0x044064a8, + 0x044064b4, + 0x00407804, + 0x1440780c, + 0x004078bc, + 0x18408000, + 0x00408064, + 0x08408800, + 0x0c408900, + 0x00408980, + 0x044064c0, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, +}; + +uint32_t nvc0_grhub_code[] = { + 0x03090ef5, + 0x9800d898, + 0x86f001d9, + 0x0489b808, + 0xf00c1bf4, + 0x21f502f7, + 0x00f802ec, + 0xb60798c4, + 0x8dbb0384, + 0x0880b600, + 0x80008e80, + 0x90b6018f, + 0x0f94f001, + 0xf801d980, + 0x0131f400, + 0x9800d898, + 0x89b801d9, + 0x210bf404, + 0xb60789c4, + 0x9dbb0394, + 0x0890b600, + 0x98009e98, + 0x80b6019f, + 0x0f84f001, + 0xf400d880, + 0x00f80132, + 0x0728b7f1, + 0xb906b4b6, + 0xc9f002ec, + 0x00bcd01f, + 0xc800bccf, + 0x1bf41fcc, + 0x06a7f0fa, + 0x010321f5, + 0xf840bfcf, + 0x28b7f100, + 0x06b4b607, + 0xb980bfd0, + 0xc9f002ec, + 0x1ec9f01f, + 0xcf00bcd0, + 0xccc800bc, + 0xfa1bf41f, + 0x87f100f8, + 0x84b60430, + 0x1ff9f006, + 0xf8008fd0, + 0x3087f100, + 0x0684b604, + 0xf80080d0, + 0x3c87f100, + 0x0684b608, + 0x99f094bd, + 0x0089d000, + 0x081887f1, + 0xd00684b6, + 0x87f1008a, + 0x84b60400, + 0x0088cf06, + 0xf4888aff, + 0x87f1f31b, + 0x84b6085c, + 0xf094bd06, + 0x89d00099, + 0xf100f800, + 0xb6083c87, + 0x94bd0684, + 0xd00099f0, + 0x87f10089, + 0x84b60818, + 0x008ad006, + 0x040087f1, + 0xcf0684b6, + 0x8aff0088, + 0xf30bf488, + 0x085c87f1, + 0xbd0684b6, + 0x0099f094, + 0xf80089d0, + 0x9894bd00, + 0x85b600e8, + 0x0180b61a, + 0xbb0284b6, + 0xe0b60098, + 0x04efb804, + 0xb9eb1bf4, + 0x00f8029f, + 0x083c87f1, + 0xbd0684b6, + 0x0199f094, + 0xf10089d0, + 0xb6071087, + 0x94bd0684, + 0xf405bbfd, + 0x8bd0090b, + 0x0099f000, + 0xf405eefd, + 0x8ed00c0b, + 0xc08fd080, + 0xb70199f0, + 0xc8010080, + 0xb4b600ab, + 0x0cb9f010, + 0xb601aec8, + 0xbefd11e4, + 0x008bd005, + 0xf0008ecf, + 0x0bf41fe4, + 0x00ce98fa, + 0xd005e9fd, + 0xc0b6c08e, + 0x04cdb804, + 0xc8e81bf4, + 0x1bf402ab, + 0x008bcf18, + 0xb01fb4f0, + 0x1bf410b4, + 0x02a7f0f7, + 0xf4c921f4, + 0xabc81b0e, + 0x10b4b600, + 0xf00cb9f0, + 0x8bd012b9, + 0x008bcf00, + 0xf412bbc8, + 0x87f1fa1b, + 0x84b6085c, + 0xf094bd06, + 0x89d00199, + 0xf900f800, + 0x02a7f0a0, + 0xfcc921f4, + 0xf100f8a0, + 0xf04afc87, + 0x97f00283, + 0x0089d00c, + 0x020721f5, + 0x87f100f8, + 0x83f04afc, + 0x0d97f002, + 0xf50089d0, + 0xf8020721, + 0xfca7f100, + 0x02a3f04f, + 0x0500aba2, + 0xd00fc7f0, + 0xc7f000ac, + 0x00bcd00b, + 0x020721f5, + 0xf000aed0, + 0xbcd00ac7, + 0x0721f500, + 0xf100f802, + 0xb6083c87, + 0x94bd0684, + 0xd00399f0, + 0x21f50089, + 0xe7f00213, + 0x3921f503, + 0xfca7f102, + 0x02a3f046, + 0x0400aba0, + 0xf040a0d0, + 0xbcd001c7, + 0x0721f500, + 0x010c9202, + 0xf000acd0, + 0xbcd002c7, + 0x0721f500, + 0x2621f502, + 0x8087f102, + 0x0684b608, + 0xb70089cf, + 0x95220080, + 0x8ed008fe, + 0x408ed000, + 0xb6808acf, + 0xa0b606a5, + 0x00eabb01, + 0xb60480b6, + 0x1bf40192, + 0x08e4b6e8, + 0xf1f2efbc, + 0xb6085c87, + 0x94bd0684, + 0xd00399f0, + 0x00f80089, + 0xe7f1e0f9, + 0xe4b60814, + 0x00efd006, + 0x0c1ce7f1, + 0xf006e4b6, + 0xefd001f7, + 0xf8e0fc00, + 0xfe04bd00, + 0x07fe0004, + 0x0017f100, + 0x0227f012, + 0xf10012d0, + 0xfe05b917, + 0x17f10010, + 0x10d00400, + 0x0437f1c0, + 0x0634b604, + 0x200327f1, + 0xf10032d0, + 0xd0200427, + 0x27f10132, + 0x32d0200b, + 0x0c27f102, + 0x0732d020, + 0x0c2427f1, + 0xb90624b6, + 0x23d00003, + 0x0427f100, + 0x0023f087, + 0xb70012d0, + 0xf0010012, + 0x12d00427, + 0x1031f400, + 0x9604e7f1, + 0xf440e3f0, + 0xf1c76821, + 0x01018090, + 0x801ff4f0, + 0x17f0000f, + 0x041fbb01, + 0xf10112b6, + 0xb6040c27, + 0x21d00624, + 0x4021d000, + 0x080027f1, + 0xcf0624b6, + 0xf7f00022, + 0x08f0b654, + 0xb800f398, + 0x0bf40432, + 0x0034b00b, + 0xf8f11bf4, + 0x0017f100, + 0x02fe5801, + 0xf003ff58, + 0x0e8000e3, + 0x150f8014, + 0x013d21f5, + 0x070037f1, + 0x950634b6, + 0x34d00814, + 0x4034d000, + 0x130030b7, + 0xb6001fbb, + 0x3fd002f5, + 0x0815b600, + 0xb60110b6, + 0x1fb90814, + 0x6321f502, + 0x001fbb02, + 0xf1000398, + 0xf0200047, + 0x4ea05043, + 0x1fb90804, + 0x8d21f402, + 0x08004ea0, + 0xf4022fb9, + 0x4ea08d21, + 0xf4bd010c, + 0xa08d21f4, + 0xf401044e, + 0x4ea08d21, + 0xf7f00100, + 0x8d21f402, + 0x08004ea0, + 0xc86821f4, + 0x0bf41fff, + 0x044ea0fa, + 0x6821f408, + 0xb7001fbb, + 0xb6800040, + 0x1bf40132, + 0x0027f1b4, + 0x0624b608, + 0xb74021d0, + 0xbd080020, + 0x1f19f014, + 0xf40021d0, + 0x28f40031, + 0x08d7f000, + 0xf43921f4, + 0xe4b1f401, + 0x1bf54001, + 0x87f100d1, + 0x84b6083c, + 0xf094bd06, + 0x89d00499, + 0x0017f100, + 0x0614b60b, + 0xcf4012cf, + 0x13c80011, + 0x7e0bf41f, + 0xf41f23c8, + 0x20f95a0b, + 0xf10212b9, + 0xb6083c87, + 0x94bd0684, + 0xd00799f0, + 0x32f40089, + 0x0231f401, + 0x082921f5, + 0x085c87f1, + 0xbd0684b6, + 0x0799f094, + 0xfc0089d0, + 0x3c87f120, + 0x0684b608, + 0x99f094bd, + 0x0089d006, + 0xf50131f4, + 0xf1082921, + 0xb6085c87, + 0x94bd0684, + 0xd00699f0, + 0x0ef40089, + 0xb920f931, + 0x32f40212, + 0x0232f401, + 0x082921f5, + 0x17f120fc, + 0x14b60b00, + 0x0012d006, + 0xc8130ef4, + 0x0bf41f23, + 0x0131f40d, + 0xf50232f4, + 0xf1082921, + 0xb60b0c17, + 0x27f00614, + 0x0012d001, + 0x085c87f1, + 0xbd0684b6, + 0x0499f094, + 0xf50089d0, + 0xb0ff200e, + 0x1bf401e4, + 0x02f2b90d, + 0x07b521f5, + 0xb0420ef4, + 0x1bf402e4, + 0x3c87f12e, + 0x0684b608, + 0x99f094bd, + 0x0089d007, + 0xf40132f4, + 0x21f50232, + 0x87f10829, + 0x84b6085c, + 0xf094bd06, + 0x89d00799, + 0x110ef400, + 0xf010ef94, + 0x21f501f5, + 0x0ef502ec, + 0x17f1fed1, + 0x14b60820, + 0xf024bd06, + 0x12d01f29, + 0xbe0ef500, + 0xfe80f9fe, + 0x80f90188, + 0xa0f990f9, + 0xd0f9b0f9, + 0xf0f9e0f9, + 0xc4800acf, + 0x0bf404ab, + 0x00b7f11d, + 0x08d7f019, + 0xcf40becf, + 0x21f400bf, + 0x00b0b704, + 0x01e7f004, + 0xe400bed0, + 0xf40100ab, + 0xd7f00d0b, + 0x01e7f108, + 0x0421f440, + 0x0104b7f1, + 0xabffb0bd, + 0x0d0bf4b4, + 0x0c1ca7f1, + 0xd006a4b6, + 0x0ad000ab, + 0xfcf0fc40, + 0xfcd0fce0, + 0xfca0fcb0, + 0xfe80fc90, + 0x80fc0088, + 0xf80032f4, + 0x60e7f101, + 0x40e3f041, + 0xf401f7f0, + 0x21f48d21, + 0x04ffc868, + 0xf8fa0bf4, + 0x60e7f100, + 0x40e3f041, + 0x21f4f4bd, + 0xf100f88d, + 0xf04170e7, + 0xf5f040e3, + 0x8d21f410, + 0xe7f100f8, + 0xe3f04170, + 0x6821f440, + 0xf410f4f0, + 0x00f8f31b, + 0x0614e7f1, + 0xf106e4b6, + 0xd00270f7, + 0xf7f000ef, + 0x01f2b608, + 0xf1fd1bf4, + 0xd00770f7, + 0x00f800ef, + 0x086ce7f1, + 0xd006e4b6, + 0xe7f100ef, + 0xe3f08a14, + 0x8d21f440, + 0xa86ce7f1, + 0xf441e3f0, + 0x00f88d21, + 0x083c87f1, + 0xbd0684b6, + 0x0599f094, + 0xf00089d0, + 0x21f40ca7, + 0x2417f1c9, + 0x0614b60a, + 0xf10010d0, + 0xb60b0037, + 0x32d00634, + 0x0c17f140, + 0x0614b60a, + 0xd00747f0, + 0x14d00012, + 0x4014cf40, + 0xf41f44f0, + 0x32d0fa1b, + 0x000bfe00, + 0xb61f2af0, + 0x20b60424, + 0x3c87f102, + 0x0684b608, + 0x99f094bd, + 0x0089d008, + 0x0a0417f1, + 0xd00614b6, + 0x17f10012, + 0x14b60a20, + 0x0227f006, + 0x800023f1, + 0xf00012d0, + 0x27f11017, + 0x23f00300, + 0x0512fa02, + 0x87f103f8, + 0x84b6085c, + 0xf094bd06, + 0x89d00899, + 0xc1019800, + 0x981814b6, + 0x25b6c002, + 0x0512fd08, + 0xf1160180, + 0xb6083c87, + 0x94bd0684, + 0xd00999f0, + 0x27f10089, + 0x24b60a04, + 0x0021d006, + 0xf10127f0, + 0xb60a2017, + 0x12d00614, + 0x0017f100, + 0x0613f002, + 0xf80501fa, + 0x5c87f103, + 0x0684b608, + 0x99f094bd, + 0x0089d009, + 0x085c87f1, + 0xbd0684b6, + 0x0599f094, + 0xf80089d0, + 0x3121f500, + 0xb821f506, + 0x0ca7f006, + 0xf1c921f4, + 0xb60a1017, + 0x27f00614, + 0x0012d005, + 0xfd0012cf, + 0x1bf40522, + 0x4921f5fa, + 0x9800f806, + 0x27f18103, + 0x24b60a04, + 0x0023d006, + 0x34c434bd, + 0x0f1bf4ff, + 0x030057f1, + 0xfa0653f0, + 0x03f80535, + 0x98c04e98, + 0x21f4c14f, + 0x0830b68d, + 0xf40112b6, + 0x0398df1b, + 0x0023d016, + 0xf1800080, + 0xf0020017, + 0x01fa0613, + 0xf803f806, + 0x0611f400, + 0xf01102f4, + 0x21f510f7, + 0x21f50698, + 0x11f40631, + 0x02f7f01c, + 0x065721f5, + 0x066621f5, + 0x067821f5, + 0x21f5f4bd, + 0x21f50657, + 0x019806b8, + 0x1427f116, + 0x0624b604, + 0xf10020d0, + 0xf0a500e7, + 0x1fb941e3, + 0x8d21f402, + 0xf004e0b6, + 0x2cf001fc, + 0x0124b602, + 0xf405f2fd, + 0x17f18d21, + 0x13f04afc, + 0x0c27f002, + 0xf50012d0, + 0xf1020721, + 0xf047fc27, + 0x20d00223, + 0x012cf000, + 0xd00320b6, + 0xacf00012, + 0x06a5f001, + 0x9800b7f0, + 0x0d98140c, + 0x00e7f015, + 0x015c21f5, + 0xf508a7f0, + 0xf5010321, + 0xf4020721, + 0xa7f02201, + 0xc921f40c, + 0x0a1017f1, + 0xf00614b6, + 0x12d00527, + 0x0012cf00, + 0xf40522fd, + 0x02f4fa1b, + 0x02f7f032, + 0x065721f5, + 0x21f5f4bd, + 0x21f50698, + 0x21f50226, + 0xf4bd0666, + 0x065721f5, + 0x981011f4, + 0x11fd8001, + 0x070bf405, + 0x07df21f5, + 0x064921f5, + 0x000000f8, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, +}; diff --git a/drivers/gpu/drm/nouveau/nvc0_instmem.c b/drivers/gpu/drm/nouveau/nvc0_instmem.c index 82357d2..b701c43 100644 --- a/drivers/gpu/drm/nouveau/nvc0_instmem.c +++ b/drivers/gpu/drm/nouveau/nvc0_instmem.c @@ -32,7 +32,6 @@ struct nvc0_instmem_priv { struct nouveau_channel *bar1; struct nouveau_gpuobj *bar3_pgd; struct nouveau_channel *bar3; - struct nouveau_gpuobj *chan_pgd; }; int @@ -181,17 +180,11 @@ nvc0_instmem_init(struct drm_device *dev) goto error; /* channel vm */ - ret = nouveau_vm_new(dev, 0, (1ULL << 40), 0x0008000000ULL, &vm); + ret = nouveau_vm_new(dev, 0, (1ULL << 40), 0x0008000000ULL, + &dev_priv->chan_vm); if (ret) goto error; - ret = nouveau_gpuobj_new(dev, NULL, 0x8000, 4096, 0, &priv->chan_pgd); - if (ret) - goto error; - - nouveau_vm_ref(vm, &dev_priv->chan_vm, priv->chan_pgd); - nouveau_vm_ref(NULL, &vm, NULL); - nvc0_instmem_resume(dev); return 0; error: @@ -211,8 +204,7 @@ nvc0_instmem_takedown(struct drm_device *dev) nv_wr32(dev, 0x1704, 0x00000000); nv_wr32(dev, 0x1714, 0x00000000); - nouveau_vm_ref(NULL, &dev_priv->chan_vm, priv->chan_pgd); - nouveau_gpuobj_ref(NULL, &priv->chan_pgd); + nouveau_vm_ref(NULL, &dev_priv->chan_vm, NULL); nvc0_channel_del(&priv->bar1); nouveau_vm_ref(NULL, &dev_priv->bar1_vm, priv->bar1_pgd); diff --git a/drivers/gpu/drm/nouveau/nvc0_vm.c b/drivers/gpu/drm/nouveau/nvc0_vm.c index a179e6c..9e35294 100644 --- a/drivers/gpu/drm/nouveau/nvc0_vm.c +++ b/drivers/gpu/drm/nouveau/nvc0_vm.c @@ -105,7 +105,11 @@ nvc0_vm_flush(struct nouveau_vm *vm) struct drm_device *dev = vm->dev; struct nouveau_vm_pgd *vpgd; unsigned long flags; - u32 engine = (dev_priv->chan_vm == vm) ? 1 : 5; + u32 engine; + + engine = 1; + if (vm == dev_priv->bar1_vm || vm == dev_priv->bar3_vm) + engine |= 4; pinstmem->flush(vm->dev); diff --git a/drivers/gpu/drm/nouveau/nvc0_vram.c b/drivers/gpu/drm/nouveau/nvc0_vram.c index 67c6ec6..e45a24d 100644 --- a/drivers/gpu/drm/nouveau/nvc0_vram.c +++ b/drivers/gpu/drm/nouveau/nvc0_vram.c @@ -61,9 +61,7 @@ nvc0_vram_new(struct drm_device *dev, u64 size, u32 align, u32 ncmin, u32 type, struct nouveau_mem **pmem) { struct drm_nouveau_private *dev_priv = dev->dev_private; - struct ttm_bo_device *bdev = &dev_priv->ttm.bdev; - struct ttm_mem_type_manager *man = &bdev->man[TTM_PL_VRAM]; - struct nouveau_mm *mm = man->priv; + struct nouveau_mm *mm = dev_priv->engine.vram.mm; struct nouveau_mm_node *r; struct nouveau_mem *mem; int ret; @@ -105,9 +103,15 @@ int nvc0_vram_init(struct drm_device *dev) { struct drm_nouveau_private *dev_priv = dev->dev_private; + struct nouveau_vram_engine *vram = &dev_priv->engine.vram; + const u32 rsvd_head = ( 256 * 1024) >> 12; /* vga memory */ + const u32 rsvd_tail = (1024 * 1024) >> 12; /* vbios etc */ + u32 length; dev_priv->vram_size = nv_rd32(dev, 0x10f20c) << 20; dev_priv->vram_size *= nv_rd32(dev, 0x121c74); - dev_priv->vram_rblock_size = 4096; - return 0; + + length = (dev_priv->vram_size >> 12) - rsvd_head - rsvd_tail; + + return nouveau_mm_init(&vram->mm, rsvd_head, length, 1); } diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c index 9541995..c742944 100644 --- a/drivers/gpu/drm/radeon/atombios_crtc.c +++ b/drivers/gpu/drm/radeon/atombios_crtc.c @@ -764,7 +764,7 @@ static void atombios_crtc_set_dcpll(struct drm_crtc *crtc, } static void atombios_crtc_program_pll(struct drm_crtc *crtc, - int crtc_id, + u32 crtc_id, int pll_id, u32 encoder_mode, u32 encoder_id, @@ -851,8 +851,7 @@ static void atombios_crtc_program_pll(struct drm_crtc *crtc, args.v5.ucPpll = pll_id; break; case 6: - args.v6.ulCrtcPclkFreq.ucCRTC = crtc_id; - args.v6.ulCrtcPclkFreq.ulPixelClock = cpu_to_le32(clock / 10); + args.v6.ulDispEngClkFreq = cpu_to_le32(crtc_id << 24 | clock / 10); args.v6.ucRefDiv = ref_div; args.v6.usFbDiv = cpu_to_le16(fb_div); args.v6.ulFbDivDecFrac = cpu_to_le32(frac_fb_div * 100000); diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c index 8c0f9e3..645b84b 100644 --- a/drivers/gpu/drm/radeon/atombios_dp.c +++ b/drivers/gpu/drm/radeon/atombios_dp.c @@ -627,6 +627,7 @@ struct radeon_dp_link_train_info { u8 train_set[4]; u8 link_status[DP_LINK_STATUS_SIZE]; u8 tries; + bool use_dpencoder; }; static void radeon_dp_update_vs_emph(struct radeon_dp_link_train_info *dp_info) @@ -646,7 +647,7 @@ static void radeon_dp_set_tp(struct radeon_dp_link_train_info *dp_info, int tp) int rtp = 0; /* set training pattern on the source */ - if (ASIC_IS_DCE4(dp_info->rdev)) { + if (ASIC_IS_DCE4(dp_info->rdev) || !dp_info->use_dpencoder) { switch (tp) { case DP_TRAINING_PATTERN_1: rtp = ATOM_ENCODER_CMD_DP_LINK_TRAINING_PATTERN1; @@ -706,7 +707,7 @@ static int radeon_dp_link_train_init(struct radeon_dp_link_train_info *dp_info) radeon_write_dpcd_reg(dp_info->radeon_connector, DP_LINK_BW_SET, tmp); /* start training on the source */ - if (ASIC_IS_DCE4(dp_info->rdev)) + if (ASIC_IS_DCE4(dp_info->rdev) || !dp_info->use_dpencoder) atombios_dig_encoder_setup(dp_info->encoder, ATOM_ENCODER_CMD_DP_LINK_TRAINING_START, 0); else @@ -731,7 +732,7 @@ static int radeon_dp_link_train_finish(struct radeon_dp_link_train_info *dp_info DP_TRAINING_PATTERN_DISABLE); /* disable the training pattern on the source */ - if (ASIC_IS_DCE4(dp_info->rdev)) + if (ASIC_IS_DCE4(dp_info->rdev) || !dp_info->use_dpencoder) atombios_dig_encoder_setup(dp_info->encoder, ATOM_ENCODER_CMD_DP_LINK_TRAINING_COMPLETE, 0); else @@ -869,7 +870,8 @@ void radeon_dp_link_train(struct drm_encoder *encoder, struct radeon_connector *radeon_connector; struct radeon_connector_atom_dig *dig_connector; struct radeon_dp_link_train_info dp_info; - u8 tmp; + int index; + u8 tmp, frev, crev; if (!radeon_encoder->enc_priv) return; @@ -884,6 +886,18 @@ void radeon_dp_link_train(struct drm_encoder *encoder, (dig_connector->dp_sink_type != CONNECTOR_OBJECT_ID_eDP)) return; + /* DPEncoderService newer than 1.1 can't program properly the + * training pattern. When facing such version use the + * DIGXEncoderControl (X== 1 | 2) + */ + dp_info.use_dpencoder = true; + index = GetIndexIntoMasterTable(COMMAND, DPEncoderService); + if (atom_parse_cmd_header(rdev->mode_info.atom_context, index, &frev, &crev)) { + if (crev > 1) { + dp_info.use_dpencoder = false; + } + } + dp_info.enc_id = 0; if (dig->dig_encoder) dp_info.enc_id |= ATOM_DP_CONFIG_DIG2_ENCODER; diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index 15bd047..14dce9f 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -1382,9 +1382,6 @@ int evergreen_cp_resume(struct radeon_device *rdev) /* set the wb address wether it's enabled or not */ WREG32(CP_RB_RPTR_ADDR, -#ifdef __BIG_ENDIAN - RB_RPTR_SWAP(2) | -#endif ((rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC)); WREG32(CP_RB_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF); WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF); @@ -2047,6 +2044,7 @@ static void evergreen_gpu_init(struct radeon_device *rdev) rdev->config.evergreen.tile_config |= ((gb_addr_config & 0x30000000) >> 28) << 12; + rdev->config.evergreen.backend_map = gb_backend_map; WREG32(GB_BACKEND_MAP, gb_backend_map); WREG32(GB_ADDR_CONFIG, gb_addr_config); WREG32(DMIF_ADDR_CONFIG, gb_addr_config); @@ -2761,6 +2759,9 @@ int evergreen_irq_process(struct radeon_device *rdev) return IRQ_NONE; } restart_ih: + /* Order reading of wptr vs. reading of IH ring data */ + rmb(); + /* display interrupts */ evergreen_irq_ack(rdev); diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c index 23d3641..189e865 100644 --- a/drivers/gpu/drm/radeon/evergreen_cs.c +++ b/drivers/gpu/drm/radeon/evergreen_cs.c @@ -856,7 +856,6 @@ static inline int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u3 case SQ_PGM_START_PS: case SQ_PGM_START_HS: case SQ_PGM_START_LS: - case GDS_ADDR_BASE: case SQ_CONST_MEM_BASE: case SQ_ALU_CONST_CACHE_GS_0: case SQ_ALU_CONST_CACHE_GS_1: @@ -946,6 +945,34 @@ static inline int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u3 } ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); break; + case SX_MEMORY_EXPORT_BASE: + if (p->rdev->family >= CHIP_CAYMAN) { + dev_warn(p->dev, "bad SET_CONFIG_REG " + "0x%04X\n", reg); + return -EINVAL; + } + r = evergreen_cs_packet_next_reloc(p, &reloc); + if (r) { + dev_warn(p->dev, "bad SET_CONFIG_REG " + "0x%04X\n", reg); + return -EINVAL; + } + ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + break; + case CAYMAN_SX_SCATTER_EXPORT_BASE: + if (p->rdev->family < CHIP_CAYMAN) { + dev_warn(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); + return -EINVAL; + } + r = evergreen_cs_packet_next_reloc(p, &reloc); + if (r) { + dev_warn(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); + return -EINVAL; + } + ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + break; default: dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx); return -EINVAL; @@ -1153,6 +1180,34 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, return r; } break; + case PACKET3_DISPATCH_DIRECT: + if (pkt->count != 3) { + DRM_ERROR("bad DISPATCH_DIRECT\n"); + return -EINVAL; + } + r = evergreen_cs_track_check(p); + if (r) { + dev_warn(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx); + return r; + } + break; + case PACKET3_DISPATCH_INDIRECT: + if (pkt->count != 1) { + DRM_ERROR("bad DISPATCH_INDIRECT\n"); + return -EINVAL; + } + r = evergreen_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("bad DISPATCH_INDIRECT\n"); + return -EINVAL; + } + ib[idx+0] = idx_value + (u32)(reloc->lobj.gpu_offset & 0xffffffff); + r = evergreen_cs_track_check(p); + if (r) { + dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); + return r; + } + break; case PACKET3_WAIT_REG_MEM: if (pkt->count != 5) { DRM_ERROR("bad WAIT_REG_MEM\n"); diff --git a/drivers/gpu/drm/radeon/evergreend.h b/drivers/gpu/drm/radeon/evergreend.h index b7b2714..7363d9d 100644 --- a/drivers/gpu/drm/radeon/evergreend.h +++ b/drivers/gpu/drm/radeon/evergreend.h @@ -351,6 +351,7 @@ #define COLOR_BUFFER_SIZE(x) ((x) << 0) #define POSITION_BUFFER_SIZE(x) ((x) << 8) #define SMX_BUFFER_SIZE(x) ((x) << 16) +#define SX_MEMORY_EXPORT_BASE 0x9010 #define SX_MISC 0x28350 #define CB_PERF_CTR0_SEL_0 0x9A20 @@ -1122,6 +1123,7 @@ #define CAYMAN_PA_SC_AA_CONFIG 0x28BE0 #define CAYMAN_MSAA_NUM_SAMPLES_SHIFT 0 #define CAYMAN_MSAA_NUM_SAMPLES_MASK 0x7 +#define CAYMAN_SX_SCATTER_EXPORT_BASE 0x28358 /* cayman packet3 addition */ #define CAYMAN_PACKET3_DEALLOC_STATE 0x14 diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 559dbd4..44c4750 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -833,6 +833,7 @@ static void cayman_gpu_init(struct radeon_device *rdev) rdev->config.cayman.tile_config |= ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12; + rdev->config.cayman.backend_map = gb_backend_map; WREG32(GB_BACKEND_MAP, gb_backend_map); WREG32(GB_ADDR_CONFIG, gb_addr_config); WREG32(DMIF_ADDR_CONFIG, gb_addr_config); diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index bc54b26..aa5571b 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -1662,6 +1662,7 @@ void r600_gpu_init(struct radeon_device *rdev) R6XX_MAX_BACKENDS_MASK) >> 16)), (cc_rb_backend_disable >> 16)); rdev->config.r600.tile_config = tiling_config; + rdev->config.r600.backend_map = backend_map; tiling_config |= BACKEND_MAP(backend_map); WREG32(GB_TILING_CONFIG, tiling_config); WREG32(DCP_TILING_CONFIG, tiling_config & 0xffff); @@ -2212,9 +2213,6 @@ int r600_cp_resume(struct radeon_device *rdev) /* set the wb address whether it's enabled or not */ WREG32(CP_RB_RPTR_ADDR, -#ifdef __BIG_ENDIAN - RB_RPTR_SWAP(2) | -#endif ((rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC)); WREG32(CP_RB_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF); WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF); @@ -2994,10 +2992,6 @@ int r600_irq_init(struct radeon_device *rdev) /* RPTR_REARM only works if msi's are enabled */ if (rdev->msi_enabled) ih_cntl |= RPTR_REARM; - -#ifdef __BIG_ENDIAN - ih_cntl |= IH_MC_SWAP(IH_MC_SWAP_32BIT); -#endif WREG32(IH_CNTL, ih_cntl); /* force the active interrupt state to all disabled */ @@ -3308,6 +3302,10 @@ int r600_irq_process(struct radeon_device *rdev) if (!rdev->ih.enabled || rdev->shutdown) return IRQ_NONE; + /* No MSIs, need a dummy read to flush PCI DMAs */ + if (!rdev->msi_enabled) + RREG32(IH_RB_WPTR); + wptr = r600_get_ih_wptr(rdev); rptr = rdev->ih.rptr; DRM_DEBUG("r600_irq_process start: rptr %d, wptr %d\n", rptr, wptr); @@ -3320,6 +3318,9 @@ int r600_irq_process(struct radeon_device *rdev) } restart_ih: + /* Order reading of wptr vs. reading of IH ring data */ + rmb(); + /* display interrupts */ r600_irq_ack(rdev); diff --git a/drivers/gpu/drm/radeon/r600_cp.c b/drivers/gpu/drm/radeon/r600_cp.c index c3ab959..45fd592 100644 --- a/drivers/gpu/drm/radeon/r600_cp.c +++ b/drivers/gpu/drm/radeon/r600_cp.c @@ -1802,8 +1802,8 @@ static void r600_cp_init_ring_buffer(struct drm_device *dev, /* Set ring buffer size */ #ifdef __BIG_ENDIAN RADEON_WRITE(R600_CP_RB_CNTL, - RADEON_BUF_SWAP_32BIT | - RADEON_RB_NO_UPDATE | + R600_BUF_SWAP_32BIT | + R600_RB_NO_UPDATE | (dev_priv->ring.rptr_update_l2qw << 8) | dev_priv->ring.size_l2qw); #else @@ -1820,15 +1820,15 @@ static void r600_cp_init_ring_buffer(struct drm_device *dev, #ifdef __BIG_ENDIAN RADEON_WRITE(R600_CP_RB_CNTL, - RADEON_BUF_SWAP_32BIT | - RADEON_RB_NO_UPDATE | - RADEON_RB_RPTR_WR_ENA | + R600_BUF_SWAP_32BIT | + R600_RB_NO_UPDATE | + R600_RB_RPTR_WR_ENA | (dev_priv->ring.rptr_update_l2qw << 8) | dev_priv->ring.size_l2qw); #else RADEON_WRITE(R600_CP_RB_CNTL, - RADEON_RB_NO_UPDATE | - RADEON_RB_RPTR_WR_ENA | + R600_RB_NO_UPDATE | + R600_RB_RPTR_WR_ENA | (dev_priv->ring.rptr_update_l2qw << 8) | dev_priv->ring.size_l2qw); #endif @@ -1851,13 +1851,8 @@ static void r600_cp_init_ring_buffer(struct drm_device *dev, - ((unsigned long) dev->sg->virtual) + dev_priv->gart_vm_start; } - RADEON_WRITE(R600_CP_RB_RPTR_ADDR, -#ifdef __BIG_ENDIAN - (2 << 0) | -#endif - (rptr_addr & 0xfffffffc)); - RADEON_WRITE(R600_CP_RB_RPTR_ADDR_HI, - upper_32_bits(rptr_addr)); + RADEON_WRITE(R600_CP_RB_RPTR_ADDR, (rptr_addr & 0xfffffffc)); + RADEON_WRITE(R600_CP_RB_RPTR_ADDR_HI, upper_32_bits(rptr_addr)); #ifdef __BIG_ENDIAN RADEON_WRITE(R600_CP_RB_CNTL, diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c index 909bda8..db8ef19 100644 --- a/drivers/gpu/drm/radeon/r600_cs.c +++ b/drivers/gpu/drm/radeon/r600_cs.c @@ -1200,6 +1200,15 @@ static inline int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx } ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); break; + case SX_MEMORY_EXPORT_BASE: + r = r600_cs_packet_next_reloc(p, &reloc); + if (r) { + dev_warn(p->dev, "bad SET_CONFIG_REG " + "0x%04X\n", reg); + return -EINVAL; + } + ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + break; default: dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx); return -EINVAL; diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index ef0e0e0..ef37a9b 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1003,6 +1003,7 @@ struct r600_asic { unsigned tiling_npipes; unsigned tiling_group_size; unsigned tile_config; + unsigned backend_map; struct r100_gpu_lockup lockup; }; @@ -1028,6 +1029,7 @@ struct rv770_asic { unsigned tiling_npipes; unsigned tiling_group_size; unsigned tile_config; + unsigned backend_map; struct r100_gpu_lockup lockup; }; @@ -1054,6 +1056,7 @@ struct evergreen_asic { unsigned tiling_npipes; unsigned tiling_group_size; unsigned tile_config; + unsigned backend_map; struct r100_gpu_lockup lockup; }; @@ -1174,7 +1177,7 @@ struct radeon_device { /* Register mmio */ resource_size_t rmmio_base; resource_size_t rmmio_size; - void *rmmio; + void __iomem *rmmio; radeon_rreg_t mc_rreg; radeon_wreg_t mc_wreg; radeon_rreg_t pll_rreg; @@ -1251,20 +1254,20 @@ int radeon_gpu_wait_for_idle(struct radeon_device *rdev); static inline uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg) { if (reg < rdev->rmmio_size) - return readl(((void __iomem *)rdev->rmmio) + reg); + return readl((rdev->rmmio) + reg); else { - writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX); - return readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA); + writel(reg, (rdev->rmmio) + RADEON_MM_INDEX); + return readl((rdev->rmmio) + RADEON_MM_DATA); } } static inline void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v) { if (reg < rdev->rmmio_size) - writel(v, ((void __iomem *)rdev->rmmio) + reg); + writel(v, (rdev->rmmio) + reg); else { - writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX); - writel(v, ((void __iomem *)rdev->rmmio) + RADEON_MM_DATA); + writel(reg, (rdev->rmmio) + RADEON_MM_INDEX); + writel(v, (rdev->rmmio) + RADEON_MM_DATA); } } @@ -1296,10 +1299,10 @@ static inline void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v) /* * Registers read & write functions. */ -#define RREG8(reg) readb(((void __iomem *)rdev->rmmio) + (reg)) -#define WREG8(reg, v) writeb(v, ((void __iomem *)rdev->rmmio) + (reg)) -#define RREG16(reg) readw(((void __iomem *)rdev->rmmio) + (reg)) -#define WREG16(reg, v) writew(v, ((void __iomem *)rdev->rmmio) + (reg)) +#define RREG8(reg) readb((rdev->rmmio) + (reg)) +#define WREG8(reg, v) writeb(v, (rdev->rmmio) + (reg)) +#define RREG16(reg) readw((rdev->rmmio) + (reg)) +#define WREG16(reg, v) writew(v, (rdev->rmmio) + (reg)) #define RREG32(reg) r100_mm_rreg(rdev, (reg)) #define DREG32(reg) printk(KERN_INFO "REGISTER: " #reg " : 0x%08X\n", r100_mm_rreg(rdev, (reg))) #define WREG32(reg, v) r100_mm_wreg(rdev, (reg), (v)) diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index b244962..df8218b 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -625,7 +625,7 @@ static struct radeon_asic r600_asic = { .fence_ring_emit = &r600_fence_ring_emit, .cs_parse = &r600_cs_parse, .copy_blit = &r600_copy_blit, - .copy_dma = &r600_copy_blit, + .copy_dma = NULL, .copy = &r600_copy_blit, .get_engine_clock = &radeon_atom_get_engine_clock, .set_engine_clock = &radeon_atom_set_engine_clock, @@ -672,7 +672,7 @@ static struct radeon_asic rs780_asic = { .fence_ring_emit = &r600_fence_ring_emit, .cs_parse = &r600_cs_parse, .copy_blit = &r600_copy_blit, - .copy_dma = &r600_copy_blit, + .copy_dma = NULL, .copy = &r600_copy_blit, .get_engine_clock = &radeon_atom_get_engine_clock, .set_engine_clock = &radeon_atom_set_engine_clock, @@ -719,7 +719,7 @@ static struct radeon_asic rv770_asic = { .fence_ring_emit = &r600_fence_ring_emit, .cs_parse = &r600_cs_parse, .copy_blit = &r600_copy_blit, - .copy_dma = &r600_copy_blit, + .copy_dma = NULL, .copy = &r600_copy_blit, .get_engine_clock = &radeon_atom_get_engine_clock, .set_engine_clock = &radeon_atom_set_engine_clock, @@ -766,7 +766,7 @@ static struct radeon_asic evergreen_asic = { .fence_ring_emit = &r600_fence_ring_emit, .cs_parse = &evergreen_cs_parse, .copy_blit = &evergreen_copy_blit, - .copy_dma = &evergreen_copy_blit, + .copy_dma = NULL, .copy = &evergreen_copy_blit, .get_engine_clock = &radeon_atom_get_engine_clock, .set_engine_clock = &radeon_atom_set_engine_clock, @@ -813,7 +813,7 @@ static struct radeon_asic sumo_asic = { .fence_ring_emit = &r600_fence_ring_emit, .cs_parse = &evergreen_cs_parse, .copy_blit = &evergreen_copy_blit, - .copy_dma = &evergreen_copy_blit, + .copy_dma = NULL, .copy = &evergreen_copy_blit, .get_engine_clock = &radeon_atom_get_engine_clock, .set_engine_clock = &radeon_atom_set_engine_clock, @@ -860,7 +860,7 @@ static struct radeon_asic btc_asic = { .fence_ring_emit = &r600_fence_ring_emit, .cs_parse = &evergreen_cs_parse, .copy_blit = &evergreen_copy_blit, - .copy_dma = &evergreen_copy_blit, + .copy_dma = NULL, .copy = &evergreen_copy_blit, .get_engine_clock = &radeon_atom_get_engine_clock, .set_engine_clock = &radeon_atom_set_engine_clock, @@ -907,7 +907,7 @@ static struct radeon_asic cayman_asic = { .fence_ring_emit = &r600_fence_ring_emit, .cs_parse = &evergreen_cs_parse, .copy_blit = &evergreen_copy_blit, - .copy_dma = &evergreen_copy_blit, + .copy_dma = NULL, .copy = &evergreen_copy_blit, .get_engine_clock = &radeon_atom_get_engine_clock, .set_engine_clock = &radeon_atom_set_engine_clock, diff --git a/drivers/gpu/drm/radeon/radeon_clocks.c b/drivers/gpu/drm/radeon/radeon_clocks.c index 2d48e7a..dcd0863e 100644 --- a/drivers/gpu/drm/radeon/radeon_clocks.c +++ b/drivers/gpu/drm/radeon/radeon_clocks.c @@ -96,7 +96,7 @@ uint32_t radeon_legacy_get_memory_clock(struct radeon_device *rdev) * Read XTAL (ref clock), SCLK and MCLK from Open Firmware device * tree. Hopefully, ATI OF driver is kind enough to fill these */ -static bool __devinit radeon_read_clocks_OF(struct drm_device *dev) +static bool radeon_read_clocks_OF(struct drm_device *dev) { struct radeon_device *rdev = dev->dev_private; struct device_node *dp = rdev->pdev->dev.of_node; @@ -166,7 +166,7 @@ static bool __devinit radeon_read_clocks_OF(struct drm_device *dev) return true; } #else -static bool __devinit radeon_read_clocks_OF(struct drm_device *dev) +static bool radeon_read_clocks_OF(struct drm_device *dev) { return false; } diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c index e459467..a74217c 100644 --- a/drivers/gpu/drm/radeon/radeon_combios.c +++ b/drivers/gpu/drm/radeon/radeon_combios.c @@ -779,7 +779,8 @@ void radeon_combios_i2c_init(struct radeon_device *rdev) } } } - } else if (rdev->family >= CHIP_R200) { + } else if ((rdev->family == CHIP_R200) || + (rdev->family >= CHIP_R300)) { /* 0x68 */ i2c = combios_setup_i2c_bus(rdev, DDC_MONID, 0, 0); rdev->i2c_bus[3] = radeon_i2c_create(dev, &i2c, "MONID"); diff --git a/drivers/gpu/drm/radeon/radeon_cp.c b/drivers/gpu/drm/radeon/radeon_cp.c index 7586779..045ec59 100644 --- a/drivers/gpu/drm/radeon/radeon_cp.c +++ b/drivers/gpu/drm/radeon/radeon_cp.c @@ -2115,7 +2115,7 @@ int radeon_driver_load(struct drm_device *dev, unsigned long flags) if (drm_pci_device_is_agp(dev)) dev_priv->flags |= RADEON_IS_AGP; - else if (drm_pci_device_is_pcie(dev)) + else if (pci_is_pcie(dev->pdev)) dev_priv->flags |= RADEON_IS_PCIE; else dev_priv->flags |= RADEON_IS_PCI; diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index 292f73f..28f4655 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -282,7 +282,7 @@ void radeon_crtc_handle_flip(struct radeon_device *rdev, int crtc_id) spin_lock_irqsave(&rdev->ddev->event_lock, flags); work = radeon_crtc->unpin_work; if (work == NULL || - !radeon_fence_signaled(work->fence)) { + (work->fence && !radeon_fence_signaled(work->fence))) { spin_unlock_irqrestore(&rdev->ddev->event_lock, flags); return; } @@ -348,7 +348,6 @@ static int radeon_crtc_page_flip(struct drm_crtc *crtc, struct radeon_framebuffer *new_radeon_fb; struct drm_gem_object *obj; struct radeon_bo *rbo; - struct radeon_fence *fence; struct radeon_unpin_work *work; unsigned long flags; u32 tiling_flags, pitch_pixels; @@ -359,16 +358,9 @@ static int radeon_crtc_page_flip(struct drm_crtc *crtc, if (work == NULL) return -ENOMEM; - r = radeon_fence_create(rdev, &fence); - if (unlikely(r != 0)) { - kfree(work); - DRM_ERROR("flip queue: failed to create fence.\n"); - return -ENOMEM; - } work->event = event; work->rdev = rdev; work->crtc_id = radeon_crtc->crtc_id; - work->fence = radeon_fence_ref(fence); old_radeon_fb = to_radeon_framebuffer(crtc->fb); new_radeon_fb = to_radeon_framebuffer(fb); /* schedule unpin of the old buffer */ @@ -377,6 +369,10 @@ static int radeon_crtc_page_flip(struct drm_crtc *crtc, drm_gem_object_reference(obj); rbo = gem_to_radeon_bo(obj); work->old_rbo = rbo; + obj = new_radeon_fb->obj; + rbo = gem_to_radeon_bo(obj); + if (rbo->tbo.sync_obj) + work->fence = radeon_fence_ref(rbo->tbo.sync_obj); INIT_WORK(&work->work, radeon_unpin_work_func); /* We borrow the event spin lock for protecting unpin_work */ @@ -391,9 +387,6 @@ static int radeon_crtc_page_flip(struct drm_crtc *crtc, spin_unlock_irqrestore(&dev->event_lock, flags); /* pin the new buffer */ - obj = new_radeon_fb->obj; - rbo = gem_to_radeon_bo(obj); - DRM_DEBUG_DRIVER("flip-ioctl() cur_fbo = %p, cur_bbo = %p\n", work->old_rbo, rbo); @@ -461,37 +454,18 @@ static int radeon_crtc_page_flip(struct drm_crtc *crtc, goto pflip_cleanup1; } - /* 32 ought to cover us */ - r = radeon_ring_lock(rdev, 32); - if (r) { - DRM_ERROR("failed to lock the ring before flip\n"); - goto pflip_cleanup2; - } - - /* emit the fence */ - radeon_fence_emit(rdev, fence); /* set the proper interrupt */ radeon_pre_page_flip(rdev, radeon_crtc->crtc_id); - /* fire the ring */ - radeon_ring_unlock_commit(rdev); return 0; -pflip_cleanup2: - drm_vblank_put(dev, radeon_crtc->crtc_id); - pflip_cleanup1: - r = radeon_bo_reserve(rbo, false); - if (unlikely(r != 0)) { + if (unlikely(radeon_bo_reserve(rbo, false) != 0)) { DRM_ERROR("failed to reserve new rbo in error path\n"); goto pflip_cleanup; } - r = radeon_bo_unpin(rbo); - if (unlikely(r != 0)) { - radeon_bo_unreserve(rbo); - r = -EINVAL; + if (unlikely(radeon_bo_unpin(rbo) != 0)) { DRM_ERROR("failed to unpin new rbo in error path\n"); - goto pflip_cleanup; } radeon_bo_unreserve(rbo); @@ -501,7 +475,7 @@ pflip_cleanup: unlock_free: drm_gem_object_unreference_unlocked(old_radeon_fb->obj); spin_unlock_irqrestore(&dev->event_lock, flags); - radeon_fence_unref(&fence); + radeon_fence_unref(&work->fence); kfree(work); return r; diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 73dfbe8..85f033f 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -50,10 +50,11 @@ * 2.7.0 - fixups for r600 2D tiling support. (no external ABI change), add eg dyn gpr regs * 2.8.0 - pageflip support, r500 US_FORMAT regs. r500 ARGB2101010 colorbuf, r300->r500 CMASK, clock crystal query * 2.9.0 - r600 tiling (s3tc,rgtc) working, SET_PREDICATION packet 3 on r600 + eg, backend query - * 2.10.0 - fusion 2D tiling + * 2.10.0 - fusion 2D tiling, initial compute support for the CS checker + * 2.11.0 - backend map */ #define KMS_DRIVER_MAJOR 2 -#define KMS_DRIVER_MINOR 10 +#define KMS_DRIVER_MINOR 11 #define KMS_DRIVER_PATCHLEVEL 0 int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags); int radeon_driver_unload_kms(struct drm_device *dev); diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index bd58af6..be2c122 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -60,7 +60,7 @@ int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags) /* update BUS flag */ if (drm_pci_device_is_agp(dev)) { flags |= RADEON_IS_AGP; - } else if (drm_pci_device_is_pcie(dev)) { + } else if (pci_is_pcie(dev->pdev)) { flags |= RADEON_IS_PCIE; } else { flags |= RADEON_IS_PCI; @@ -237,6 +237,19 @@ int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) case RADEON_INFO_FUSION_GART_WORKING: value = 1; break; + case RADEON_INFO_BACKEND_MAP: + if (rdev->family >= CHIP_CAYMAN) + value = rdev->config.cayman.backend_map; + else if (rdev->family >= CHIP_CEDAR) + value = rdev->config.evergreen.backend_map; + else if (rdev->family >= CHIP_RV770) + value = rdev->config.rv770.backend_map; + else if (rdev->family >= CHIP_R600) + value = rdev->config.r600.backend_map; + else { + return -EINVAL; + } + break; default: DRM_DEBUG_KMS("Invalid request %d\n", info->request); return -EINVAL; diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c index aaa19dc..6fabe89 100644 --- a/drivers/gpu/drm/radeon/radeon_pm.c +++ b/drivers/gpu/drm/radeon/radeon_pm.c @@ -594,6 +594,9 @@ int radeon_pm_init(struct radeon_device *rdev) if (rdev->pm.default_vddc) radeon_atom_set_voltage(rdev, rdev->pm.default_vddc, SET_VOLTAGE_TYPE_ASIC_VDDC); + if (rdev->pm.default_vddci) + radeon_atom_set_voltage(rdev, rdev->pm.default_vddci, + SET_VOLTAGE_TYPE_ASIC_VDDCI); if (rdev->pm.default_sclk) radeon_set_engine_clock(rdev, rdev->pm.default_sclk); if (rdev->pm.default_mclk) diff --git a/drivers/gpu/drm/radeon/radeon_reg.h b/drivers/gpu/drm/radeon/radeon_reg.h index bc44a3d..b4ce864 100644 --- a/drivers/gpu/drm/radeon/radeon_reg.h +++ b/drivers/gpu/drm/radeon/radeon_reg.h @@ -3295,7 +3295,7 @@ # define RADEON_RB_BUFSZ_MASK (0x3f << 0) # define RADEON_RB_BLKSZ_SHIFT 8 # define RADEON_RB_BLKSZ_MASK (0x3f << 8) -# define RADEON_BUF_SWAP_32BIT (1 << 17) +# define RADEON_BUF_SWAP_32BIT (2 << 16) # define RADEON_MAX_FETCH_SHIFT 18 # define RADEON_MAX_FETCH_MASK (0x3 << 18) # define RADEON_RB_NO_UPDATE (1 << 27) diff --git a/drivers/gpu/drm/radeon/reg_srcs/cayman b/drivers/gpu/drm/radeon/reg_srcs/cayman index 0aa8e85..2316977 100644 --- a/drivers/gpu/drm/radeon/reg_srcs/cayman +++ b/drivers/gpu/drm/radeon/reg_srcs/cayman @@ -208,6 +208,7 @@ cayman 0x9400 0x0002834C PA_SC_VPORT_ZMAX_15 0x00028350 SX_MISC 0x00028354 SX_SURFACE_SYNC +0x0002835C SX_SCATTER_EXPORT_SIZE 0x00028380 SQ_VTX_SEMANTIC_0 0x00028384 SQ_VTX_SEMANTIC_1 0x00028388 SQ_VTX_SEMANTIC_2 @@ -432,6 +433,7 @@ cayman 0x9400 0x00028700 SPI_STACK_MGMT 0x00028704 SPI_WAVE_MGMT_1 0x00028708 SPI_WAVE_MGMT_2 +0x00028720 GDS_ADDR_BASE 0x00028724 GDS_ADDR_SIZE 0x00028780 CB_BLEND0_CONTROL 0x00028784 CB_BLEND1_CONTROL diff --git a/drivers/gpu/drm/radeon/reg_srcs/evergreen b/drivers/gpu/drm/radeon/reg_srcs/evergreen index 0e28cae..161737a2 100644 --- a/drivers/gpu/drm/radeon/reg_srcs/evergreen +++ b/drivers/gpu/drm/radeon/reg_srcs/evergreen @@ -44,6 +44,7 @@ evergreen 0x9400 0x00008E28 SQ_STATIC_THREAD_MGMT_3 0x00008E2C SQ_LDS_RESOURCE_MGMT 0x00008E48 SQ_EX_ALLOC_TABLE_SLOTS +0x00009014 SX_MEMORY_EXPORT_SIZE 0x00009100 SPI_CONFIG_CNTL 0x0000913C SPI_CONFIG_CNTL_1 0x00009508 TA_CNTL_AUX @@ -442,7 +443,9 @@ evergreen 0x9400 0x000286EC SPI_COMPUTE_NUM_THREAD_X 0x000286F0 SPI_COMPUTE_NUM_THREAD_Y 0x000286F4 SPI_COMPUTE_NUM_THREAD_Z +0x00028720 GDS_ADDR_BASE 0x00028724 GDS_ADDR_SIZE +0x00028728 GDS_ORDERED_WAVE_PER_SE 0x00028780 CB_BLEND0_CONTROL 0x00028784 CB_BLEND1_CONTROL 0x00028788 CB_BLEND2_CONTROL diff --git a/drivers/gpu/drm/radeon/reg_srcs/r600 b/drivers/gpu/drm/radeon/reg_srcs/r600 index ea49752..0380c5c 100644 --- a/drivers/gpu/drm/radeon/reg_srcs/r600 +++ b/drivers/gpu/drm/radeon/reg_srcs/r600 @@ -429,6 +429,7 @@ r600 0x9400 0x00028438 SX_ALPHA_REF 0x00028410 SX_ALPHA_TEST_CONTROL 0x00028350 SX_MISC +0x00009014 SX_MEMORY_EXPORT_SIZE 0x00009604 TC_INVALIDATE 0x00009400 TD_FILTER4 0x00009404 TD_FILTER4_1 diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c index 1f5850e..4b5d0e6 100644 --- a/drivers/gpu/drm/radeon/rs600.c +++ b/drivers/gpu/drm/radeon/rs600.c @@ -530,7 +530,7 @@ int rs600_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr) addr = addr & 0xFFFFFFFFFFFFF000ULL; addr |= R600_PTE_VALID | R600_PTE_SYSTEM | R600_PTE_SNOOPED; addr |= R600_PTE_READABLE | R600_PTE_WRITEABLE; - writeq(addr, ((void __iomem *)ptr) + (i * 8)); + writeq(addr, ptr + (i * 8)); return 0; } diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c index 4de5189..4720d00 100644 --- a/drivers/gpu/drm/radeon/rv770.c +++ b/drivers/gpu/drm/radeon/rv770.c @@ -778,6 +778,7 @@ static void rv770_gpu_init(struct radeon_device *rdev) (cc_rb_backend_disable >> 16)); rdev->config.rv770.tile_config = gb_tiling_config; + rdev->config.rv770.backend_map = backend_map; gb_tiling_config |= BACKEND_MAP(backend_map); WREG32(GB_TILING_CONFIG, gb_tiling_config); diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c index d948575..170e751 100644 --- a/drivers/gpu/drm/ttm/ttm_page_alloc.c +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c @@ -355,7 +355,7 @@ restart: if (nr_free) goto restart; - /* Not allowed to fall tough or break because + /* Not allowed to fall through or break because * following context is inside spinlock while we are * outside here. */ @@ -556,7 +556,7 @@ out: } /** - * Fill the given pool if there isn't enough pages and requested number of + * Fill the given pool if there aren't enough pages and the requested number of * pages is small. */ static void ttm_page_pool_fill_locked(struct ttm_page_pool *pool, @@ -576,8 +576,8 @@ static void ttm_page_pool_fill_locked(struct ttm_page_pool *pool, pool->fill_lock = true; - /* If allocation request is small and there is not enough - * pages in pool we fill the pool first */ + /* If allocation request is small and there are not enough + * pages in a pool we fill the pool up first. */ if (count < _manager->options.small && count > pool->npages) { struct list_head new_pages; @@ -614,9 +614,9 @@ static void ttm_page_pool_fill_locked(struct ttm_page_pool *pool, } /** - * Cut count nubmer of pages from the pool and put them to return list + * Cut 'count' number of pages from the pool and put them on the return list. * - * @return count of pages still to allocate to fill the request. + * @return count of pages still required to fulfill the request. */ static unsigned ttm_page_pool_get_pages(struct ttm_page_pool *pool, struct list_head *pages, int ttm_flags, @@ -637,7 +637,7 @@ static unsigned ttm_page_pool_get_pages(struct ttm_page_pool *pool, goto out; } /* find the last pages to include for requested number of pages. Split - * pool to begin and halves to reduce search space. */ + * pool to begin and halve it to reduce search space. */ if (count <= pool->npages/2) { i = 0; list_for_each(p, &pool->list) { @@ -651,7 +651,7 @@ static unsigned ttm_page_pool_get_pages(struct ttm_page_pool *pool, break; } } - /* Cut count number of pages from pool */ + /* Cut 'count' number of pages from the pool */ list_cut_position(pages, &pool->list, p); pool->npages -= count; count = 0; |