diff options
Diffstat (limited to 'drivers/gpu/drm')
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/cik_regs.h | 177 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c | 10 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h | 6 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_device.c | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 7 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_events.c | 4 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 10 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_process.c | 21 |
8 files changed, 53 insertions, 185 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/cik_regs.h b/drivers/gpu/drm/amd/amdkfd/cik_regs.h index 01ff332..183be5b 100644 --- a/drivers/gpu/drm/amd/amdkfd/cik_regs.h +++ b/drivers/gpu/drm/amd/amdkfd/cik_regs.h @@ -23,33 +23,11 @@ #ifndef CIK_REGS_H #define CIK_REGS_H -#define IH_VMID_0_LUT 0x3D40u - -#define BIF_DOORBELL_CNTL 0x530Cu - -#define SRBM_GFX_CNTL 0xE44 -#define PIPEID(x) ((x) << 0) -#define MEID(x) ((x) << 2) -#define VMID(x) ((x) << 4) -#define QUEUEID(x) ((x) << 8) - -#define SQ_CONFIG 0x8C00 - -#define SH_MEM_BASES 0x8C28 /* if PTR32, these are the bases for scratch and lds */ #define PRIVATE_BASE(x) ((x) << 0) /* scratch */ #define SHARED_BASE(x) ((x) << 16) /* LDS */ -#define SH_MEM_APE1_BASE 0x8C2C -/* if PTR32, this is the base location of GPUVM */ -#define SH_MEM_APE1_LIMIT 0x8C30 -/* if PTR32, this is the upper limit of GPUVM */ -#define SH_MEM_CONFIG 0x8C34 #define PTR32 (1 << 0) -#define PRIVATE_ATC (1 << 1) #define ALIGNMENT_MODE(x) ((x) << 2) -#define SH_MEM_ALIGNMENT_MODE_DWORD 0 -#define SH_MEM_ALIGNMENT_MODE_DWORD_STRICT 1 -#define SH_MEM_ALIGNMENT_MODE_STRICT 2 #define SH_MEM_ALIGNMENT_MODE_UNALIGNED 3 #define DEFAULT_MTYPE(x) ((x) << 4) #define APE1_MTYPE(x) ((x) << 7) @@ -58,137 +36,34 @@ #define MTYPE_CACHED 0 #define MTYPE_NONCACHED 3 - -#define SH_STATIC_MEM_CONFIG 0x9604u - -#define TC_CFG_L1_LOAD_POLICY0 0xAC68 -#define TC_CFG_L1_LOAD_POLICY1 0xAC6C -#define TC_CFG_L1_STORE_POLICY 0xAC70 -#define TC_CFG_L2_LOAD_POLICY0 0xAC74 -#define TC_CFG_L2_LOAD_POLICY1 0xAC78 -#define TC_CFG_L2_STORE_POLICY0 0xAC7C -#define TC_CFG_L2_STORE_POLICY1 0xAC80 -#define TC_CFG_L2_ATOMIC_POLICY 0xAC84 -#define TC_CFG_L1_VOLATILE 0xAC88 -#define TC_CFG_L2_VOLATILE 0xAC8C - -#define CP_PQ_WPTR_POLL_CNTL 0xC20C -#define WPTR_POLL_EN (1 << 31) - -#define CPC_INT_CNTL 0xC2D0 -#define CP_ME1_PIPE0_INT_CNTL 0xC214 -#define CP_ME1_PIPE1_INT_CNTL 0xC218 -#define CP_ME1_PIPE2_INT_CNTL 0xC21C -#define CP_ME1_PIPE3_INT_CNTL 0xC220 -#define CP_ME2_PIPE0_INT_CNTL 0xC224 -#define CP_ME2_PIPE1_INT_CNTL 0xC228 -#define CP_ME2_PIPE2_INT_CNTL 0xC22C -#define CP_ME2_PIPE3_INT_CNTL 0xC230 -#define DEQUEUE_REQUEST_INT_ENABLE (1 << 13) -#define WRM_POLL_TIMEOUT_INT_ENABLE (1 << 17) -#define PRIV_REG_INT_ENABLE (1 << 23) -#define TIME_STAMP_INT_ENABLE (1 << 26) -#define GENERIC2_INT_ENABLE (1 << 29) -#define GENERIC1_INT_ENABLE (1 << 30) -#define GENERIC0_INT_ENABLE (1 << 31) -#define CP_ME1_PIPE0_INT_STATUS 0xC214 -#define CP_ME1_PIPE1_INT_STATUS 0xC218 -#define CP_ME1_PIPE2_INT_STATUS 0xC21C -#define CP_ME1_PIPE3_INT_STATUS 0xC220 -#define CP_ME2_PIPE0_INT_STATUS 0xC224 -#define CP_ME2_PIPE1_INT_STATUS 0xC228 -#define CP_ME2_PIPE2_INT_STATUS 0xC22C -#define CP_ME2_PIPE3_INT_STATUS 0xC230 -#define DEQUEUE_REQUEST_INT_STATUS (1 << 13) -#define WRM_POLL_TIMEOUT_INT_STATUS (1 << 17) -#define PRIV_REG_INT_STATUS (1 << 23) -#define TIME_STAMP_INT_STATUS (1 << 26) -#define GENERIC2_INT_STATUS (1 << 29) -#define GENERIC1_INT_STATUS (1 << 30) -#define GENERIC0_INT_STATUS (1 << 31) - -#define CP_HPD_EOP_BASE_ADDR 0xC904 -#define CP_HPD_EOP_BASE_ADDR_HI 0xC908 -#define CP_HPD_EOP_VMID 0xC90C -#define CP_HPD_EOP_CONTROL 0xC910 -#define EOP_SIZE(x) ((x) << 0) -#define EOP_SIZE_MASK (0x3f << 0) -#define CP_MQD_BASE_ADDR 0xC914 -#define CP_MQD_BASE_ADDR_HI 0xC918 -#define CP_HQD_ACTIVE 0xC91C -#define CP_HQD_VMID 0xC920 - -#define CP_HQD_PERSISTENT_STATE 0xC924u #define DEFAULT_CP_HQD_PERSISTENT_STATE (0x33U << 8) #define PRELOAD_REQ (1 << 0) -#define CP_HQD_PIPE_PRIORITY 0xC928u -#define CP_HQD_QUEUE_PRIORITY 0xC92Cu -#define CP_HQD_QUANTUM 0xC930u +#define MQD_CONTROL_PRIV_STATE_EN (1U << 8) + +#define DEFAULT_MIN_IB_AVAIL_SIZE (3U << 20) + +#define IB_ATC_EN (1U << 23) + #define QUANTUM_EN 1U #define QUANTUM_SCALE_1MS (1U << 4) #define QUANTUM_DURATION(x) ((x) << 8) -#define CP_HQD_PQ_BASE 0xC934 -#define CP_HQD_PQ_BASE_HI 0xC938 -#define CP_HQD_PQ_RPTR 0xC93C -#define CP_HQD_PQ_RPTR_REPORT_ADDR 0xC940 -#define CP_HQD_PQ_RPTR_REPORT_ADDR_HI 0xC944 -#define CP_HQD_PQ_WPTR_POLL_ADDR 0xC948 -#define CP_HQD_PQ_WPTR_POLL_ADDR_HI 0xC94C -#define CP_HQD_PQ_DOORBELL_CONTROL 0xC950 -#define DOORBELL_OFFSET(x) ((x) << 2) -#define DOORBELL_OFFSET_MASK (0x1fffff << 2) -#define DOORBELL_SOURCE (1 << 28) -#define DOORBELL_SCHD_HIT (1 << 29) -#define DOORBELL_EN (1 << 30) -#define DOORBELL_HIT (1 << 31) -#define CP_HQD_PQ_WPTR 0xC954 -#define CP_HQD_PQ_CONTROL 0xC958 -#define QUEUE_SIZE(x) ((x) << 0) -#define QUEUE_SIZE_MASK (0x3f << 0) #define RPTR_BLOCK_SIZE(x) ((x) << 8) -#define RPTR_BLOCK_SIZE_MASK (0x3f << 8) #define MIN_AVAIL_SIZE(x) ((x) << 20) -#define PQ_ATC_EN (1 << 23) -#define PQ_VOLATILE (1 << 26) -#define NO_UPDATE_RPTR (1 << 27) -#define UNORD_DISPATCH (1 << 28) -#define ROQ_PQ_IB_FLIP (1 << 29) -#define PRIV_STATE (1 << 30) -#define KMD_QUEUE (1 << 31) - #define DEFAULT_RPTR_BLOCK_SIZE RPTR_BLOCK_SIZE(5) #define DEFAULT_MIN_AVAIL_SIZE MIN_AVAIL_SIZE(3) -#define CP_HQD_IB_BASE_ADDR 0xC95Cu -#define CP_HQD_IB_BASE_ADDR_HI 0xC960u -#define CP_HQD_IB_RPTR 0xC964u -#define CP_HQD_IB_CONTROL 0xC968u -#define IB_ATC_EN (1U << 23) -#define DEFAULT_MIN_IB_AVAIL_SIZE (3U << 20) - -#define AQL_ENABLE 1 - -#define CP_HQD_DEQUEUE_REQUEST 0xC974 -#define DEQUEUE_REQUEST_DRAIN 1 -#define DEQUEUE_REQUEST_RESET 2 -#define DEQUEUE_INT (1U << 8) +#define PQ_ATC_EN (1 << 23) +#define NO_UPDATE_RPTR (1 << 27) -#define CP_HQD_SEMA_CMD 0xC97Cu -#define CP_HQD_MSG_TYPE 0xC980u -#define CP_HQD_ATOMIC0_PREOP_LO 0xC984u -#define CP_HQD_ATOMIC0_PREOP_HI 0xC988u -#define CP_HQD_ATOMIC1_PREOP_LO 0xC98Cu -#define CP_HQD_ATOMIC1_PREOP_HI 0xC990u -#define CP_HQD_HQ_SCHEDULER0 0xC994u -#define CP_HQD_HQ_SCHEDULER1 0xC998u +#define DOORBELL_OFFSET(x) ((x) << 2) +#define DOORBELL_EN (1 << 30) +#define PRIV_STATE (1 << 30) +#define KMD_QUEUE (1 << 31) -#define CP_MQD_CONTROL 0xC99C -#define MQD_VMID(x) ((x) << 0) -#define MQD_VMID_MASK (0xf << 0) -#define MQD_CONTROL_PRIV_STATE_EN (1U << 8) +#define AQL_ENABLE 1 #define SDMA_RB_VMID(x) (x << 24) #define SDMA_RB_ENABLE (1 << 0) @@ -202,33 +77,7 @@ #define SDMA_VA_SHARED_BASE(x) (x << 8) #define GRBM_GFX_INDEX 0x30800 -#define INSTANCE_INDEX(x) ((x) << 0) -#define SH_INDEX(x) ((x) << 8) -#define SE_INDEX(x) ((x) << 16) -#define SH_BROADCAST_WRITES (1 << 29) -#define INSTANCE_BROADCAST_WRITES (1 << 30) -#define SE_BROADCAST_WRITES (1 << 31) - -#define SQC_CACHES 0x30d20 -#define SQC_POLICY 0x8C38u -#define SQC_VOLATILE 0x8C3Cu -#define CP_PERFMON_CNTL 0x36020 - -#define ATC_VMID0_PASID_MAPPING 0x339Cu -#define ATC_VMID_PASID_MAPPING_UPDATE_STATUS 0x3398u #define ATC_VMID_PASID_MAPPING_VALID (1U << 31) -#define ATC_VM_APERTURE0_CNTL 0x3310u -#define ATS_ACCESS_MODE_NEVER 0 -#define ATS_ACCESS_MODE_ALWAYS 1 - -#define ATC_VM_APERTURE0_CNTL2 0x3318u -#define ATC_VM_APERTURE0_HIGH_ADDR 0x3308u -#define ATC_VM_APERTURE0_LOW_ADDR 0x3300u -#define ATC_VM_APERTURE1_CNTL 0x3314u -#define ATC_VM_APERTURE1_CNTL2 0x331Cu -#define ATC_VM_APERTURE1_HIGH_ADDR 0x330Cu -#define ATC_VM_APERTURE1_LOW_ADDR 0x3304u - #endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c index 96153f2..c34c393 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c @@ -445,7 +445,7 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev, aw_reg_add_dword /= sizeof(uint32_t); packets_vec[0].bitfields2.reg_offset = - aw_reg_add_dword - CONFIG_REG_BASE; + aw_reg_add_dword - AMD_CONFIG_REG_BASE; packets_vec[0].reg_data[0] = cntl.u32All; @@ -458,7 +458,7 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev, aw_reg_add_dword /= sizeof(uint32_t); packets_vec[1].bitfields2.reg_offset = - aw_reg_add_dword - CONFIG_REG_BASE; + aw_reg_add_dword - AMD_CONFIG_REG_BASE; packets_vec[1].reg_data[0] = addrHi.u32All; aw_reg_add_dword = @@ -470,7 +470,7 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev, aw_reg_add_dword /= sizeof(uint32_t); packets_vec[2].bitfields2.reg_offset = - aw_reg_add_dword - CONFIG_REG_BASE; + aw_reg_add_dword - AMD_CONFIG_REG_BASE; packets_vec[2].reg_data[0] = addrLo.u32All; /* enable watch flag if address is not zero*/ @@ -488,7 +488,7 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev, aw_reg_add_dword /= sizeof(uint32_t); packets_vec[3].bitfields2.reg_offset = - aw_reg_add_dword - CONFIG_REG_BASE; + aw_reg_add_dword - AMD_CONFIG_REG_BASE; packets_vec[3].reg_data[0] = cntl.u32All; status = dbgdev_diq_submit_ib( @@ -690,7 +690,7 @@ static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev, packets_vec[1].header.opcode = IT_SET_CONFIG_REG; packets_vec[1].header.type = PM4_TYPE_3; packets_vec[1].bitfields2.reg_offset = SQ_CMD / (sizeof(uint32_t)) - - CONFIG_REG_BASE; + AMD_CONFIG_REG_BASE; packets_vec[1].bitfields2.vmid_shift = SQ_CMD_VMID_OFFSET; packets_vec[1].bitfields2.insert_vmid = 1; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h index 4b0dd5a..03424c2 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h @@ -48,9 +48,9 @@ enum { /* CONFIG reg space definition */ enum { - CONFIG_REG_BASE = 0x2000, /* in dwords */ - CONFIG_REG_END = 0x2B00, - CONFIG_REG_SIZE = CONFIG_REG_END - CONFIG_REG_BASE + AMD_CONFIG_REG_BASE = 0x2000, /* in dwords */ + AMD_CONFIG_REG_END = 0x2B00, + AMD_CONFIG_REG_SIZE = AMD_CONFIG_REG_END - AMD_CONFIG_REG_BASE }; /* SH reg space definition */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 1d1e2e9..75312c8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -44,7 +44,10 @@ static const struct kfd_device_info kaveri_device_info = { static const struct kfd_device_info carrizo_device_info = { .asic_family = CHIP_CARRIZO, .max_pasid_bits = 16, + /* max num of queues for CZ.TODO should be a dynamic value */ + .max_no_of_hqd = 24, .ih_ring_entry_size = 4 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_cik, .num_of_watch_points = 4, .mqd_size_aligned = MQD_SIZE_ALIGNED }; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 547b0a5..4bb7f42 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -946,7 +946,7 @@ static int destroy_queues_cpsch(struct device_queue_manager *dqm, { int retval; enum kfd_preempt_type_filter preempt_type; - struct kfd_process *p; + struct kfd_process_device *pdd; BUG_ON(!dqm); @@ -981,8 +981,9 @@ static int destroy_queues_cpsch(struct device_queue_manager *dqm, retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED, QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS); if (retval != 0) { - p = kfd_get_process(current); - p->reset_wavefronts = true; + pdd = kfd_get_process_device_data(dqm->dev, + kfd_get_process(current)); + pdd->reset_wavefronts = true; goto out; } pm_release_ib(&dqm->packets); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index 3cb37d2..b6e28dc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -313,6 +313,10 @@ static int create_signal_event(struct file *devkfd, p->signal_event_count, ev->event_id, ev->user_signal_address); + pr_debug("signal event number %zu created with id %d, address %p\n", + p->signal_event_count, ev->event_id, + ev->user_signal_address); + return 0; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index cb79046..d0d5f4b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -463,6 +463,11 @@ struct kfd_process_device { /* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */ bool bound; + + /* This flag tells if we should reset all + * wavefronts on process termination + */ + bool reset_wavefronts; }; #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd) @@ -519,11 +524,6 @@ struct kfd_process { event_pages */ u32 next_nonsignal_event_id; size_t signal_event_count; - /* - * This flag tells if we should reset all wavefronts on - * process termination - */ - bool reset_wavefronts; }; /** diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 56b904f..8a1f999 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -173,7 +173,7 @@ static void kfd_process_wq_release(struct work_struct *work) pr_debug("Releasing pdd (topology id %d) for process (pasid %d) in workqueue\n", pdd->dev->id, p->pasid); - if (p->reset_wavefronts) + if (pdd->reset_wavefronts) dbgdev_wave_reset_wavefronts(pdd->dev, p); amd_iommu_unbind_pasid(pdd->dev->pdev, p->pasid); @@ -222,6 +222,7 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn, struct mm_struct *mm) { struct kfd_process *p; + struct kfd_process_device *pdd = NULL; /* * The kfd_process structure can not be free because the @@ -240,6 +241,15 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn, /* In case our notifier is called before IOMMU notifier */ pqm_uninit(&p->pqm); + /* Iterate over all process device data structure and check + * if we should reset all wavefronts */ + list_for_each_entry(pdd, &p->per_device_data, per_device_list) + if (pdd->reset_wavefronts) { + pr_warn("amdkfd: Resetting all wave fronts\n"); + dbgdev_wave_reset_wavefronts(pdd->dev, p); + pdd->reset_wavefronts = false; + } + mutex_unlock(&p->mutex); /* @@ -305,8 +315,6 @@ static struct kfd_process *create_process(const struct task_struct *thread) if (kfd_init_apertures(process) != 0) goto err_init_apretures; - process->reset_wavefronts = false; - return process; err_init_apretures: @@ -348,6 +356,7 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, INIT_LIST_HEAD(&pdd->qpd.queues_list); INIT_LIST_HEAD(&pdd->qpd.priv_queue_list); pdd->qpd.dqm = dev->dqm; + pdd->reset_wavefronts = false; list_add(&pdd->per_device_list, &p->per_device_data); } @@ -409,10 +418,12 @@ void kfd_unbind_process_from_device(struct kfd_dev *dev, unsigned int pasid) kfd_dbgmgr_destroy(dev->dbgmgr); pqm_uninit(&p->pqm); - if (p->reset_wavefronts) - dbgdev_wave_reset_wavefronts(dev, p); pdd = kfd_get_process_device_data(dev, p); + if (pdd->reset_wavefronts) { + dbgdev_wave_reset_wavefronts(pdd->dev, p); + pdd->reset_wavefronts = false; + } /* * Just mark pdd as unbound, because we still need it to call |