From 1549fcd15cab5e59beb7203583e0a70349dda7c1 Mon Sep 17 00:00:00 2001 From: Firo Yang Date: Thu, 23 Apr 2015 17:58:05 +0800 Subject: drm/amdkfd: Remove unessary void pointer cast kmalloc() returns a void pointer - no need to cast it in drivers/gpu/drm/amd/amdkfd/kfd_process.c::kfd_process_destroy_delayed() Signed-off-by: Firo Yang Signed-off-by: Oded Gabbay diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 945d622..4d7bc95 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -203,8 +203,7 @@ static void kfd_process_destroy_delayed(struct rcu_head *rcu) mmdrop(p->mm); - work = (struct kfd_process_release_work *) - kmalloc(sizeof(struct kfd_process_release_work), GFP_ATOMIC); + work = kmalloc(sizeof(struct kfd_process_release_work), GFP_ATOMIC); if (work) { INIT_WORK((struct work_struct *) work, kfd_process_wq_release); -- cgit v0.10.2 From 8856d8e048aa12ee056e24046e22fea7c77817a6 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Mon, 4 May 2015 15:53:15 +0300 Subject: drm/amdkfd: reformat some debug prints Signed-off-by: Oded Gabbay diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c index 17e56dc..e621eba 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c @@ -142,14 +142,13 @@ int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma) vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - pr_debug("kfd: mapping doorbell page in kfd_doorbell_mmap\n" - " target user address == 0x%08llX\n" - " physical address == 0x%08llX\n" - " vm_flags == 0x%04lX\n" - " size == 0x%04lX\n", - (unsigned long long) vma->vm_start, address, vma->vm_flags, - doorbell_process_allocation()); - + pr_debug("mapping doorbell page:\n"); + pr_debug(" target user address == 0x%08llX\n", + (unsigned long long) vma->vm_start); + pr_debug(" physical address == 0x%08llX\n", address); + pr_debug(" vm_flags == 0x%04lX\n", vma->vm_flags); + pr_debug(" size == 0x%04lX\n", + doorbell_process_allocation()); return io_remap_pfn_range(vma, vma->vm_start, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c index c7d298e..8fa8941 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c @@ -215,8 +215,9 @@ static int acquire_packet_buffer(struct kernel_queue *kq, queue_address = (unsigned int *)kq->pq_kernel_addr; queue_size_dwords = kq->queue->properties.queue_size / sizeof(uint32_t); - pr_debug("amdkfd: In func %s\nrptr: %d\nwptr: %d\nqueue_address 0x%p\n", - __func__, rptr, wptr, queue_address); + pr_debug("rptr: %d\n", rptr); + pr_debug("wptr: %d\n", wptr); + pr_debug("queue_address 0x%p\n", queue_address); available_size = (rptr - 1 - wptr + queue_size_dwords) % queue_size_dwords; -- cgit v0.10.2 From d42af779fb2cf23bbe218c91b44f4979f18bc910 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Tue, 5 May 2015 11:51:31 +0300 Subject: drm/amdkfd: Use new struct for asic specific ops This patch creates a new structure for asic specific operations, instead of using the existing structure of operations. This is done to make the code flow more logic, readable and maintainable. The change is done only to the device queue manager module at this point. Signed-off-by: Oded Gabbay Reviewed-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index 488f51d..650ae1c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -120,6 +120,18 @@ struct device_queue_manager_ops { uint64_t alternate_aperture_size); }; +struct device_queue_manager_asic_ops { + int (*register_process)(struct device_queue_manager *dqm, + struct qcm_process_device *qpd); + int (*initialize)(struct device_queue_manager *dqm); + bool (*set_cache_memory_policy)(struct device_queue_manager *dqm, + struct qcm_process_device *qpd, + enum cache_policy default_policy, + enum cache_policy alternate_policy, + void __user *alternate_aperture_base, + uint64_t alternate_aperture_size); +}; + /** * struct device_queue_manager * @@ -134,7 +146,7 @@ struct device_queue_manager_ops { struct device_queue_manager { struct device_queue_manager_ops ops; - struct device_queue_manager_ops ops_asic_specific; + struct device_queue_manager_asic_ops ops_asic_specific; struct mqd_manager *mqds[KFD_MQD_TYPE_MAX]; struct packet_manager packets; @@ -157,8 +169,8 @@ struct device_queue_manager { bool active_runlist; }; -void device_queue_manager_init_cik(struct device_queue_manager_ops *ops); -void device_queue_manager_init_vi(struct device_queue_manager_ops *ops); +void device_queue_manager_init_cik(struct device_queue_manager_asic_ops *ops); +void device_queue_manager_init_vi(struct device_queue_manager_asic_ops *ops); void program_sh_mem_settings(struct device_queue_manager *dqm, struct qcm_process_device *qpd); int init_pipelines(struct device_queue_manager *dqm, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c index 5469efe..292d13f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c @@ -34,7 +34,7 @@ static int register_process_cik(struct device_queue_manager *dqm, struct qcm_process_device *qpd); static int initialize_cpsch_cik(struct device_queue_manager *dqm); -void device_queue_manager_init_cik(struct device_queue_manager_ops *ops) +void device_queue_manager_init_cik(struct device_queue_manager_asic_ops *ops) { ops->set_cache_memory_policy = set_cache_memory_policy_cik; ops->register_process = register_process_cik; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c index 20553dc..8b00ccf 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c @@ -33,7 +33,7 @@ static int register_process_vi(struct device_queue_manager *dqm, struct qcm_process_device *qpd); static int initialize_cpsch_vi(struct device_queue_manager *dqm); -void device_queue_manager_init_vi(struct device_queue_manager_ops *ops) +void device_queue_manager_init_vi(struct device_queue_manager_asic_ops *ops) { pr_warn("amdkfd: VI DQM is not currently supported\n"); -- cgit v0.10.2 From 3e3f6e1a90a890e0cea4ec6d6f98e1fa94255de8 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Tue, 5 May 2015 11:51:39 +0300 Subject: drm/amdkfd: make the sdma vm init to be asic specific Signed-off-by: Oded Gabbay Reviewed-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 69af73f..1eb1022 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -614,19 +614,6 @@ static void deallocate_sdma_queue(struct device_queue_manager *dqm, set_bit(sdma_queue_id, (unsigned long *)&dqm->sdma_bitmap); } -static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, - struct qcm_process_device *qpd) -{ - uint32_t value = SDMA_ATC; - - if (q->process->is_32bit_user_mode) - value |= SDMA_VA_PTR32 | get_sh_mem_bases_32(qpd_to_pdd(qpd)); - else - value |= SDMA_VA_SHARED_BASE(get_sh_mem_bases_nybble_64( - qpd_to_pdd(qpd))); - q->properties.sdma_vm_addr = value; -} - static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, struct queue *q, struct qcm_process_device *qpd) @@ -649,7 +636,7 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, pr_debug(" sdma queue id: %d\n", q->properties.sdma_queue_id); pr_debug(" sdma engine id: %d\n", q->properties.sdma_engine_id); - init_sdma_vm(dqm, q, qpd); + dqm->ops_asic_specific.init_sdma_vm(dqm, q, qpd); retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, &q->gart_mqd_addr, &q->properties); if (retval != 0) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index 650ae1c..57278e2 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -130,6 +130,9 @@ struct device_queue_manager_asic_ops { enum cache_policy alternate_policy, void __user *alternate_aperture_base, uint64_t alternate_aperture_size); + void (*init_sdma_vm)(struct device_queue_manager *dqm, + struct queue *q, + struct qcm_process_device *qpd); }; /** diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c index 292d13f..9ce8a20 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c @@ -33,12 +33,15 @@ static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm, static int register_process_cik(struct device_queue_manager *dqm, struct qcm_process_device *qpd); static int initialize_cpsch_cik(struct device_queue_manager *dqm); +static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, + struct qcm_process_device *qpd); void device_queue_manager_init_cik(struct device_queue_manager_asic_ops *ops) { ops->set_cache_memory_policy = set_cache_memory_policy_cik; ops->register_process = register_process_cik; ops->initialize = initialize_cpsch_cik; + ops->init_sdma_vm = init_sdma_vm; } static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble) @@ -129,6 +132,19 @@ static int register_process_cik(struct device_queue_manager *dqm, return 0; } +static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, + struct qcm_process_device *qpd) +{ + uint32_t value = SDMA_ATC; + + if (q->process->is_32bit_user_mode) + value |= SDMA_VA_PTR32 | get_sh_mem_bases_32(qpd_to_pdd(qpd)); + else + value |= SDMA_VA_SHARED_BASE(get_sh_mem_bases_nybble_64( + qpd_to_pdd(qpd))); + q->properties.sdma_vm_addr = value; +} + static int initialize_cpsch_cik(struct device_queue_manager *dqm) { return init_pipelines(dqm, get_pipes_num(dqm), get_first_pipe(dqm)); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c index 8b00ccf..4c15212 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c @@ -32,6 +32,8 @@ static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm, static int register_process_vi(struct device_queue_manager *dqm, struct qcm_process_device *qpd); static int initialize_cpsch_vi(struct device_queue_manager *dqm); +static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, + struct qcm_process_device *qpd); void device_queue_manager_init_vi(struct device_queue_manager_asic_ops *ops) { @@ -40,6 +42,7 @@ void device_queue_manager_init_vi(struct device_queue_manager_asic_ops *ops) ops->set_cache_memory_policy = set_cache_memory_policy_vi; ops->register_process = register_process_vi; ops->initialize = initialize_cpsch_vi; + ops->init_sdma_vm = init_sdma_vm; } static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm, @@ -58,6 +61,11 @@ static int register_process_vi(struct device_queue_manager *dqm, return -1; } +static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, + struct qcm_process_device *qpd) +{ +} + static int initialize_cpsch_vi(struct device_queue_manager *dqm) { return 0; -- cgit v0.10.2 From 1241e0b48f9337320a65560192f53406832eff2c Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Sun, 10 May 2015 12:37:28 +0300 Subject: MAINTAINERS: update amdkfd Oded's email address Leaving AMD soon so need to update my email address to @gmail.com Signed-off-by: Oded Gabbay diff --git a/MAINTAINERS b/MAINTAINERS index 781e099..d68df93 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -631,7 +631,7 @@ F: drivers/iommu/amd_iommu*.[ch] F: include/linux/amd-iommu.h AMD KFD -M: Oded Gabbay +M: Oded Gabbay L: dri-devel@lists.freedesktop.org T: git git://people.freedesktop.org/~gabbayo/linux.git S: Supported -- cgit v0.10.2 From d36b94fcf0c812b04fff483e1b423f7d5042766e Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Thu, 5 Mar 2015 15:13:18 +0200 Subject: drm/radeon: Add init interrupt kfd->kgd interface This patch adds a new interface function to the kfd->kgd interface. The function is kgd_init_interrupts() and its function is to initialize a pipe's interrupts. The function currently enables the timestamp interrupt and the bad opcode interrupt. Signed-off-by: Oded Gabbay diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index dabd944..4ea21ae 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -144,6 +144,8 @@ struct kfd2kgd_calls { int (*init_pipeline)(struct kgd_dev *kgd, uint32_t pipe_id, uint32_t hpd_size, uint64_t hpd_gpu_addr); + int (*init_interrupts)(struct kgd_dev *kgd, uint32_t pipe_id); + int (*hqd_load)(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, uint32_t queue_id, uint32_t __user *wptr); diff --git a/drivers/gpu/drm/radeon/cik_reg.h b/drivers/gpu/drm/radeon/cik_reg.h index f667347..0ec5d53 100644 --- a/drivers/gpu/drm/radeon/cik_reg.h +++ b/drivers/gpu/drm/radeon/cik_reg.h @@ -149,6 +149,8 @@ #define KFD_CIK_SDMA_QUEUE_OFFSET 0x200 +#define CPC_INT_CNTL 0xC2D0 + #define CP_HQD_IQ_RPTR 0xC970u #define AQL_ENABLE (1U << 0) #define SDMA0_RLC0_RB_CNTL 0xD400u diff --git a/drivers/gpu/drm/radeon/cikd.h b/drivers/gpu/drm/radeon/cikd.h index 0089d83..b33ba3b 100644 --- a/drivers/gpu/drm/radeon/cikd.h +++ b/drivers/gpu/drm/radeon/cikd.h @@ -1335,6 +1335,7 @@ # define CNTX_EMPTY_INT_ENABLE (1 << 20) # define PRIV_INSTR_INT_ENABLE (1 << 22) # define PRIV_REG_INT_ENABLE (1 << 23) +# define OPCODE_ERROR_INT_ENABLE (1 << 24) # define TIME_STAMP_INT_ENABLE (1 << 26) # define CP_RINGID2_INT_ENABLE (1 << 29) # define CP_RINGID1_INT_ENABLE (1 << 30) diff --git a/drivers/gpu/drm/radeon/radeon_kfd.c b/drivers/gpu/drm/radeon/radeon_kfd.c index 3db2300..813a416 100644 --- a/drivers/gpu/drm/radeon/radeon_kfd.c +++ b/drivers/gpu/drm/radeon/radeon_kfd.c @@ -66,7 +66,7 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, uint32_t hpd_size, uint64_t hpd_gpu_addr); - +static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, uint32_t queue_id, uint32_t __user *wptr); static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd); @@ -89,6 +89,7 @@ static const struct kfd2kgd_calls kfd2kgd = { .program_sh_mem_settings = kgd_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, .init_pipeline = kgd_init_pipeline, + .init_interrupts = kgd_init_interrupts, .hqd_load = kgd_hqd_load, .hqd_sdma_load = kgd_hqd_sdma_load, .hqd_is_occupied = kgd_hqd_is_occupied, @@ -407,6 +408,24 @@ static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, return 0; } +static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) +{ + uint32_t mec; + uint32_t pipe; + + mec = (pipe_id / CIK_PIPE_PER_MEC) + 1; + pipe = (pipe_id % CIK_PIPE_PER_MEC); + + lock_srbm(kgd, mec, pipe, 0, 0); + + write_register(kgd, CPC_INT_CNTL, + TIME_STAMP_INT_ENABLE | OPCODE_ERROR_INT_ENABLE); + + unlock_srbm(kgd); + + return 0; +} + static inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m) { uint32_t retval; -- cgit v0.10.2 From 2249d55827c9e5d5731d7a8622ecd366d8756bbb Mon Sep 17 00:00:00 2001 From: Andrew Lewycky Date: Thu, 17 Jul 2014 01:37:30 +0300 Subject: drm/amdkfd: Add interrupt handling module This patch adds the interrupt handling module, kfd_interrupt.c, and its related members in different data structures to the amdkfd driver. The amdkfd interrupt module maintains an internal interrupt ring per amdkfd device. The internal interrupt ring contains interrupts that needs further handling. The extra handling is deferred to a later time through a workqueue. There's no acknowledgment for the interrupts we use. The hardware simply queues a new interrupt each time without waiting. The fixed-size internal queue means that it's possible for us to lose interrupts because we have no back-pressure to the hardware. However, only interrupts that are "wanted" by amdkfd, are copied into the amdkfd s/w interrupt ring, in order to minimize the chances for overflow of the ring. Signed-off-by: Andrew Lewycky Signed-off-by: Oded Gabbay diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile index 0f49601..cd09c05 100644 --- a/drivers/gpu/drm/amd/amdkfd/Makefile +++ b/drivers/gpu/drm/amd/amdkfd/Makefile @@ -12,5 +12,6 @@ amdkfd-y := kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \ kfd_kernel_queue_vi.o kfd_packet_manager.o \ kfd_process_queue_manager.o kfd_device_queue_manager.o \ kfd_device_queue_manager_cik.o kfd_device_queue_manager_vi.o \ + kfd_interrupt.o obj-$(CONFIG_HSA_AMD) += amdkfd.o diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index ca7f2d3..13c30a0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -235,6 +235,13 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, goto kfd_topology_add_device_error; } + if (kfd_interrupt_init(kfd)) { + dev_err(kfd_device, + "Error initializing interrupts for device (%x:%x)\n", + kfd->pdev->vendor, kfd->pdev->device); + goto kfd_interrupt_error; + } + if (!device_iommu_pasid_init(kfd)) { dev_err(kfd_device, "Error initializing iommuv2 for device (%x:%x)\n", @@ -273,6 +280,8 @@ dqm_start_error: device_queue_manager_error: amd_iommu_free_device(kfd->pdev); device_iommu_pasid_error: + kfd_interrupt_exit(kfd); +kfd_interrupt_error: kfd_topology_remove_device(kfd); kfd_topology_add_device_error: kfd_gtt_sa_fini(kfd); @@ -290,6 +299,7 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd) if (kfd->init_complete) { device_queue_manager_uninit(kfd->dqm); amd_iommu_free_device(kfd->pdev); + kfd_interrupt_exit(kfd); kfd_topology_remove_device(kfd); kfd_gtt_sa_fini(kfd); kfd->kfd2kgd->free_gtt_mem(kfd->kgd, kfd->gtt_mem); @@ -333,7 +343,17 @@ int kgd2kfd_resume(struct kfd_dev *kfd) /* This is called directly from KGD at ISR. */ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) { - /* Process interrupts / schedule work as necessary */ + if (!kfd->init_complete) + return; + + spin_lock(&kfd->interrupt_lock); + + if (kfd->interrupts_active + && interrupt_is_wanted(kfd, ih_ring_entry) + && enqueue_ih_ring_entry(kfd, ih_ring_entry)) + schedule_work(&kfd->interrupt_work); + + spin_unlock(&kfd->interrupt_lock); } static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 1eb1022..078092c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -522,6 +522,17 @@ int init_pipelines(struct device_queue_manager *dqm, return 0; } +static void init_interrupts(struct device_queue_manager *dqm) +{ + unsigned int i; + + BUG_ON(dqm == NULL); + + for (i = 0 ; i < get_pipes_num(dqm) ; i++) + dqm->dev->kfd2kgd->init_interrupts(dqm->dev->kgd, + i + get_first_pipe(dqm)); +} + static int init_scheduler(struct device_queue_manager *dqm) { int retval; @@ -581,6 +592,7 @@ static void uninitialize_nocpsch(struct device_queue_manager *dqm) static int start_nocpsch(struct device_queue_manager *dqm) { + init_interrupts(dqm); return 0; } @@ -737,6 +749,9 @@ static int start_cpsch(struct device_queue_manager *dqm) dqm->fence_addr = dqm->fence_mem->cpu_ptr; dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr; + + init_interrupts(dqm); + list_for_each_entry(node, &dqm->queues, list) if (node->qpd->pqm->process && dqm->dev) kfd_bind_process_to_device(dqm->dev, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c new file mode 100644 index 0000000..5383dd0 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c @@ -0,0 +1,181 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * KFD Interrupts. + * + * AMD GPUs deliver interrupts by pushing an interrupt description onto the + * interrupt ring and then sending an interrupt. KGD receives the interrupt + * in ISR and sends us a pointer to each new entry on the interrupt ring. + * + * We generally can't process interrupt-signaled events from ISR, so we call + * out to each interrupt client module (currently only the scheduler) to ask if + * each interrupt is interesting. If they return true, then it requires further + * processing so we copy it to an internal interrupt ring and call each + * interrupt client again from a work-queue. + * + * There's no acknowledgment for the interrupts we use. The hardware simply + * queues a new interrupt each time without waiting. + * + * The fixed-size internal queue means that it's possible for us to lose + * interrupts because we have no back-pressure to the hardware. + */ + +#include +#include +#include "kfd_priv.h" + +#define KFD_INTERRUPT_RING_SIZE 1024 + +static void interrupt_wq(struct work_struct *); + +int kfd_interrupt_init(struct kfd_dev *kfd) +{ + void *interrupt_ring = kmalloc_array(KFD_INTERRUPT_RING_SIZE, + kfd->device_info->ih_ring_entry_size, + GFP_KERNEL); + if (!interrupt_ring) + return -ENOMEM; + + kfd->interrupt_ring = interrupt_ring; + kfd->interrupt_ring_size = + KFD_INTERRUPT_RING_SIZE * kfd->device_info->ih_ring_entry_size; + atomic_set(&kfd->interrupt_ring_wptr, 0); + atomic_set(&kfd->interrupt_ring_rptr, 0); + + spin_lock_init(&kfd->interrupt_lock); + + INIT_WORK(&kfd->interrupt_work, interrupt_wq); + + kfd->interrupts_active = true; + + /* + * After this function returns, the interrupt will be enabled. This + * barrier ensures that the interrupt running on a different processor + * sees all the above writes. + */ + smp_wmb(); + + return 0; +} + +void kfd_interrupt_exit(struct kfd_dev *kfd) +{ + /* + * Stop the interrupt handler from writing to the ring and scheduling + * workqueue items. The spinlock ensures that any interrupt running + * after we have unlocked sees interrupts_active = false. + */ + unsigned long flags; + + spin_lock_irqsave(&kfd->interrupt_lock, flags); + kfd->interrupts_active = false; + spin_unlock_irqrestore(&kfd->interrupt_lock, flags); + + /* + * Flush_scheduled_work ensures that there are no outstanding + * work-queue items that will access interrupt_ring. New work items + * can't be created because we stopped interrupt handling above. + */ + flush_scheduled_work(); + + kfree(kfd->interrupt_ring); +} + +/* + * This assumes that it can't be called concurrently with itself + * but only with dequeue_ih_ring_entry. + */ +bool enqueue_ih_ring_entry(struct kfd_dev *kfd, const void *ih_ring_entry) +{ + unsigned int rptr = atomic_read(&kfd->interrupt_ring_rptr); + unsigned int wptr = atomic_read(&kfd->interrupt_ring_wptr); + + if ((rptr - wptr) % kfd->interrupt_ring_size == + kfd->device_info->ih_ring_entry_size) { + /* This is very bad, the system is likely to hang. */ + dev_err_ratelimited(kfd_chardev(), + "Interrupt ring overflow, dropping interrupt.\n"); + return false; + } + + memcpy(kfd->interrupt_ring + wptr, ih_ring_entry, + kfd->device_info->ih_ring_entry_size); + + wptr = (wptr + kfd->device_info->ih_ring_entry_size) % + kfd->interrupt_ring_size; + smp_wmb(); /* Ensure memcpy'd data is visible before wptr update. */ + atomic_set(&kfd->interrupt_ring_wptr, wptr); + + return true; +} + +/* + * This assumes that it can't be called concurrently with itself + * but only with enqueue_ih_ring_entry. + */ +static bool dequeue_ih_ring_entry(struct kfd_dev *kfd, void *ih_ring_entry) +{ + /* + * Assume that wait queues have an implicit barrier, i.e. anything that + * happened in the ISR before it queued work is visible. + */ + + unsigned int wptr = atomic_read(&kfd->interrupt_ring_wptr); + unsigned int rptr = atomic_read(&kfd->interrupt_ring_rptr); + + if (rptr == wptr) + return false; + + memcpy(ih_ring_entry, kfd->interrupt_ring + rptr, + kfd->device_info->ih_ring_entry_size); + + rptr = (rptr + kfd->device_info->ih_ring_entry_size) % + kfd->interrupt_ring_size; + + /* + * Ensure the rptr write update is not visible until + * memcpy has finished reading. + */ + smp_mb(); + atomic_set(&kfd->interrupt_ring_rptr, rptr); + + return true; +} + +static void interrupt_wq(struct work_struct *work) +{ + struct kfd_dev *dev = container_of(work, struct kfd_dev, + interrupt_work); + + uint32_t ih_ring_entry[DIV_ROUND_UP( + dev->device_info->ih_ring_entry_size, + sizeof(uint32_t))]; + + while (dequeue_ih_ring_entry(dev, ih_ring_entry)) + ; +} + +bool interrupt_is_wanted(struct kfd_dev *dev, const uint32_t *ih_ring_entry) +{ + return false; +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index f21fcce..34c7662 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -161,10 +161,23 @@ struct kfd_dev { unsigned int gtt_sa_chunk_size; unsigned int gtt_sa_num_of_chunks; + /* Interrupts */ + void *interrupt_ring; + size_t interrupt_ring_size; + atomic_t interrupt_ring_rptr; + atomic_t interrupt_ring_wptr; + struct work_struct interrupt_work; + spinlock_t interrupt_lock; + /* QCM Device instance */ struct device_queue_manager *dqm; bool init_complete; + /* + * Interrupts of interest to KFD are copied + * from the HW ring into a SW ring. + */ + bool interrupts_active; }; /* KGD2KFD callbacks */ @@ -555,7 +568,11 @@ struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev); struct kfd_dev *kfd_topology_enum_kfd_devices(uint8_t idx); /* Interrupts */ +int kfd_interrupt_init(struct kfd_dev *dev); +void kfd_interrupt_exit(struct kfd_dev *dev); void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry); +bool enqueue_ih_ring_entry(struct kfd_dev *kfd, const void *ih_ring_entry); +bool interrupt_is_wanted(struct kfd_dev *dev, const uint32_t *ih_ring_entry); /* Power Management */ void kgd2kfd_suspend(struct kfd_dev *kfd); -- cgit v0.10.2 From 29a5d3eb9a7612b26ba098a0db65e54372612d07 Mon Sep 17 00:00:00 2001 From: Andrew Lewycky Date: Sun, 7 Dec 2014 17:05:11 +0200 Subject: drm/amdkfd: add events IOCTL set definitions - AMDKFD_IOC_CREATE_EVENT: Creates a new event of a specified type - AMDKFD_IOC_DESTROY_EVENT: Destroys an existing event - AMDKFD_IOC_SET_EVENT: Signal an existing event - AMDKFD_IOC_RESET_EVENT: Reset an existing event - AMDKFD_IOC_WAIT_EVENTS: Wait on event(s) until they are signaled v2: - Move the limit of the signal events to kfd_ioctl.h so it can be used by userspace v3: - Change all bool fields in struct kfd_memory_exception_failure to uint32_t Signed-off-by: Andrew Lewycky Signed-off-by: Oded Gabbay diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 19a4fba..9933b2e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -514,6 +514,36 @@ static int kfd_ioctl_get_process_apertures(struct file *filp, return 0; } +static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p, + void *data) +{ + return -ENODEV; +} + +static int kfd_ioctl_destroy_event(struct file *filp, struct kfd_process *p, + void *data) +{ + return -ENODEV; +} + +static int kfd_ioctl_set_event(struct file *filp, struct kfd_process *p, + void *data) +{ + return -ENODEV; +} + +static int kfd_ioctl_reset_event(struct file *filp, struct kfd_process *p, + void *data) +{ + return -ENODEV; +} + +static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p, + void *data) +{ + return -ENODEV; +} + #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \ [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, .cmd_drv = 0, .name = #ioctl} @@ -539,6 +569,21 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { AMDKFD_IOCTL_DEF(AMDKFD_IOC_UPDATE_QUEUE, kfd_ioctl_update_queue, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_EVENT, + kfd_ioctl_create_event, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_EVENT, + kfd_ioctl_destroy_event, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_EVENT, + kfd_ioctl_set_event, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_RESET_EVENT, + kfd_ioctl_reset_event, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_WAIT_EVENTS, + kfd_ioctl_wait_events, 0), }; #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index af94f31..4ca35a8 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -27,7 +27,7 @@ #include #define KFD_IOCTL_MAJOR_VERSION 1 -#define KFD_IOCTL_MINOR_VERSION 0 +#define KFD_IOCTL_MINOR_VERSION 1 struct kfd_ioctl_get_version_args { uint32_t major_version; /* from KFD */ @@ -128,6 +128,83 @@ struct kfd_ioctl_get_process_apertures_args { uint32_t pad; }; +/* Matching HSA_EVENTTYPE */ +#define KFD_IOC_EVENT_SIGNAL 0 +#define KFD_IOC_EVENT_NODECHANGE 1 +#define KFD_IOC_EVENT_DEVICESTATECHANGE 2 +#define KFD_IOC_EVENT_HW_EXCEPTION 3 +#define KFD_IOC_EVENT_SYSTEM_EVENT 4 +#define KFD_IOC_EVENT_DEBUG_EVENT 5 +#define KFD_IOC_EVENT_PROFILE_EVENT 6 +#define KFD_IOC_EVENT_QUEUE_EVENT 7 +#define KFD_IOC_EVENT_MEMORY 8 + +#define KFD_IOC_WAIT_RESULT_COMPLETE 0 +#define KFD_IOC_WAIT_RESULT_TIMEOUT 1 +#define KFD_IOC_WAIT_RESULT_FAIL 2 + +#define KFD_SIGNAL_EVENT_LIMIT 256 + +struct kfd_ioctl_create_event_args { + uint64_t event_page_offset; /* from KFD */ + uint32_t event_trigger_data; /* from KFD - signal events only */ + uint32_t event_type; /* to KFD */ + uint32_t auto_reset; /* to KFD */ + uint32_t node_id; /* to KFD - only valid for certain + event types */ + uint32_t event_id; /* from KFD */ + uint32_t event_slot_index; /* from KFD */ +}; + +struct kfd_ioctl_destroy_event_args { + uint32_t event_id; /* to KFD */ + uint32_t pad; +}; + +struct kfd_ioctl_set_event_args { + uint32_t event_id; /* to KFD */ + uint32_t pad; +}; + +struct kfd_ioctl_reset_event_args { + uint32_t event_id; /* to KFD */ + uint32_t pad; +}; + +struct kfd_memory_exception_failure { + uint32_t NotPresent; /* Page not present or supervisor privilege */ + uint32_t ReadOnly; /* Write access to a read-only page */ + uint32_t NoExecute; /* Execute access to a page marked NX */ + uint32_t pad; +}; + +/* memory exception data*/ +struct kfd_hsa_memory_exception_data { + struct kfd_memory_exception_failure failure; + uint64_t va; + uint32_t gpu_id; + uint32_t pad; +}; + +/* Event data*/ +struct kfd_event_data { + union { + struct kfd_hsa_memory_exception_data memory_exception_data; + }; /* From KFD */ + uint64_t kfd_event_data_ext; /* pointer to an extension structure + for future exception types */ + uint32_t event_id; /* to KFD */ + uint32_t pad; +}; + +struct kfd_ioctl_wait_events_args { + uint64_t events_ptr; /* to KFD */ + uint32_t num_events; /* to KFD */ + uint32_t wait_for_all; /* to KFD */ + uint32_t timeout; /* to KFD */ + uint32_t wait_result; /* from KFD */ +}; + #define AMDKFD_IOCTL_BASE 'K' #define AMDKFD_IO(nr) _IO(AMDKFD_IOCTL_BASE, nr) #define AMDKFD_IOR(nr, type) _IOR(AMDKFD_IOCTL_BASE, nr, type) @@ -155,7 +232,22 @@ struct kfd_ioctl_get_process_apertures_args { #define AMDKFD_IOC_UPDATE_QUEUE \ AMDKFD_IOW(0x07, struct kfd_ioctl_update_queue_args) +#define AMDKFD_IOC_CREATE_EVENT \ + AMDKFD_IOWR(0x08, struct kfd_ioctl_create_event_args) + +#define AMDKFD_IOC_DESTROY_EVENT \ + AMDKFD_IOW(0x09, struct kfd_ioctl_destroy_event_args) + +#define AMDKFD_IOC_SET_EVENT \ + AMDKFD_IOW(0x0A, struct kfd_ioctl_set_event_args) + +#define AMDKFD_IOC_RESET_EVENT \ + AMDKFD_IOW(0x0B, struct kfd_ioctl_reset_event_args) + +#define AMDKFD_IOC_WAIT_EVENTS \ + AMDKFD_IOWR(0x0C, struct kfd_ioctl_wait_events_args) + #define AMDKFD_COMMAND_START 0x01 -#define AMDKFD_COMMAND_END 0x08 +#define AMDKFD_COMMAND_END 0x0D #endif -- cgit v0.10.2 From f3a398183f7b9ef78f6b71ee9f7641e046403bcb Mon Sep 17 00:00:00 2001 From: Andrew Lewycky Date: Sun, 10 May 2015 12:15:46 +0300 Subject: drm/amdkfd: Add the events module This patch adds the events module (kfd_events.c) and the interrupt handle module for Kaveri (cik_event_interrupt.c). The patch updates the interrupt_is_wanted(), so that it now calls the interrupt isr function specific for the device that received the interrupt. That function(implemented in cik_event_interrupt.c) returns whether this interrupt is of interest to us or not. The patch also updates the interrupt_wq(), so that it now calls the device's specific wq function, which checks the interrupt source and tries to signal relevant events. v2: Increase limit of signal events to 4096 per process Remove bitfields from struct cik_ih_ring_entry Rename radeon_kfd_event_mmap to kfd_event_mmap Add debug prints to allocate_free_slot and allocate_signal_page Make allocate_event_notification_slot return a correct value Add warning prints to create_signal_event Remove error print from IOCTL path Reformatted debug prints in kfd_event_mmap Map correct size (as received from mmap) in kfd_event_mmap v3: Reduce limit of signal events back to 256 per process Fix allocation of kernel memory for signal events Signed-off-by: Andrew Lewycky Signed-off-by: Oded Gabbay diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile index cd09c05..652d254 100644 --- a/drivers/gpu/drm/amd/amdkfd/Makefile +++ b/drivers/gpu/drm/amd/amdkfd/Makefile @@ -12,6 +12,6 @@ amdkfd-y := kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \ kfd_kernel_queue_vi.o kfd_packet_manager.o \ kfd_process_queue_manager.o kfd_device_queue_manager.o \ kfd_device_queue_manager_cik.o kfd_device_queue_manager_vi.o \ - kfd_interrupt.o + kfd_interrupt.o kfd_events.o cik_event_interrupt.o obj-$(CONFIG_HSA_AMD) += amdkfd.o diff --git a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c new file mode 100644 index 0000000..629510a --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c @@ -0,0 +1,63 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "kfd_priv.h" +#include "kfd_events.h" +#include "cik_int.h" + +static bool cik_event_interrupt_isr(struct kfd_dev *dev, + const uint32_t *ih_ring_entry) +{ + unsigned int pasid; + const struct cik_ih_ring_entry *ihre = + (const struct cik_ih_ring_entry *)ih_ring_entry; + + pasid = (ihre->ring_id & 0xffff0000) >> 16; + + /* Do not process in ISR, just request it to be forwarded to WQ. */ + return (pasid != 0) && + (ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE || + ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG); +} + +static void cik_event_interrupt_wq(struct kfd_dev *dev, + const uint32_t *ih_ring_entry) +{ + unsigned int pasid; + const struct cik_ih_ring_entry *ihre = + (const struct cik_ih_ring_entry *)ih_ring_entry; + + pasid = (ihre->ring_id & 0xffff0000) >> 16; + + if (pasid == 0) + return; + + if (ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE) + kfd_signal_event_interrupt(pasid, 0, 0); + else if (ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG) + kfd_signal_event_interrupt(pasid, ihre->data & 0xFF, 8); +} + +const struct kfd_event_interrupt_class event_interrupt_class_cik = { + .interrupt_isr = cik_event_interrupt_isr, + .interrupt_wq = cik_event_interrupt_wq, +}; diff --git a/drivers/gpu/drm/amd/amdkfd/cik_int.h b/drivers/gpu/drm/amd/amdkfd/cik_int.h new file mode 100644 index 0000000..bbef9e2 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/cik_int.h @@ -0,0 +1,40 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef HSA_RADEON_CIK_INT_H_INCLUDED +#define HSA_RADEON_CIK_INT_H_INCLUDED + +#include + +struct cik_ih_ring_entry { + uint32_t source_id; + uint32_t data; + uint32_t ring_id; + uint32_t reserved; +}; + +#define CIK_INTSRC_DEQUEUE_COMPLETE 0xC6 +#define CIK_INTSRC_CP_END_OF_PIPE 0xB5 +#define CIK_INTSRC_SQ_INTERRUPT_MSG 0xEF + +#endif + diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 9933b2e..6dd8948 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -289,8 +289,10 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, args->queue_id = queue_id; + /* Return gpu_id as doorbell offset for mmap usage */ - args->doorbell_offset = args->gpu_id << PAGE_SHIFT; + args->doorbell_offset = (KFD_MMAP_DOORBELL_MASK | args->gpu_id); + args->doorbell_offset <<= PAGE_SHIFT; mutex_unlock(&p->mutex); @@ -684,5 +686,15 @@ static int kfd_mmap(struct file *filp, struct vm_area_struct *vma) if (IS_ERR(process)) return PTR_ERR(process); - return kfd_doorbell_mmap(process, vma); + if ((vma->vm_pgoff & KFD_MMAP_DOORBELL_MASK) == + KFD_MMAP_DOORBELL_MASK) { + vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_DOORBELL_MASK; + return kfd_doorbell_mmap(process, vma); + } else if ((vma->vm_pgoff & KFD_MMAP_EVENTS_MASK) == + KFD_MMAP_EVENTS_MASK) { + vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_EVENTS_MASK; + return kfd_event_mmap(process, vma); + } + + return -EFAULT; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 13c30a0..4c03169 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -34,6 +34,7 @@ static const struct kfd_device_info kaveri_device_info = { .asic_family = CHIP_KAVERI, .max_pasid_bits = 16, .ih_ring_entry_size = 4 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_cik, .mqd_size_aligned = MQD_SIZE_ALIGNED }; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c new file mode 100644 index 0000000..23ffa96 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -0,0 +1,799 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "kfd_priv.h" +#include "kfd_events.h" + +/* + * A task can only be on a single wait_queue at a time, but we need to support + * waiting on multiple events (any/all). + * Instead of each event simply having a wait_queue with sleeping tasks, it + * has a singly-linked list of tasks. + * A thread that wants to sleep creates an array of these, one for each event + * and adds one to each event's waiter chain. + */ +struct kfd_event_waiter { + struct list_head waiters; + struct task_struct *sleeping_task; + + /* Transitions to true when the event this belongs to is signaled. */ + bool activated; +}; + +/* + * Over-complicated pooled allocator for event notification slots. + * + * Each signal event needs a 64-bit signal slot where the signaler will write + * a 1 before sending an interrupt.l (This is needed because some interrupts + * do not contain enough spare data bits to identify an event.) + * We get whole pages from vmalloc and map them to the process VA. + * Individual signal events are then allocated a slot in a page. + */ + +struct signal_page { + struct list_head event_pages; /* kfd_process.signal_event_pages */ + uint64_t *kernel_address; + uint64_t __user *user_address; + uint32_t page_index; /* Index into the mmap aperture. */ + unsigned int free_slots; + unsigned long used_slot_bitmap[0]; +}; + +#define SLOTS_PER_PAGE KFD_SIGNAL_EVENT_LIMIT +#define SLOT_BITMAP_SIZE BITS_TO_LONGS(SLOTS_PER_PAGE) +#define BITS_PER_PAGE (ilog2(SLOTS_PER_PAGE)+1) +#define SIGNAL_PAGE_SIZE (sizeof(struct signal_page) + \ + SLOT_BITMAP_SIZE * sizeof(long)) + +/* + * For signal events, the event ID is used as the interrupt user data. + * For SQ s_sendmsg interrupts, this is limited to 8 bits. + */ + +#define INTERRUPT_DATA_BITS 8 +#define SIGNAL_EVENT_ID_SLOT_SHIFT 0 + +static uint64_t *page_slots(struct signal_page *page) +{ + return page->kernel_address; +} + +static bool allocate_free_slot(struct kfd_process *process, + struct signal_page **out_page, + unsigned int *out_slot_index) +{ + struct signal_page *page; + + list_for_each_entry(page, &process->signal_event_pages, event_pages) { + if (page->free_slots > 0) { + unsigned int slot = + find_first_zero_bit(page->used_slot_bitmap, + SLOTS_PER_PAGE); + + __set_bit(slot, page->used_slot_bitmap); + page->free_slots--; + + page_slots(page)[slot] = UNSIGNALED_EVENT_SLOT; + + *out_page = page; + *out_slot_index = slot; + + pr_debug("allocated event signal slot in page %p, slot %d\n", + page, slot); + + return true; + } + } + + pr_debug("No free event signal slots were found for process %p\n", + process); + + return false; +} + +#define list_tail_entry(head, type, member) \ + list_entry((head)->prev, type, member) + +static bool allocate_signal_page(struct file *devkfd, struct kfd_process *p) +{ + void *backing_store; + struct signal_page *page; + + page = kzalloc(SIGNAL_PAGE_SIZE, GFP_KERNEL); + if (!page) + goto fail_alloc_signal_page; + + page->free_slots = SLOTS_PER_PAGE; + + backing_store = (void *) __get_free_pages(GFP_KERNEL | __GFP_ZERO, + get_order(KFD_SIGNAL_EVENT_LIMIT * 8)); + if (!backing_store) + goto fail_alloc_signal_store; + + /* prevent user-mode info leaks */ + memset(backing_store, (uint8_t) UNSIGNALED_EVENT_SLOT, + KFD_SIGNAL_EVENT_LIMIT * 8); + + page->kernel_address = backing_store; + + if (list_empty(&p->signal_event_pages)) + page->page_index = 0; + else + page->page_index = list_tail_entry(&p->signal_event_pages, + struct signal_page, + event_pages)->page_index + 1; + + pr_debug("allocated new event signal page at %p, for process %p\n", + page, p); + pr_debug("page index is %d\n", page->page_index); + + list_add(&page->event_pages, &p->signal_event_pages); + + return true; + +fail_alloc_signal_store: + kfree(page); +fail_alloc_signal_page: + return false; +} + +static bool allocate_event_notification_slot(struct file *devkfd, + struct kfd_process *p, + struct signal_page **page, + unsigned int *signal_slot_index) +{ + bool ret; + + ret = allocate_free_slot(p, page, signal_slot_index); + if (ret == false) { + ret = allocate_signal_page(devkfd, p); + if (ret == true) + ret = allocate_free_slot(p, page, signal_slot_index); + } + + return ret; +} + +/* Assumes that the process's event_mutex is locked. */ +static void release_event_notification_slot(struct signal_page *page, + size_t slot_index) +{ + __clear_bit(slot_index, page->used_slot_bitmap); + page->free_slots++; + + /* We don't free signal pages, they are retained by the process + * and reused until it exits. */ +} + +static struct signal_page *lookup_signal_page_by_index(struct kfd_process *p, + unsigned int page_index) +{ + struct signal_page *page; + + /* + * This is safe because we don't delete signal pages until the + * process exits. + */ + list_for_each_entry(page, &p->signal_event_pages, event_pages) + if (page->page_index == page_index) + return page; + + return NULL; +} + +/* + * Assumes that p->event_mutex is held and of course that p is not going + * away (current or locked). + */ +static struct kfd_event *lookup_event_by_id(struct kfd_process *p, uint32_t id) +{ + struct kfd_event *ev; + + hash_for_each_possible(p->events, ev, events, id) + if (ev->event_id == id) + return ev; + + return NULL; +} + +static u32 make_signal_event_id(struct signal_page *page, + unsigned int signal_slot_index) +{ + return page->page_index | + (signal_slot_index << SIGNAL_EVENT_ID_SLOT_SHIFT); +} + +/* + * Produce a kfd event id for a nonsignal event. + * These are arbitrary numbers, so we do a sequential search through + * the hash table for an unused number. + */ +static u32 make_nonsignal_event_id(struct kfd_process *p) +{ + u32 id; + + for (id = p->next_nonsignal_event_id; + id < KFD_LAST_NONSIGNAL_EVENT_ID && + lookup_event_by_id(p, id) != NULL; + id++) + ; + + if (id < KFD_LAST_NONSIGNAL_EVENT_ID) { + + /* + * What if id == LAST_NONSIGNAL_EVENT_ID - 1? + * Then next_nonsignal_event_id = LAST_NONSIGNAL_EVENT_ID so + * the first loop fails immediately and we proceed with the + * wraparound loop below. + */ + p->next_nonsignal_event_id = id + 1; + + return id; + } + + for (id = KFD_FIRST_NONSIGNAL_EVENT_ID; + id < KFD_LAST_NONSIGNAL_EVENT_ID && + lookup_event_by_id(p, id) != NULL; + id++) + ; + + + if (id < KFD_LAST_NONSIGNAL_EVENT_ID) { + p->next_nonsignal_event_id = id + 1; + return id; + } + + p->next_nonsignal_event_id = KFD_FIRST_NONSIGNAL_EVENT_ID; + return 0; +} + +static struct kfd_event *lookup_event_by_page_slot(struct kfd_process *p, + struct signal_page *page, + unsigned int signal_slot) +{ + return lookup_event_by_id(p, make_signal_event_id(page, signal_slot)); +} + +static int create_signal_event(struct file *devkfd, + struct kfd_process *p, + struct kfd_event *ev) +{ + if (p->signal_event_count == KFD_SIGNAL_EVENT_LIMIT) { + pr_warn("amdkfd: Signal event wasn't created because limit was reached\n"); + return -ENOMEM; + } + + if (!allocate_event_notification_slot(devkfd, p, &ev->signal_page, + &ev->signal_slot_index)) { + pr_warn("amdkfd: Signal event wasn't created because out of kernel memory\n"); + return -ENOMEM; + } + + p->signal_event_count++; + + ev->user_signal_address = + &ev->signal_page->user_address[ev->signal_slot_index]; + + ev->event_id = make_signal_event_id(ev->signal_page, + ev->signal_slot_index); + + pr_debug("signal event number %zu created with id %d, address %p\n", + p->signal_event_count, ev->event_id, + ev->user_signal_address); + + return 0; +} + +/* + * No non-signal events are supported yet. + * We create them as events that never signal. + * Set event calls from user-mode are failed. + */ +static int create_other_event(struct kfd_process *p, struct kfd_event *ev) +{ + ev->event_id = make_nonsignal_event_id(p); + if (ev->event_id == 0) + return -ENOMEM; + + return 0; +} + +void kfd_event_init_process(struct kfd_process *p) +{ + mutex_init(&p->event_mutex); + hash_init(p->events); + INIT_LIST_HEAD(&p->signal_event_pages); + p->next_nonsignal_event_id = KFD_FIRST_NONSIGNAL_EVENT_ID; + p->signal_event_count = 0; +} + +static void destroy_event(struct kfd_process *p, struct kfd_event *ev) +{ + if (ev->signal_page != NULL) { + release_event_notification_slot(ev->signal_page, + ev->signal_slot_index); + p->signal_event_count--; + } + + /* + * Abandon the list of waiters. Individual waiting threads will + * clean up their own data. + */ + list_del(&ev->waiters); + + hash_del(&ev->events); + kfree(ev); +} + +static void destroy_events(struct kfd_process *p) +{ + struct kfd_event *ev; + struct hlist_node *tmp; + unsigned int hash_bkt; + + hash_for_each_safe(p->events, hash_bkt, tmp, ev, events) + destroy_event(p, ev); +} + +/* + * We assume that the process is being destroyed and there is no need to + * unmap the pages or keep bookkeeping data in order. + */ +static void shutdown_signal_pages(struct kfd_process *p) +{ + struct signal_page *page, *tmp; + + list_for_each_entry_safe(page, tmp, &p->signal_event_pages, + event_pages) { + free_pages((unsigned long)page->kernel_address, + get_order(KFD_SIGNAL_EVENT_LIMIT * 8)); + kfree(page); + } +} + +void kfd_event_free_process(struct kfd_process *p) +{ + destroy_events(p); + shutdown_signal_pages(p); +} + +static bool event_can_be_gpu_signaled(const struct kfd_event *ev) +{ + return ev->type == KFD_EVENT_TYPE_SIGNAL || + ev->type == KFD_EVENT_TYPE_DEBUG; +} + +static bool event_can_be_cpu_signaled(const struct kfd_event *ev) +{ + return ev->type == KFD_EVENT_TYPE_SIGNAL; +} + +int kfd_event_create(struct file *devkfd, struct kfd_process *p, + uint32_t event_type, bool auto_reset, uint32_t node_id, + uint32_t *event_id, uint32_t *event_trigger_data, + uint64_t *event_page_offset, uint32_t *event_slot_index) +{ + int ret = 0; + struct kfd_event *ev = kzalloc(sizeof(*ev), GFP_KERNEL); + + if (!ev) + return -ENOMEM; + + ev->type = event_type; + ev->auto_reset = auto_reset; + ev->signaled = false; + + INIT_LIST_HEAD(&ev->waiters); + + *event_page_offset = 0; + + mutex_lock(&p->event_mutex); + + switch (event_type) { + case KFD_EVENT_TYPE_SIGNAL: + case KFD_EVENT_TYPE_DEBUG: + ret = create_signal_event(devkfd, p, ev); + if (!ret) { + *event_page_offset = (ev->signal_page->page_index | + KFD_MMAP_EVENTS_MASK); + *event_page_offset <<= PAGE_SHIFT; + *event_slot_index = ev->signal_slot_index; + } + break; + default: + ret = create_other_event(p, ev); + break; + } + + if (!ret) { + hash_add(p->events, &ev->events, ev->event_id); + + *event_id = ev->event_id; + *event_trigger_data = ev->event_id; + } else { + kfree(ev); + } + + mutex_unlock(&p->event_mutex); + + return ret; +} + +/* Assumes that p is current. */ +int kfd_event_destroy(struct kfd_process *p, uint32_t event_id) +{ + struct kfd_event *ev; + int ret = 0; + + mutex_lock(&p->event_mutex); + + ev = lookup_event_by_id(p, event_id); + + if (ev) + destroy_event(p, ev); + else + ret = -EINVAL; + + mutex_unlock(&p->event_mutex); + return ret; +} + +static void set_event(struct kfd_event *ev) +{ + struct kfd_event_waiter *waiter; + struct kfd_event_waiter *next; + + /* Auto reset if the list is non-empty and we're waking someone. */ + ev->signaled = !ev->auto_reset || list_empty(&ev->waiters); + + list_for_each_entry_safe(waiter, next, &ev->waiters, waiters) { + waiter->activated = true; + + /* _init because free_waiters will call list_del */ + list_del_init(&waiter->waiters); + + wake_up_process(waiter->sleeping_task); + } +} + +/* Assumes that p is current. */ +int kfd_set_event(struct kfd_process *p, uint32_t event_id) +{ + int ret = 0; + struct kfd_event *ev; + + mutex_lock(&p->event_mutex); + + ev = lookup_event_by_id(p, event_id); + + if (ev && event_can_be_cpu_signaled(ev)) + set_event(ev); + else + ret = -EINVAL; + + mutex_unlock(&p->event_mutex); + return ret; +} + +static void reset_event(struct kfd_event *ev) +{ + ev->signaled = false; +} + +/* Assumes that p is current. */ +int kfd_reset_event(struct kfd_process *p, uint32_t event_id) +{ + int ret = 0; + struct kfd_event *ev; + + mutex_lock(&p->event_mutex); + + ev = lookup_event_by_id(p, event_id); + + if (ev && event_can_be_cpu_signaled(ev)) + reset_event(ev); + else + ret = -EINVAL; + + mutex_unlock(&p->event_mutex); + return ret; + +} + +static void acknowledge_signal(struct kfd_process *p, struct kfd_event *ev) +{ + page_slots(ev->signal_page)[ev->signal_slot_index] = + UNSIGNALED_EVENT_SLOT; +} + +static bool is_slot_signaled(struct signal_page *page, unsigned int index) +{ + return page_slots(page)[index] != UNSIGNALED_EVENT_SLOT; +} + +static void set_event_from_interrupt(struct kfd_process *p, + struct kfd_event *ev) +{ + if (ev && event_can_be_gpu_signaled(ev)) { + acknowledge_signal(p, ev); + set_event(ev); + } +} + +void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id, + uint32_t valid_id_bits) +{ + struct kfd_event *ev; + + /* + * Because we are called from arbitrary context (workqueue) as opposed + * to process context, kfd_process could attempt to exit while we are + * running so the lookup function returns a locked process. + */ + struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); + + if (!p) + return; /* Presumably process exited. */ + + mutex_lock(&p->event_mutex); + + if (valid_id_bits >= INTERRUPT_DATA_BITS) { + /* Partial ID is a full ID. */ + ev = lookup_event_by_id(p, partial_id); + set_event_from_interrupt(p, ev); + } else { + /* + * Partial ID is in fact partial. For now we completely + * ignore it, but we could use any bits we did receive to + * search faster. + */ + struct signal_page *page; + unsigned i; + + list_for_each_entry(page, &p->signal_event_pages, event_pages) + for (i = 0; i < SLOTS_PER_PAGE; i++) + if (is_slot_signaled(page, i)) { + ev = lookup_event_by_page_slot(p, + page, i); + set_event_from_interrupt(p, ev); + } + } + + mutex_unlock(&p->event_mutex); + mutex_unlock(&p->mutex); +} + +static struct kfd_event_waiter *alloc_event_waiters(uint32_t num_events) +{ + struct kfd_event_waiter *event_waiters; + uint32_t i; + + event_waiters = kmalloc_array(num_events, + sizeof(struct kfd_event_waiter), + GFP_KERNEL); + + for (i = 0; (event_waiters) && (i < num_events) ; i++) { + INIT_LIST_HEAD(&event_waiters[i].waiters); + event_waiters[i].sleeping_task = current; + event_waiters[i].activated = false; + } + + return event_waiters; +} + +static int init_event_waiter(struct kfd_process *p, + struct kfd_event_waiter *waiter, + uint32_t event_id) +{ + struct kfd_event *ev = lookup_event_by_id(p, event_id); + + if (!ev) + return -EINVAL; + + waiter->activated = ev->signaled; + ev->signaled = ev->signaled && !ev->auto_reset; + + list_add(&waiter->waiters, &ev->waiters); + + return 0; +} + +static bool test_event_condition(bool all, uint32_t num_events, + struct kfd_event_waiter *event_waiters) +{ + uint32_t i; + uint32_t activated_count = 0; + + for (i = 0; i < num_events; i++) { + if (event_waiters[i].activated) { + if (!all) + return true; + + activated_count++; + } + } + + return activated_count == num_events; +} + +static long user_timeout_to_jiffies(uint32_t user_timeout_ms) +{ + if (user_timeout_ms == KFD_EVENT_TIMEOUT_IMMEDIATE) + return 0; + + if (user_timeout_ms == KFD_EVENT_TIMEOUT_INFINITE) + return MAX_SCHEDULE_TIMEOUT; + + /* + * msecs_to_jiffies interprets all values above 2^31-1 as infinite, + * but we consider them finite. + * This hack is wrong, but nobody is likely to notice. + */ + user_timeout_ms = min_t(uint32_t, user_timeout_ms, 0x7FFFFFFF); + + return msecs_to_jiffies(user_timeout_ms) + 1; +} + +static void free_waiters(uint32_t num_events, struct kfd_event_waiter *waiters) +{ + uint32_t i; + + for (i = 0; i < num_events; i++) + list_del(&waiters[i].waiters); + + kfree(waiters); +} + +int kfd_wait_on_events(struct kfd_process *p, + uint32_t num_events, const uint32_t __user *event_ids, + bool all, uint32_t user_timeout_ms, + enum kfd_event_wait_result *wait_result) +{ + uint32_t i; + int ret = 0; + struct kfd_event_waiter *event_waiters = NULL; + long timeout = user_timeout_to_jiffies(user_timeout_ms); + + mutex_lock(&p->event_mutex); + + event_waiters = alloc_event_waiters(num_events); + if (!event_waiters) { + ret = -ENOMEM; + goto fail; + } + + for (i = 0; i < num_events; i++) { + uint32_t event_id; + + ret = get_user(event_id, &event_ids[i]); + if (ret) + goto fail; + + ret = init_event_waiter(p, &event_waiters[i], event_id); + if (ret) + goto fail; + } + + mutex_unlock(&p->event_mutex); + + while (true) { + if (fatal_signal_pending(current)) { + ret = -EINTR; + break; + } + + if (signal_pending(current)) { + /* + * This is wrong when a nonzero, non-infinite timeout + * is specified. We need to use + * ERESTARTSYS_RESTARTBLOCK, but struct restart_block + * contains a union with data for each user and it's + * in generic kernel code that I don't want to + * touch yet. + */ + ret = -ERESTARTSYS; + break; + } + + if (test_event_condition(all, num_events, event_waiters)) { + *wait_result = KFD_WAIT_COMPLETE; + break; + } + + if (timeout <= 0) { + *wait_result = KFD_WAIT_TIMEOUT; + break; + } + + timeout = schedule_timeout_interruptible(timeout); + } + __set_current_state(TASK_RUNNING); + + mutex_lock(&p->event_mutex); + free_waiters(num_events, event_waiters); + mutex_unlock(&p->event_mutex); + + return ret; + +fail: + if (event_waiters) + free_waiters(num_events, event_waiters); + + mutex_unlock(&p->event_mutex); + + *wait_result = KFD_WAIT_ERROR; + + return ret; +} + +int kfd_event_mmap(struct kfd_process *p, struct vm_area_struct *vma) +{ + + unsigned int page_index; + unsigned long pfn; + struct signal_page *page; + + /* check required size is logical */ + if (get_order(KFD_SIGNAL_EVENT_LIMIT * 8) != + get_order(vma->vm_end - vma->vm_start)) { + pr_err("amdkfd: event page mmap requested illegal size\n"); + return -EINVAL; + } + + page_index = vma->vm_pgoff; + + page = lookup_signal_page_by_index(p, page_index); + if (!page) { + /* Probably KFD bug, but mmap is user-accessible. */ + pr_debug("signal page could not be found for page_index %u\n", + page_index); + return -EINVAL; + } + + pfn = __pa(page->kernel_address); + pfn >>= PAGE_SHIFT; + + vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE + | VM_DONTDUMP | VM_PFNMAP; + + pr_debug("mapping signal page\n"); + pr_debug(" start user address == 0x%08lx\n", vma->vm_start); + pr_debug(" end user address == 0x%08lx\n", vma->vm_end); + pr_debug(" pfn == 0x%016lX\n", pfn); + pr_debug(" vm_flags == 0x%08lX\n", vma->vm_flags); + pr_debug(" size == 0x%08lX\n", + vma->vm_end - vma->vm_start); + + page->user_address = (uint64_t __user *)vma->vm_start; + + /* mapping the page to user process */ + return remap_pfn_range(vma, vma->vm_start, pfn, + vma->vm_end - vma->vm_start, vma->vm_page_prot); +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.h b/drivers/gpu/drm/amd/amdkfd/kfd_events.h new file mode 100644 index 0000000..d9b5b38 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.h @@ -0,0 +1,76 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef KFD_EVENTS_H_INCLUDED +#define KFD_EVENTS_H_INCLUDED + +#include +#include +#include +#include +#include "kfd_priv.h" + +#define KFD_EVENT_ID_NONSIGNAL_MASK 0x80000000U +#define KFD_FIRST_NONSIGNAL_EVENT_ID KFD_EVENT_ID_NONSIGNAL_MASK +#define KFD_LAST_NONSIGNAL_EVENT_ID UINT_MAX + +/* + * Written into kfd_signal_slot_t to indicate that the event is not signaled. + * Since the event protocol may need to write the event ID into memory, this + * must not be a valid event ID. + * For the sake of easy memset-ing, this must be a byte pattern. + */ +#define UNSIGNALED_EVENT_SLOT ((uint64_t)-1) + +struct kfd_event_waiter; +struct signal_page; + +struct kfd_event { + /* All events in process, rooted at kfd_process.events. */ + struct hlist_node events; + + u32 event_id; + + bool signaled; + bool auto_reset; + + int type; + + struct list_head waiters; /* List of kfd_event_waiter by waiters. */ + + /* Only for signal events. */ + struct signal_page *signal_page; + unsigned int signal_slot_index; + uint64_t __user *user_signal_address; +}; + +#define KFD_EVENT_TIMEOUT_IMMEDIATE 0 +#define KFD_EVENT_TIMEOUT_INFINITE 0xFFFFFFFFu + +/* Matching HSA_EVENTTYPE */ +#define KFD_EVENT_TYPE_SIGNAL 0 +#define KFD_EVENT_TYPE_DEBUG 5 + +extern void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id, + uint32_t valid_id_bits); + +#endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c index 5383dd0..7f134aa 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c @@ -172,10 +172,17 @@ static void interrupt_wq(struct work_struct *work) sizeof(uint32_t))]; while (dequeue_ih_ring_entry(dev, ih_ring_entry)) - ; + dev->device_info->event_interrupt_class->interrupt_wq(dev, + ih_ring_entry); } bool interrupt_is_wanted(struct kfd_dev *dev, const uint32_t *ih_ring_entry) { - return false; + /* integer and bitwise OR so there is no boolean short-circuiting */ + unsigned wanted = 0; + + wanted |= dev->device_info->event_interrupt_class->interrupt_isr(dev, + ih_ring_entry); + + return wanted != 0; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 34c7662..0ff9a3d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -35,6 +35,9 @@ #define KFD_SYSFS_FILE_MODE 0444 +#define KFD_MMAP_DOORBELL_MASK 0x8000000000000 +#define KFD_MMAP_EVENTS_MASK 0x4000000000000 + /* * When working with cp scheduler we should assign the HIQ manually or via * the radeon driver to a fixed hqd slot, here are the fixed HIQ hqd slot @@ -108,8 +111,16 @@ enum asic_family_type { CHIP_CARRIZO }; +struct kfd_event_interrupt_class { + bool (*interrupt_isr)(struct kfd_dev *dev, + const uint32_t *ih_ring_entry); + void (*interrupt_wq)(struct kfd_dev *dev, + const uint32_t *ih_ring_entry); +}; + struct kfd_device_info { unsigned int asic_family; + const struct kfd_event_interrupt_class *event_interrupt_class; unsigned int max_pasid_bits; size_t ih_ring_entry_size; uint8_t num_of_watch_points; @@ -490,6 +501,15 @@ struct kfd_process { /*Is the user space process 32 bit?*/ bool is_32bit_user_mode; + + /* Event-related data */ + struct mutex event_mutex; + /* All events in process hashed by ID, linked on kfd_event.events. */ + DECLARE_HASHTABLE(events, 4); + struct list_head signal_event_pages; /* struct slot_page_header. + event_pages */ + u32 next_nonsignal_event_id; + size_t signal_event_count; }; /** @@ -514,6 +534,7 @@ void kfd_process_create_wq(void); void kfd_process_destroy_wq(void); struct kfd_process *kfd_create_process(const struct task_struct *); struct kfd_process *kfd_get_process(const struct task_struct *); +struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid); struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev, struct kfd_process *p); @@ -659,4 +680,30 @@ uint64_t kfd_get_number_elems(struct kfd_dev *kfd); phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev, struct kfd_process *process); +/* Events */ +extern const struct kfd_event_interrupt_class event_interrupt_class_cik; + +enum kfd_event_wait_result { + KFD_WAIT_COMPLETE, + KFD_WAIT_TIMEOUT, + KFD_WAIT_ERROR +}; + +void kfd_event_init_process(struct kfd_process *p); +void kfd_event_free_process(struct kfd_process *p); +int kfd_event_mmap(struct kfd_process *process, struct vm_area_struct *vma); +int kfd_wait_on_events(struct kfd_process *p, + uint32_t num_events, const uint32_t __user *event_ids, + bool all, uint32_t user_timeout_ms, + enum kfd_event_wait_result *wait_result); +void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id, + uint32_t valid_id_bits); +int kfd_set_event(struct kfd_process *p, uint32_t event_id); +int kfd_reset_event(struct kfd_process *p, uint32_t event_id); +int kfd_event_create(struct file *devkfd, struct kfd_process *p, + uint32_t event_type, bool auto_reset, uint32_t node_id, + uint32_t *event_id, uint32_t *event_trigger_data, + uint64_t *event_page_offset, uint32_t *event_slot_index); +int kfd_event_destroy(struct kfd_process *p, uint32_t event_id); + #endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 4d7bc95..dc910af 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -178,6 +178,8 @@ static void kfd_process_wq_release(struct work_struct *work) kfree(pdd); } + kfd_event_free_process(p); + kfd_pasid_free(p->pasid); mutex_unlock(&p->mutex); @@ -288,6 +290,8 @@ static struct kfd_process *create_process(const struct task_struct *thread) INIT_LIST_HEAD(&process->per_device_data); + kfd_event_init_process(process); + err = pqm_init(&process->pqm, process); if (err != 0) goto err_process_pqm_init; @@ -430,3 +434,23 @@ bool kfd_has_process_device_data(struct kfd_process *p) { return !(list_empty(&p->per_device_data)); } + +/* This returns with process->mutex locked. */ +struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid) +{ + struct kfd_process *p; + unsigned int temp; + + int idx = srcu_read_lock(&kfd_processes_srcu); + + hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { + if (p->pasid == pasid) { + mutex_lock(&p->mutex); + break; + } + } + + srcu_read_unlock(&kfd_processes_srcu, idx); + + return p; +} -- cgit v0.10.2 From 59d3e8be87a14c6a0d91c683e63d9b31734525ae Mon Sep 17 00:00:00 2001 From: Alexey Skidanov Date: Tue, 14 Apr 2015 18:05:49 +0300 Subject: drm/amdkfd: Add memory exception handling This patch adds Peripheral Page Request (PPR) failure processing and reporting. Bad address or pointer to a system memory block with inappropriate read/write permission cause such PPR failure during a user queue processing. PPR request handling is done by IOMMU driver notifying AMDKFD module on PPR failure. The process triggering a PPR failure will be notified by appropriate event or SIGTERM signal will be sent to it. v3: - Change all bool fields in struct kfd_memory_exception_failure to uint32_t Signed-off-by: Alexey Skidanov Signed-off-by: Oded Gabbay diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 4c03169..52cab0f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -182,6 +182,32 @@ static void iommu_pasid_shutdown_callback(struct pci_dev *pdev, int pasid) kfd_unbind_process_from_device(dev, pasid); } +/* + * This function called by IOMMU driver on PPR failure + */ +static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int pasid, + unsigned long address, u16 flags) +{ + struct kfd_dev *dev; + + dev_warn(kfd_device, + "Invalid PPR device %x:%x.%x pasid %d address 0x%lX flags 0x%X", + PCI_BUS_NUM(pdev->devfn), + PCI_SLOT(pdev->devfn), + PCI_FUNC(pdev->devfn), + pasid, + address, + flags); + + dev = kfd_device_by_pci_dev(pdev); + BUG_ON(dev == NULL); + + kfd_signal_iommu_event(dev, pasid, address, + flags & PPR_FAULT_WRITE, flags & PPR_FAULT_EXEC); + + return AMD_IOMMU_INV_PRI_RSP_INVALID; +} + bool kgd2kfd_device_init(struct kfd_dev *kfd, const struct kgd2kfd_shared_resources *gpu_resources) { @@ -251,6 +277,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, } amd_iommu_set_invalidate_ctx_cb(kfd->pdev, iommu_pasid_shutdown_callback); + amd_iommu_set_invalid_ppr_cb(kfd->pdev, iommu_invalid_ppr_cb); kfd->dqm = device_queue_manager_init(kfd); if (!kfd->dqm) { @@ -316,6 +343,7 @@ void kgd2kfd_suspend(struct kfd_dev *kfd) if (kfd->init_complete) { kfd->dqm->ops.stop(kfd->dqm); amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL); + amd_iommu_set_invalid_ppr_cb(kfd->pdev, NULL); amd_iommu_free_device(kfd->pdev); } } @@ -335,6 +363,7 @@ int kgd2kfd_resume(struct kfd_dev *kfd) return -ENXIO; amd_iommu_set_invalidate_ctx_cb(kfd->pdev, iommu_pasid_shutdown_callback); + amd_iommu_set_invalid_ppr_cb(kfd->pdev, iommu_invalid_ppr_cb); kfd->dqm->ops.start(kfd->dqm); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index 23ffa96..fa13d3e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -30,6 +30,7 @@ #include #include "kfd_priv.h" #include "kfd_events.h" +#include /* * A task can only be on a single wait_queue at a time, but we need to support @@ -45,6 +46,10 @@ struct kfd_event_waiter { /* Transitions to true when the event this belongs to is signaled. */ bool activated; + + /* Event */ + struct kfd_event *event; + uint32_t input_index; }; /* @@ -609,14 +614,17 @@ static struct kfd_event_waiter *alloc_event_waiters(uint32_t num_events) } static int init_event_waiter(struct kfd_process *p, - struct kfd_event_waiter *waiter, - uint32_t event_id) + struct kfd_event_waiter *waiter, + uint32_t event_id, + uint32_t input_index) { struct kfd_event *ev = lookup_event_by_id(p, event_id); if (!ev) return -EINVAL; + waiter->event = ev; + waiter->input_index = input_index; waiter->activated = ev->signaled; ev->signaled = ev->signaled && !ev->auto_reset; @@ -643,6 +651,38 @@ static bool test_event_condition(bool all, uint32_t num_events, return activated_count == num_events; } +/* + * Copy event specific data, if defined. + * Currently only memory exception events have additional data to copy to user + */ +static bool copy_signaled_event_data(uint32_t num_events, + struct kfd_event_waiter *event_waiters, + struct kfd_event_data __user *data) +{ + struct kfd_hsa_memory_exception_data *src; + struct kfd_hsa_memory_exception_data __user *dst; + struct kfd_event_waiter *waiter; + struct kfd_event *event; + uint32_t i; + + for (i = 0; i < num_events; i++) { + waiter = &event_waiters[i]; + event = waiter->event; + if (waiter->activated && event->type == KFD_EVENT_TYPE_MEMORY) { + dst = &data[waiter->input_index].memory_exception_data; + src = &event->memory_exception_data; + if (copy_to_user(dst, src, + sizeof(struct kfd_hsa_memory_exception_data))) + return false; + } + } + + return true; + +} + + + static long user_timeout_to_jiffies(uint32_t user_timeout_ms) { if (user_timeout_ms == KFD_EVENT_TIMEOUT_IMMEDIATE) @@ -672,10 +712,12 @@ static void free_waiters(uint32_t num_events, struct kfd_event_waiter *waiters) } int kfd_wait_on_events(struct kfd_process *p, - uint32_t num_events, const uint32_t __user *event_ids, + uint32_t num_events, void __user *data, bool all, uint32_t user_timeout_ms, enum kfd_event_wait_result *wait_result) { + struct kfd_event_data __user *events = + (struct kfd_event_data __user *) data; uint32_t i; int ret = 0; struct kfd_event_waiter *event_waiters = NULL; @@ -690,13 +732,14 @@ int kfd_wait_on_events(struct kfd_process *p, } for (i = 0; i < num_events; i++) { - uint32_t event_id; + struct kfd_event_data event_data; - ret = get_user(event_id, &event_ids[i]); - if (ret) + if (copy_from_user(&event_data, &events[i], + sizeof(struct kfd_event_data))) goto fail; - ret = init_event_waiter(p, &event_waiters[i], event_id); + ret = init_event_waiter(p, &event_waiters[i], + event_data.event_id, i); if (ret) goto fail; } @@ -723,7 +766,11 @@ int kfd_wait_on_events(struct kfd_process *p, } if (test_event_condition(all, num_events, event_waiters)) { - *wait_result = KFD_WAIT_COMPLETE; + if (copy_signaled_event_data(num_events, + event_waiters, events)) + *wait_result = KFD_WAIT_COMPLETE; + else + *wait_result = KFD_WAIT_ERROR; break; } @@ -797,3 +844,95 @@ int kfd_event_mmap(struct kfd_process *p, struct vm_area_struct *vma) return remap_pfn_range(vma, vma->vm_start, pfn, vma->vm_end - vma->vm_start, vma->vm_page_prot); } + +/* + * Assumes that p->event_mutex is held and of course + * that p is not going away (current or locked). + */ +static void lookup_events_by_type_and_signal(struct kfd_process *p, + int type, void *event_data) +{ + struct kfd_hsa_memory_exception_data *ev_data; + struct kfd_event *ev; + int bkt; + bool send_signal = true; + + ev_data = (struct kfd_hsa_memory_exception_data *) event_data; + + hash_for_each(p->events, bkt, ev, events) + if (ev->type == type) { + send_signal = false; + dev_dbg(kfd_device, + "Event found: id %X type %d", + ev->event_id, ev->type); + set_event(ev); + if (ev->type == KFD_EVENT_TYPE_MEMORY && ev_data) + ev->memory_exception_data = *ev_data; + } + + /* Send SIGTERM no event of type "type" has been found*/ + if (send_signal) { + dev_warn(kfd_device, + "Sending SIGTERM to HSA Process with PID %d ", + p->lead_thread->pid); + send_sig(SIGTERM, p->lead_thread, 0); + } +} + +void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid, + unsigned long address, bool is_write_requested, + bool is_execute_requested) +{ + struct kfd_hsa_memory_exception_data memory_exception_data; + struct vm_area_struct *vma; + + /* + * Because we are called from arbitrary context (workqueue) as opposed + * to process context, kfd_process could attempt to exit while we are + * running so the lookup function returns a locked process. + */ + struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); + + if (!p) + return; /* Presumably process exited. */ + + memset(&memory_exception_data, 0, sizeof(memory_exception_data)); + + down_read(&p->mm->mmap_sem); + vma = find_vma(p->mm, address); + + memory_exception_data.gpu_id = dev->id; + memory_exception_data.va = address; + /* Set failure reason */ + memory_exception_data.failure.NotPresent = 1; + memory_exception_data.failure.NoExecute = 0; + memory_exception_data.failure.ReadOnly = 0; + if (vma) { + if (vma->vm_start > address) { + memory_exception_data.failure.NotPresent = 1; + memory_exception_data.failure.NoExecute = 0; + memory_exception_data.failure.ReadOnly = 0; + } else { + memory_exception_data.failure.NotPresent = 0; + if (is_write_requested && !(vma->vm_flags & VM_WRITE)) + memory_exception_data.failure.ReadOnly = 1; + else + memory_exception_data.failure.ReadOnly = 0; + if (is_execute_requested && !(vma->vm_flags & VM_EXEC)) + memory_exception_data.failure.NoExecute = 1; + else + memory_exception_data.failure.NoExecute = 0; + } + } + + up_read(&p->mm->mmap_sem); + + mutex_lock(&p->event_mutex); + + /* Lookup events by type and signal them */ + lookup_events_by_type_and_signal(p, KFD_EVENT_TYPE_MEMORY, + &memory_exception_data); + + mutex_unlock(&p->event_mutex); + mutex_unlock(&p->mutex); +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.h b/drivers/gpu/drm/amd/amdkfd/kfd_events.h index d9b5b38..691cf85 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.h @@ -28,6 +28,7 @@ #include #include #include "kfd_priv.h" +#include #define KFD_EVENT_ID_NONSIGNAL_MASK 0x80000000U #define KFD_FIRST_NONSIGNAL_EVENT_ID KFD_EVENT_ID_NONSIGNAL_MASK @@ -61,6 +62,11 @@ struct kfd_event { struct signal_page *signal_page; unsigned int signal_slot_index; uint64_t __user *user_signal_address; + + /* type specific data */ + union { + struct kfd_hsa_memory_exception_data memory_exception_data; + }; }; #define KFD_EVENT_TIMEOUT_IMMEDIATE 0 @@ -69,6 +75,7 @@ struct kfd_event { /* Matching HSA_EVENTTYPE */ #define KFD_EVENT_TYPE_SIGNAL 0 #define KFD_EVENT_TYPE_DEBUG 5 +#define KFD_EVENT_TYPE_MEMORY 8 extern void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id, uint32_t valid_id_bits); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 0ff9a3d..3594503 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -693,11 +693,14 @@ void kfd_event_init_process(struct kfd_process *p); void kfd_event_free_process(struct kfd_process *p); int kfd_event_mmap(struct kfd_process *process, struct vm_area_struct *vma); int kfd_wait_on_events(struct kfd_process *p, - uint32_t num_events, const uint32_t __user *event_ids, + uint32_t num_events, void __user *data, bool all, uint32_t user_timeout_ms, enum kfd_event_wait_result *wait_result); void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id, uint32_t valid_id_bits); +void kfd_signal_iommu_event(struct kfd_dev *dev, + unsigned int pasid, unsigned long address, + bool is_write_requested, bool is_execute_requested); int kfd_set_event(struct kfd_process *p, uint32_t event_id); int kfd_reset_event(struct kfd_process *p, uint32_t event_id); int kfd_event_create(struct file *devkfd, struct kfd_process *p, -- cgit v0.10.2 From 930c5ff4390221cccf368b305c04351fbcf0dfcf Mon Sep 17 00:00:00 2001 From: Alexey Skidanov Date: Tue, 25 Nov 2014 10:34:31 +0200 Subject: drm/amdkfd: Add bad opcode exception handling Signed-off-by: Alexey Skidanov Signed-off-by: Oded Gabbay diff --git a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c index 629510a..211fc48 100644 --- a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c +++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c @@ -36,7 +36,8 @@ static bool cik_event_interrupt_isr(struct kfd_dev *dev, /* Do not process in ISR, just request it to be forwarded to WQ. */ return (pasid != 0) && (ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE || - ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG); + ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG || + ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE); } static void cik_event_interrupt_wq(struct kfd_dev *dev, @@ -55,6 +56,8 @@ static void cik_event_interrupt_wq(struct kfd_dev *dev, kfd_signal_event_interrupt(pasid, 0, 0); else if (ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG) kfd_signal_event_interrupt(pasid, ihre->data & 0xFF, 8); + else if (ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE) + kfd_signal_hw_exception_event(pasid); } const struct kfd_event_interrupt_class event_interrupt_class_cik = { diff --git a/drivers/gpu/drm/amd/amdkfd/cik_int.h b/drivers/gpu/drm/amd/amdkfd/cik_int.h index bbef9e2..79a16d2 100644 --- a/drivers/gpu/drm/amd/amdkfd/cik_int.h +++ b/drivers/gpu/drm/amd/amdkfd/cik_int.h @@ -34,6 +34,7 @@ struct cik_ih_ring_entry { #define CIK_INTSRC_DEQUEUE_COMPLETE 0xC6 #define CIK_INTSRC_CP_END_OF_PIPE 0xB5 +#define CIK_INTSRC_CP_BAD_OPCODE 0xB7 #define CIK_INTSRC_SQ_INTERRUPT_MSG 0xEF #endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index fa13d3e..5c3a81e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -936,3 +936,24 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid, mutex_unlock(&p->event_mutex); mutex_unlock(&p->mutex); } + +void kfd_signal_hw_exception_event(unsigned int pasid) +{ + /* + * Because we are called from arbitrary context (workqueue) as opposed + * to process context, kfd_process could attempt to exit while we are + * running so the lookup function returns a locked process. + */ + struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); + + if (!p) + return; /* Presumably process exited. */ + + mutex_lock(&p->event_mutex); + + /* Lookup events by type and signal them */ + lookup_events_by_type_and_signal(p, KFD_EVENT_TYPE_HW_EXCEPTION, NULL); + + mutex_unlock(&p->event_mutex); + mutex_unlock(&p->mutex); +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.h b/drivers/gpu/drm/amd/amdkfd/kfd_events.h index 691cf85..28f6838 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.h @@ -74,6 +74,7 @@ struct kfd_event { /* Matching HSA_EVENTTYPE */ #define KFD_EVENT_TYPE_SIGNAL 0 +#define KFD_EVENT_TYPE_HW_EXCEPTION 3 #define KFD_EVENT_TYPE_DEBUG 5 #define KFD_EVENT_TYPE_MEMORY 8 diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 3594503..9383494 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -682,6 +682,7 @@ phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev, /* Events */ extern const struct kfd_event_interrupt_class event_interrupt_class_cik; +extern const struct kfd_device_global_init_class device_global_init_class_cik; enum kfd_event_wait_result { KFD_WAIT_COMPLETE, @@ -701,6 +702,7 @@ void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id, void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid, unsigned long address, bool is_write_requested, bool is_execute_requested); +void kfd_signal_hw_exception_event(unsigned int pasid); int kfd_set_event(struct kfd_process *p, uint32_t event_id); int kfd_reset_event(struct kfd_process *p, uint32_t event_id); int kfd_event_create(struct file *devkfd, struct kfd_process *p, -- cgit v0.10.2 From 81663016dbfd53e29d1b5c5ddbc9b12ae1d66474 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Wed, 24 Dec 2014 13:30:52 +0200 Subject: drm/amdkfd: Add module parameter of send_sigterm This patch adds a new kernel module parameter to amdkfd, called send_sigterm. This parameter specifies whether amdkfd should send the SIGTERM signal to an HSA process, when the following conditions occur: 1. The GPU triggers an exception regarding a kernel that was issued by this process. 2. The HSA process isn't waiting on an event that handles this exception. The default behavior is not to send a SIGTERM and suffice with a dmesg error print. Reviewed-by: Ben Goz Signed-off-by: Oded Gabbay diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index 5c3a81e..3cb37d2 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -872,10 +872,16 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p, /* Send SIGTERM no event of type "type" has been found*/ if (send_signal) { - dev_warn(kfd_device, - "Sending SIGTERM to HSA Process with PID %d ", + if (send_sigterm) { + dev_warn(kfd_device, + "Sending SIGTERM to HSA Process with PID %d ", + p->lead_thread->pid); + send_sig(SIGTERM, p->lead_thread, 0); + } else { + dev_err(kfd_device, + "HSA Process (PID %d) got unhandled exception", p->lead_thread->pid); - send_sig(SIGTERM, p->lead_thread, 0); + } } } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c index 4e0a68f..e4fc96e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c @@ -54,6 +54,11 @@ module_param(max_num_of_queues_per_device, int, 0444); MODULE_PARM_DESC(max_num_of_queues_per_device, "Maximum number of supported queues per device (1 = Minimum, 4096 = default)"); +int send_sigterm; +module_param(send_sigterm, int, 0444); +MODULE_PARM_DESC(send_sigterm, + "Send sigterm to HSA process on unhandled exception (0 = disable, 1 = enable)"); + bool kgd2kfd_init(unsigned interface_version, const struct kgd2kfd_calls **g2f) { /* diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 9383494..b6f838f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -74,6 +74,12 @@ extern int max_num_of_queues_per_device; /* Kernel module parameter to specify the scheduling policy */ extern int sched_policy; +/* + * Kernel module parameter to specify whether to send sigterm to HSA process on + * unhandled exception + */ +extern int send_sigterm; + /** * enum kfd_sched_policy * -- cgit v0.10.2 From 8377396b5db166c56a90b400317f26954901c994 Mon Sep 17 00:00:00 2001 From: Andrew Lewycky Date: Tue, 9 Sep 2014 15:22:05 +0300 Subject: drm/amdkfd: Implement events IOCTLs Signed-off-by: Andrew Lewycky Signed-off-by: Oded Gabbay diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 6dd8948..b2c6109 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -519,31 +519,57 @@ static int kfd_ioctl_get_process_apertures(struct file *filp, static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p, void *data) { - return -ENODEV; + struct kfd_ioctl_create_event_args *args = data; + int err; + + err = kfd_event_create(filp, p, args->event_type, + args->auto_reset != 0, args->node_id, + &args->event_id, &args->event_trigger_data, + &args->event_page_offset, + &args->event_slot_index); + + return err; } static int kfd_ioctl_destroy_event(struct file *filp, struct kfd_process *p, void *data) { - return -ENODEV; + struct kfd_ioctl_destroy_event_args *args = data; + + return kfd_event_destroy(p, args->event_id); } static int kfd_ioctl_set_event(struct file *filp, struct kfd_process *p, void *data) { - return -ENODEV; + struct kfd_ioctl_set_event_args *args = data; + + return kfd_set_event(p, args->event_id); } static int kfd_ioctl_reset_event(struct file *filp, struct kfd_process *p, void *data) { - return -ENODEV; + struct kfd_ioctl_reset_event_args *args = data; + + return kfd_reset_event(p, args->event_id); } static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p, void *data) { - return -ENODEV; + struct kfd_ioctl_wait_events_args *args = data; + enum kfd_event_wait_result wait_result; + int err; + + err = kfd_wait_on_events(p, args->num_events, + (void __user *)args->events_ptr, + (args->wait_for_all != 0), + args->timeout, &wait_result); + + args->wait_result = wait_result; + + return err; } #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \ -- cgit v0.10.2 From 7591cd2cd59d77c52141d9c2c759c14f981c1a13 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Tue, 21 Apr 2015 15:09:48 +0300 Subject: drm/amdkfd: change driver version to 0.7.2 Signed-off-by: Oded Gabbay diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c index e4fc96e..ca8410e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c @@ -29,10 +29,10 @@ #define KFD_DRIVER_AUTHOR "AMD Inc. and others" #define KFD_DRIVER_DESC "Standalone HSA driver for AMD's GPUs" -#define KFD_DRIVER_DATE "20150122" +#define KFD_DRIVER_DATE "20150421" #define KFD_DRIVER_MAJOR 0 #define KFD_DRIVER_MINOR 7 -#define KFD_DRIVER_PATCHLEVEL 1 +#define KFD_DRIVER_PATCHLEVEL 2 static const struct kgd2kfd_calls kgd2kfd = { .exit = kgd2kfd_exit, -- cgit v0.10.2