summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdkfd
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c38
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h6
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c46
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h6
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c16
7 files changed, 97 insertions, 19 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 52cab0f..f1f86db 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -33,6 +33,8 @@
static const struct kfd_device_info kaveri_device_info = {
.asic_family = CHIP_KAVERI,
.max_pasid_bits = 16,
+ /* max num of queues for KV.TODO should be a dynamic value */
+ .max_no_of_hqd = 24,
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik,
.mqd_size_aligned = MQD_SIZE_ALIGNED
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 4e215bd..a5dc822 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -45,7 +45,8 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock);
-static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock);
+static int destroy_queues_cpsch(struct device_queue_manager *dqm,
+ bool preempt_static_queues, bool lock);
static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
struct queue *q,
@@ -775,7 +776,7 @@ static int stop_cpsch(struct device_queue_manager *dqm)
BUG_ON(!dqm);
- destroy_queues_cpsch(dqm, true);
+ destroy_queues_cpsch(dqm, true, true);
list_for_each_entry(node, &dqm->queues, list) {
pdd = qpd_to_pdd(node->qpd);
@@ -829,7 +830,8 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
pr_debug("kfd: In %s\n", __func__);
mutex_lock(&dqm->lock);
- destroy_queues_cpsch(dqm, false);
+ /* here we actually preempt the DIQ */
+ destroy_queues_cpsch(dqm, true, false);
list_del(&kq->list);
dqm->queue_count--;
qpd->is_debug = false;
@@ -935,13 +937,15 @@ static int destroy_sdma_queues(struct device_queue_manager *dqm,
unsigned int sdma_engine)
{
return pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA,
- KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES, 0, false,
+ KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES, 0, false,
sdma_engine);
}
-static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock)
+static int destroy_queues_cpsch(struct device_queue_manager *dqm,
+ bool preempt_static_queues, bool lock)
{
int retval;
+ enum kfd_preempt_type_filter preempt_type;
BUG_ON(!dqm);
@@ -960,8 +964,12 @@ static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock)
destroy_sdma_queues(dqm, 1);
}
+ preempt_type = preempt_static_queues ?
+ KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES :
+ KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES;
+
retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE,
- KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES, 0, false, 0);
+ preempt_type, 0, false, 0);
if (retval != 0)
goto out;
@@ -989,7 +997,7 @@ static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock)
if (lock)
mutex_lock(&dqm->lock);
- retval = destroy_queues_cpsch(dqm, false);
+ retval = destroy_queues_cpsch(dqm, false, false);
if (retval != 0) {
pr_err("kfd: the cp might be in an unrecoverable state due to an unsuccessful queues preemption");
goto out;
@@ -1024,13 +1032,27 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
{
int retval;
struct mqd_manager *mqd;
+ bool preempt_all_queues;
BUG_ON(!dqm || !qpd || !q);
+ preempt_all_queues = false;
+
retval = 0;
/* remove queue from list to prevent rescheduling after preemption */
mutex_lock(&dqm->lock);
+
+ if (qpd->is_debug) {
+ /*
+ * error, currently we do not allow to destroy a queue
+ * of a currently debugged process
+ */
+ retval = -EBUSY;
+ goto failed_try_destroy_debugged_queue;
+
+ }
+
mqd = dqm->ops.get_mqd_manager(dqm,
get_mqd_type_from_queue_type(q->properties.type));
if (!mqd) {
@@ -1062,6 +1084,8 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
return 0;
failed:
+failed_try_destroy_debugged_queue:
+
mutex_unlock(&dqm->lock);
return retval;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index 57278e2..ec4036a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -88,9 +88,11 @@ struct device_queue_manager_ops {
struct queue *q,
struct qcm_process_device *qpd,
int *allocate_vmid);
+
int (*destroy_queue)(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
struct queue *q);
+
int (*update_queue)(struct device_queue_manager *dqm,
struct queue *q);
@@ -100,8 +102,10 @@ struct device_queue_manager_ops {
int (*register_process)(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
+
int (*unregister_process)(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
+
int (*initialize)(struct device_queue_manager *dqm);
int (*start)(struct device_queue_manager *dqm);
int (*stop)(struct device_queue_manager *dqm);
@@ -109,9 +113,11 @@ struct device_queue_manager_ops {
int (*create_kernel_queue)(struct device_queue_manager *dqm,
struct kernel_queue *kq,
struct qcm_process_device *qpd);
+
void (*destroy_kernel_queue)(struct device_queue_manager *dqm,
struct kernel_queue *kq,
struct qcm_process_device *qpd);
+
bool (*set_cache_memory_policy)(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
enum cache_policy default_policy,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
index e2533d8..99b6d28 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
@@ -163,7 +163,7 @@ static int pm_create_map_process(struct packet_manager *pm, uint32_t *buffer,
num_queues = 0;
list_for_each_entry(cur, &qpd->queues_list, list)
num_queues++;
- packet->bitfields10.num_queues = num_queues;
+ packet->bitfields10.num_queues = (qpd->is_debug) ? 0 : num_queues;
packet->sh_mem_config = qpd->sh_mem_config;
packet->sh_mem_bases = qpd->sh_mem_bases;
@@ -177,9 +177,10 @@ static int pm_create_map_process(struct packet_manager *pm, uint32_t *buffer,
}
static int pm_create_map_queue(struct packet_manager *pm, uint32_t *buffer,
- struct queue *q)
+ struct queue *q, bool is_static)
{
struct pm4_map_queues *packet;
+ bool use_static = is_static;
BUG_ON(!pm || !buffer || !q);
@@ -209,6 +210,7 @@ static int pm_create_map_queue(struct packet_manager *pm, uint32_t *buffer,
case KFD_QUEUE_TYPE_SDMA:
packet->bitfields2.engine_sel =
engine_sel__mes_map_queues__sdma0;
+ use_static = false; /* no static queues under SDMA */
break;
default:
BUG();
@@ -218,6 +220,9 @@ static int pm_create_map_queue(struct packet_manager *pm, uint32_t *buffer,
packet->mes_map_queues_ordinals[0].bitfields3.doorbell_offset =
q->properties.doorbell_off;
+ packet->mes_map_queues_ordinals[0].bitfields3.is_static =
+ (use_static == true) ? 1 : 0;
+
packet->mes_map_queues_ordinals[0].mqd_addr_lo =
lower_32_bits(q->gart_mqd_addr);
@@ -271,9 +276,11 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
pm_release_ib(pm);
return -ENOMEM;
}
+
retval = pm_create_map_process(pm, &rl_buffer[rl_wptr], qpd);
if (retval != 0)
return retval;
+
proccesses_mapped++;
inc_wptr(&rl_wptr, sizeof(struct pm4_map_process),
alloc_size_bytes);
@@ -281,23 +288,36 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
list_for_each_entry(kq, &qpd->priv_queue_list, list) {
if (kq->queue->properties.is_active != true)
continue;
+
+ pr_debug("kfd: static_queue, mapping kernel q %d, is debug status %d\n",
+ kq->queue->queue, qpd->is_debug);
+
retval = pm_create_map_queue(pm, &rl_buffer[rl_wptr],
- kq->queue);
+ kq->queue, qpd->is_debug);
if (retval != 0)
return retval;
- inc_wptr(&rl_wptr, sizeof(struct pm4_map_queues),
- alloc_size_bytes);
+
+ inc_wptr(&rl_wptr,
+ sizeof(struct pm4_map_queues),
+ alloc_size_bytes);
}
list_for_each_entry(q, &qpd->queues_list, list) {
if (q->properties.is_active != true)
continue;
- retval = pm_create_map_queue(pm,
- &rl_buffer[rl_wptr], q);
+
+ pr_debug("kfd: static_queue, mapping user queue %d, is debug status %d\n",
+ q->queue, qpd->is_debug);
+
+ retval = pm_create_map_queue(pm, &rl_buffer[rl_wptr],
+ q, qpd->is_debug);
+
if (retval != 0)
return retval;
- inc_wptr(&rl_wptr, sizeof(struct pm4_map_queues),
- alloc_size_bytes);
+
+ inc_wptr(&rl_wptr,
+ sizeof(struct pm4_map_queues),
+ alloc_size_bytes);
}
}
@@ -488,7 +508,8 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
packet = (struct pm4_unmap_queues *)buffer;
memset(buffer, 0, sizeof(struct pm4_unmap_queues));
-
+ pr_debug("kfd: static_queue: unmapping queues: mode is %d , reset is %d , type is %d\n",
+ mode, reset, type);
packet->header.u32all = build_pm4_header(IT_UNMAP_QUEUES,
sizeof(struct pm4_unmap_queues));
switch (type) {
@@ -529,6 +550,11 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
packet->bitfields2.queue_sel =
queue_sel__mes_unmap_queues__perform_request_on_all_active_queues;
break;
+ case KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES:
+ /* in this case, we do not preempt static queues */
+ packet->bitfields2.queue_sel =
+ queue_sel__mes_unmap_queues__perform_request_on_dynamic_queues_only;
+ break;
default:
BUG();
break;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h
index 071ad57..5b393f3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h
@@ -237,7 +237,8 @@ struct pm4_map_queues {
struct {
union {
struct {
- uint32_t reserved5:2;
+ uint32_t is_static:1;
+ uint32_t reserved5:1;
uint32_t doorbell_offset:21;
uint32_t reserved6:3;
uint32_t queue:6;
@@ -328,7 +329,8 @@ enum unmap_queues_action_enum {
enum unmap_queues_queue_sel_enum {
queue_sel__mes_unmap_queues__perform_request_on_specified_queues = 0,
queue_sel__mes_unmap_queues__perform_request_on_pasid_queues = 1,
- queue_sel__mes_unmap_queues__perform_request_on_all_active_queues = 2
+ queue_sel__mes_unmap_queues__perform_request_on_all_active_queues = 2,
+ queue_sel__mes_unmap_queues__perform_request_on_dynamic_queues_only = 3
};
enum unmap_queues_engine_sel_enum {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 816c7a1..eda7281 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -128,6 +128,7 @@ struct kfd_device_info {
unsigned int asic_family;
const struct kfd_event_interrupt_class *event_interrupt_class;
unsigned int max_pasid_bits;
+ unsigned int max_no_of_hqd;
size_t ih_ring_entry_size;
uint8_t num_of_watch_points;
uint16_t mqd_size_aligned;
@@ -231,6 +232,7 @@ struct device *kfd_chardev(void);
enum kfd_preempt_type_filter {
KFD_PREEMPT_TYPE_FILTER_SINGLE_QUEUE,
KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES,
+ KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES,
KFD_PREEMPT_TYPE_FILTER_BY_PASID
};
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index 530b82c..85b7fec 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -158,6 +158,8 @@ int pqm_create_queue(struct process_queue_manager *pqm,
struct queue *q;
struct process_queue_node *pqn;
struct kernel_queue *kq;
+ int num_queues = 0;
+ struct queue *cur;
BUG_ON(!pqm || !dev || !properties || !qid);
@@ -172,6 +174,20 @@ int pqm_create_queue(struct process_queue_manager *pqm,
return -1;
}
+ /*
+ * for debug process, verify that it is within the static queues limit
+ * currently limit is set to half of the total avail HQD slots
+ * If we are just about to create DIQ, the is_debug flag is not set yet
+ * Hence we also check the type as well
+ */
+ if ((pdd->qpd.is_debug) ||
+ (type == KFD_QUEUE_TYPE_DIQ)) {
+ list_for_each_entry(cur, &pdd->qpd.queues_list, list)
+ num_queues++;
+ if (num_queues >= dev->device_info->max_no_of_hqd/2)
+ return (-ENOSPC);
+ }
+
retval = find_available_queue_slot(pqm, qid);
if (retval != 0)
return retval;