diff options
Diffstat (limited to 'drivers/ntb/test/ntb_perf.c')
-rw-r--r-- | drivers/ntb/test/ntb_perf.c | 240 |
1 files changed, 159 insertions, 81 deletions
diff --git a/drivers/ntb/test/ntb_perf.c b/drivers/ntb/test/ntb_perf.c index 8dfce9c..6a50f20 100644 --- a/drivers/ntb/test/ntb_perf.c +++ b/drivers/ntb/test/ntb_perf.c @@ -58,6 +58,7 @@ #include <linux/delay.h> #include <linux/sizes.h> #include <linux/ntb.h> +#include <linux/mutex.h> #define DRIVER_NAME "ntb_perf" #define DRIVER_DESCRIPTION "PCIe NTB Performance Measurement Tool" @@ -83,6 +84,10 @@ MODULE_DESCRIPTION(DRIVER_DESCRIPTION); static struct dentry *perf_debugfs_dir; +static unsigned long max_mw_size; +module_param(max_mw_size, ulong, 0644); +MODULE_PARM_DESC(max_mw_size, "Limit size of large memory windows"); + static unsigned int seg_order = 19; /* 512K */ module_param(seg_order, uint, 0644); MODULE_PARM_DESC(seg_order, "size order [n^2] of buffer segment for testing"); @@ -117,6 +122,10 @@ struct pthr_ctx { int dma_prep_err; int src_idx; void *srcs[MAX_SRCS]; + wait_queue_head_t *wq; + int status; + u64 copied; + u64 diff_us; }; struct perf_ctx { @@ -124,23 +133,23 @@ struct perf_ctx { spinlock_t db_lock; struct perf_mw mw; bool link_is_up; - struct work_struct link_cleanup; struct delayed_work link_work; + wait_queue_head_t link_wq; struct dentry *debugfs_node_dir; struct dentry *debugfs_run; struct dentry *debugfs_threads; u8 perf_threads; - bool run; + /* mutex ensures only one set of threads run at once */ + struct mutex run_mutex; struct pthr_ctx pthr_ctx[MAX_THREADS]; atomic_t tsync; + atomic_t tdone; }; enum { VERSION = 0, MW_SZ_HIGH, MW_SZ_LOW, - SPAD_MSG, - SPAD_ACK, MAX_SPAD }; @@ -148,10 +157,16 @@ static void perf_link_event(void *ctx) { struct perf_ctx *perf = ctx; - if (ntb_link_is_up(perf->ntb, NULL, NULL) == 1) + if (ntb_link_is_up(perf->ntb, NULL, NULL) == 1) { schedule_delayed_work(&perf->link_work, 2*HZ); - else - schedule_work(&perf->link_cleanup); + } else { + dev_dbg(&perf->ntb->pdev->dev, "link down\n"); + + if (!perf->link_is_up) + cancel_delayed_work_sync(&perf->link_work); + + perf->link_is_up = false; + } } static void perf_db_event(void *ctx, int vec) @@ -271,6 +286,7 @@ static int perf_move_data(struct pthr_ctx *pctx, char __iomem *dst, char *src, char __iomem *tmp = dst; u64 perf, diff_us; ktime_t kstart, kstop, kdiff; + unsigned long last_sleep = jiffies; chunks = div64_u64(win_size, buf_size); total_chunks = div64_u64(total, buf_size); @@ -286,30 +302,40 @@ static int perf_move_data(struct pthr_ctx *pctx, char __iomem *dst, char *src, } else tmp += buf_size; - /* Probably should schedule every 4GB to prevent soft hang. */ - if (((copied % SZ_4G) == 0) && !use_dma) { + /* Probably should schedule every 5s to prevent soft hang. */ + if (unlikely((jiffies - last_sleep) > 5 * HZ)) { + last_sleep = jiffies; set_current_state(TASK_INTERRUPTIBLE); schedule_timeout(1); } + + if (unlikely(kthread_should_stop())) + break; } if (use_dma) { - pr_info("%s: All DMA descriptors submitted\n", current->comm); - while (atomic_read(&pctx->dma_sync) != 0) + pr_debug("%s: All DMA descriptors submitted\n", current->comm); + while (atomic_read(&pctx->dma_sync) != 0) { + if (kthread_should_stop()) + break; msleep(20); + } } kstop = ktime_get(); kdiff = ktime_sub(kstop, kstart); diff_us = ktime_to_us(kdiff); - pr_info("%s: copied %llu bytes\n", current->comm, copied); + pr_debug("%s: copied %llu bytes\n", current->comm, copied); - pr_info("%s: lasted %llu usecs\n", current->comm, diff_us); + pr_debug("%s: lasted %llu usecs\n", current->comm, diff_us); perf = div64_u64(copied, diff_us); - pr_info("%s: MBytes/s: %llu\n", current->comm, perf); + pr_debug("%s: MBytes/s: %llu\n", current->comm, perf); + + pctx->copied = copied; + pctx->diff_us = diff_us; return 0; } @@ -331,7 +357,7 @@ static int ntb_perf_thread(void *data) int rc, node, i; struct dma_chan *dma_chan = NULL; - pr_info("kthread %s starting...\n", current->comm); + pr_debug("kthread %s starting...\n", current->comm); node = dev_to_node(&pdev->dev); @@ -389,7 +415,10 @@ static int ntb_perf_thread(void *data) pctx->srcs[i] = NULL; } - return 0; + atomic_inc(&perf->tdone); + wake_up(pctx->wq); + rc = 0; + goto done; err: for (i = 0; i < MAX_SRCS; i++) { @@ -402,6 +431,16 @@ err: pctx->dma_chan = NULL; } +done: + /* Wait until we are told to stop */ + for (;;) { + set_current_state(TASK_INTERRUPTIBLE); + if (kthread_should_stop()) + break; + schedule(); + } + __set_current_state(TASK_RUNNING); + return rc; } @@ -472,6 +511,10 @@ static void perf_link_work(struct work_struct *work) dev_dbg(&perf->ntb->pdev->dev, "%s called\n", __func__); size = perf->mw.phys_size; + + if (max_mw_size && size > max_mw_size) + size = max_mw_size; + ntb_peer_spad_write(ndev, MW_SZ_HIGH, upper_32_bits(size)); ntb_peer_spad_write(ndev, MW_SZ_LOW, lower_32_bits(size)); ntb_peer_spad_write(ndev, VERSION, PERF_VERSION); @@ -496,6 +539,7 @@ static void perf_link_work(struct work_struct *work) goto out1; perf->link_is_up = true; + wake_up(&perf->link_wq); return; @@ -508,18 +552,6 @@ out: msecs_to_jiffies(PERF_LINK_DOWN_TIMEOUT)); } -static void perf_link_cleanup(struct work_struct *work) -{ - struct perf_ctx *perf = container_of(work, - struct perf_ctx, - link_cleanup); - - dev_dbg(&perf->ntb->pdev->dev, "%s called\n", __func__); - - if (!perf->link_is_up) - cancel_delayed_work_sync(&perf->link_work); -} - static int perf_setup_mw(struct ntb_dev *ntb, struct perf_ctx *perf) { struct perf_mw *mw; @@ -544,16 +576,44 @@ static ssize_t debugfs_run_read(struct file *filp, char __user *ubuf, { struct perf_ctx *perf = filp->private_data; char *buf; - ssize_t ret, out_offset; + ssize_t ret, out_off = 0; + struct pthr_ctx *pctx; + int i; + u64 rate; if (!perf) return 0; - buf = kmalloc(64, GFP_KERNEL); + buf = kmalloc(1024, GFP_KERNEL); if (!buf) return -ENOMEM; - out_offset = snprintf(buf, 64, "%d\n", perf->run); - ret = simple_read_from_buffer(ubuf, count, offp, buf, out_offset); + + if (mutex_is_locked(&perf->run_mutex)) { + out_off = snprintf(buf, 64, "running\n"); + goto read_from_buf; + } + + for (i = 0; i < MAX_THREADS; i++) { + pctx = &perf->pthr_ctx[i]; + + if (pctx->status == -ENODATA) + break; + + if (pctx->status) { + out_off += snprintf(buf + out_off, 1024 - out_off, + "%d: error %d\n", i, + pctx->status); + continue; + } + + rate = div64_u64(pctx->copied, pctx->diff_us); + out_off += snprintf(buf + out_off, 1024 - out_off, + "%d: copied %llu bytes in %llu usecs, %llu MBytes/s\n", + i, pctx->copied, pctx->diff_us, rate); + } + +read_from_buf: + ret = simple_read_from_buffer(ubuf, count, offp, buf, out_off); kfree(buf); return ret; @@ -564,80 +624,90 @@ static void threads_cleanup(struct perf_ctx *perf) struct pthr_ctx *pctx; int i; - perf->run = false; for (i = 0; i < MAX_THREADS; i++) { pctx = &perf->pthr_ctx[i]; if (pctx->thread) { - kthread_stop(pctx->thread); + pctx->status = kthread_stop(pctx->thread); pctx->thread = NULL; } } } +static void perf_clear_thread_status(struct perf_ctx *perf) +{ + int i; + + for (i = 0; i < MAX_THREADS; i++) + perf->pthr_ctx[i].status = -ENODATA; +} + static ssize_t debugfs_run_write(struct file *filp, const char __user *ubuf, size_t count, loff_t *offp) { struct perf_ctx *perf = filp->private_data; int node, i; + DECLARE_WAIT_QUEUE_HEAD(wq); - if (!perf->link_is_up) - return 0; + if (wait_event_interruptible(perf->link_wq, perf->link_is_up)) + return -ENOLINK; if (perf->perf_threads == 0) - return 0; + return -EINVAL; - if (atomic_read(&perf->tsync) == 0) - perf->run = false; + if (!mutex_trylock(&perf->run_mutex)) + return -EBUSY; - if (perf->run) - threads_cleanup(perf); - else { - perf->run = true; + perf_clear_thread_status(perf); - if (perf->perf_threads > MAX_THREADS) { - perf->perf_threads = MAX_THREADS; - pr_info("Reset total threads to: %u\n", MAX_THREADS); - } + if (perf->perf_threads > MAX_THREADS) { + perf->perf_threads = MAX_THREADS; + pr_info("Reset total threads to: %u\n", MAX_THREADS); + } - /* no greater than 1M */ - if (seg_order > MAX_SEG_ORDER) { - seg_order = MAX_SEG_ORDER; - pr_info("Fix seg_order to %u\n", seg_order); - } + /* no greater than 1M */ + if (seg_order > MAX_SEG_ORDER) { + seg_order = MAX_SEG_ORDER; + pr_info("Fix seg_order to %u\n", seg_order); + } - if (run_order < seg_order) { - run_order = seg_order; - pr_info("Fix run_order to %u\n", run_order); - } + if (run_order < seg_order) { + run_order = seg_order; + pr_info("Fix run_order to %u\n", run_order); + } - node = dev_to_node(&perf->ntb->pdev->dev); - /* launch kernel thread */ - for (i = 0; i < perf->perf_threads; i++) { - struct pthr_ctx *pctx; - - pctx = &perf->pthr_ctx[i]; - atomic_set(&pctx->dma_sync, 0); - pctx->perf = perf; - pctx->thread = - kthread_create_on_node(ntb_perf_thread, - (void *)pctx, - node, "ntb_perf %d", i); - if (IS_ERR(pctx->thread)) { - pctx->thread = NULL; - goto err; - } else - wake_up_process(pctx->thread); - - if (perf->run == false) - return -ENXIO; - } + node = dev_to_node(&perf->ntb->pdev->dev); + atomic_set(&perf->tdone, 0); + /* launch kernel thread */ + for (i = 0; i < perf->perf_threads; i++) { + struct pthr_ctx *pctx; + + pctx = &perf->pthr_ctx[i]; + atomic_set(&pctx->dma_sync, 0); + pctx->perf = perf; + pctx->wq = &wq; + pctx->thread = + kthread_create_on_node(ntb_perf_thread, + (void *)pctx, + node, "ntb_perf %d", i); + if (IS_ERR(pctx->thread)) { + pctx->thread = NULL; + goto err; + } else { + wake_up_process(pctx->thread); + } } + wait_event_interruptible(wq, + atomic_read(&perf->tdone) == perf->perf_threads); + + threads_cleanup(perf); + mutex_unlock(&perf->run_mutex); return count; err: threads_cleanup(perf); + mutex_unlock(&perf->run_mutex); return -ENXIO; } @@ -688,6 +758,12 @@ static int perf_probe(struct ntb_client *client, struct ntb_dev *ntb) int node; int rc = 0; + if (ntb_spad_count(ntb) < MAX_SPAD) { + dev_err(&ntb->dev, "Not enough scratch pad registers for %s", + DRIVER_NAME); + return -EIO; + } + node = dev_to_node(&pdev->dev); perf = kzalloc_node(sizeof(*perf), GFP_KERNEL, node); @@ -699,11 +775,11 @@ static int perf_probe(struct ntb_client *client, struct ntb_dev *ntb) perf->ntb = ntb; perf->perf_threads = 1; atomic_set(&perf->tsync, 0); - perf->run = false; + mutex_init(&perf->run_mutex); spin_lock_init(&perf->db_lock); perf_setup_mw(ntb, perf); + init_waitqueue_head(&perf->link_wq); INIT_DELAYED_WORK(&perf->link_work, perf_link_work); - INIT_WORK(&perf->link_cleanup, perf_link_cleanup); rc = ntb_set_ctx(ntb, perf, &perf_ops); if (rc) @@ -717,11 +793,12 @@ static int perf_probe(struct ntb_client *client, struct ntb_dev *ntb) if (rc) goto err_ctx; + perf_clear_thread_status(perf); + return 0; err_ctx: cancel_delayed_work_sync(&perf->link_work); - cancel_work_sync(&perf->link_cleanup); kfree(perf); err_perf: return rc; @@ -734,8 +811,9 @@ static void perf_remove(struct ntb_client *client, struct ntb_dev *ntb) dev_dbg(&perf->ntb->dev, "%s called\n", __func__); + mutex_lock(&perf->run_mutex); + cancel_delayed_work_sync(&perf->link_work); - cancel_work_sync(&perf->link_cleanup); ntb_clear_ctx(ntb); ntb_link_disable(ntb); |