diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-03-19 23:31:54 (GMT) |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-03-19 23:31:54 (GMT) |
commit | de06dbfa7861c9019eedefc0c356ba86e5098f1b (patch) | |
tree | 69986680dc2c500f8c090e8a8c0b17dd7c5c8cbb /arch/arm/mm/dma-mapping.c | |
parent | b31a3bc3dbd2f42b61674d37de7f46022e1f6846 (diff) | |
parent | 1b3bf847977c2e5974012ddd4b25fef50967d785 (diff) | |
download | linux-de06dbfa7861c9019eedefc0c356ba86e5098f1b.tar.xz |
Merge branch 'for-linus' of git://ftp.arm.linux.org.uk/~rmk/linux-arm
Pull ARM updates from Russell King:
"Another mixture of changes this time around:
- Split XIP linker file from main linker file to make it more
maintainable, and various XIP fixes, and clean up a resulting
macro.
- Decompressor cleanups from Masahiro Yamada
- Avoid printing an error for a missing L2 cache
- Remove some duplicated symbols in System.map, and move
vectors/stubs back into kernel VMA
- Various low priority fixes from Arnd
- Updates to allow bus match functions to return negative errno
values, touching some drivers and the driver core. Greg has acked
these changes.
- Virtualisation platform udpates form Jean-Philippe Brucker.
- Security enhancements from Kees Cook
- Rework some Kconfig dependencies and move PSCI idle management code
out of arch/arm into drivers/firmware/psci.c
- ARM DMA mapping updates, touching media, acked by Mauro.
- Fix places in ARM code which should be using virt_to_idmap() so
that Keystone2 can work.
- Fix Marvell Tauros2 to work again with non-DT boots.
- Provide a delay timer for ARM Orion platforms"
* 'for-linus' of git://ftp.arm.linux.org.uk/~rmk/linux-arm: (45 commits)
ARM: 8546/1: dma-mapping: refactor to fix coherent+cma+gfp=0
ARM: 8547/1: dma-mapping: store buffer information
ARM: 8543/1: decompressor: rename suffix_y to compress-y
ARM: 8542/1: decompressor: merge piggy.*.S and simplify Makefile
ARM: 8541/1: decompressor: drop redundant FORCE in Makefile
ARM: 8540/1: decompressor: use clean-files instead of extra-y to clean files
ARM: 8539/1: decompressor: drop more unneeded assignments to "targets"
ARM: 8538/1: decompressor: drop unneeded assignments to "targets"
ARM: 8532/1: uncompress: mark putc as inline
ARM: 8531/1: turn init_new_context into an inline function
ARM: 8530/1: remove VIRT_TO_BUS
ARM: 8537/1: drop unused DEBUG_RODATA from XIP_KERNEL
ARM: 8536/1: mm: hide __start_rodata_section_aligned for non-debug builds
ARM: 8535/1: mm: DEBUG_RODATA makes no sense with XIP_KERNEL
ARM: 8534/1: virt: fix hyp-stub build for pre-ARMv7 CPUs
ARM: make the physical-relative calculation more obvious
ARM: 8512/1: proc-v7.S: Adjust stack address when XIP_KERNEL
ARM: 8411/1: Add default SPARSEMEM settings
ARM: 8503/1: clk_register_clkdev: remove format string interface
ARM: 8529/1: remove 'i' and 'zi' targets
...
Diffstat (limited to 'arch/arm/mm/dma-mapping.c')
-rw-r--r-- | arch/arm/mm/dma-mapping.c | 248 |
1 files changed, 198 insertions, 50 deletions
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 0eca381..deac58d 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -42,6 +42,55 @@ #include "dma.h" #include "mm.h" +struct arm_dma_alloc_args { + struct device *dev; + size_t size; + gfp_t gfp; + pgprot_t prot; + const void *caller; + bool want_vaddr; +}; + +struct arm_dma_free_args { + struct device *dev; + size_t size; + void *cpu_addr; + struct page *page; + bool want_vaddr; +}; + +struct arm_dma_allocator { + void *(*alloc)(struct arm_dma_alloc_args *args, + struct page **ret_page); + void (*free)(struct arm_dma_free_args *args); +}; + +struct arm_dma_buffer { + struct list_head list; + void *virt; + struct arm_dma_allocator *allocator; +}; + +static LIST_HEAD(arm_dma_bufs); +static DEFINE_SPINLOCK(arm_dma_bufs_lock); + +static struct arm_dma_buffer *arm_dma_buffer_find(void *virt) +{ + struct arm_dma_buffer *buf, *found = NULL; + unsigned long flags; + + spin_lock_irqsave(&arm_dma_bufs_lock, flags); + list_for_each_entry(buf, &arm_dma_bufs, list) { + if (buf->virt == virt) { + list_del(&buf->list); + found = buf; + break; + } + } + spin_unlock_irqrestore(&arm_dma_bufs_lock, flags); + return found; +} + /* * The DMA API is built upon the notion of "buffer ownership". A buffer * is either exclusively owned by the CPU (and therefore may be accessed @@ -592,7 +641,7 @@ static inline pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot) #define __alloc_remap_buffer(dev, size, gfp, prot, ret, c, wv) NULL #define __alloc_from_pool(size, ret_page) NULL #define __alloc_from_contiguous(dev, size, prot, ret, c, wv) NULL -#define __free_from_pool(cpu_addr, size) 0 +#define __free_from_pool(cpu_addr, size) do { } while (0) #define __free_from_contiguous(dev, page, cpu_addr, size, wv) do { } while (0) #define __dma_free_remap(cpu_addr, size) do { } while (0) @@ -610,7 +659,78 @@ static void *__alloc_simple_buffer(struct device *dev, size_t size, gfp_t gfp, return page_address(page); } +static void *simple_allocator_alloc(struct arm_dma_alloc_args *args, + struct page **ret_page) +{ + return __alloc_simple_buffer(args->dev, args->size, args->gfp, + ret_page); +} + +static void simple_allocator_free(struct arm_dma_free_args *args) +{ + __dma_free_buffer(args->page, args->size); +} +static struct arm_dma_allocator simple_allocator = { + .alloc = simple_allocator_alloc, + .free = simple_allocator_free, +}; + +static void *cma_allocator_alloc(struct arm_dma_alloc_args *args, + struct page **ret_page) +{ + return __alloc_from_contiguous(args->dev, args->size, args->prot, + ret_page, args->caller, + args->want_vaddr); +} + +static void cma_allocator_free(struct arm_dma_free_args *args) +{ + __free_from_contiguous(args->dev, args->page, args->cpu_addr, + args->size, args->want_vaddr); +} + +static struct arm_dma_allocator cma_allocator = { + .alloc = cma_allocator_alloc, + .free = cma_allocator_free, +}; + +static void *pool_allocator_alloc(struct arm_dma_alloc_args *args, + struct page **ret_page) +{ + return __alloc_from_pool(args->size, ret_page); +} + +static void pool_allocator_free(struct arm_dma_free_args *args) +{ + __free_from_pool(args->cpu_addr, args->size); +} + +static struct arm_dma_allocator pool_allocator = { + .alloc = pool_allocator_alloc, + .free = pool_allocator_free, +}; + +static void *remap_allocator_alloc(struct arm_dma_alloc_args *args, + struct page **ret_page) +{ + return __alloc_remap_buffer(args->dev, args->size, args->gfp, + args->prot, ret_page, args->caller, + args->want_vaddr); +} + +static void remap_allocator_free(struct arm_dma_free_args *args) +{ + if (args->want_vaddr) + __dma_free_remap(args->cpu_addr, args->size); + + __dma_free_buffer(args->page, args->size); +} + +static struct arm_dma_allocator remap_allocator = { + .alloc = remap_allocator_alloc, + .free = remap_allocator_free, +}; static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, pgprot_t prot, bool is_coherent, @@ -619,7 +739,16 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, u64 mask = get_coherent_dma_mask(dev); struct page *page = NULL; void *addr; - bool want_vaddr; + bool allowblock, cma; + struct arm_dma_buffer *buf; + struct arm_dma_alloc_args args = { + .dev = dev, + .size = PAGE_ALIGN(size), + .gfp = gfp, + .prot = prot, + .caller = caller, + .want_vaddr = !dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs), + }; #ifdef CONFIG_DMA_API_DEBUG u64 limit = (mask + 1) & ~mask; @@ -633,6 +762,10 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, if (!mask) return NULL; + buf = kzalloc(sizeof(*buf), gfp); + if (!buf) + return NULL; + if (mask < 0xffffffffULL) gfp |= GFP_DMA; @@ -644,28 +777,37 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, * platform; see CONFIG_HUGETLBFS. */ gfp &= ~(__GFP_COMP); + args.gfp = gfp; *handle = DMA_ERROR_CODE; - size = PAGE_ALIGN(size); - want_vaddr = !dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs); - - if (nommu()) - addr = __alloc_simple_buffer(dev, size, gfp, &page); - else if (dev_get_cma_area(dev) && (gfp & __GFP_DIRECT_RECLAIM)) - addr = __alloc_from_contiguous(dev, size, prot, &page, - caller, want_vaddr); - else if (is_coherent) - addr = __alloc_simple_buffer(dev, size, gfp, &page); - else if (!gfpflags_allow_blocking(gfp)) - addr = __alloc_from_pool(size, &page); + allowblock = gfpflags_allow_blocking(gfp); + cma = allowblock ? dev_get_cma_area(dev) : false; + + if (cma) + buf->allocator = &cma_allocator; + else if (nommu() || is_coherent) + buf->allocator = &simple_allocator; + else if (allowblock) + buf->allocator = &remap_allocator; else - addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, - caller, want_vaddr); + buf->allocator = &pool_allocator; + + addr = buf->allocator->alloc(&args, &page); + + if (page) { + unsigned long flags; - if (page) *handle = pfn_to_dma(dev, page_to_pfn(page)); + buf->virt = args.want_vaddr ? addr : page; + + spin_lock_irqsave(&arm_dma_bufs_lock, flags); + list_add(&buf->list, &arm_dma_bufs); + spin_unlock_irqrestore(&arm_dma_bufs_lock, flags); + } else { + kfree(buf); + } - return want_vaddr ? addr : page; + return args.want_vaddr ? addr : page; } /* @@ -741,25 +883,21 @@ static void __arm_dma_free(struct device *dev, size_t size, void *cpu_addr, bool is_coherent) { struct page *page = pfn_to_page(dma_to_pfn(dev, handle)); - bool want_vaddr = !dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs); - - size = PAGE_ALIGN(size); - - if (nommu()) { - __dma_free_buffer(page, size); - } else if (!is_coherent && __free_from_pool(cpu_addr, size)) { + struct arm_dma_buffer *buf; + struct arm_dma_free_args args = { + .dev = dev, + .size = PAGE_ALIGN(size), + .cpu_addr = cpu_addr, + .page = page, + .want_vaddr = !dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs), + }; + + buf = arm_dma_buffer_find(cpu_addr); + if (WARN(!buf, "Freeing invalid buffer %p\n", cpu_addr)) return; - } else if (!dev_get_cma_area(dev)) { - if (want_vaddr && !is_coherent) - __dma_free_remap(cpu_addr, size); - __dma_free_buffer(page, size); - } else { - /* - * Non-atomic allocations cannot be freed with IRQs disabled - */ - WARN_ON(irqs_disabled()); - __free_from_contiguous(dev, page, cpu_addr, size, want_vaddr); - } + + buf->allocator->free(&args); + kfree(buf); } void arm_dma_free(struct device *dev, size_t size, void *cpu_addr, @@ -1122,6 +1260,9 @@ static inline void __free_iova(struct dma_iommu_mapping *mapping, spin_unlock_irqrestore(&mapping->lock, flags); } +/* We'll try 2M, 1M, 64K, and finally 4K; array must end with 0! */ +static const int iommu_order_array[] = { 9, 8, 4, 0 }; + static struct page **__iommu_alloc_buffer(struct device *dev, size_t size, gfp_t gfp, struct dma_attrs *attrs) { @@ -1129,6 +1270,7 @@ static struct page **__iommu_alloc_buffer(struct device *dev, size_t size, int count = size >> PAGE_SHIFT; int array_size = count * sizeof(struct page *); int i = 0; + int order_idx = 0; if (array_size <= PAGE_SIZE) pages = kzalloc(array_size, GFP_KERNEL); @@ -1154,6 +1296,10 @@ static struct page **__iommu_alloc_buffer(struct device *dev, size_t size, return pages; } + /* Go straight to 4K chunks if caller says it's OK. */ + if (dma_get_attr(DMA_ATTR_ALLOC_SINGLE_PAGES, attrs)) + order_idx = ARRAY_SIZE(iommu_order_array) - 1; + /* * IOMMU can map any pages, so himem can also be used here */ @@ -1162,22 +1308,24 @@ static struct page **__iommu_alloc_buffer(struct device *dev, size_t size, while (count) { int j, order; - for (order = __fls(count); order > 0; --order) { - /* - * We do not want OOM killer to be invoked as long - * as we can fall back to single pages, so we force - * __GFP_NORETRY for orders higher than zero. - */ - pages[i] = alloc_pages(gfp | __GFP_NORETRY, order); - if (pages[i]) - break; + order = iommu_order_array[order_idx]; + + /* Drop down when we get small */ + if (__fls(count) < order) { + order_idx++; + continue; } - if (!pages[i]) { - /* - * Fall back to single page allocation. - * Might invoke OOM killer as last resort. - */ + if (order) { + /* See if it's easy to allocate a high-order chunk */ + pages[i] = alloc_pages(gfp | __GFP_NORETRY, order); + + /* Go down a notch at first sign of pressure */ + if (!pages[i]) { + order_idx++; + continue; + } + } else { pages[i] = alloc_pages(gfp, 0); if (!pages[i]) goto error; |