From bca0fa5f12a6744a2b2e53154af65a51402b3426 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Fri, 23 Mar 2012 13:05:14 +0100 Subject: common: add dma_mmap_from_coherent() function Add a common helper for dma-mapping core for mapping a coherent buffer to userspace. Reported-by: Subash Patel Signed-off-by: Marek Szyprowski Acked-by: Kyungmin Park Tested-By: Subash Patel diff --git a/drivers/base/dma-coherent.c b/drivers/base/dma-coherent.c index bb0025c..1b85949 100644 --- a/drivers/base/dma-coherent.c +++ b/drivers/base/dma-coherent.c @@ -10,6 +10,7 @@ struct dma_coherent_mem { void *virt_base; dma_addr_t device_base; + phys_addr_t pfn_base; int size; int flags; unsigned long *bitmap; @@ -44,6 +45,7 @@ int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr, dev->dma_mem->virt_base = mem_base; dev->dma_mem->device_base = device_addr; + dev->dma_mem->pfn_base = PFN_DOWN(bus_addr); dev->dma_mem->size = pages; dev->dma_mem->flags = flags; @@ -176,3 +178,43 @@ int dma_release_from_coherent(struct device *dev, int order, void *vaddr) return 0; } EXPORT_SYMBOL(dma_release_from_coherent); + +/** + * dma_mmap_from_coherent() - try to mmap the memory allocated from + * per-device coherent memory pool to userspace + * @dev: device from which the memory was allocated + * @vma: vm_area for the userspace memory + * @vaddr: cpu address returned by dma_alloc_from_coherent + * @size: size of the memory buffer allocated by dma_alloc_from_coherent + * + * This checks whether the memory was allocated from the per-device + * coherent memory pool and if so, maps that memory to the provided vma. + * + * Returns 1 if we correctly mapped the memory, or 0 if + * dma_release_coherent() should proceed with mapping memory from + * generic pools. + */ +int dma_mmap_from_coherent(struct device *dev, struct vm_area_struct *vma, + void *vaddr, size_t size, int *ret) +{ + struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; + + if (mem && vaddr >= mem->virt_base && vaddr + size <= + (mem->virt_base + (mem->size << PAGE_SHIFT))) { + unsigned long off = vma->vm_pgoff; + int start = (vaddr - mem->virt_base) >> PAGE_SHIFT; + int user_count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + int count = size >> PAGE_SHIFT; + + *ret = -ENXIO; + if (off < count && user_count <= count - off) { + unsigned pfn = mem->pfn_base + start + off; + *ret = remap_pfn_range(vma, vma->vm_start, pfn, + user_count << PAGE_SHIFT, + vma->vm_page_prot); + } + return 1; + } + return 0; +} +EXPORT_SYMBOL(dma_mmap_from_coherent); diff --git a/include/asm-generic/dma-coherent.h b/include/asm-generic/dma-coherent.h index 85a3ffa..abfb268 100644 --- a/include/asm-generic/dma-coherent.h +++ b/include/asm-generic/dma-coherent.h @@ -3,13 +3,15 @@ #ifdef CONFIG_HAVE_GENERIC_DMA_COHERENT /* - * These two functions are only for dma allocator. + * These three functions are only for dma allocator. * Don't use them in device drivers. */ int dma_alloc_from_coherent(struct device *dev, ssize_t size, dma_addr_t *dma_handle, void **ret); int dma_release_from_coherent(struct device *dev, int order, void *vaddr); +int dma_mmap_from_coherent(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, size_t size, int *ret); /* * Standard interface */ -- cgit v0.10.2 From 47142f07eea32e9c108f548a4b06c28bec7df6e4 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Tue, 15 May 2012 19:04:13 +0200 Subject: ARM: dma-mapping: use dma_mmap_from_coherent() Signed-off-by: Marek Szyprowski diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index db23ae4..7ec0863 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -401,6 +401,9 @@ static int dma_mmap(struct device *dev, struct vm_area_struct *vma, unsigned long user_size, kern_size; struct arm_vmregion *c; + if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret)) + return ret; + user_size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr); -- cgit v0.10.2 From 6b6f770b573903f8a7d1cfab1fc662685653f413 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Tue, 28 Feb 2012 10:19:14 +0100 Subject: ARM: dma-mapping: use pr_* instread of printk Replace all calls to printk with pr_* functions family. Signed-off-by: Marek Szyprowski Acked-by: Kyungmin Park Acked-by: Arnd Bergmann Tested-By: Subash Patel diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 7ec0863..322c70a 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -184,14 +184,14 @@ static int __init consistent_init(void) pud = pud_alloc(&init_mm, pgd, base); if (!pud) { - printk(KERN_ERR "%s: no pud tables\n", __func__); + pr_err("%s: no pud tables\n", __func__); ret = -ENOMEM; break; } pmd = pmd_alloc(&init_mm, pud, base); if (!pmd) { - printk(KERN_ERR "%s: no pmd tables\n", __func__); + pr_err("%s: no pmd tables\n", __func__); ret = -ENOMEM; break; } @@ -199,7 +199,7 @@ static int __init consistent_init(void) pte = pte_alloc_kernel(pmd, base); if (!pte) { - printk(KERN_ERR "%s: no pte tables\n", __func__); + pr_err("%s: no pte tables\n", __func__); ret = -ENOMEM; break; } @@ -222,7 +222,7 @@ __dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot, int bit; if (!consistent_pte) { - printk(KERN_ERR "%s: not initialised\n", __func__); + pr_err("%s: not initialised\n", __func__); dump_stack(); return NULL; } @@ -281,14 +281,14 @@ static void __dma_free_remap(void *cpu_addr, size_t size) c = arm_vmregion_find_remove(&consistent_head, (unsigned long)cpu_addr); if (!c) { - printk(KERN_ERR "%s: trying to free invalid coherent area: %p\n", + pr_err("%s: trying to free invalid coherent area: %p\n", __func__, cpu_addr); dump_stack(); return; } if ((c->vm_end - c->vm_start) != size) { - printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n", + pr_err("%s: freeing wrong coherent size (%ld != %d)\n", __func__, c->vm_end - c->vm_start, size); dump_stack(); size = c->vm_end - c->vm_start; @@ -310,8 +310,8 @@ static void __dma_free_remap(void *cpu_addr, size_t size) } if (pte_none(pte) || !pte_present(pte)) - printk(KERN_CRIT "%s: bad page in kernel page table\n", - __func__); + pr_crit("%s: bad page in kernel page table\n", + __func__); } while (size -= PAGE_SIZE); flush_tlb_kernel_range(c->vm_start, c->vm_end); -- cgit v0.10.2 From 553ac78877242b6d8b591323731df304140d0f99 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Wed, 29 Feb 2012 14:45:28 +0100 Subject: ARM: dma-mapping: introduce DMA_ERROR_CODE constant Replace all uses of ~0 with DMA_ERROR_CODE, what should make the code easier to read. Signed-off-by: Marek Szyprowski Acked-by: Kyungmin Park Tested-By: Subash Patel diff --git a/arch/arm/common/dmabounce.c b/arch/arm/common/dmabounce.c index 595ecd29..210ad1b 100644 --- a/arch/arm/common/dmabounce.c +++ b/arch/arm/common/dmabounce.c @@ -254,7 +254,7 @@ static inline dma_addr_t map_single(struct device *dev, void *ptr, size_t size, if (buf == NULL) { dev_err(dev, "%s: unable to map unsafe buffer %p!\n", __func__, ptr); - return ~0; + return DMA_ERROR_CODE; } dev_dbg(dev, "%s: unsafe buffer %p (dma=%#x) mapped to %p (dma=%#x)\n", @@ -320,7 +320,7 @@ dma_addr_t __dma_map_page(struct device *dev, struct page *page, ret = needs_bounce(dev, dma_addr, size); if (ret < 0) - return ~0; + return DMA_ERROR_CODE; if (ret == 0) { __dma_page_cpu_to_dev(page, offset, size, dir); @@ -329,7 +329,7 @@ dma_addr_t __dma_map_page(struct device *dev, struct page *page, if (PageHighMem(page)) { dev_err(dev, "DMA buffer bouncing of HIGHMEM pages is not supported\n"); - return ~0; + return DMA_ERROR_CODE; } return map_single(dev, page_address(page) + offset, size, dir); diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h index cb3b7c9..6a838da 100644 --- a/arch/arm/include/asm/dma-mapping.h +++ b/arch/arm/include/asm/dma-mapping.h @@ -10,6 +10,8 @@ #include #include +#define DMA_ERROR_CODE (~0) + #ifdef __arch_page_to_dma #error Please update to __arch_pfn_to_dma #endif @@ -123,7 +125,7 @@ extern int dma_set_mask(struct device *, u64); */ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { - return dma_addr == ~0; + return dma_addr == DMA_ERROR_CODE; } /* diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 322c70a..e4ac5fc 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -342,7 +342,7 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, */ gfp &= ~(__GFP_COMP); - *handle = ~0; + *handle = DMA_ERROR_CODE; size = PAGE_ALIGN(size); page = __dma_alloc_buffer(dev, size, gfp); -- cgit v0.10.2 From a227fb92a0f5f0dd8282719386e9b3a29f0d16b2 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Fri, 10 Feb 2012 19:55:20 +0100 Subject: ARM: dma-mapping: remove offset parameter to prepare for generic dma_ops This patch removes the need for the offset parameter in dma bounce functions. This is required to let dma-mapping framework on ARM architecture to use common, generic dma_map_ops based dma-mapping helpers. Background and more detailed explaination: dma_*_range_* functions are available from the early days of the dma mapping api. They are the correct way of doing a partial syncs on the buffer (usually used by the network device drivers). This patch changes only the internal implementation of the dma bounce functions to let them tunnel through dma_map_ops structure. The driver api stays unchanged, so driver are obliged to call dma_*_range_* functions to keep code clean and easy to understand. The only drawback from this patch is reduced detection of the dma api abuse. Let us consider the following code: dma_addr = dma_map_single(dev, ptr, 64, DMA_TO_DEVICE); dma_sync_single_range_for_cpu(dev, dma_addr+16, 0, 32, DMA_TO_DEVICE); Without the patch such code fails, because dma bounce code is unable to find the bounce buffer for the given dma_address. After the patch the above sync call will be equivalent to: dma_sync_single_range_for_cpu(dev, dma_addr, 16, 32, DMA_TO_DEVICE); which succeeds. I don't consider this as a real problem, because DMA API abuse should be caught by debug_dma_* function family. This patch lets us to simplify the internal low-level implementation without chaning the driver visible API. Signed-off-by: Marek Szyprowski Acked-by: Kyungmin Park Tested-By: Subash Patel diff --git a/arch/arm/common/dmabounce.c b/arch/arm/common/dmabounce.c index 210ad1b..32e9cc6 100644 --- a/arch/arm/common/dmabounce.c +++ b/arch/arm/common/dmabounce.c @@ -173,7 +173,8 @@ find_safe_buffer(struct dmabounce_device_info *device_info, dma_addr_t safe_dma_ read_lock_irqsave(&device_info->lock, flags); list_for_each_entry(b, &device_info->safe_buffers, node) - if (b->safe_dma_addr == safe_dma_addr) { + if (b->safe_dma_addr <= safe_dma_addr && + b->safe_dma_addr + b->size > safe_dma_addr) { rb = b; break; } @@ -362,9 +363,10 @@ void __dma_unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, EXPORT_SYMBOL(__dma_unmap_page); int dmabounce_sync_for_cpu(struct device *dev, dma_addr_t addr, - unsigned long off, size_t sz, enum dma_data_direction dir) + size_t sz, enum dma_data_direction dir) { struct safe_buffer *buf; + unsigned long off; dev_dbg(dev, "%s(dma=%#x,off=%#lx,sz=%zx,dir=%x)\n", __func__, addr, off, sz, dir); @@ -373,6 +375,8 @@ int dmabounce_sync_for_cpu(struct device *dev, dma_addr_t addr, if (!buf) return 1; + off = addr - buf->safe_dma_addr; + BUG_ON(buf->direction != dir); dev_dbg(dev, "%s: unsafe buffer %p (dma=%#x) mapped to %p (dma=%#x)\n", @@ -391,9 +395,10 @@ int dmabounce_sync_for_cpu(struct device *dev, dma_addr_t addr, EXPORT_SYMBOL(dmabounce_sync_for_cpu); int dmabounce_sync_for_device(struct device *dev, dma_addr_t addr, - unsigned long off, size_t sz, enum dma_data_direction dir) + size_t sz, enum dma_data_direction dir) { struct safe_buffer *buf; + unsigned long off; dev_dbg(dev, "%s(dma=%#x,off=%#lx,sz=%zx,dir=%x)\n", __func__, addr, off, sz, dir); @@ -402,6 +407,8 @@ int dmabounce_sync_for_device(struct device *dev, dma_addr_t addr, if (!buf) return 1; + off = addr - buf->safe_dma_addr; + BUG_ON(buf->direction != dir); dev_dbg(dev, "%s: unsafe buffer %p (dma=%#x) mapped to %p (dma=%#x)\n", diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h index 6a838da..eeddbe2 100644 --- a/arch/arm/include/asm/dma-mapping.h +++ b/arch/arm/include/asm/dma-mapping.h @@ -266,19 +266,17 @@ extern void __dma_unmap_page(struct device *, dma_addr_t, size_t, /* * Private functions */ -int dmabounce_sync_for_cpu(struct device *, dma_addr_t, unsigned long, - size_t, enum dma_data_direction); -int dmabounce_sync_for_device(struct device *, dma_addr_t, unsigned long, - size_t, enum dma_data_direction); +int dmabounce_sync_for_cpu(struct device *, dma_addr_t, size_t, enum dma_data_direction); +int dmabounce_sync_for_device(struct device *, dma_addr_t, size_t, enum dma_data_direction); #else static inline int dmabounce_sync_for_cpu(struct device *d, dma_addr_t addr, - unsigned long offset, size_t size, enum dma_data_direction dir) + size_t size, enum dma_data_direction dir) { return 1; } static inline int dmabounce_sync_for_device(struct device *d, dma_addr_t addr, - unsigned long offset, size_t size, enum dma_data_direction dir) + size_t size, enum dma_data_direction dir) { return 1; } @@ -401,6 +399,33 @@ static inline void dma_unmap_page(struct device *dev, dma_addr_t handle, __dma_unmap_page(dev, handle, size, dir); } + +static inline void dma_sync_single_for_cpu(struct device *dev, + dma_addr_t handle, size_t size, enum dma_data_direction dir) +{ + BUG_ON(!valid_dma_direction(dir)); + + debug_dma_sync_single_for_cpu(dev, handle, size, dir); + + if (!dmabounce_sync_for_cpu(dev, handle, size, dir)) + return; + + __dma_single_dev_to_cpu(dma_to_virt(dev, handle), size, dir); +} + +static inline void dma_sync_single_for_device(struct device *dev, + dma_addr_t handle, size_t size, enum dma_data_direction dir) +{ + BUG_ON(!valid_dma_direction(dir)); + + debug_dma_sync_single_for_device(dev, handle, size, dir); + + if (!dmabounce_sync_for_device(dev, handle, size, dir)) + return; + + __dma_single_cpu_to_dev(dma_to_virt(dev, handle), size, dir); +} + /** * dma_sync_single_range_for_cpu * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices @@ -423,40 +448,14 @@ static inline void dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t handle, unsigned long offset, size_t size, enum dma_data_direction dir) { - BUG_ON(!valid_dma_direction(dir)); - - debug_dma_sync_single_for_cpu(dev, handle + offset, size, dir); - - if (!dmabounce_sync_for_cpu(dev, handle, offset, size, dir)) - return; - - __dma_single_dev_to_cpu(dma_to_virt(dev, handle) + offset, size, dir); + dma_sync_single_for_cpu(dev, handle + offset, size, dir); } static inline void dma_sync_single_range_for_device(struct device *dev, dma_addr_t handle, unsigned long offset, size_t size, enum dma_data_direction dir) { - BUG_ON(!valid_dma_direction(dir)); - - debug_dma_sync_single_for_device(dev, handle + offset, size, dir); - - if (!dmabounce_sync_for_device(dev, handle, offset, size, dir)) - return; - - __dma_single_cpu_to_dev(dma_to_virt(dev, handle) + offset, size, dir); -} - -static inline void dma_sync_single_for_cpu(struct device *dev, - dma_addr_t handle, size_t size, enum dma_data_direction dir) -{ - dma_sync_single_range_for_cpu(dev, handle, 0, size, dir); -} - -static inline void dma_sync_single_for_device(struct device *dev, - dma_addr_t handle, size_t size, enum dma_data_direction dir) -{ - dma_sync_single_range_for_device(dev, handle, 0, size, dir); + dma_sync_single_for_device(dev, handle + offset, size, dir); } /* diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index e4ac5fc..a16993a 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -660,7 +660,7 @@ void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int i; for_each_sg(sg, s, nents, i) { - if (!dmabounce_sync_for_cpu(dev, sg_dma_address(s), 0, + if (!dmabounce_sync_for_cpu(dev, sg_dma_address(s), sg_dma_len(s), dir)) continue; @@ -686,7 +686,7 @@ void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int i; for_each_sg(sg, s, nents, i) { - if (!dmabounce_sync_for_device(dev, sg_dma_address(s), 0, + if (!dmabounce_sync_for_device(dev, sg_dma_address(s), sg_dma_len(s), dir)) continue; -- cgit v0.10.2 From 2dc6a016bbedf18f18ad73997e5338307d6dbde9 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Fri, 10 Feb 2012 19:55:20 +0100 Subject: ARM: dma-mapping: use asm-generic/dma-mapping-common.h This patch modifies dma-mapping implementation on ARM architecture to use common dma_map_ops structure and asm-generic/dma-mapping-common.h helpers. Signed-off-by: Marek Szyprowski Acked-by: Kyungmin Park Tested-By: Subash Patel diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 36586dba..c8111c5 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -4,6 +4,7 @@ config ARM select HAVE_AOUT select HAVE_DMA_API_DEBUG select HAVE_IDE if PCI || ISA || PCMCIA + select HAVE_DMA_ATTRS select HAVE_MEMBLOCK select RTC_LIB select SYS_SUPPORTS_APM_EMULATION diff --git a/arch/arm/include/asm/device.h b/arch/arm/include/asm/device.h index 7aa3680..6e2cb0e 100644 --- a/arch/arm/include/asm/device.h +++ b/arch/arm/include/asm/device.h @@ -7,6 +7,7 @@ #define ASMARM_DEVICE_H struct dev_archdata { + struct dma_map_ops *dma_ops; #ifdef CONFIG_DMABOUNCE struct dmabounce_device_info *dmabounce; #endif diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h index eeddbe2..6725a08 100644 --- a/arch/arm/include/asm/dma-mapping.h +++ b/arch/arm/include/asm/dma-mapping.h @@ -11,6 +11,27 @@ #include #define DMA_ERROR_CODE (~0) +extern struct dma_map_ops arm_dma_ops; + +static inline struct dma_map_ops *get_dma_ops(struct device *dev) +{ + if (dev && dev->archdata.dma_ops) + return dev->archdata.dma_ops; + return &arm_dma_ops; +} + +static inline void set_dma_ops(struct device *dev, struct dma_map_ops *ops) +{ + BUG_ON(!dev); + dev->archdata.dma_ops = ops; +} + +#include + +static inline int dma_set_mask(struct device *dev, u64 mask) +{ + return get_dma_ops(dev)->set_dma_mask(dev, mask); +} #ifdef __arch_page_to_dma #error Please update to __arch_pfn_to_dma @@ -119,7 +140,6 @@ static inline void __dma_page_dev_to_cpu(struct page *page, unsigned long off, extern int dma_supported(struct device *, u64); extern int dma_set_mask(struct device *, u64); - /* * DMA errors are defined by all-bits-set in the DMA address. */ @@ -297,179 +317,17 @@ static inline void __dma_unmap_page(struct device *dev, dma_addr_t handle, } #endif /* CONFIG_DMABOUNCE */ -/** - * dma_map_single - map a single buffer for streaming DMA - * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices - * @cpu_addr: CPU direct mapped address of buffer - * @size: size of buffer to map - * @dir: DMA transfer direction - * - * Ensure that any data held in the cache is appropriately discarded - * or written back. - * - * The device owns this memory once this call has completed. The CPU - * can regain ownership by calling dma_unmap_single() or - * dma_sync_single_for_cpu(). - */ -static inline dma_addr_t dma_map_single(struct device *dev, void *cpu_addr, - size_t size, enum dma_data_direction dir) -{ - unsigned long offset; - struct page *page; - dma_addr_t addr; - - BUG_ON(!virt_addr_valid(cpu_addr)); - BUG_ON(!virt_addr_valid(cpu_addr + size - 1)); - BUG_ON(!valid_dma_direction(dir)); - - page = virt_to_page(cpu_addr); - offset = (unsigned long)cpu_addr & ~PAGE_MASK; - addr = __dma_map_page(dev, page, offset, size, dir); - debug_dma_map_page(dev, page, offset, size, dir, addr, true); - - return addr; -} - -/** - * dma_map_page - map a portion of a page for streaming DMA - * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices - * @page: page that buffer resides in - * @offset: offset into page for start of buffer - * @size: size of buffer to map - * @dir: DMA transfer direction - * - * Ensure that any data held in the cache is appropriately discarded - * or written back. - * - * The device owns this memory once this call has completed. The CPU - * can regain ownership by calling dma_unmap_page(). - */ -static inline dma_addr_t dma_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, enum dma_data_direction dir) -{ - dma_addr_t addr; - - BUG_ON(!valid_dma_direction(dir)); - - addr = __dma_map_page(dev, page, offset, size, dir); - debug_dma_map_page(dev, page, offset, size, dir, addr, false); - - return addr; -} - -/** - * dma_unmap_single - unmap a single buffer previously mapped - * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices - * @handle: DMA address of buffer - * @size: size of buffer (same as passed to dma_map_single) - * @dir: DMA transfer direction (same as passed to dma_map_single) - * - * Unmap a single streaming mode DMA translation. The handle and size - * must match what was provided in the previous dma_map_single() call. - * All other usages are undefined. - * - * After this call, reads by the CPU to the buffer are guaranteed to see - * whatever the device wrote there. - */ -static inline void dma_unmap_single(struct device *dev, dma_addr_t handle, - size_t size, enum dma_data_direction dir) -{ - debug_dma_unmap_page(dev, handle, size, dir, true); - __dma_unmap_page(dev, handle, size, dir); -} - -/** - * dma_unmap_page - unmap a buffer previously mapped through dma_map_page() - * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices - * @handle: DMA address of buffer - * @size: size of buffer (same as passed to dma_map_page) - * @dir: DMA transfer direction (same as passed to dma_map_page) - * - * Unmap a page streaming mode DMA translation. The handle and size - * must match what was provided in the previous dma_map_page() call. - * All other usages are undefined. - * - * After this call, reads by the CPU to the buffer are guaranteed to see - * whatever the device wrote there. - */ -static inline void dma_unmap_page(struct device *dev, dma_addr_t handle, - size_t size, enum dma_data_direction dir) -{ - debug_dma_unmap_page(dev, handle, size, dir, false); - __dma_unmap_page(dev, handle, size, dir); -} - - -static inline void dma_sync_single_for_cpu(struct device *dev, - dma_addr_t handle, size_t size, enum dma_data_direction dir) -{ - BUG_ON(!valid_dma_direction(dir)); - - debug_dma_sync_single_for_cpu(dev, handle, size, dir); - - if (!dmabounce_sync_for_cpu(dev, handle, size, dir)) - return; - - __dma_single_dev_to_cpu(dma_to_virt(dev, handle), size, dir); -} - -static inline void dma_sync_single_for_device(struct device *dev, - dma_addr_t handle, size_t size, enum dma_data_direction dir) -{ - BUG_ON(!valid_dma_direction(dir)); - - debug_dma_sync_single_for_device(dev, handle, size, dir); - - if (!dmabounce_sync_for_device(dev, handle, size, dir)) - return; - - __dma_single_cpu_to_dev(dma_to_virt(dev, handle), size, dir); -} - -/** - * dma_sync_single_range_for_cpu - * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices - * @handle: DMA address of buffer - * @offset: offset of region to start sync - * @size: size of region to sync - * @dir: DMA transfer direction (same as passed to dma_map_single) - * - * Make physical memory consistent for a single streaming mode DMA - * translation after a transfer. - * - * If you perform a dma_map_single() but wish to interrogate the - * buffer using the cpu, yet do not wish to teardown the PCI dma - * mapping, you must call this function before doing so. At the - * next point you give the PCI dma address back to the card, you - * must first the perform a dma_sync_for_device, and then the - * device again owns the buffer. - */ -static inline void dma_sync_single_range_for_cpu(struct device *dev, - dma_addr_t handle, unsigned long offset, size_t size, - enum dma_data_direction dir) -{ - dma_sync_single_for_cpu(dev, handle + offset, size, dir); -} - -static inline void dma_sync_single_range_for_device(struct device *dev, - dma_addr_t handle, unsigned long offset, size_t size, - enum dma_data_direction dir) -{ - dma_sync_single_for_device(dev, handle + offset, size, dir); -} - /* * The scatter list versions of the above methods. */ -extern int dma_map_sg(struct device *, struct scatterlist *, int, - enum dma_data_direction); -extern void dma_unmap_sg(struct device *, struct scatterlist *, int, +extern int arm_dma_map_sg(struct device *, struct scatterlist *, int, + enum dma_data_direction, struct dma_attrs *attrs); +extern void arm_dma_unmap_sg(struct device *, struct scatterlist *, int, + enum dma_data_direction, struct dma_attrs *attrs); +extern void arm_dma_sync_sg_for_cpu(struct device *, struct scatterlist *, int, enum dma_data_direction); -extern void dma_sync_sg_for_cpu(struct device *, struct scatterlist *, int, +extern void arm_dma_sync_sg_for_device(struct device *, struct scatterlist *, int, enum dma_data_direction); -extern void dma_sync_sg_for_device(struct device *, struct scatterlist *, int, - enum dma_data_direction); - #endif /* __KERNEL__ */ #endif diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index a16993a..70be6e1 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -29,6 +29,85 @@ #include "mm.h" +/** + * arm_dma_map_page - map a portion of a page for streaming DMA + * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices + * @page: page that buffer resides in + * @offset: offset into page for start of buffer + * @size: size of buffer to map + * @dir: DMA transfer direction + * + * Ensure that any data held in the cache is appropriately discarded + * or written back. + * + * The device owns this memory once this call has completed. The CPU + * can regain ownership by calling dma_unmap_page(). + */ +static inline dma_addr_t arm_dma_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + return __dma_map_page(dev, page, offset, size, dir); +} + +/** + * arm_dma_unmap_page - unmap a buffer previously mapped through dma_map_page() + * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices + * @handle: DMA address of buffer + * @size: size of buffer (same as passed to dma_map_page) + * @dir: DMA transfer direction (same as passed to dma_map_page) + * + * Unmap a page streaming mode DMA translation. The handle and size + * must match what was provided in the previous dma_map_page() call. + * All other usages are undefined. + * + * After this call, reads by the CPU to the buffer are guaranteed to see + * whatever the device wrote there. + */ +static inline void arm_dma_unmap_page(struct device *dev, dma_addr_t handle, + size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + __dma_unmap_page(dev, handle, size, dir); +} + +static inline void arm_dma_sync_single_for_cpu(struct device *dev, + dma_addr_t handle, size_t size, enum dma_data_direction dir) +{ + unsigned int offset = handle & (PAGE_SIZE - 1); + struct page *page = pfn_to_page(dma_to_pfn(dev, handle-offset)); + if (!dmabounce_sync_for_cpu(dev, handle, size, dir)) + return; + + __dma_page_dev_to_cpu(page, offset, size, dir); +} + +static inline void arm_dma_sync_single_for_device(struct device *dev, + dma_addr_t handle, size_t size, enum dma_data_direction dir) +{ + unsigned int offset = handle & (PAGE_SIZE - 1); + struct page *page = pfn_to_page(dma_to_pfn(dev, handle-offset)); + if (!dmabounce_sync_for_device(dev, handle, size, dir)) + return; + + __dma_page_cpu_to_dev(page, offset, size, dir); +} + +static int arm_dma_set_mask(struct device *dev, u64 dma_mask); + +struct dma_map_ops arm_dma_ops = { + .map_page = arm_dma_map_page, + .unmap_page = arm_dma_unmap_page, + .map_sg = arm_dma_map_sg, + .unmap_sg = arm_dma_unmap_sg, + .sync_single_for_cpu = arm_dma_sync_single_for_cpu, + .sync_single_for_device = arm_dma_sync_single_for_device, + .sync_sg_for_cpu = arm_dma_sync_sg_for_cpu, + .sync_sg_for_device = arm_dma_sync_sg_for_device, + .set_dma_mask = arm_dma_set_mask, +}; +EXPORT_SYMBOL(arm_dma_ops); + static u64 get_coherent_dma_mask(struct device *dev) { u64 mask = (u64)arm_dma_limit; @@ -461,47 +540,6 @@ void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr } EXPORT_SYMBOL(dma_free_coherent); -/* - * Make an area consistent for devices. - * Note: Drivers should NOT use this function directly, as it will break - * platforms with CONFIG_DMABOUNCE. - * Use the driver DMA support - see dma-mapping.h (dma_sync_*) - */ -void ___dma_single_cpu_to_dev(const void *kaddr, size_t size, - enum dma_data_direction dir) -{ - unsigned long paddr; - - BUG_ON(!virt_addr_valid(kaddr) || !virt_addr_valid(kaddr + size - 1)); - - dmac_map_area(kaddr, size, dir); - - paddr = __pa(kaddr); - if (dir == DMA_FROM_DEVICE) { - outer_inv_range(paddr, paddr + size); - } else { - outer_clean_range(paddr, paddr + size); - } - /* FIXME: non-speculating: flush on bidirectional mappings? */ -} -EXPORT_SYMBOL(___dma_single_cpu_to_dev); - -void ___dma_single_dev_to_cpu(const void *kaddr, size_t size, - enum dma_data_direction dir) -{ - BUG_ON(!virt_addr_valid(kaddr) || !virt_addr_valid(kaddr + size - 1)); - - /* FIXME: non-speculating: not required */ - /* don't bother invalidating if DMA to device */ - if (dir != DMA_TO_DEVICE) { - unsigned long paddr = __pa(kaddr); - outer_inv_range(paddr, paddr + size); - } - - dmac_unmap_area(kaddr, size, dir); -} -EXPORT_SYMBOL(___dma_single_dev_to_cpu); - static void dma_cache_maint_page(struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, void (*op)(const void *, size_t, int)) @@ -599,21 +637,18 @@ EXPORT_SYMBOL(___dma_page_dev_to_cpu); * Device ownership issues as mentioned for dma_map_single are the same * here. */ -int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, - enum dma_data_direction dir) +int arm_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir, struct dma_attrs *attrs) { struct scatterlist *s; int i, j; - BUG_ON(!valid_dma_direction(dir)); - for_each_sg(sg, s, nents, i) { s->dma_address = __dma_map_page(dev, sg_page(s), s->offset, s->length, dir); if (dma_mapping_error(dev, s->dma_address)) goto bad_mapping; } - debug_dma_map_sg(dev, sg, nents, nents, dir); return nents; bad_mapping: @@ -621,7 +656,6 @@ int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, __dma_unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir); return 0; } -EXPORT_SYMBOL(dma_map_sg); /** * dma_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg @@ -633,18 +667,15 @@ EXPORT_SYMBOL(dma_map_sg); * Unmap a set of streaming mode DMA translations. Again, CPU access * rules concerning calls here are the same as for dma_unmap_single(). */ -void dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, - enum dma_data_direction dir) +void arm_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir, struct dma_attrs *attrs) { struct scatterlist *s; int i; - debug_dma_unmap_sg(dev, sg, nents, dir); - for_each_sg(sg, s, nents, i) __dma_unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir); } -EXPORT_SYMBOL(dma_unmap_sg); /** * dma_sync_sg_for_cpu @@ -653,7 +684,7 @@ EXPORT_SYMBOL(dma_unmap_sg); * @nents: number of buffers to map (returned from dma_map_sg) * @dir: DMA transfer direction (same as was passed to dma_map_sg) */ -void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, +void arm_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir) { struct scatterlist *s; @@ -667,10 +698,7 @@ void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, __dma_page_dev_to_cpu(sg_page(s), s->offset, s->length, dir); } - - debug_dma_sync_sg_for_cpu(dev, sg, nents, dir); } -EXPORT_SYMBOL(dma_sync_sg_for_cpu); /** * dma_sync_sg_for_device @@ -679,7 +707,7 @@ EXPORT_SYMBOL(dma_sync_sg_for_cpu); * @nents: number of buffers to map (returned from dma_map_sg) * @dir: DMA transfer direction (same as was passed to dma_map_sg) */ -void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, +void arm_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir) { struct scatterlist *s; @@ -693,10 +721,7 @@ void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, __dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir); } - - debug_dma_sync_sg_for_device(dev, sg, nents, dir); } -EXPORT_SYMBOL(dma_sync_sg_for_device); /* * Return whether the given device DMA address mask can be supported @@ -712,7 +737,7 @@ int dma_supported(struct device *dev, u64 mask) } EXPORT_SYMBOL(dma_supported); -int dma_set_mask(struct device *dev, u64 dma_mask) +static int arm_dma_set_mask(struct device *dev, u64 dma_mask) { if (!dev->dma_mask || !dma_supported(dev, dma_mask)) return -EIO; @@ -723,7 +748,6 @@ int dma_set_mask(struct device *dev, u64 dma_mask) return 0; } -EXPORT_SYMBOL(dma_set_mask); #define PREALLOC_DMA_DEBUG_ENTRIES 4096 -- cgit v0.10.2 From 2a550e73d3e5f040a3e8eb733c942ab352eafb36 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Fri, 10 Feb 2012 19:55:20 +0100 Subject: ARM: dma-mapping: implement dma sg methods on top of any generic dma ops This patch converts all dma_sg methods to be generic (independent of the current DMA mapping implementation for ARM architecture). All dma sg operations are now implemented on top of respective dma_map_page/dma_sync_single_for* operations from dma_map_ops structure. Before this patch there were custom methods for all scatter/gather related operations. They iterated over the whole scatter list and called cache related operations directly (which in turn checked if we use dma bounce code or not and called respective version). This patch changes them not to use such shortcut. Instead it provides similar loop over scatter list and calls methods from the device's dma_map_ops structure. This enables us to use device dependent implementations of cache related operations (direct linear or dma bounce) depending on the provided dma_map_ops structure. Signed-off-by: Marek Szyprowski Acked-by: Kyungmin Park Tested-By: Subash Patel diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 70be6e1..b50fa57 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -622,7 +622,7 @@ void ___dma_page_dev_to_cpu(struct page *page, unsigned long off, EXPORT_SYMBOL(___dma_page_dev_to_cpu); /** - * dma_map_sg - map a set of SG buffers for streaming mode DMA + * arm_dma_map_sg - map a set of SG buffers for streaming mode DMA * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices * @sg: list of buffers * @nents: number of buffers to map @@ -640,12 +640,13 @@ EXPORT_SYMBOL(___dma_page_dev_to_cpu); int arm_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir, struct dma_attrs *attrs) { + struct dma_map_ops *ops = get_dma_ops(dev); struct scatterlist *s; int i, j; for_each_sg(sg, s, nents, i) { - s->dma_address = __dma_map_page(dev, sg_page(s), s->offset, - s->length, dir); + s->dma_address = ops->map_page(dev, sg_page(s), s->offset, + s->length, dir, attrs); if (dma_mapping_error(dev, s->dma_address)) goto bad_mapping; } @@ -653,12 +654,12 @@ int arm_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, bad_mapping: for_each_sg(sg, s, i, j) - __dma_unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir); + ops->unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir, attrs); return 0; } /** - * dma_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg + * arm_dma_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices * @sg: list of buffers * @nents: number of buffers to unmap (same as was passed to dma_map_sg) @@ -670,15 +671,17 @@ int arm_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, void arm_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir, struct dma_attrs *attrs) { + struct dma_map_ops *ops = get_dma_ops(dev); struct scatterlist *s; + int i; for_each_sg(sg, s, nents, i) - __dma_unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir); + ops->unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir, attrs); } /** - * dma_sync_sg_for_cpu + * arm_dma_sync_sg_for_cpu * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices * @sg: list of buffers * @nents: number of buffers to map (returned from dma_map_sg) @@ -687,21 +690,17 @@ void arm_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, void arm_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir) { + struct dma_map_ops *ops = get_dma_ops(dev); struct scatterlist *s; int i; - for_each_sg(sg, s, nents, i) { - if (!dmabounce_sync_for_cpu(dev, sg_dma_address(s), - sg_dma_len(s), dir)) - continue; - - __dma_page_dev_to_cpu(sg_page(s), s->offset, - s->length, dir); - } + for_each_sg(sg, s, nents, i) + ops->sync_single_for_cpu(dev, sg_dma_address(s), s->length, + dir); } /** - * dma_sync_sg_for_device + * arm_dma_sync_sg_for_device * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices * @sg: list of buffers * @nents: number of buffers to map (returned from dma_map_sg) @@ -710,17 +709,13 @@ void arm_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, void arm_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir) { + struct dma_map_ops *ops = get_dma_ops(dev); struct scatterlist *s; int i; - for_each_sg(sg, s, nents, i) { - if (!dmabounce_sync_for_device(dev, sg_dma_address(s), - sg_dma_len(s), dir)) - continue; - - __dma_page_cpu_to_dev(sg_page(s), s->offset, - s->length, dir); - } + for_each_sg(sg, s, nents, i) + ops->sync_single_for_device(dev, sg_dma_address(s), s->length, + dir); } /* -- cgit v0.10.2 From 15237e1f505b3e5c2276f240b01cd2133e110cbc Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Fri, 10 Feb 2012 19:55:20 +0100 Subject: ARM: dma-mapping: move all dma bounce code to separate dma ops structure This patch removes dma bounce hooks from the common dma mapping implementation on ARM architecture and creates a separate set of dma_map_ops for dma bounce devices. Signed-off-by: Marek Szyprowski Acked-by: Kyungmin Park Tested-By: Subash Patel diff --git a/arch/arm/common/dmabounce.c b/arch/arm/common/dmabounce.c index 32e9cc6..813c29d 100644 --- a/arch/arm/common/dmabounce.c +++ b/arch/arm/common/dmabounce.c @@ -308,8 +308,9 @@ static inline void unmap_single(struct device *dev, struct safe_buffer *buf, * substitute the safe buffer for the unsafe one. * (basically move the buffer from an unsafe area to a safe one) */ -dma_addr_t __dma_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, enum dma_data_direction dir) +static dma_addr_t dmabounce_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) { dma_addr_t dma_addr; int ret; @@ -324,7 +325,7 @@ dma_addr_t __dma_map_page(struct device *dev, struct page *page, return DMA_ERROR_CODE; if (ret == 0) { - __dma_page_cpu_to_dev(page, offset, size, dir); + arm_dma_ops.sync_single_for_device(dev, dma_addr, size, dir); return dma_addr; } @@ -335,7 +336,6 @@ dma_addr_t __dma_map_page(struct device *dev, struct page *page, return map_single(dev, page_address(page) + offset, size, dir); } -EXPORT_SYMBOL(__dma_map_page); /* * see if a mapped address was really a "safe" buffer and if so, copy @@ -343,8 +343,8 @@ EXPORT_SYMBOL(__dma_map_page); * the safe buffer. (basically return things back to the way they * should be) */ -void __dma_unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, - enum dma_data_direction dir) +static void dmabounce_unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, + enum dma_data_direction dir, struct dma_attrs *attrs) { struct safe_buffer *buf; @@ -353,16 +353,14 @@ void __dma_unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, buf = find_safe_buffer_dev(dev, dma_addr, __func__); if (!buf) { - __dma_page_dev_to_cpu(pfn_to_page(dma_to_pfn(dev, dma_addr)), - dma_addr & ~PAGE_MASK, size, dir); + arm_dma_ops.sync_single_for_cpu(dev, dma_addr, size, dir); return; } unmap_single(dev, buf, size, dir); } -EXPORT_SYMBOL(__dma_unmap_page); -int dmabounce_sync_for_cpu(struct device *dev, dma_addr_t addr, +static int __dmabounce_sync_for_cpu(struct device *dev, dma_addr_t addr, size_t sz, enum dma_data_direction dir) { struct safe_buffer *buf; @@ -392,9 +390,17 @@ int dmabounce_sync_for_cpu(struct device *dev, dma_addr_t addr, } return 0; } -EXPORT_SYMBOL(dmabounce_sync_for_cpu); -int dmabounce_sync_for_device(struct device *dev, dma_addr_t addr, +static void dmabounce_sync_for_cpu(struct device *dev, + dma_addr_t handle, size_t size, enum dma_data_direction dir) +{ + if (!__dmabounce_sync_for_cpu(dev, handle, size, dir)) + return; + + arm_dma_ops.sync_single_for_cpu(dev, handle, size, dir); +} + +static int __dmabounce_sync_for_device(struct device *dev, dma_addr_t addr, size_t sz, enum dma_data_direction dir) { struct safe_buffer *buf; @@ -424,7 +430,35 @@ int dmabounce_sync_for_device(struct device *dev, dma_addr_t addr, } return 0; } -EXPORT_SYMBOL(dmabounce_sync_for_device); + +static void dmabounce_sync_for_device(struct device *dev, + dma_addr_t handle, size_t size, enum dma_data_direction dir) +{ + if (!__dmabounce_sync_for_device(dev, handle, size, dir)) + return; + + arm_dma_ops.sync_single_for_device(dev, handle, size, dir); +} + +static int dmabounce_set_mask(struct device *dev, u64 dma_mask) +{ + if (dev->archdata.dmabounce) + return 0; + + return arm_dma_ops.set_dma_mask(dev, dma_mask); +} + +static struct dma_map_ops dmabounce_ops = { + .map_page = dmabounce_map_page, + .unmap_page = dmabounce_unmap_page, + .sync_single_for_cpu = dmabounce_sync_for_cpu, + .sync_single_for_device = dmabounce_sync_for_device, + .map_sg = arm_dma_map_sg, + .unmap_sg = arm_dma_unmap_sg, + .sync_sg_for_cpu = arm_dma_sync_sg_for_cpu, + .sync_sg_for_device = arm_dma_sync_sg_for_device, + .set_dma_mask = dmabounce_set_mask, +}; static int dmabounce_init_pool(struct dmabounce_pool *pool, struct device *dev, const char *name, unsigned long size) @@ -486,6 +520,7 @@ int dmabounce_register_dev(struct device *dev, unsigned long small_buffer_size, #endif dev->archdata.dmabounce = device_info; + set_dma_ops(dev, &dmabounce_ops); dev_info(dev, "dmabounce: registered device\n"); @@ -504,6 +539,7 @@ void dmabounce_unregister_dev(struct device *dev) struct dmabounce_device_info *device_info = dev->archdata.dmabounce; dev->archdata.dmabounce = NULL; + set_dma_ops(dev, NULL); if (!device_info) { dev_warn(dev, diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h index 6725a08..7a7c3c7 100644 --- a/arch/arm/include/asm/dma-mapping.h +++ b/arch/arm/include/asm/dma-mapping.h @@ -85,62 +85,6 @@ static inline dma_addr_t virt_to_dma(struct device *dev, void *addr) #endif /* - * The DMA API is built upon the notion of "buffer ownership". A buffer - * is either exclusively owned by the CPU (and therefore may be accessed - * by it) or exclusively owned by the DMA device. These helper functions - * represent the transitions between these two ownership states. - * - * Note, however, that on later ARMs, this notion does not work due to - * speculative prefetches. We model our approach on the assumption that - * the CPU does do speculative prefetches, which means we clean caches - * before transfers and delay cache invalidation until transfer completion. - * - * Private support functions: these are not part of the API and are - * liable to change. Drivers must not use these. - */ -static inline void __dma_single_cpu_to_dev(const void *kaddr, size_t size, - enum dma_data_direction dir) -{ - extern void ___dma_single_cpu_to_dev(const void *, size_t, - enum dma_data_direction); - - if (!arch_is_coherent()) - ___dma_single_cpu_to_dev(kaddr, size, dir); -} - -static inline void __dma_single_dev_to_cpu(const void *kaddr, size_t size, - enum dma_data_direction dir) -{ - extern void ___dma_single_dev_to_cpu(const void *, size_t, - enum dma_data_direction); - - if (!arch_is_coherent()) - ___dma_single_dev_to_cpu(kaddr, size, dir); -} - -static inline void __dma_page_cpu_to_dev(struct page *page, unsigned long off, - size_t size, enum dma_data_direction dir) -{ - extern void ___dma_page_cpu_to_dev(struct page *, unsigned long, - size_t, enum dma_data_direction); - - if (!arch_is_coherent()) - ___dma_page_cpu_to_dev(page, off, size, dir); -} - -static inline void __dma_page_dev_to_cpu(struct page *page, unsigned long off, - size_t size, enum dma_data_direction dir) -{ - extern void ___dma_page_dev_to_cpu(struct page *, unsigned long, - size_t, enum dma_data_direction); - - if (!arch_is_coherent()) - ___dma_page_dev_to_cpu(page, off, size, dir); -} - -extern int dma_supported(struct device *, u64); -extern int dma_set_mask(struct device *, u64); -/* * DMA errors are defined by all-bits-set in the DMA address. */ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) @@ -163,6 +107,8 @@ static inline void dma_free_noncoherent(struct device *dev, size_t size, { } +extern int dma_supported(struct device *dev, u64 mask); + /** * dma_alloc_coherent - allocate consistent memory for DMA * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices @@ -235,7 +181,6 @@ int dma_mmap_writecombine(struct device *, struct vm_area_struct *, extern void __init init_consistent_dma_size(unsigned long size); -#ifdef CONFIG_DMABOUNCE /* * For SA-1111, IXP425, and ADI systems the dma-mapping functions are "magic" * and utilize bounce buffers as needed to work around limited DMA windows. @@ -275,47 +220,7 @@ extern int dmabounce_register_dev(struct device *, unsigned long, */ extern void dmabounce_unregister_dev(struct device *); -/* - * The DMA API, implemented by dmabounce.c. See below for descriptions. - */ -extern dma_addr_t __dma_map_page(struct device *, struct page *, - unsigned long, size_t, enum dma_data_direction); -extern void __dma_unmap_page(struct device *, dma_addr_t, size_t, - enum dma_data_direction); - -/* - * Private functions - */ -int dmabounce_sync_for_cpu(struct device *, dma_addr_t, size_t, enum dma_data_direction); -int dmabounce_sync_for_device(struct device *, dma_addr_t, size_t, enum dma_data_direction); -#else -static inline int dmabounce_sync_for_cpu(struct device *d, dma_addr_t addr, - size_t size, enum dma_data_direction dir) -{ - return 1; -} - -static inline int dmabounce_sync_for_device(struct device *d, dma_addr_t addr, - size_t size, enum dma_data_direction dir) -{ - return 1; -} - -static inline dma_addr_t __dma_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, enum dma_data_direction dir) -{ - __dma_page_cpu_to_dev(page, offset, size, dir); - return pfn_to_dma(dev, page_to_pfn(page)) + offset; -} - -static inline void __dma_unmap_page(struct device *dev, dma_addr_t handle, - size_t size, enum dma_data_direction dir) -{ - __dma_page_dev_to_cpu(pfn_to_page(dma_to_pfn(dev, handle)), - handle & ~PAGE_MASK, size, dir); -} -#endif /* CONFIG_DMABOUNCE */ /* * The scatter list versions of the above methods. diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index b50fa57..c949668 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -29,6 +29,75 @@ #include "mm.h" +/* + * The DMA API is built upon the notion of "buffer ownership". A buffer + * is either exclusively owned by the CPU (and therefore may be accessed + * by it) or exclusively owned by the DMA device. These helper functions + * represent the transitions between these two ownership states. + * + * Note, however, that on later ARMs, this notion does not work due to + * speculative prefetches. We model our approach on the assumption that + * the CPU does do speculative prefetches, which means we clean caches + * before transfers and delay cache invalidation until transfer completion. + * + * Private support functions: these are not part of the API and are + * liable to change. Drivers must not use these. + */ +static inline void __dma_single_cpu_to_dev(const void *kaddr, size_t size, + enum dma_data_direction dir) +{ + extern void ___dma_single_cpu_to_dev(const void *, size_t, + enum dma_data_direction); + + if (!arch_is_coherent()) + ___dma_single_cpu_to_dev(kaddr, size, dir); +} + +static inline void __dma_single_dev_to_cpu(const void *kaddr, size_t size, + enum dma_data_direction dir) +{ + extern void ___dma_single_dev_to_cpu(const void *, size_t, + enum dma_data_direction); + + if (!arch_is_coherent()) + ___dma_single_dev_to_cpu(kaddr, size, dir); +} + +static inline void __dma_page_cpu_to_dev(struct page *page, unsigned long off, + size_t size, enum dma_data_direction dir) +{ + extern void ___dma_page_cpu_to_dev(struct page *, unsigned long, + size_t, enum dma_data_direction); + + if (!arch_is_coherent()) + ___dma_page_cpu_to_dev(page, off, size, dir); +} + +static inline void __dma_page_dev_to_cpu(struct page *page, unsigned long off, + size_t size, enum dma_data_direction dir) +{ + extern void ___dma_page_dev_to_cpu(struct page *, unsigned long, + size_t, enum dma_data_direction); + + if (!arch_is_coherent()) + ___dma_page_dev_to_cpu(page, off, size, dir); +} + + +static inline dma_addr_t __dma_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, enum dma_data_direction dir) +{ + __dma_page_cpu_to_dev(page, offset, size, dir); + return pfn_to_dma(dev, page_to_pfn(page)) + offset; +} + +static inline void __dma_unmap_page(struct device *dev, dma_addr_t handle, + size_t size, enum dma_data_direction dir) +{ + __dma_page_dev_to_cpu(pfn_to_page(dma_to_pfn(dev, handle)), + handle & ~PAGE_MASK, size, dir); +} + /** * arm_dma_map_page - map a portion of a page for streaming DMA * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices @@ -76,9 +145,6 @@ static inline void arm_dma_sync_single_for_cpu(struct device *dev, { unsigned int offset = handle & (PAGE_SIZE - 1); struct page *page = pfn_to_page(dma_to_pfn(dev, handle-offset)); - if (!dmabounce_sync_for_cpu(dev, handle, size, dir)) - return; - __dma_page_dev_to_cpu(page, offset, size, dir); } @@ -87,9 +153,6 @@ static inline void arm_dma_sync_single_for_device(struct device *dev, { unsigned int offset = handle & (PAGE_SIZE - 1); struct page *page = pfn_to_page(dma_to_pfn(dev, handle-offset)); - if (!dmabounce_sync_for_device(dev, handle, size, dir)) - return; - __dma_page_cpu_to_dev(page, offset, size, dir); } @@ -599,7 +662,6 @@ void ___dma_page_cpu_to_dev(struct page *page, unsigned long off, } /* FIXME: non-speculating: flush on bidirectional mappings? */ } -EXPORT_SYMBOL(___dma_page_cpu_to_dev); void ___dma_page_dev_to_cpu(struct page *page, unsigned long off, size_t size, enum dma_data_direction dir) @@ -619,7 +681,6 @@ void ___dma_page_dev_to_cpu(struct page *page, unsigned long off, if (dir != DMA_TO_DEVICE && off == 0 && size >= PAGE_SIZE) set_bit(PG_dcache_clean, &page->flags); } -EXPORT_SYMBOL(___dma_page_dev_to_cpu); /** * arm_dma_map_sg - map a set of SG buffers for streaming mode DMA @@ -737,9 +798,7 @@ static int arm_dma_set_mask(struct device *dev, u64 dma_mask) if (!dev->dma_mask || !dma_supported(dev, dma_mask)) return -EIO; -#ifndef CONFIG_DMABOUNCE *dev->dma_mask = dma_mask; -#endif return 0; } -- cgit v0.10.2 From 51fde3499b531d4cf278f4d2eaa6c45b2865b16b Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Fri, 10 Feb 2012 19:55:20 +0100 Subject: ARM: dma-mapping: remove redundant code and do the cleanup This patch just performs a global cleanup in DMA mapping implementation for ARM architecture. Some of the tiny helper functions have been moved to the caller code, some have been merged together. Signed-off-by: Marek Szyprowski Acked-by: Kyungmin Park Acked-by: Arnd Bergmann Tested-By: Subash Patel diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index c949668..dddb406 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -40,64 +40,12 @@ * the CPU does do speculative prefetches, which means we clean caches * before transfers and delay cache invalidation until transfer completion. * - * Private support functions: these are not part of the API and are - * liable to change. Drivers must not use these. */ -static inline void __dma_single_cpu_to_dev(const void *kaddr, size_t size, - enum dma_data_direction dir) -{ - extern void ___dma_single_cpu_to_dev(const void *, size_t, - enum dma_data_direction); - - if (!arch_is_coherent()) - ___dma_single_cpu_to_dev(kaddr, size, dir); -} - -static inline void __dma_single_dev_to_cpu(const void *kaddr, size_t size, - enum dma_data_direction dir) -{ - extern void ___dma_single_dev_to_cpu(const void *, size_t, - enum dma_data_direction); - - if (!arch_is_coherent()) - ___dma_single_dev_to_cpu(kaddr, size, dir); -} - -static inline void __dma_page_cpu_to_dev(struct page *page, unsigned long off, - size_t size, enum dma_data_direction dir) -{ - extern void ___dma_page_cpu_to_dev(struct page *, unsigned long, +static void __dma_page_cpu_to_dev(struct page *, unsigned long, size_t, enum dma_data_direction); - - if (!arch_is_coherent()) - ___dma_page_cpu_to_dev(page, off, size, dir); -} - -static inline void __dma_page_dev_to_cpu(struct page *page, unsigned long off, - size_t size, enum dma_data_direction dir) -{ - extern void ___dma_page_dev_to_cpu(struct page *, unsigned long, +static void __dma_page_dev_to_cpu(struct page *, unsigned long, size_t, enum dma_data_direction); - if (!arch_is_coherent()) - ___dma_page_dev_to_cpu(page, off, size, dir); -} - - -static inline dma_addr_t __dma_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, enum dma_data_direction dir) -{ - __dma_page_cpu_to_dev(page, offset, size, dir); - return pfn_to_dma(dev, page_to_pfn(page)) + offset; -} - -static inline void __dma_unmap_page(struct device *dev, dma_addr_t handle, - size_t size, enum dma_data_direction dir) -{ - __dma_page_dev_to_cpu(pfn_to_page(dma_to_pfn(dev, handle)), - handle & ~PAGE_MASK, size, dir); -} - /** * arm_dma_map_page - map a portion of a page for streaming DMA * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices @@ -112,11 +60,13 @@ static inline void __dma_unmap_page(struct device *dev, dma_addr_t handle, * The device owns this memory once this call has completed. The CPU * can regain ownership by calling dma_unmap_page(). */ -static inline dma_addr_t arm_dma_map_page(struct device *dev, struct page *page, +static dma_addr_t arm_dma_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, struct dma_attrs *attrs) { - return __dma_map_page(dev, page, offset, size, dir); + if (!arch_is_coherent()) + __dma_page_cpu_to_dev(page, offset, size, dir); + return pfn_to_dma(dev, page_to_pfn(page)) + offset; } /** @@ -133,27 +83,31 @@ static inline dma_addr_t arm_dma_map_page(struct device *dev, struct page *page, * After this call, reads by the CPU to the buffer are guaranteed to see * whatever the device wrote there. */ -static inline void arm_dma_unmap_page(struct device *dev, dma_addr_t handle, +static void arm_dma_unmap_page(struct device *dev, dma_addr_t handle, size_t size, enum dma_data_direction dir, struct dma_attrs *attrs) { - __dma_unmap_page(dev, handle, size, dir); + if (!arch_is_coherent()) + __dma_page_dev_to_cpu(pfn_to_page(dma_to_pfn(dev, handle)), + handle & ~PAGE_MASK, size, dir); } -static inline void arm_dma_sync_single_for_cpu(struct device *dev, +static void arm_dma_sync_single_for_cpu(struct device *dev, dma_addr_t handle, size_t size, enum dma_data_direction dir) { unsigned int offset = handle & (PAGE_SIZE - 1); struct page *page = pfn_to_page(dma_to_pfn(dev, handle-offset)); - __dma_page_dev_to_cpu(page, offset, size, dir); + if (!arch_is_coherent()) + __dma_page_dev_to_cpu(page, offset, size, dir); } -static inline void arm_dma_sync_single_for_device(struct device *dev, +static void arm_dma_sync_single_for_device(struct device *dev, dma_addr_t handle, size_t size, enum dma_data_direction dir) { unsigned int offset = handle & (PAGE_SIZE - 1); struct page *page = pfn_to_page(dma_to_pfn(dev, handle-offset)); - __dma_page_cpu_to_dev(page, offset, size, dir); + if (!arch_is_coherent()) + __dma_page_cpu_to_dev(page, offset, size, dir); } static int arm_dma_set_mask(struct device *dev, u64 dma_mask); @@ -647,7 +601,13 @@ static void dma_cache_maint_page(struct page *page, unsigned long offset, } while (left); } -void ___dma_page_cpu_to_dev(struct page *page, unsigned long off, +/* + * Make an area consistent for devices. + * Note: Drivers should NOT use this function directly, as it will break + * platforms with CONFIG_DMABOUNCE. + * Use the driver DMA support - see dma-mapping.h (dma_sync_*) + */ +static void __dma_page_cpu_to_dev(struct page *page, unsigned long off, size_t size, enum dma_data_direction dir) { unsigned long paddr; @@ -663,7 +623,7 @@ void ___dma_page_cpu_to_dev(struct page *page, unsigned long off, /* FIXME: non-speculating: flush on bidirectional mappings? */ } -void ___dma_page_dev_to_cpu(struct page *page, unsigned long off, +static void __dma_page_dev_to_cpu(struct page *page, unsigned long off, size_t size, enum dma_data_direction dir) { unsigned long paddr = page_to_phys(page) + off; -- cgit v0.10.2 From f99d60341238fe73fc514129cd9ae4e44e1b2c47 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Wed, 16 May 2012 18:31:23 +0200 Subject: ARM: dma-mapping: use alloc, mmap, free from dma_ops This patch converts dma_alloc/free/mmap_{coherent,writecombine} functions to use generic alloc/free/mmap methods from dma_map_ops structure. A new DMA_ATTR_WRITE_COMBINE DMA attribute have been introduced to implement writecombine methods. Signed-off-by: Marek Szyprowski Acked-by: Kyungmin Park Acked-by: Arnd Bergmann Tested-By: Subash Patel diff --git a/arch/arm/common/dmabounce.c b/arch/arm/common/dmabounce.c index 813c29d..9d7eb53 100644 --- a/arch/arm/common/dmabounce.c +++ b/arch/arm/common/dmabounce.c @@ -449,6 +449,9 @@ static int dmabounce_set_mask(struct device *dev, u64 dma_mask) } static struct dma_map_ops dmabounce_ops = { + .alloc = arm_dma_alloc, + .free = arm_dma_free, + .mmap = arm_dma_mmap, .map_page = dmabounce_map_page, .unmap_page = dmabounce_unmap_page, .sync_single_for_cpu = dmabounce_sync_for_cpu, diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h index 7a7c3c7..bbef15d 100644 --- a/arch/arm/include/asm/dma-mapping.h +++ b/arch/arm/include/asm/dma-mapping.h @@ -5,6 +5,7 @@ #include #include +#include #include #include @@ -110,68 +111,115 @@ static inline void dma_free_noncoherent(struct device *dev, size_t size, extern int dma_supported(struct device *dev, u64 mask); /** - * dma_alloc_coherent - allocate consistent memory for DMA + * arm_dma_alloc - allocate consistent memory for DMA * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices * @size: required memory size * @handle: bus-specific DMA address + * @attrs: optinal attributes that specific mapping properties * - * Allocate some uncached, unbuffered memory for a device for - * performing DMA. This function allocates pages, and will - * return the CPU-viewed address, and sets @handle to be the - * device-viewed address. + * Allocate some memory for a device for performing DMA. This function + * allocates pages, and will return the CPU-viewed address, and sets @handle + * to be the device-viewed address. */ -extern void *dma_alloc_coherent(struct device *, size_t, dma_addr_t *, gfp_t); +extern void *arm_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, + gfp_t gfp, struct dma_attrs *attrs); + +#define dma_alloc_coherent(d, s, h, f) dma_alloc_attrs(d, s, h, f, NULL) + +static inline void *dma_alloc_attrs(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t flag, + struct dma_attrs *attrs) +{ + struct dma_map_ops *ops = get_dma_ops(dev); + void *cpu_addr; + BUG_ON(!ops); + + cpu_addr = ops->alloc(dev, size, dma_handle, flag, attrs); + debug_dma_alloc_coherent(dev, size, *dma_handle, cpu_addr); + return cpu_addr; +} /** - * dma_free_coherent - free memory allocated by dma_alloc_coherent + * arm_dma_free - free memory allocated by arm_dma_alloc * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices * @size: size of memory originally requested in dma_alloc_coherent * @cpu_addr: CPU-view address returned from dma_alloc_coherent * @handle: device-view address returned from dma_alloc_coherent + * @attrs: optinal attributes that specific mapping properties * * Free (and unmap) a DMA buffer previously allocated by - * dma_alloc_coherent(). + * arm_dma_alloc(). * * References to memory and mappings associated with cpu_addr/handle * during and after this call executing are illegal. */ -extern void dma_free_coherent(struct device *, size_t, void *, dma_addr_t); +extern void arm_dma_free(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t handle, struct dma_attrs *attrs); + +#define dma_free_coherent(d, s, c, h) dma_free_attrs(d, s, c, h, NULL) + +static inline void dma_free_attrs(struct device *dev, size_t size, + void *cpu_addr, dma_addr_t dma_handle, + struct dma_attrs *attrs) +{ + struct dma_map_ops *ops = get_dma_ops(dev); + BUG_ON(!ops); + + debug_dma_free_coherent(dev, size, cpu_addr, dma_handle); + ops->free(dev, size, cpu_addr, dma_handle, attrs); +} /** - * dma_mmap_coherent - map a coherent DMA allocation into user space + * arm_dma_mmap - map a coherent DMA allocation into user space * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices * @vma: vm_area_struct describing requested user mapping * @cpu_addr: kernel CPU-view address returned from dma_alloc_coherent * @handle: device-view address returned from dma_alloc_coherent * @size: size of memory originally requested in dma_alloc_coherent + * @attrs: optinal attributes that specific mapping properties * * Map a coherent DMA buffer previously allocated by dma_alloc_coherent * into user space. The coherent DMA buffer must not be freed by the * driver until the user space mapping has been released. */ -int dma_mmap_coherent(struct device *, struct vm_area_struct *, - void *, dma_addr_t, size_t); +extern int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + struct dma_attrs *attrs); +#define dma_mmap_coherent(d, v, c, h, s) dma_mmap_attrs(d, v, c, h, s, NULL) -/** - * dma_alloc_writecombine - allocate writecombining memory for DMA - * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices - * @size: required memory size - * @handle: bus-specific DMA address - * - * Allocate some uncached, buffered memory for a device for - * performing DMA. This function allocates pages, and will - * return the CPU-viewed address, and sets @handle to be the - * device-viewed address. - */ -extern void *dma_alloc_writecombine(struct device *, size_t, dma_addr_t *, - gfp_t); +static inline int dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, + size_t size, struct dma_attrs *attrs) +{ + struct dma_map_ops *ops = get_dma_ops(dev); + BUG_ON(!ops); + return ops->mmap(dev, vma, cpu_addr, dma_addr, size, attrs); +} -#define dma_free_writecombine(dev,size,cpu_addr,handle) \ - dma_free_coherent(dev,size,cpu_addr,handle) +static inline void *dma_alloc_writecombine(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t flag) +{ + DEFINE_DMA_ATTRS(attrs); + dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs); + return dma_alloc_attrs(dev, size, dma_handle, flag, &attrs); +} -int dma_mmap_writecombine(struct device *, struct vm_area_struct *, - void *, dma_addr_t, size_t); +static inline void dma_free_writecombine(struct device *dev, size_t size, + void *cpu_addr, dma_addr_t dma_handle) +{ + DEFINE_DMA_ATTRS(attrs); + dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs); + return dma_free_attrs(dev, size, cpu_addr, dma_handle, &attrs); +} + +static inline int dma_mmap_writecombine(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size) +{ + DEFINE_DMA_ATTRS(attrs); + dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs); + return dma_mmap_attrs(dev, vma, cpu_addr, dma_addr, size, &attrs); +} /* * This can be called during boot to increase the size of the consistent @@ -180,7 +228,6 @@ int dma_mmap_writecombine(struct device *, struct vm_area_struct *, */ extern void __init init_consistent_dma_size(unsigned long size); - /* * For SA-1111, IXP425, and ADI systems the dma-mapping functions are "magic" * and utilize bounce buffers as needed to work around limited DMA windows. diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index dddb406..2501866 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -113,6 +113,9 @@ static void arm_dma_sync_single_for_device(struct device *dev, static int arm_dma_set_mask(struct device *dev, u64 dma_mask); struct dma_map_ops arm_dma_ops = { + .alloc = arm_dma_alloc, + .free = arm_dma_free, + .mmap = arm_dma_mmap, .map_page = arm_dma_map_page, .unmap_page = arm_dma_unmap_page, .map_sg = arm_dma_map_sg, @@ -415,10 +418,19 @@ static void __dma_free_remap(void *cpu_addr, size_t size) arm_vmregion_free(&consistent_head, c); } +static inline pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot) +{ + prot = dma_get_attr(DMA_ATTR_WRITE_COMBINE, attrs) ? + pgprot_writecombine(prot) : + pgprot_dmacoherent(prot); + return prot; +} + #else /* !CONFIG_MMU */ #define __dma_alloc_remap(page, size, gfp, prot, c) page_address(page) #define __dma_free_remap(addr, size) do { } while (0) +#define __get_dma_pgprot(attrs, prot) __pgprot(0) #endif /* CONFIG_MMU */ @@ -462,41 +474,33 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, * Allocate DMA-coherent memory space and return both the kernel remapped * virtual and bus address for that space. */ -void * -dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp) +void *arm_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, + gfp_t gfp, struct dma_attrs *attrs) { + pgprot_t prot = __get_dma_pgprot(attrs, pgprot_kernel); void *memory; if (dma_alloc_from_coherent(dev, size, handle, &memory)) return memory; - return __dma_alloc(dev, size, handle, gfp, - pgprot_dmacoherent(pgprot_kernel), + return __dma_alloc(dev, size, handle, gfp, prot, __builtin_return_address(0)); } -EXPORT_SYMBOL(dma_alloc_coherent); /* - * Allocate a writecombining region, in much the same way as - * dma_alloc_coherent above. + * Create userspace mapping for the DMA-coherent memory. */ -void * -dma_alloc_writecombine(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp) -{ - return __dma_alloc(dev, size, handle, gfp, - pgprot_writecombine(pgprot_kernel), - __builtin_return_address(0)); -} -EXPORT_SYMBOL(dma_alloc_writecombine); - -static int dma_mmap(struct device *dev, struct vm_area_struct *vma, - void *cpu_addr, dma_addr_t dma_addr, size_t size) +int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + struct dma_attrs *attrs) { int ret = -ENXIO; #ifdef CONFIG_MMU unsigned long user_size, kern_size; struct arm_vmregion *c; + vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot); + if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret)) return ret; @@ -521,27 +525,12 @@ static int dma_mmap(struct device *dev, struct vm_area_struct *vma, return ret; } -int dma_mmap_coherent(struct device *dev, struct vm_area_struct *vma, - void *cpu_addr, dma_addr_t dma_addr, size_t size) -{ - vma->vm_page_prot = pgprot_dmacoherent(vma->vm_page_prot); - return dma_mmap(dev, vma, cpu_addr, dma_addr, size); -} -EXPORT_SYMBOL(dma_mmap_coherent); - -int dma_mmap_writecombine(struct device *dev, struct vm_area_struct *vma, - void *cpu_addr, dma_addr_t dma_addr, size_t size) -{ - vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); - return dma_mmap(dev, vma, cpu_addr, dma_addr, size); -} -EXPORT_SYMBOL(dma_mmap_writecombine); - /* * free a page as defined by the above mapping. * Must not be called with IRQs disabled. */ -void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle) +void arm_dma_free(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t handle, struct dma_attrs *attrs) { WARN_ON(irqs_disabled()); @@ -555,7 +544,6 @@ void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr __dma_free_buffer(pfn_to_page(dma_to_pfn(dev, handle)), size); } -EXPORT_SYMBOL(dma_free_coherent); static void dma_cache_maint_page(struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, -- cgit v0.10.2 From 4ce63fcd919c32d22528e54dcd89506962933719 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Wed, 16 May 2012 15:48:21 +0200 Subject: ARM: dma-mapping: add support for IOMMU mapper This patch add a complete implementation of DMA-mapping API for devices which have IOMMU support. This implementation tries to optimize dma address space usage by remapping all possible physical memory chunks into a single dma address space chunk. DMA address space is managed on top of the bitmap stored in the dma_iommu_mapping structure stored in device->archdata. Platform setup code has to initialize parameters of the dma address space (base address, size, allocation precision order) with arm_iommu_create_mapping() function. To reduce the size of the bitmap, all allocations are aligned to the specified order of base 4 KiB pages. dma_alloc_* functions allocate physical memory in chunks, each with alloc_pages() function to avoid failing if the physical memory gets fragmented. In worst case the allocated buffer is composed of 4 KiB page chunks. dma_map_sg() function minimizes the total number of dma address space chunks by merging of physical memory chunks into one larger dma address space chunk. If requested chunk (scatter list entry) boundaries match physical page boundaries, most calls to dma_map_sg() requests will result in creating only one chunk in dma address space. dma_map_page() simply creates a mapping for the given page(s) in the dma address space. All dma functions also perform required cache operation like their counterparts from the arm linear physical memory mapping version. This patch contains code and fixes kindly provided by: - Krishna Reddy , - Andrzej Pietrasiewicz , - Hiroshi DOYU Signed-off-by: Marek Szyprowski Acked-by: Kyungmin Park Reviewed-by: Konrad Rzeszutek Wilk Tested-By: Subash Patel diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index c8111c5..97478a5 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -46,6 +46,14 @@ config ARM config ARM_HAS_SG_CHAIN bool +config NEED_SG_DMA_LENGTH + bool + +config ARM_DMA_USE_IOMMU + select NEED_SG_DMA_LENGTH + select ARM_HAS_SG_CHAIN + bool + config HAVE_PWM bool diff --git a/arch/arm/include/asm/device.h b/arch/arm/include/asm/device.h index 6e2cb0e..b69c0d3 100644 --- a/arch/arm/include/asm/device.h +++ b/arch/arm/include/asm/device.h @@ -14,6 +14,9 @@ struct dev_archdata { #ifdef CONFIG_IOMMU_API void *iommu; /* private IOMMU data */ #endif +#ifdef CONFIG_ARM_DMA_USE_IOMMU + struct dma_iommu_mapping *mapping; +#endif }; struct omap_device; diff --git a/arch/arm/include/asm/dma-iommu.h b/arch/arm/include/asm/dma-iommu.h new file mode 100644 index 0000000..799b094 --- /dev/null +++ b/arch/arm/include/asm/dma-iommu.h @@ -0,0 +1,34 @@ +#ifndef ASMARM_DMA_IOMMU_H +#define ASMARM_DMA_IOMMU_H + +#ifdef __KERNEL__ + +#include +#include +#include +#include + +struct dma_iommu_mapping { + /* iommu specific data */ + struct iommu_domain *domain; + + void *bitmap; + size_t bits; + unsigned int order; + dma_addr_t base; + + spinlock_t lock; + struct kref kref; +}; + +struct dma_iommu_mapping * +arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, size_t size, + int order); + +void arm_iommu_release_mapping(struct dma_iommu_mapping *mapping); + +int arm_iommu_attach_device(struct device *dev, + struct dma_iommu_mapping *mapping); + +#endif /* __KERNEL__ */ +#endif diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 2501866..3ac4760 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -19,6 +19,8 @@ #include #include #include +#include +#include #include #include @@ -26,6 +28,7 @@ #include #include #include +#include #include "mm.h" @@ -155,6 +158,21 @@ static u64 get_coherent_dma_mask(struct device *dev) return mask; } +static void __dma_clear_buffer(struct page *page, size_t size) +{ + void *ptr; + /* + * Ensure that the allocated pages are zeroed, and that any data + * lurking in the kernel direct-mapped region is invalidated. + */ + ptr = page_address(page); + if (ptr) { + memset(ptr, 0, size); + dmac_flush_range(ptr, ptr + size); + outer_flush_range(__pa(ptr), __pa(ptr) + size); + } +} + /* * Allocate a DMA buffer for 'dev' of size 'size' using the * specified gfp mask. Note that 'size' must be page aligned. @@ -163,7 +181,6 @@ static struct page *__dma_alloc_buffer(struct device *dev, size_t size, gfp_t gf { unsigned long order = get_order(size); struct page *page, *p, *e; - void *ptr; u64 mask = get_coherent_dma_mask(dev); #ifdef CONFIG_DMA_API_DEBUG @@ -192,14 +209,7 @@ static struct page *__dma_alloc_buffer(struct device *dev, size_t size, gfp_t gf for (p = page + (size >> PAGE_SHIFT), e = page + (1 << order); p < e; p++) __free_page(p); - /* - * Ensure that the allocated pages are zeroed, and that any data - * lurking in the kernel direct-mapped region is invalidated. - */ - ptr = page_address(page); - memset(ptr, 0, size); - dmac_flush_range(ptr, ptr + size); - outer_flush_range(__pa(ptr), __pa(ptr) + size); + __dma_clear_buffer(page, size); return page; } @@ -348,7 +358,7 @@ __dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot, u32 off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1); pte = consistent_pte[idx] + off; - c->vm_pages = page; + c->priv = page; do { BUG_ON(!pte_none(*pte)); @@ -509,13 +519,14 @@ int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma, c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr); if (c) { unsigned long off = vma->vm_pgoff; + struct page *pages = c->priv; kern_size = (c->vm_end - c->vm_start) >> PAGE_SHIFT; if (off < kern_size && user_size <= (kern_size - off)) { ret = remap_pfn_range(vma, vma->vm_start, - page_to_pfn(c->vm_pages) + off, + page_to_pfn(pages) + off, user_size << PAGE_SHIFT, vma->vm_page_prot); } @@ -654,6 +665,9 @@ int arm_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, int i, j; for_each_sg(sg, s, nents, i) { +#ifdef CONFIG_NEED_SG_DMA_LENGTH + s->dma_length = s->length; +#endif s->dma_address = ops->map_page(dev, sg_page(s), s->offset, s->length, dir, attrs); if (dma_mapping_error(dev, s->dma_address)) @@ -762,3 +776,679 @@ static int __init dma_debug_do_init(void) return 0; } fs_initcall(dma_debug_do_init); + +#ifdef CONFIG_ARM_DMA_USE_IOMMU + +/* IOMMU */ + +static inline dma_addr_t __alloc_iova(struct dma_iommu_mapping *mapping, + size_t size) +{ + unsigned int order = get_order(size); + unsigned int align = 0; + unsigned int count, start; + unsigned long flags; + + count = ((PAGE_ALIGN(size) >> PAGE_SHIFT) + + (1 << mapping->order) - 1) >> mapping->order; + + if (order > mapping->order) + align = (1 << (order - mapping->order)) - 1; + + spin_lock_irqsave(&mapping->lock, flags); + start = bitmap_find_next_zero_area(mapping->bitmap, mapping->bits, 0, + count, align); + if (start > mapping->bits) { + spin_unlock_irqrestore(&mapping->lock, flags); + return DMA_ERROR_CODE; + } + + bitmap_set(mapping->bitmap, start, count); + spin_unlock_irqrestore(&mapping->lock, flags); + + return mapping->base + (start << (mapping->order + PAGE_SHIFT)); +} + +static inline void __free_iova(struct dma_iommu_mapping *mapping, + dma_addr_t addr, size_t size) +{ + unsigned int start = (addr - mapping->base) >> + (mapping->order + PAGE_SHIFT); + unsigned int count = ((size >> PAGE_SHIFT) + + (1 << mapping->order) - 1) >> mapping->order; + unsigned long flags; + + spin_lock_irqsave(&mapping->lock, flags); + bitmap_clear(mapping->bitmap, start, count); + spin_unlock_irqrestore(&mapping->lock, flags); +} + +static struct page **__iommu_alloc_buffer(struct device *dev, size_t size, gfp_t gfp) +{ + struct page **pages; + int count = size >> PAGE_SHIFT; + int array_size = count * sizeof(struct page *); + int i = 0; + + if (array_size <= PAGE_SIZE) + pages = kzalloc(array_size, gfp); + else + pages = vzalloc(array_size); + if (!pages) + return NULL; + + while (count) { + int j, order = __ffs(count); + + pages[i] = alloc_pages(gfp | __GFP_NOWARN, order); + while (!pages[i] && order) + pages[i] = alloc_pages(gfp | __GFP_NOWARN, --order); + if (!pages[i]) + goto error; + + if (order) + split_page(pages[i], order); + j = 1 << order; + while (--j) + pages[i + j] = pages[i] + j; + + __dma_clear_buffer(pages[i], PAGE_SIZE << order); + i += 1 << order; + count -= 1 << order; + } + + return pages; +error: + while (--i) + if (pages[i]) + __free_pages(pages[i], 0); + if (array_size < PAGE_SIZE) + kfree(pages); + else + vfree(pages); + return NULL; +} + +static int __iommu_free_buffer(struct device *dev, struct page **pages, size_t size) +{ + int count = size >> PAGE_SHIFT; + int array_size = count * sizeof(struct page *); + int i; + for (i = 0; i < count; i++) + if (pages[i]) + __free_pages(pages[i], 0); + if (array_size < PAGE_SIZE) + kfree(pages); + else + vfree(pages); + return 0; +} + +/* + * Create a CPU mapping for a specified pages + */ +static void * +__iommu_alloc_remap(struct page **pages, size_t size, gfp_t gfp, pgprot_t prot) +{ + struct arm_vmregion *c; + size_t align; + size_t count = size >> PAGE_SHIFT; + int bit; + + if (!consistent_pte[0]) { + pr_err("%s: not initialised\n", __func__); + dump_stack(); + return NULL; + } + + /* + * Align the virtual region allocation - maximum alignment is + * a section size, minimum is a page size. This helps reduce + * fragmentation of the DMA space, and also prevents allocations + * smaller than a section from crossing a section boundary. + */ + bit = fls(size - 1); + if (bit > SECTION_SHIFT) + bit = SECTION_SHIFT; + align = 1 << bit; + + /* + * Allocate a virtual address in the consistent mapping region. + */ + c = arm_vmregion_alloc(&consistent_head, align, size, + gfp & ~(__GFP_DMA | __GFP_HIGHMEM), NULL); + if (c) { + pte_t *pte; + int idx = CONSISTENT_PTE_INDEX(c->vm_start); + int i = 0; + u32 off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1); + + pte = consistent_pte[idx] + off; + c->priv = pages; + + do { + BUG_ON(!pte_none(*pte)); + + set_pte_ext(pte, mk_pte(pages[i], prot), 0); + pte++; + off++; + i++; + if (off >= PTRS_PER_PTE) { + off = 0; + pte = consistent_pte[++idx]; + } + } while (i < count); + + dsb(); + + return (void *)c->vm_start; + } + return NULL; +} + +/* + * Create a mapping in device IO address space for specified pages + */ +static dma_addr_t +__iommu_create_mapping(struct device *dev, struct page **pages, size_t size) +{ + struct dma_iommu_mapping *mapping = dev->archdata.mapping; + unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; + dma_addr_t dma_addr, iova; + int i, ret = DMA_ERROR_CODE; + + dma_addr = __alloc_iova(mapping, size); + if (dma_addr == DMA_ERROR_CODE) + return dma_addr; + + iova = dma_addr; + for (i = 0; i < count; ) { + unsigned int next_pfn = page_to_pfn(pages[i]) + 1; + phys_addr_t phys = page_to_phys(pages[i]); + unsigned int len, j; + + for (j = i + 1; j < count; j++, next_pfn++) + if (page_to_pfn(pages[j]) != next_pfn) + break; + + len = (j - i) << PAGE_SHIFT; + ret = iommu_map(mapping->domain, iova, phys, len, 0); + if (ret < 0) + goto fail; + iova += len; + i = j; + } + return dma_addr; +fail: + iommu_unmap(mapping->domain, dma_addr, iova-dma_addr); + __free_iova(mapping, dma_addr, size); + return DMA_ERROR_CODE; +} + +static int __iommu_remove_mapping(struct device *dev, dma_addr_t iova, size_t size) +{ + struct dma_iommu_mapping *mapping = dev->archdata.mapping; + + /* + * add optional in-page offset from iova to size and align + * result to page size + */ + size = PAGE_ALIGN((iova & ~PAGE_MASK) + size); + iova &= PAGE_MASK; + + iommu_unmap(mapping->domain, iova, size); + __free_iova(mapping, iova, size); + return 0; +} + +static void *arm_iommu_alloc_attrs(struct device *dev, size_t size, + dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs) +{ + pgprot_t prot = __get_dma_pgprot(attrs, pgprot_kernel); + struct page **pages; + void *addr = NULL; + + *handle = DMA_ERROR_CODE; + size = PAGE_ALIGN(size); + + pages = __iommu_alloc_buffer(dev, size, gfp); + if (!pages) + return NULL; + + *handle = __iommu_create_mapping(dev, pages, size); + if (*handle == DMA_ERROR_CODE) + goto err_buffer; + + addr = __iommu_alloc_remap(pages, size, gfp, prot); + if (!addr) + goto err_mapping; + + return addr; + +err_mapping: + __iommu_remove_mapping(dev, *handle, size); +err_buffer: + __iommu_free_buffer(dev, pages, size); + return NULL; +} + +static int arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + struct dma_attrs *attrs) +{ + struct arm_vmregion *c; + + vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot); + c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr); + + if (c) { + struct page **pages = c->priv; + + unsigned long uaddr = vma->vm_start; + unsigned long usize = vma->vm_end - vma->vm_start; + int i = 0; + + do { + int ret; + + ret = vm_insert_page(vma, uaddr, pages[i++]); + if (ret) { + pr_err("Remapping memory, error: %d\n", ret); + return ret; + } + + uaddr += PAGE_SIZE; + usize -= PAGE_SIZE; + } while (usize > 0); + } + return 0; +} + +/* + * free a page as defined by the above mapping. + * Must not be called with IRQs disabled. + */ +void arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t handle, struct dma_attrs *attrs) +{ + struct arm_vmregion *c; + size = PAGE_ALIGN(size); + + c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr); + if (c) { + struct page **pages = c->priv; + __dma_free_remap(cpu_addr, size); + __iommu_remove_mapping(dev, handle, size); + __iommu_free_buffer(dev, pages, size); + } +} + +/* + * Map a part of the scatter-gather list into contiguous io address space + */ +static int __map_sg_chunk(struct device *dev, struct scatterlist *sg, + size_t size, dma_addr_t *handle, + enum dma_data_direction dir) +{ + struct dma_iommu_mapping *mapping = dev->archdata.mapping; + dma_addr_t iova, iova_base; + int ret = 0; + unsigned int count; + struct scatterlist *s; + + size = PAGE_ALIGN(size); + *handle = DMA_ERROR_CODE; + + iova_base = iova = __alloc_iova(mapping, size); + if (iova == DMA_ERROR_CODE) + return -ENOMEM; + + for (count = 0, s = sg; count < (size >> PAGE_SHIFT); s = sg_next(s)) { + phys_addr_t phys = page_to_phys(sg_page(s)); + unsigned int len = PAGE_ALIGN(s->offset + s->length); + + if (!arch_is_coherent()) + __dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir); + + ret = iommu_map(mapping->domain, iova, phys, len, 0); + if (ret < 0) + goto fail; + count += len >> PAGE_SHIFT; + iova += len; + } + *handle = iova_base; + + return 0; +fail: + iommu_unmap(mapping->domain, iova_base, count * PAGE_SIZE); + __free_iova(mapping, iova_base, size); + return ret; +} + +/** + * arm_iommu_map_sg - map a set of SG buffers for streaming mode DMA + * @dev: valid struct device pointer + * @sg: list of buffers + * @nents: number of buffers to map + * @dir: DMA transfer direction + * + * Map a set of buffers described by scatterlist in streaming mode for DMA. + * The scatter gather list elements are merged together (if possible) and + * tagged with the appropriate dma address and length. They are obtained via + * sg_dma_{address,length}. + */ +int arm_iommu_map_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir, struct dma_attrs *attrs) +{ + struct scatterlist *s = sg, *dma = sg, *start = sg; + int i, count = 0; + unsigned int offset = s->offset; + unsigned int size = s->offset + s->length; + unsigned int max = dma_get_max_seg_size(dev); + + for (i = 1; i < nents; i++) { + s = sg_next(s); + + s->dma_address = DMA_ERROR_CODE; + s->dma_length = 0; + + if (s->offset || (size & ~PAGE_MASK) || size + s->length > max) { + if (__map_sg_chunk(dev, start, size, &dma->dma_address, + dir) < 0) + goto bad_mapping; + + dma->dma_address += offset; + dma->dma_length = size - offset; + + size = offset = s->offset; + start = s; + dma = sg_next(dma); + count += 1; + } + size += s->length; + } + if (__map_sg_chunk(dev, start, size, &dma->dma_address, dir) < 0) + goto bad_mapping; + + dma->dma_address += offset; + dma->dma_length = size - offset; + + return count+1; + +bad_mapping: + for_each_sg(sg, s, count, i) + __iommu_remove_mapping(dev, sg_dma_address(s), sg_dma_len(s)); + return 0; +} + +/** + * arm_iommu_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg + * @dev: valid struct device pointer + * @sg: list of buffers + * @nents: number of buffers to unmap (same as was passed to dma_map_sg) + * @dir: DMA transfer direction (same as was passed to dma_map_sg) + * + * Unmap a set of streaming mode DMA translations. Again, CPU access + * rules concerning calls here are the same as for dma_unmap_single(). + */ +void arm_iommu_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir, struct dma_attrs *attrs) +{ + struct scatterlist *s; + int i; + + for_each_sg(sg, s, nents, i) { + if (sg_dma_len(s)) + __iommu_remove_mapping(dev, sg_dma_address(s), + sg_dma_len(s)); + if (!arch_is_coherent()) + __dma_page_dev_to_cpu(sg_page(s), s->offset, + s->length, dir); + } +} + +/** + * arm_iommu_sync_sg_for_cpu + * @dev: valid struct device pointer + * @sg: list of buffers + * @nents: number of buffers to map (returned from dma_map_sg) + * @dir: DMA transfer direction (same as was passed to dma_map_sg) + */ +void arm_iommu_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir) +{ + struct scatterlist *s; + int i; + + for_each_sg(sg, s, nents, i) + if (!arch_is_coherent()) + __dma_page_dev_to_cpu(sg_page(s), s->offset, s->length, dir); + +} + +/** + * arm_iommu_sync_sg_for_device + * @dev: valid struct device pointer + * @sg: list of buffers + * @nents: number of buffers to map (returned from dma_map_sg) + * @dir: DMA transfer direction (same as was passed to dma_map_sg) + */ +void arm_iommu_sync_sg_for_device(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir) +{ + struct scatterlist *s; + int i; + + for_each_sg(sg, s, nents, i) + if (!arch_is_coherent()) + __dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir); +} + + +/** + * arm_iommu_map_page + * @dev: valid struct device pointer + * @page: page that buffer resides in + * @offset: offset into page for start of buffer + * @size: size of buffer to map + * @dir: DMA transfer direction + * + * IOMMU aware version of arm_dma_map_page() + */ +static dma_addr_t arm_iommu_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + struct dma_iommu_mapping *mapping = dev->archdata.mapping; + dma_addr_t dma_addr; + int ret, len = PAGE_ALIGN(size + offset); + + if (!arch_is_coherent()) + __dma_page_cpu_to_dev(page, offset, size, dir); + + dma_addr = __alloc_iova(mapping, len); + if (dma_addr == DMA_ERROR_CODE) + return dma_addr; + + ret = iommu_map(mapping->domain, dma_addr, page_to_phys(page), len, 0); + if (ret < 0) + goto fail; + + return dma_addr + offset; +fail: + __free_iova(mapping, dma_addr, len); + return DMA_ERROR_CODE; +} + +/** + * arm_iommu_unmap_page + * @dev: valid struct device pointer + * @handle: DMA address of buffer + * @size: size of buffer (same as passed to dma_map_page) + * @dir: DMA transfer direction (same as passed to dma_map_page) + * + * IOMMU aware version of arm_dma_unmap_page() + */ +static void arm_iommu_unmap_page(struct device *dev, dma_addr_t handle, + size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + struct dma_iommu_mapping *mapping = dev->archdata.mapping; + dma_addr_t iova = handle & PAGE_MASK; + struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova)); + int offset = handle & ~PAGE_MASK; + int len = PAGE_ALIGN(size + offset); + + if (!iova) + return; + + if (!arch_is_coherent()) + __dma_page_dev_to_cpu(page, offset, size, dir); + + iommu_unmap(mapping->domain, iova, len); + __free_iova(mapping, iova, len); +} + +static void arm_iommu_sync_single_for_cpu(struct device *dev, + dma_addr_t handle, size_t size, enum dma_data_direction dir) +{ + struct dma_iommu_mapping *mapping = dev->archdata.mapping; + dma_addr_t iova = handle & PAGE_MASK; + struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova)); + unsigned int offset = handle & ~PAGE_MASK; + + if (!iova) + return; + + if (!arch_is_coherent()) + __dma_page_dev_to_cpu(page, offset, size, dir); +} + +static void arm_iommu_sync_single_for_device(struct device *dev, + dma_addr_t handle, size_t size, enum dma_data_direction dir) +{ + struct dma_iommu_mapping *mapping = dev->archdata.mapping; + dma_addr_t iova = handle & PAGE_MASK; + struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova)); + unsigned int offset = handle & ~PAGE_MASK; + + if (!iova) + return; + + __dma_page_cpu_to_dev(page, offset, size, dir); +} + +struct dma_map_ops iommu_ops = { + .alloc = arm_iommu_alloc_attrs, + .free = arm_iommu_free_attrs, + .mmap = arm_iommu_mmap_attrs, + + .map_page = arm_iommu_map_page, + .unmap_page = arm_iommu_unmap_page, + .sync_single_for_cpu = arm_iommu_sync_single_for_cpu, + .sync_single_for_device = arm_iommu_sync_single_for_device, + + .map_sg = arm_iommu_map_sg, + .unmap_sg = arm_iommu_unmap_sg, + .sync_sg_for_cpu = arm_iommu_sync_sg_for_cpu, + .sync_sg_for_device = arm_iommu_sync_sg_for_device, +}; + +/** + * arm_iommu_create_mapping + * @bus: pointer to the bus holding the client device (for IOMMU calls) + * @base: start address of the valid IO address space + * @size: size of the valid IO address space + * @order: accuracy of the IO addresses allocations + * + * Creates a mapping structure which holds information about used/unused + * IO address ranges, which is required to perform memory allocation and + * mapping with IOMMU aware functions. + * + * The client device need to be attached to the mapping with + * arm_iommu_attach_device function. + */ +struct dma_iommu_mapping * +arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, size_t size, + int order) +{ + unsigned int count = size >> (PAGE_SHIFT + order); + unsigned int bitmap_size = BITS_TO_LONGS(count) * sizeof(long); + struct dma_iommu_mapping *mapping; + int err = -ENOMEM; + + if (!count) + return ERR_PTR(-EINVAL); + + mapping = kzalloc(sizeof(struct dma_iommu_mapping), GFP_KERNEL); + if (!mapping) + goto err; + + mapping->bitmap = kzalloc(bitmap_size, GFP_KERNEL); + if (!mapping->bitmap) + goto err2; + + mapping->base = base; + mapping->bits = BITS_PER_BYTE * bitmap_size; + mapping->order = order; + spin_lock_init(&mapping->lock); + + mapping->domain = iommu_domain_alloc(bus); + if (!mapping->domain) + goto err3; + + kref_init(&mapping->kref); + return mapping; +err3: + kfree(mapping->bitmap); +err2: + kfree(mapping); +err: + return ERR_PTR(err); +} + +static void release_iommu_mapping(struct kref *kref) +{ + struct dma_iommu_mapping *mapping = + container_of(kref, struct dma_iommu_mapping, kref); + + iommu_domain_free(mapping->domain); + kfree(mapping->bitmap); + kfree(mapping); +} + +void arm_iommu_release_mapping(struct dma_iommu_mapping *mapping) +{ + if (mapping) + kref_put(&mapping->kref, release_iommu_mapping); +} + +/** + * arm_iommu_attach_device + * @dev: valid struct device pointer + * @mapping: io address space mapping structure (returned from + * arm_iommu_create_mapping) + * + * Attaches specified io address space mapping to the provided device, + * this replaces the dma operations (dma_map_ops pointer) with the + * IOMMU aware version. More than one client might be attached to + * the same io address space mapping. + */ +int arm_iommu_attach_device(struct device *dev, + struct dma_iommu_mapping *mapping) +{ + int err; + + err = iommu_attach_device(mapping->domain, dev); + if (err) + return err; + + kref_get(&mapping->kref); + dev->archdata.mapping = mapping; + set_dma_ops(dev, &iommu_ops); + + pr_info("Attached IOMMU controller to %s device.\n", dev_name(dev)); + return 0; +} + +#endif diff --git a/arch/arm/mm/vmregion.h b/arch/arm/mm/vmregion.h index 162be66..bf312c3 100644 --- a/arch/arm/mm/vmregion.h +++ b/arch/arm/mm/vmregion.h @@ -17,7 +17,7 @@ struct arm_vmregion { struct list_head vm_list; unsigned long vm_start; unsigned long vm_end; - struct page *vm_pages; + void *priv; int vm_active; const void *caller; }; -- cgit v0.10.2 From 5f63b720b62925ef3c6a85473dcd547b0fd90616 Mon Sep 17 00:00:00 2001 From: Michal Nazarewicz Date: Wed, 11 Jan 2012 15:16:11 +0100 Subject: mm: page_alloc: remove trailing whitespace Signed-off-by: Michal Nazarewicz Signed-off-by: Marek Szyprowski Acked-by: Mel Gorman diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 918330f..6fb46c1 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -513,10 +513,10 @@ static inline int page_is_buddy(struct page *page, struct page *buddy, * free pages of length of (1 << order) and marked with _mapcount -2. Page's * order is recorded in page_private(page) field. * So when we are allocating or freeing one, we can derive the state of the - * other. That is, if we allocate a small block, and both were - * free, the remainder of the region must be split into blocks. + * other. That is, if we allocate a small block, and both were + * free, the remainder of the region must be split into blocks. * If a block is freed, and its buddy is also free, then this - * triggers coalescing into a block of larger size. + * triggers coalescing into a block of larger size. * * -- wli */ @@ -1061,17 +1061,17 @@ retry_reserve: return page; } -/* +/* * Obtain a specified number of elements from the buddy allocator, all under * a single hold of the lock, for efficiency. Add them to the supplied list. * Returns the number of new pages which were placed at *list. */ -static int rmqueue_bulk(struct zone *zone, unsigned int order, +static int rmqueue_bulk(struct zone *zone, unsigned int order, unsigned long count, struct list_head *list, int migratetype, int cold) { int i; - + spin_lock(&zone->lock); for (i = 0; i < count; ++i) { struct page *page = __rmqueue(zone, order, migratetype); @@ -4301,7 +4301,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat, init_waitqueue_head(&pgdat->kswapd_wait); pgdat->kswapd_max_order = 0; pgdat_page_cgroup_init(pgdat); - + for (j = 0; j < MAX_NR_ZONES; j++) { struct zone *zone = pgdat->node_zones + j; unsigned long size, realsize, memmap_pages; -- cgit v0.10.2 From 2fe86e0004076128f05d5a774b5c9c03d9dc3de2 Mon Sep 17 00:00:00 2001 From: Michal Nazarewicz Date: Mon, 30 Jan 2012 13:16:26 +0100 Subject: mm: compaction: introduce isolate_migratepages_range() This commit introduces isolate_migratepages_range() function which extracts functionality from isolate_migratepages() so that it can be used on arbitrary PFN ranges. isolate_migratepages() function is implemented as a simple wrapper around isolate_migratepages_range(). Signed-off-by: Michal Nazarewicz Signed-off-by: Marek Szyprowski Acked-by: Mel Gorman Reviewed-by: KAMEZAWA Hiroyuki Tested-by: Rob Clark Tested-by: Ohad Ben-Cohen Tested-by: Benjamin Gaignard Tested-by: Robert Nelson Tested-by: Barry Song diff --git a/mm/compaction.c b/mm/compaction.c index 74a8c82..ee20fc0 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -250,31 +250,34 @@ typedef enum { ISOLATE_SUCCESS, /* Pages isolated, migrate */ } isolate_migrate_t; -/* - * Isolate all pages that can be migrated from the block pointed to by - * the migrate scanner within compact_control. +/** + * isolate_migratepages_range() - isolate all migrate-able pages in range. + * @zone: Zone pages are in. + * @cc: Compaction control structure. + * @low_pfn: The first PFN of the range. + * @end_pfn: The one-past-the-last PFN of the range. + * + * Isolate all pages that can be migrated from the range specified by + * [low_pfn, end_pfn). Returns zero if there is a fatal signal + * pending), otherwise PFN of the first page that was not scanned + * (which may be both less, equal to or more then end_pfn). + * + * Assumes that cc->migratepages is empty and cc->nr_migratepages is + * zero. + * + * Apart from cc->migratepages and cc->nr_migratetypes this function + * does not modify any cc's fields, in particular it does not modify + * (or read for that matter) cc->migrate_pfn. */ -static isolate_migrate_t isolate_migratepages(struct zone *zone, - struct compact_control *cc) +static unsigned long +isolate_migratepages_range(struct zone *zone, struct compact_control *cc, + unsigned long low_pfn, unsigned long end_pfn) { - unsigned long low_pfn, end_pfn; unsigned long last_pageblock_nr = 0, pageblock_nr; unsigned long nr_scanned = 0, nr_isolated = 0; struct list_head *migratelist = &cc->migratepages; isolate_mode_t mode = ISOLATE_ACTIVE|ISOLATE_INACTIVE; - /* Do not scan outside zone boundaries */ - low_pfn = max(cc->migrate_pfn, zone->zone_start_pfn); - - /* Only scan within a pageblock boundary */ - end_pfn = ALIGN(low_pfn + pageblock_nr_pages, pageblock_nr_pages); - - /* Do not cross the free scanner or scan within a memory hole */ - if (end_pfn > cc->free_pfn || !pfn_valid(low_pfn)) { - cc->migrate_pfn = end_pfn; - return ISOLATE_NONE; - } - /* * Ensure that there are not too many pages isolated from the LRU * list by either parallel reclaimers or compaction. If there are, @@ -283,12 +286,12 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone, while (unlikely(too_many_isolated(zone))) { /* async migration should just abort */ if (!cc->sync) - return ISOLATE_ABORT; + return 0; congestion_wait(BLK_RW_ASYNC, HZ/10); if (fatal_signal_pending(current)) - return ISOLATE_ABORT; + return 0; } /* Time to isolate some pages for migration */ @@ -396,10 +399,40 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone, acct_isolated(zone, cc); spin_unlock_irq(&zone->lru_lock); - cc->migrate_pfn = low_pfn; trace_mm_compaction_isolate_migratepages(nr_scanned, nr_isolated); + return low_pfn; +} + +/* + * Isolate all pages that can be migrated from the block pointed to by + * the migrate scanner within compact_control. + */ +static isolate_migrate_t isolate_migratepages(struct zone *zone, + struct compact_control *cc) +{ + unsigned long low_pfn, end_pfn; + + /* Do not scan outside zone boundaries */ + low_pfn = max(cc->migrate_pfn, zone->zone_start_pfn); + + /* Only scan within a pageblock boundary */ + end_pfn = ALIGN(low_pfn + pageblock_nr_pages, pageblock_nr_pages); + + /* Do not cross the free scanner or scan within a memory hole */ + if (end_pfn > cc->free_pfn || !pfn_valid(low_pfn)) { + cc->migrate_pfn = end_pfn; + return ISOLATE_NONE; + } + + /* Perform the isolation */ + low_pfn = isolate_migratepages_range(zone, cc, low_pfn, end_pfn); + if (!low_pfn) + return ISOLATE_ABORT; + + cc->migrate_pfn = low_pfn; + return ISOLATE_SUCCESS; } -- cgit v0.10.2 From 03d44192f69a45d780ba124f691e76020a44ebae Mon Sep 17 00:00:00 2001 From: Michal Nazarewicz Date: Mon, 30 Jan 2012 13:23:47 +0100 Subject: mm: compaction: introduce map_pages() This commit creates a map_pages() function which map pages freed using split_free_pages(). This merely moves some code from isolate_freepages() so that it can be reused in other places. Signed-off-by: Michal Nazarewicz Signed-off-by: Marek Szyprowski Acked-by: Mel Gorman Reviewed-by: KAMEZAWA Hiroyuki Tested-by: Robert Nelson Tested-by: Barry Song diff --git a/mm/compaction.c b/mm/compaction.c index ee20fc0..d9d7b35 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -127,6 +127,16 @@ static bool suitable_migration_target(struct page *page) return false; } +static void map_pages(struct list_head *list) +{ + struct page *page; + + list_for_each_entry(page, list, lru) { + arch_alloc_page(page, 0); + kernel_map_pages(page, 1, 1); + } +} + /* * Based on information in the current compact_control, find blocks * suitable for isolating free pages from and then isolate them. @@ -206,10 +216,7 @@ static void isolate_freepages(struct zone *zone, } /* split_free_page does not map the pages */ - list_for_each_entry(page, freelist, lru) { - arch_alloc_page(page, 0); - kernel_map_pages(page, 1, 1); - } + map_pages(freelist); cc->free_pfn = high_pfn; cc->nr_freepages = nr_freepages; -- cgit v0.10.2 From 85aa125f001f87f96a72e9e6ee515490843b1202 Mon Sep 17 00:00:00 2001 From: Michal Nazarewicz Date: Mon, 30 Jan 2012 13:24:03 +0100 Subject: mm: compaction: introduce isolate_freepages_range() This commit introduces isolate_freepages_range() function which generalises isolate_freepages_block() so that it can be used on arbitrary PFN ranges. isolate_freepages_block() is left with only minor changes. Signed-off-by: Michal Nazarewicz Signed-off-by: Marek Szyprowski Acked-by: Mel Gorman Reviewed-by: KAMEZAWA Hiroyuki Tested-by: Rob Clark Tested-by: Ohad Ben-Cohen Tested-by: Benjamin Gaignard Tested-by: Robert Nelson Tested-by: Barry Song diff --git a/mm/compaction.c b/mm/compaction.c index d9d7b35..06b198f 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -54,24 +54,20 @@ static unsigned long release_freepages(struct list_head *freelist) return count; } -/* Isolate free pages onto a private freelist. Must hold zone->lock */ -static unsigned long isolate_freepages_block(struct zone *zone, - unsigned long blockpfn, - struct list_head *freelist) +/* + * Isolate free pages onto a private freelist. Caller must hold zone->lock. + * If @strict is true, will abort returning 0 on any invalid PFNs or non-free + * pages inside of the pageblock (even though it may still end up isolating + * some pages). + */ +static unsigned long isolate_freepages_block(unsigned long blockpfn, + unsigned long end_pfn, + struct list_head *freelist, + bool strict) { - unsigned long zone_end_pfn, end_pfn; int nr_scanned = 0, total_isolated = 0; struct page *cursor; - /* Get the last PFN we should scan for free pages at */ - zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages; - end_pfn = min(blockpfn + pageblock_nr_pages, zone_end_pfn); - - /* Find the first usable PFN in the block to initialse page cursor */ - for (; blockpfn < end_pfn; blockpfn++) { - if (pfn_valid_within(blockpfn)) - break; - } cursor = pfn_to_page(blockpfn); /* Isolate free pages. This assumes the block is valid */ @@ -79,15 +75,23 @@ static unsigned long isolate_freepages_block(struct zone *zone, int isolated, i; struct page *page = cursor; - if (!pfn_valid_within(blockpfn)) + if (!pfn_valid_within(blockpfn)) { + if (strict) + return 0; continue; + } nr_scanned++; - if (!PageBuddy(page)) + if (!PageBuddy(page)) { + if (strict) + return 0; continue; + } /* Found a free page, break it into order-0 pages */ isolated = split_free_page(page); + if (!isolated && strict) + return 0; total_isolated += isolated; for (i = 0; i < isolated; i++) { list_add(&page->lru, freelist); @@ -105,6 +109,73 @@ static unsigned long isolate_freepages_block(struct zone *zone, return total_isolated; } +/** + * isolate_freepages_range() - isolate free pages. + * @start_pfn: The first PFN to start isolating. + * @end_pfn: The one-past-last PFN. + * + * Non-free pages, invalid PFNs, or zone boundaries within the + * [start_pfn, end_pfn) range are considered errors, cause function to + * undo its actions and return zero. + * + * Otherwise, function returns one-past-the-last PFN of isolated page + * (which may be greater then end_pfn if end fell in a middle of + * a free page). + */ +static unsigned long +isolate_freepages_range(unsigned long start_pfn, unsigned long end_pfn) +{ + unsigned long isolated, pfn, block_end_pfn, flags; + struct zone *zone = NULL; + LIST_HEAD(freelist); + + if (pfn_valid(start_pfn)) + zone = page_zone(pfn_to_page(start_pfn)); + + for (pfn = start_pfn; pfn < end_pfn; pfn += isolated) { + if (!pfn_valid(pfn) || zone != page_zone(pfn_to_page(pfn))) + break; + + /* + * On subsequent iterations ALIGN() is actually not needed, + * but we keep it that we not to complicate the code. + */ + block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages); + block_end_pfn = min(block_end_pfn, end_pfn); + + spin_lock_irqsave(&zone->lock, flags); + isolated = isolate_freepages_block(pfn, block_end_pfn, + &freelist, true); + spin_unlock_irqrestore(&zone->lock, flags); + + /* + * In strict mode, isolate_freepages_block() returns 0 if + * there are any holes in the block (ie. invalid PFNs or + * non-free pages). + */ + if (!isolated) + break; + + /* + * If we managed to isolate pages, it is always (1 << n) * + * pageblock_nr_pages for some non-negative n. (Max order + * page may span two pageblocks). + */ + } + + /* split_free_page does not map the pages */ + map_pages(&freelist); + + if (pfn < end_pfn) { + /* Loop terminated early, cleanup. */ + release_freepages(&freelist); + return 0; + } + + /* We don't use freelists for anything. */ + return pfn; +} + /* Returns true if the page is within a block suitable for migration to */ static bool suitable_migration_target(struct page *page) { @@ -145,7 +216,7 @@ static void isolate_freepages(struct zone *zone, struct compact_control *cc) { struct page *page; - unsigned long high_pfn, low_pfn, pfn; + unsigned long high_pfn, low_pfn, pfn, zone_end_pfn, end_pfn; unsigned long flags; int nr_freepages = cc->nr_freepages; struct list_head *freelist = &cc->freepages; @@ -165,6 +236,8 @@ static void isolate_freepages(struct zone *zone, */ high_pfn = min(low_pfn, pfn); + zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages; + /* * Isolate free pages until enough are available to migrate the * pages on cc->migratepages. We stop searching if the migrate @@ -201,7 +274,9 @@ static void isolate_freepages(struct zone *zone, isolated = 0; spin_lock_irqsave(&zone->lock, flags); if (suitable_migration_target(page)) { - isolated = isolate_freepages_block(zone, pfn, freelist); + end_pfn = min(pfn + pageblock_nr_pages, zone_end_pfn); + isolated = isolate_freepages_block(pfn, end_pfn, + freelist, false); nr_freepages += isolated; } spin_unlock_irqrestore(&zone->lock, flags); -- cgit v0.10.2 From ff9543fd32060917beb080b1eb2d1d41ec7f39e0 Mon Sep 17 00:00:00 2001 From: Michal Nazarewicz Date: Thu, 29 Dec 2011 13:09:50 +0100 Subject: mm: compaction: export some of the functions This commit exports some of the functions from compaction.c file outside of it adding their declaration into internal.h header file so that other mm related code can use them. This forced compaction.c to always be compiled (as opposed to being compiled only if CONFIG_COMPACTION is defined) but as to avoid introducing code that user did not ask for, part of the compaction.c is now wrapped in on #ifdef. Signed-off-by: Michal Nazarewicz Signed-off-by: Marek Szyprowski Acked-by: Mel Gorman Reviewed-by: KAMEZAWA Hiroyuki Tested-by: Rob Clark Tested-by: Ohad Ben-Cohen Tested-by: Benjamin Gaignard Tested-by: Robert Nelson Tested-by: Barry Song diff --git a/mm/Makefile b/mm/Makefile index 50ec00e..8aada89 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -13,7 +13,7 @@ obj-y := filemap.o mempool.o oom_kill.o fadvise.o \ readahead.o swap.o truncate.o vmscan.o shmem.o \ prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \ page_isolation.o mm_init.o mmu_context.o percpu.o \ - $(mmu-y) + compaction.o $(mmu-y) obj-y += init-mm.o ifdef CONFIG_NO_BOOTMEM @@ -32,7 +32,6 @@ obj-$(CONFIG_NUMA) += mempolicy.o obj-$(CONFIG_SPARSEMEM) += sparse.o obj-$(CONFIG_SPARSEMEM_VMEMMAP) += sparse-vmemmap.o obj-$(CONFIG_SLOB) += slob.o -obj-$(CONFIG_COMPACTION) += compaction.o obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o obj-$(CONFIG_KSM) += ksm.o obj-$(CONFIG_PAGE_POISONING) += debug-pagealloc.o diff --git a/mm/compaction.c b/mm/compaction.c index 06b198f..7a92e41 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -16,30 +16,11 @@ #include #include "internal.h" +#if defined CONFIG_COMPACTION || defined CONFIG_CMA + #define CREATE_TRACE_POINTS #include -/* - * compact_control is used to track pages being migrated and the free pages - * they are being migrated to during memory compaction. The free_pfn starts - * at the end of a zone and migrate_pfn begins at the start. Movable pages - * are moved to the end of a zone during a compaction run and the run - * completes when free_pfn <= migrate_pfn - */ -struct compact_control { - struct list_head freepages; /* List of free pages to migrate to */ - struct list_head migratepages; /* List of pages being migrated */ - unsigned long nr_freepages; /* Number of isolated free pages */ - unsigned long nr_migratepages; /* Number of pages to migrate */ - unsigned long free_pfn; /* isolate_freepages search base */ - unsigned long migrate_pfn; /* isolate_migratepages search base */ - bool sync; /* Synchronous migration */ - - int order; /* order a direct compactor needs */ - int migratetype; /* MOVABLE, RECLAIMABLE etc */ - struct zone *zone; -}; - static unsigned long release_freepages(struct list_head *freelist) { struct page *page, *next; @@ -54,6 +35,16 @@ static unsigned long release_freepages(struct list_head *freelist) return count; } +static void map_pages(struct list_head *list) +{ + struct page *page; + + list_for_each_entry(page, list, lru) { + arch_alloc_page(page, 0); + kernel_map_pages(page, 1, 1); + } +} + /* * Isolate free pages onto a private freelist. Caller must hold zone->lock. * If @strict is true, will abort returning 0 on any invalid PFNs or non-free @@ -122,7 +113,7 @@ static unsigned long isolate_freepages_block(unsigned long blockpfn, * (which may be greater then end_pfn if end fell in a middle of * a free page). */ -static unsigned long +unsigned long isolate_freepages_range(unsigned long start_pfn, unsigned long end_pfn) { unsigned long isolated, pfn, block_end_pfn, flags; @@ -176,127 +167,6 @@ isolate_freepages_range(unsigned long start_pfn, unsigned long end_pfn) return pfn; } -/* Returns true if the page is within a block suitable for migration to */ -static bool suitable_migration_target(struct page *page) -{ - - int migratetype = get_pageblock_migratetype(page); - - /* Don't interfere with memory hot-remove or the min_free_kbytes blocks */ - if (migratetype == MIGRATE_ISOLATE || migratetype == MIGRATE_RESERVE) - return false; - - /* If the page is a large free page, then allow migration */ - if (PageBuddy(page) && page_order(page) >= pageblock_order) - return true; - - /* If the block is MIGRATE_MOVABLE, allow migration */ - if (migratetype == MIGRATE_MOVABLE) - return true; - - /* Otherwise skip the block */ - return false; -} - -static void map_pages(struct list_head *list) -{ - struct page *page; - - list_for_each_entry(page, list, lru) { - arch_alloc_page(page, 0); - kernel_map_pages(page, 1, 1); - } -} - -/* - * Based on information in the current compact_control, find blocks - * suitable for isolating free pages from and then isolate them. - */ -static void isolate_freepages(struct zone *zone, - struct compact_control *cc) -{ - struct page *page; - unsigned long high_pfn, low_pfn, pfn, zone_end_pfn, end_pfn; - unsigned long flags; - int nr_freepages = cc->nr_freepages; - struct list_head *freelist = &cc->freepages; - - /* - * Initialise the free scanner. The starting point is where we last - * scanned from (or the end of the zone if starting). The low point - * is the end of the pageblock the migration scanner is using. - */ - pfn = cc->free_pfn; - low_pfn = cc->migrate_pfn + pageblock_nr_pages; - - /* - * Take care that if the migration scanner is at the end of the zone - * that the free scanner does not accidentally move to the next zone - * in the next isolation cycle. - */ - high_pfn = min(low_pfn, pfn); - - zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages; - - /* - * Isolate free pages until enough are available to migrate the - * pages on cc->migratepages. We stop searching if the migrate - * and free page scanners meet or enough free pages are isolated. - */ - for (; pfn > low_pfn && cc->nr_migratepages > nr_freepages; - pfn -= pageblock_nr_pages) { - unsigned long isolated; - - if (!pfn_valid(pfn)) - continue; - - /* - * Check for overlapping nodes/zones. It's possible on some - * configurations to have a setup like - * node0 node1 node0 - * i.e. it's possible that all pages within a zones range of - * pages do not belong to a single zone. - */ - page = pfn_to_page(pfn); - if (page_zone(page) != zone) - continue; - - /* Check the block is suitable for migration */ - if (!suitable_migration_target(page)) - continue; - - /* - * Found a block suitable for isolating free pages from. Now - * we disabled interrupts, double check things are ok and - * isolate the pages. This is to minimise the time IRQs - * are disabled - */ - isolated = 0; - spin_lock_irqsave(&zone->lock, flags); - if (suitable_migration_target(page)) { - end_pfn = min(pfn + pageblock_nr_pages, zone_end_pfn); - isolated = isolate_freepages_block(pfn, end_pfn, - freelist, false); - nr_freepages += isolated; - } - spin_unlock_irqrestore(&zone->lock, flags); - - /* - * Record the highest PFN we isolated pages from. When next - * looking for free pages, the search will restart here as - * page migration may have returned some pages to the allocator - */ - if (isolated) - high_pfn = max(high_pfn, pfn); - } - - /* split_free_page does not map the pages */ - map_pages(freelist); - - cc->free_pfn = high_pfn; - cc->nr_freepages = nr_freepages; -} - /* Update the number of anon and file isolated pages in the zone */ static void acct_isolated(struct zone *zone, struct compact_control *cc) { @@ -325,13 +195,6 @@ static bool too_many_isolated(struct zone *zone) return isolated > (inactive + active) / 2; } -/* possible outcome of isolate_migratepages */ -typedef enum { - ISOLATE_ABORT, /* Abort compaction now */ - ISOLATE_NONE, /* No pages isolated, continue scanning */ - ISOLATE_SUCCESS, /* Pages isolated, migrate */ -} isolate_migrate_t; - /** * isolate_migratepages_range() - isolate all migrate-able pages in range. * @zone: Zone pages are in. @@ -351,7 +214,7 @@ typedef enum { * does not modify any cc's fields, in particular it does not modify * (or read for that matter) cc->migrate_pfn. */ -static unsigned long +unsigned long isolate_migratepages_range(struct zone *zone, struct compact_control *cc, unsigned long low_pfn, unsigned long end_pfn) { @@ -487,35 +350,118 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc, return low_pfn; } +#endif /* CONFIG_COMPACTION || CONFIG_CMA */ +#ifdef CONFIG_COMPACTION + +/* Returns true if the page is within a block suitable for migration to */ +static bool suitable_migration_target(struct page *page) +{ + + int migratetype = get_pageblock_migratetype(page); + + /* Don't interfere with memory hot-remove or the min_free_kbytes blocks */ + if (migratetype == MIGRATE_ISOLATE || migratetype == MIGRATE_RESERVE) + return false; + + /* If the page is a large free page, then allow migration */ + if (PageBuddy(page) && page_order(page) >= pageblock_order) + return true; + + /* If the block is MIGRATE_MOVABLE, allow migration */ + if (migratetype == MIGRATE_MOVABLE) + return true; + + /* Otherwise skip the block */ + return false; +} + /* - * Isolate all pages that can be migrated from the block pointed to by - * the migrate scanner within compact_control. + * Based on information in the current compact_control, find blocks + * suitable for isolating free pages from and then isolate them. */ -static isolate_migrate_t isolate_migratepages(struct zone *zone, - struct compact_control *cc) +static void isolate_freepages(struct zone *zone, + struct compact_control *cc) { - unsigned long low_pfn, end_pfn; + struct page *page; + unsigned long high_pfn, low_pfn, pfn, zone_end_pfn, end_pfn; + unsigned long flags; + int nr_freepages = cc->nr_freepages; + struct list_head *freelist = &cc->freepages; - /* Do not scan outside zone boundaries */ - low_pfn = max(cc->migrate_pfn, zone->zone_start_pfn); + /* + * Initialise the free scanner. The starting point is where we last + * scanned from (or the end of the zone if starting). The low point + * is the end of the pageblock the migration scanner is using. + */ + pfn = cc->free_pfn; + low_pfn = cc->migrate_pfn + pageblock_nr_pages; - /* Only scan within a pageblock boundary */ - end_pfn = ALIGN(low_pfn + pageblock_nr_pages, pageblock_nr_pages); + /* + * Take care that if the migration scanner is at the end of the zone + * that the free scanner does not accidentally move to the next zone + * in the next isolation cycle. + */ + high_pfn = min(low_pfn, pfn); - /* Do not cross the free scanner or scan within a memory hole */ - if (end_pfn > cc->free_pfn || !pfn_valid(low_pfn)) { - cc->migrate_pfn = end_pfn; - return ISOLATE_NONE; - } + zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages; - /* Perform the isolation */ - low_pfn = isolate_migratepages_range(zone, cc, low_pfn, end_pfn); - if (!low_pfn) - return ISOLATE_ABORT; + /* + * Isolate free pages until enough are available to migrate the + * pages on cc->migratepages. We stop searching if the migrate + * and free page scanners meet or enough free pages are isolated. + */ + for (; pfn > low_pfn && cc->nr_migratepages > nr_freepages; + pfn -= pageblock_nr_pages) { + unsigned long isolated; - cc->migrate_pfn = low_pfn; + if (!pfn_valid(pfn)) + continue; - return ISOLATE_SUCCESS; + /* + * Check for overlapping nodes/zones. It's possible on some + * configurations to have a setup like + * node0 node1 node0 + * i.e. it's possible that all pages within a zones range of + * pages do not belong to a single zone. + */ + page = pfn_to_page(pfn); + if (page_zone(page) != zone) + continue; + + /* Check the block is suitable for migration */ + if (!suitable_migration_target(page)) + continue; + + /* + * Found a block suitable for isolating free pages from. Now + * we disabled interrupts, double check things are ok and + * isolate the pages. This is to minimise the time IRQs + * are disabled + */ + isolated = 0; + spin_lock_irqsave(&zone->lock, flags); + if (suitable_migration_target(page)) { + end_pfn = min(pfn + pageblock_nr_pages, zone_end_pfn); + isolated = isolate_freepages_block(pfn, end_pfn, + freelist, false); + nr_freepages += isolated; + } + spin_unlock_irqrestore(&zone->lock, flags); + + /* + * Record the highest PFN we isolated pages from. When next + * looking for free pages, the search will restart here as + * page migration may have returned some pages to the allocator + */ + if (isolated) + high_pfn = max(high_pfn, pfn); + } + + /* split_free_page does not map the pages */ + map_pages(freelist); + + cc->free_pfn = high_pfn; + cc->nr_freepages = nr_freepages; } /* @@ -564,6 +510,44 @@ static void update_nr_listpages(struct compact_control *cc) cc->nr_freepages = nr_freepages; } +/* possible outcome of isolate_migratepages */ +typedef enum { + ISOLATE_ABORT, /* Abort compaction now */ + ISOLATE_NONE, /* No pages isolated, continue scanning */ + ISOLATE_SUCCESS, /* Pages isolated, migrate */ +} isolate_migrate_t; + +/* + * Isolate all pages that can be migrated from the block pointed to by + * the migrate scanner within compact_control. + */ +static isolate_migrate_t isolate_migratepages(struct zone *zone, + struct compact_control *cc) +{ + unsigned long low_pfn, end_pfn; + + /* Do not scan outside zone boundaries */ + low_pfn = max(cc->migrate_pfn, zone->zone_start_pfn); + + /* Only scan within a pageblock boundary */ + end_pfn = ALIGN(low_pfn + pageblock_nr_pages, pageblock_nr_pages); + + /* Do not cross the free scanner or scan within a memory hole */ + if (end_pfn > cc->free_pfn || !pfn_valid(low_pfn)) { + cc->migrate_pfn = end_pfn; + return ISOLATE_NONE; + } + + /* Perform the isolation */ + low_pfn = isolate_migratepages_range(zone, cc, low_pfn, end_pfn); + if (!low_pfn) + return ISOLATE_ABORT; + + cc->migrate_pfn = low_pfn; + + return ISOLATE_SUCCESS; +} + static int compact_finished(struct zone *zone, struct compact_control *cc) { @@ -910,3 +894,5 @@ void compaction_unregister_node(struct node *node) return device_remove_file(&node->dev, &dev_attr_compact); } #endif /* CONFIG_SYSFS && CONFIG_NUMA */ + +#endif /* CONFIG_COMPACTION */ diff --git a/mm/internal.h b/mm/internal.h index 2189af4..aee4761 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -100,6 +100,39 @@ extern void prep_compound_page(struct page *page, unsigned long order); extern bool is_free_buddy_page(struct page *page); #endif +#if defined CONFIG_COMPACTION || defined CONFIG_CMA + +/* + * in mm/compaction.c + */ +/* + * compact_control is used to track pages being migrated and the free pages + * they are being migrated to during memory compaction. The free_pfn starts + * at the end of a zone and migrate_pfn begins at the start. Movable pages + * are moved to the end of a zone during a compaction run and the run + * completes when free_pfn <= migrate_pfn + */ +struct compact_control { + struct list_head freepages; /* List of free pages to migrate to */ + struct list_head migratepages; /* List of pages being migrated */ + unsigned long nr_freepages; /* Number of isolated free pages */ + unsigned long nr_migratepages; /* Number of pages to migrate */ + unsigned long free_pfn; /* isolate_freepages search base */ + unsigned long migrate_pfn; /* isolate_migratepages search base */ + bool sync; /* Synchronous migration */ + + int order; /* order a direct compactor needs */ + int migratetype; /* MOVABLE, RECLAIMABLE etc */ + struct zone *zone; +}; + +unsigned long +isolate_freepages_range(unsigned long start_pfn, unsigned long end_pfn); +unsigned long +isolate_migratepages_range(struct zone *zone, struct compact_control *cc, + unsigned long low_pfn, unsigned long end_pfn); + +#endif /* * function for dealing with page's order in buddy system. -- cgit v0.10.2 From 041d3a8cdc18dc375a128d90bbb753949a81b1fb Mon Sep 17 00:00:00 2001 From: Michal Nazarewicz Date: Thu, 29 Dec 2011 13:09:50 +0100 Subject: mm: page_alloc: introduce alloc_contig_range() This commit adds the alloc_contig_range() function which tries to allocate given range of pages. It tries to migrate all already allocated pages that fall in the range thus freeing them. Once all pages in the range are freed they are removed from the buddy system thus allocated for the caller to use. Signed-off-by: Michal Nazarewicz Signed-off-by: Marek Szyprowski Acked-by: Mel Gorman Reviewed-by: KAMEZAWA Hiroyuki Tested-by: Rob Clark Tested-by: Ohad Ben-Cohen Tested-by: Benjamin Gaignard Tested-by: Robert Nelson Tested-by: Barry Song diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 581e74b..052a5b6 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -391,4 +391,12 @@ static inline bool pm_suspended_storage(void) } #endif /* CONFIG_PM_SLEEP */ +#ifdef CONFIG_CMA + +/* The below functions must be run on a range from a single zone. */ +extern int alloc_contig_range(unsigned long start, unsigned long end); +extern void free_contig_range(unsigned long pfn, unsigned nr_pages); + +#endif + #endif /* __LINUX_GFP_H */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 6fb46c1..2c38a30 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -57,6 +57,7 @@ #include #include #include +#include #include #include @@ -5550,6 +5551,193 @@ out: spin_unlock_irqrestore(&zone->lock, flags); } +#ifdef CONFIG_CMA + +static unsigned long pfn_max_align_down(unsigned long pfn) +{ + return pfn & ~(max_t(unsigned long, MAX_ORDER_NR_PAGES, + pageblock_nr_pages) - 1); +} + +static unsigned long pfn_max_align_up(unsigned long pfn) +{ + return ALIGN(pfn, max_t(unsigned long, MAX_ORDER_NR_PAGES, + pageblock_nr_pages)); +} + +static struct page * +__alloc_contig_migrate_alloc(struct page *page, unsigned long private, + int **resultp) +{ + return alloc_page(GFP_HIGHUSER_MOVABLE); +} + +/* [start, end) must belong to a single zone. */ +static int __alloc_contig_migrate_range(unsigned long start, unsigned long end) +{ + /* This function is based on compact_zone() from compaction.c. */ + + unsigned long pfn = start; + unsigned int tries = 0; + int ret = 0; + + struct compact_control cc = { + .nr_migratepages = 0, + .order = -1, + .zone = page_zone(pfn_to_page(start)), + .sync = true, + }; + INIT_LIST_HEAD(&cc.migratepages); + + migrate_prep_local(); + + while (pfn < end || !list_empty(&cc.migratepages)) { + if (fatal_signal_pending(current)) { + ret = -EINTR; + break; + } + + if (list_empty(&cc.migratepages)) { + cc.nr_migratepages = 0; + pfn = isolate_migratepages_range(cc.zone, &cc, + pfn, end); + if (!pfn) { + ret = -EINTR; + break; + } + tries = 0; + } else if (++tries == 5) { + ret = ret < 0 ? ret : -EBUSY; + break; + } + + ret = migrate_pages(&cc.migratepages, + __alloc_contig_migrate_alloc, + 0, false, true); + } + + putback_lru_pages(&cc.migratepages); + return ret > 0 ? 0 : ret; +} + +/** + * alloc_contig_range() -- tries to allocate given range of pages + * @start: start PFN to allocate + * @end: one-past-the-last PFN to allocate + * + * The PFN range does not have to be pageblock or MAX_ORDER_NR_PAGES + * aligned, however it's the caller's responsibility to guarantee that + * we are the only thread that changes migrate type of pageblocks the + * pages fall in. + * + * The PFN range must belong to a single zone. + * + * Returns zero on success or negative error code. On success all + * pages which PFN is in [start, end) are allocated for the caller and + * need to be freed with free_contig_range(). + */ +int alloc_contig_range(unsigned long start, unsigned long end) +{ + struct zone *zone = page_zone(pfn_to_page(start)); + unsigned long outer_start, outer_end; + int ret = 0, order; + + /* + * What we do here is we mark all pageblocks in range as + * MIGRATE_ISOLATE. Because pageblock and max order pages may + * have different sizes, and due to the way page allocator + * work, we align the range to biggest of the two pages so + * that page allocator won't try to merge buddies from + * different pageblocks and change MIGRATE_ISOLATE to some + * other migration type. + * + * Once the pageblocks are marked as MIGRATE_ISOLATE, we + * migrate the pages from an unaligned range (ie. pages that + * we are interested in). This will put all the pages in + * range back to page allocator as MIGRATE_ISOLATE. + * + * When this is done, we take the pages in range from page + * allocator removing them from the buddy system. This way + * page allocator will never consider using them. + * + * This lets us mark the pageblocks back as + * MIGRATE_CMA/MIGRATE_MOVABLE so that free pages in the + * aligned range but not in the unaligned, original range are + * put back to page allocator so that buddy can use them. + */ + + ret = start_isolate_page_range(pfn_max_align_down(start), + pfn_max_align_up(end)); + if (ret) + goto done; + + ret = __alloc_contig_migrate_range(start, end); + if (ret) + goto done; + + /* + * Pages from [start, end) are within a MAX_ORDER_NR_PAGES + * aligned blocks that are marked as MIGRATE_ISOLATE. What's + * more, all pages in [start, end) are free in page allocator. + * What we are going to do is to allocate all pages from + * [start, end) (that is remove them from page allocator). + * + * The only problem is that pages at the beginning and at the + * end of interesting range may be not aligned with pages that + * page allocator holds, ie. they can be part of higher order + * pages. Because of this, we reserve the bigger range and + * once this is done free the pages we are not interested in. + * + * We don't have to hold zone->lock here because the pages are + * isolated thus they won't get removed from buddy. + */ + + lru_add_drain_all(); + drain_all_pages(); + + order = 0; + outer_start = start; + while (!PageBuddy(pfn_to_page(outer_start))) { + if (++order >= MAX_ORDER) { + ret = -EBUSY; + goto done; + } + outer_start &= ~0UL << order; + } + + /* Make sure the range is really isolated. */ + if (test_pages_isolated(outer_start, end)) { + pr_warn("alloc_contig_range test_pages_isolated(%lx, %lx) failed\n", + outer_start, end); + ret = -EBUSY; + goto done; + } + + outer_end = isolate_freepages_range(outer_start, end); + if (!outer_end) { + ret = -EBUSY; + goto done; + } + + /* Free head and tail (if any) */ + if (start != outer_start) + free_contig_range(outer_start, start - outer_start); + if (end != outer_end) + free_contig_range(end, outer_end - end); + +done: + undo_isolate_page_range(pfn_max_align_down(start), + pfn_max_align_up(end)); + return ret; +} + +void free_contig_range(unsigned long pfn, unsigned nr_pages) +{ + for (; nr_pages--; ++pfn) + __free_page(pfn_to_page(pfn)); +} +#endif + #ifdef CONFIG_MEMORY_HOTREMOVE /* * All pages in the range must be isolated before calling this. -- cgit v0.10.2 From 6d4a49160de2c684fb59fa627bce80e200224331 Mon Sep 17 00:00:00 2001 From: Michal Nazarewicz Date: Wed, 11 Jan 2012 15:31:33 +0100 Subject: mm: page_alloc: change fallbacks array handling This commit adds a row for MIGRATE_ISOLATE type to the fallbacks array which was missing from it. It also, changes the array traversal logic a little making MIGRATE_RESERVE an end marker. The letter change, removes the implicit MIGRATE_UNMOVABLE from the end of each row which was read by __rmqueue_fallback() function. Signed-off-by: Michal Nazarewicz Signed-off-by: Marek Szyprowski Acked-by: Mel Gorman Reviewed-by: KAMEZAWA Hiroyuki Tested-by: Rob Clark Tested-by: Ohad Ben-Cohen Tested-by: Benjamin Gaignard Tested-by: Robert Nelson Tested-by: Barry Song diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 2c38a30..d6b580c 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -875,11 +875,12 @@ struct page *__rmqueue_smallest(struct zone *zone, unsigned int order, * This array describes the order lists are fallen back to when * the free lists for the desirable migrate type are depleted */ -static int fallbacks[MIGRATE_TYPES][MIGRATE_TYPES-1] = { +static int fallbacks[MIGRATE_TYPES][3] = { [MIGRATE_UNMOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE }, [MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE }, [MIGRATE_MOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_RESERVE }, - [MIGRATE_RESERVE] = { MIGRATE_RESERVE, MIGRATE_RESERVE, MIGRATE_RESERVE }, /* Never used */ + [MIGRATE_RESERVE] = { MIGRATE_RESERVE }, /* Never used */ + [MIGRATE_ISOLATE] = { MIGRATE_RESERVE }, /* Never used */ }; /* @@ -974,12 +975,12 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype) /* Find the largest possible block of pages in the other list */ for (current_order = MAX_ORDER-1; current_order >= order; --current_order) { - for (i = 0; i < MIGRATE_TYPES - 1; i++) { + for (i = 0;; i++) { migratetype = fallbacks[start_migratetype][i]; /* MIGRATE_RESERVE handled later if necessary */ if (migratetype == MIGRATE_RESERVE) - continue; + break; area = &(zone->free_area[current_order]); if (list_empty(&area->free_list[migratetype])) -- cgit v0.10.2 From 47118af076f64844b4f423bc2f545b2da9dab50d Mon Sep 17 00:00:00 2001 From: Michal Nazarewicz Date: Thu, 29 Dec 2011 13:09:50 +0100 Subject: mm: mmzone: MIGRATE_CMA migration type added The MIGRATE_CMA migration type has two main characteristics: (i) only movable pages can be allocated from MIGRATE_CMA pageblocks and (ii) page allocator will never change migration type of MIGRATE_CMA pageblocks. This guarantees (to some degree) that page in a MIGRATE_CMA page block can always be migrated somewhere else (unless there's no memory left in the system). It is designed to be used for allocating big chunks (eg. 10MiB) of physically contiguous memory. Once driver requests contiguous memory, pages from MIGRATE_CMA pageblocks may be migrated away to create a contiguous block. To minimise number of migrations, MIGRATE_CMA migration type is the last type tried when page allocator falls back to other migration types when requested. Signed-off-by: Michal Nazarewicz Signed-off-by: Marek Szyprowski Signed-off-by: Kyungmin Park Acked-by: Mel Gorman Reviewed-by: KAMEZAWA Hiroyuki Tested-by: Rob Clark Tested-by: Ohad Ben-Cohen Tested-by: Benjamin Gaignard Tested-by: Robert Nelson Tested-by: Barry Song diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 052a5b6..78d32a7 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -397,6 +397,9 @@ static inline bool pm_suspended_storage(void) extern int alloc_contig_range(unsigned long start, unsigned long end); extern void free_contig_range(unsigned long pfn, unsigned nr_pages); +/* CMA stuff */ +extern void init_cma_reserved_pageblock(struct page *page); + #endif #endif /* __LINUX_GFP_H */ diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index dff7115..8c1335f 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -35,13 +35,37 @@ */ #define PAGE_ALLOC_COSTLY_ORDER 3 -#define MIGRATE_UNMOVABLE 0 -#define MIGRATE_RECLAIMABLE 1 -#define MIGRATE_MOVABLE 2 -#define MIGRATE_PCPTYPES 3 /* the number of types on the pcp lists */ -#define MIGRATE_RESERVE 3 -#define MIGRATE_ISOLATE 4 /* can't allocate from here */ -#define MIGRATE_TYPES 5 +enum { + MIGRATE_UNMOVABLE, + MIGRATE_RECLAIMABLE, + MIGRATE_MOVABLE, + MIGRATE_PCPTYPES, /* the number of types on the pcp lists */ + MIGRATE_RESERVE = MIGRATE_PCPTYPES, +#ifdef CONFIG_CMA + /* + * MIGRATE_CMA migration type is designed to mimic the way + * ZONE_MOVABLE works. Only movable pages can be allocated + * from MIGRATE_CMA pageblocks and page allocator never + * implicitly change migration type of MIGRATE_CMA pageblock. + * + * The way to use it is to change migratetype of a range of + * pageblocks to MIGRATE_CMA which can be done by + * __free_pageblock_cma() function. What is important though + * is that a range of pageblocks must be aligned to + * MAX_ORDER_NR_PAGES should biggest page be bigger then + * a single pageblock. + */ + MIGRATE_CMA, +#endif + MIGRATE_ISOLATE, /* can't allocate from here */ + MIGRATE_TYPES +}; + +#ifdef CONFIG_CMA +# define is_migrate_cma(migratetype) unlikely((migratetype) == MIGRATE_CMA) +#else +# define is_migrate_cma(migratetype) false +#endif #define for_each_migratetype_order(order, type) \ for (order = 0; order < MAX_ORDER; order++) \ diff --git a/mm/Kconfig b/mm/Kconfig index e338407..3922002 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -198,7 +198,7 @@ config COMPACTION config MIGRATION bool "Page migration" def_bool y - depends on NUMA || ARCH_ENABLE_MEMORY_HOTREMOVE || COMPACTION + depends on NUMA || ARCH_ENABLE_MEMORY_HOTREMOVE || COMPACTION || CMA help Allows the migration of the physical location of pages of processes while the virtual addresses are not changed. This is useful in diff --git a/mm/compaction.c b/mm/compaction.c index 7a92e41..da7d35e 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -45,6 +45,11 @@ static void map_pages(struct list_head *list) } } +static inline bool migrate_async_suitable(int migratetype) +{ + return is_migrate_cma(migratetype) || migratetype == MIGRATE_MOVABLE; +} + /* * Isolate free pages onto a private freelist. Caller must hold zone->lock. * If @strict is true, will abort returning 0 on any invalid PFNs or non-free @@ -299,7 +304,7 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc, */ pageblock_nr = low_pfn >> pageblock_order; if (!cc->sync && last_pageblock_nr != pageblock_nr && - get_pageblock_migratetype(page) != MIGRATE_MOVABLE) { + !migrate_async_suitable(get_pageblock_migratetype(page))) { low_pfn += pageblock_nr_pages; low_pfn = ALIGN(low_pfn, pageblock_nr_pages) - 1; last_pageblock_nr = pageblock_nr; @@ -367,8 +372,8 @@ static bool suitable_migration_target(struct page *page) if (PageBuddy(page) && page_order(page) >= pageblock_order) return true; - /* If the block is MIGRATE_MOVABLE, allow migration */ - if (migratetype == MIGRATE_MOVABLE) + /* If the block is MIGRATE_MOVABLE or MIGRATE_CMA, allow migration */ + if (migrate_async_suitable(migratetype)) return true; /* Otherwise skip the block */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index d6b580c..0869eb1 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -750,6 +750,24 @@ void __meminit __free_pages_bootmem(struct page *page, unsigned int order) __free_pages(page, order); } +#ifdef CONFIG_CMA +/* Free whole pageblock and set it's migration type to MIGRATE_CMA. */ +void __init init_cma_reserved_pageblock(struct page *page) +{ + unsigned i = pageblock_nr_pages; + struct page *p = page; + + do { + __ClearPageReserved(p); + set_page_count(p, 0); + } while (++p, --i); + + set_page_refcounted(page); + set_pageblock_migratetype(page, MIGRATE_CMA); + __free_pages(page, pageblock_order); + totalram_pages += pageblock_nr_pages; +} +#endif /* * The order of subdivision here is critical for the IO subsystem. @@ -875,10 +893,15 @@ struct page *__rmqueue_smallest(struct zone *zone, unsigned int order, * This array describes the order lists are fallen back to when * the free lists for the desirable migrate type are depleted */ -static int fallbacks[MIGRATE_TYPES][3] = { - [MIGRATE_UNMOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE }, - [MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE }, - [MIGRATE_MOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_RESERVE }, +static int fallbacks[MIGRATE_TYPES][4] = { + [MIGRATE_UNMOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE }, + [MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE }, +#ifdef CONFIG_CMA + [MIGRATE_MOVABLE] = { MIGRATE_CMA, MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_RESERVE }, + [MIGRATE_CMA] = { MIGRATE_RESERVE }, /* Never used */ +#else + [MIGRATE_MOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_RESERVE }, +#endif [MIGRATE_RESERVE] = { MIGRATE_RESERVE }, /* Never used */ [MIGRATE_ISOLATE] = { MIGRATE_RESERVE }, /* Never used */ }; @@ -995,11 +1018,18 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype) * pages to the preferred allocation list. If falling * back for a reclaimable kernel allocation, be more * aggressive about taking ownership of free pages + * + * On the other hand, never change migration + * type of MIGRATE_CMA pageblocks nor move CMA + * pages on different free lists. We don't + * want unmovable pages to be allocated from + * MIGRATE_CMA areas. */ - if (unlikely(current_order >= (pageblock_order >> 1)) || - start_migratetype == MIGRATE_RECLAIMABLE || - page_group_by_mobility_disabled) { - unsigned long pages; + if (!is_migrate_cma(migratetype) && + (unlikely(current_order >= pageblock_order / 2) || + start_migratetype == MIGRATE_RECLAIMABLE || + page_group_by_mobility_disabled)) { + int pages; pages = move_freepages_block(zone, page, start_migratetype); @@ -1017,11 +1047,14 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype) rmv_page_order(page); /* Take ownership for orders >= pageblock_order */ - if (current_order >= pageblock_order) + if (current_order >= pageblock_order && + !is_migrate_cma(migratetype)) change_pageblock_range(page, current_order, start_migratetype); - expand(zone, page, order, current_order, area, migratetype); + expand(zone, page, order, current_order, area, + is_migrate_cma(migratetype) + ? migratetype : start_migratetype); trace_mm_page_alloc_extfrag(page, order, current_order, start_migratetype, migratetype); @@ -1072,7 +1105,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, unsigned long count, struct list_head *list, int migratetype, int cold) { - int i; + int mt = migratetype, i; spin_lock(&zone->lock); for (i = 0; i < count; ++i) { @@ -1093,7 +1126,12 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, list_add(&page->lru, list); else list_add_tail(&page->lru, list); - set_page_private(page, migratetype); + if (IS_ENABLED(CONFIG_CMA)) { + mt = get_pageblock_migratetype(page); + if (!is_migrate_cma(mt) && mt != MIGRATE_ISOLATE) + mt = migratetype; + } + set_page_private(page, mt); list = &page->lru; } __mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order)); @@ -1373,8 +1411,12 @@ int split_free_page(struct page *page) if (order >= pageblock_order - 1) { struct page *endpage = page + (1 << order) - 1; - for (; page < endpage; page += pageblock_nr_pages) - set_pageblock_migratetype(page, MIGRATE_MOVABLE); + for (; page < endpage; page += pageblock_nr_pages) { + int mt = get_pageblock_migratetype(page); + if (mt != MIGRATE_ISOLATE && !is_migrate_cma(mt)) + set_pageblock_migratetype(page, + MIGRATE_MOVABLE); + } } return 1 << order; @@ -5414,14 +5456,16 @@ static int __count_immobile_pages(struct zone *zone, struct page *page, int count) { unsigned long pfn, iter, found; + int mt; + /* * For avoiding noise data, lru_add_drain_all() should be called * If ZONE_MOVABLE, the zone never contains immobile pages */ if (zone_idx(zone) == ZONE_MOVABLE) return true; - - if (get_pageblock_migratetype(page) == MIGRATE_MOVABLE) + mt = get_pageblock_migratetype(page); + if (mt == MIGRATE_MOVABLE || is_migrate_cma(mt)) return true; pfn = page_to_pfn(page); diff --git a/mm/vmstat.c b/mm/vmstat.c index 7db1b9b..0dad31dc 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -613,6 +613,9 @@ static char * const migratetype_names[MIGRATE_TYPES] = { "Reclaimable", "Movable", "Reserve", +#ifdef CONFIG_CMA + "CMA", +#endif "Isolate", }; -- cgit v0.10.2 From 0815f3d81d76dfbf2abcfd93a85ff0a6008fe4c0 Mon Sep 17 00:00:00 2001 From: Michal Nazarewicz Date: Tue, 3 Apr 2012 15:06:15 +0200 Subject: mm: page_isolation: MIGRATE_CMA isolation functions added This commit changes various functions that change pages and pageblocks migrate type between MIGRATE_ISOLATE and MIGRATE_MOVABLE in such a way as to allow to work with MIGRATE_CMA migrate type. Signed-off-by: Michal Nazarewicz Signed-off-by: Marek Szyprowski Reviewed-by: KAMEZAWA Hiroyuki Tested-by: Rob Clark Tested-by: Ohad Ben-Cohen Tested-by: Benjamin Gaignard Tested-by: Robert Nelson Tested-by: Barry Song diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 78d32a7..1e49be4 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -394,7 +394,8 @@ static inline bool pm_suspended_storage(void) #ifdef CONFIG_CMA /* The below functions must be run on a range from a single zone. */ -extern int alloc_contig_range(unsigned long start, unsigned long end); +extern int alloc_contig_range(unsigned long start, unsigned long end, + unsigned migratetype); extern void free_contig_range(unsigned long pfn, unsigned nr_pages); /* CMA stuff */ diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h index 051c1b1..3bdcab3 100644 --- a/include/linux/page-isolation.h +++ b/include/linux/page-isolation.h @@ -3,7 +3,7 @@ /* * Changes migrate type in [start_pfn, end_pfn) to be MIGRATE_ISOLATE. - * If specified range includes migrate types other than MOVABLE, + * If specified range includes migrate types other than MOVABLE or CMA, * this will fail with -EBUSY. * * For isolating all pages in the range finally, the caller have to @@ -11,27 +11,27 @@ * test it. */ extern int -start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn); +start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, + unsigned migratetype); /* * Changes MIGRATE_ISOLATE to MIGRATE_MOVABLE. * target range is [start_pfn, end_pfn) */ extern int -undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn); +undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, + unsigned migratetype); /* - * test all pages in [start_pfn, end_pfn)are isolated or not. + * Test all pages in [start_pfn, end_pfn) are isolated or not. */ -extern int -test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn); +int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn); /* - * Internal funcs.Changes pageblock's migrate type. - * Please use make_pagetype_isolated()/make_pagetype_movable(). + * Internal functions. Changes pageblock's migrate type. */ extern int set_migratetype_isolate(struct page *page); -extern void unset_migratetype_isolate(struct page *page); +extern void unset_migratetype_isolate(struct page *page, unsigned migratetype); #endif diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 97cc273..c99ad4e 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1404,7 +1404,7 @@ static int get_any_page(struct page *p, unsigned long pfn, int flags) /* Not a free page */ ret = 1; } - unset_migratetype_isolate(p); + unset_migratetype_isolate(p, MIGRATE_MOVABLE); unlock_memory_hotplug(); return ret; } diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 6629faf..fc898cb 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -891,7 +891,7 @@ static int __ref offline_pages(unsigned long start_pfn, nr_pages = end_pfn - start_pfn; /* set above range as isolated */ - ret = start_isolate_page_range(start_pfn, end_pfn); + ret = start_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE); if (ret) goto out; @@ -956,7 +956,7 @@ repeat: We cannot do rollback at this point. */ offline_isolated_pages(start_pfn, end_pfn); /* reset pagetype flags and makes migrate type to be MOVABLE */ - undo_isolate_page_range(start_pfn, end_pfn); + undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE); /* removal success */ zone->present_pages -= offlined_pages; zone->zone_pgdat->node_present_pages -= offlined_pages; @@ -981,7 +981,7 @@ failed_removal: start_pfn, end_pfn); memory_notify(MEM_CANCEL_OFFLINE, &arg); /* pushback to free area */ - undo_isolate_page_range(start_pfn, end_pfn); + undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE); out: unlock_memory_hotplug(); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 0869eb1..116c087 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5582,7 +5582,7 @@ out: return ret; } -void unset_migratetype_isolate(struct page *page) +void unset_migratetype_isolate(struct page *page, unsigned migratetype) { struct zone *zone; unsigned long flags; @@ -5590,8 +5590,8 @@ void unset_migratetype_isolate(struct page *page) spin_lock_irqsave(&zone->lock, flags); if (get_pageblock_migratetype(page) != MIGRATE_ISOLATE) goto out; - set_pageblock_migratetype(page, MIGRATE_MOVABLE); - move_freepages_block(zone, page, MIGRATE_MOVABLE); + set_pageblock_migratetype(page, migratetype); + move_freepages_block(zone, page, migratetype); out: spin_unlock_irqrestore(&zone->lock, flags); } @@ -5669,6 +5669,10 @@ static int __alloc_contig_migrate_range(unsigned long start, unsigned long end) * alloc_contig_range() -- tries to allocate given range of pages * @start: start PFN to allocate * @end: one-past-the-last PFN to allocate + * @migratetype: migratetype of the underlaying pageblocks (either + * #MIGRATE_MOVABLE or #MIGRATE_CMA). All pageblocks + * in range must have the same migratetype and it must + * be either of the two. * * The PFN range does not have to be pageblock or MAX_ORDER_NR_PAGES * aligned, however it's the caller's responsibility to guarantee that @@ -5681,7 +5685,8 @@ static int __alloc_contig_migrate_range(unsigned long start, unsigned long end) * pages which PFN is in [start, end) are allocated for the caller and * need to be freed with free_contig_range(). */ -int alloc_contig_range(unsigned long start, unsigned long end) +int alloc_contig_range(unsigned long start, unsigned long end, + unsigned migratetype) { struct zone *zone = page_zone(pfn_to_page(start)); unsigned long outer_start, outer_end; @@ -5712,7 +5717,7 @@ int alloc_contig_range(unsigned long start, unsigned long end) */ ret = start_isolate_page_range(pfn_max_align_down(start), - pfn_max_align_up(end)); + pfn_max_align_up(end), migratetype); if (ret) goto done; @@ -5772,7 +5777,7 @@ int alloc_contig_range(unsigned long start, unsigned long end) done: undo_isolate_page_range(pfn_max_align_down(start), - pfn_max_align_up(end)); + pfn_max_align_up(end), migratetype); return ret; } diff --git a/mm/page_isolation.c b/mm/page_isolation.c index 4ae42bb..c9f0477 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c @@ -24,6 +24,7 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages) * to be MIGRATE_ISOLATE. * @start_pfn: The lower PFN of the range to be isolated. * @end_pfn: The upper PFN of the range to be isolated. + * @migratetype: migrate type to set in error recovery. * * Making page-allocation-type to be MIGRATE_ISOLATE means free pages in * the range will never be allocated. Any free pages and pages freed in the @@ -32,8 +33,8 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages) * start_pfn/end_pfn must be aligned to pageblock_order. * Returns 0 on success and -EBUSY if any part of range cannot be isolated. */ -int -start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn) +int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, + unsigned migratetype) { unsigned long pfn; unsigned long undo_pfn; @@ -56,7 +57,7 @@ undo: for (pfn = start_pfn; pfn < undo_pfn; pfn += pageblock_nr_pages) - unset_migratetype_isolate(pfn_to_page(pfn)); + unset_migratetype_isolate(pfn_to_page(pfn), migratetype); return -EBUSY; } @@ -64,8 +65,8 @@ undo: /* * Make isolated pages available again. */ -int -undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn) +int undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, + unsigned migratetype) { unsigned long pfn; struct page *page; @@ -77,7 +78,7 @@ undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn) page = __first_valid_page(pfn, pageblock_nr_pages); if (!page || get_pageblock_migratetype(page) != MIGRATE_ISOLATE) continue; - unset_migratetype_isolate(page); + unset_migratetype_isolate(page, migratetype); } return 0; } @@ -86,7 +87,7 @@ undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn) * all pages in [start_pfn...end_pfn) must be in the same zone. * zone->lock must be held before call this. * - * Returns 1 if all pages in the range is isolated. + * Returns 1 if all pages in the range are isolated. */ static int __test_page_isolated_in_pageblock(unsigned long pfn, unsigned long end_pfn) -- cgit v0.10.2 From cfd3da1e49bb95c355c01c0f502d657deb3d34a4 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Mon, 25 Apr 2011 21:36:42 +0000 Subject: mm: Serialize access to min_free_kbytes There is a race between the min_free_kbytes sysctl, memory hotplug and transparent hugepage support enablement. Memory hotplug uses a zonelists_mutex to avoid a race when building zonelists. Reuse it to serialise watermark updates. [a.p.zijlstra@chello.nl: Older patch fixed the race with spinlock] Signed-off-by: Mel Gorman Signed-off-by: Marek Szyprowski Reviewed-by: KAMEZAWA Hiroyuki Tested-by: Barry Song diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 116c087..8be37bc 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5020,14 +5020,7 @@ static void setup_per_zone_lowmem_reserve(void) calculate_totalreserve_pages(); } -/** - * setup_per_zone_wmarks - called when min_free_kbytes changes - * or when memory is hot-{added|removed} - * - * Ensures that the watermark[min,low,high] values for each zone are set - * correctly with respect to min_free_kbytes. - */ -void setup_per_zone_wmarks(void) +static void __setup_per_zone_wmarks(void) { unsigned long pages_min = min_free_kbytes >> (PAGE_SHIFT - 10); unsigned long lowmem_pages = 0; @@ -5082,6 +5075,20 @@ void setup_per_zone_wmarks(void) calculate_totalreserve_pages(); } +/** + * setup_per_zone_wmarks - called when min_free_kbytes changes + * or when memory is hot-{added|removed} + * + * Ensures that the watermark[min,low,high] values for each zone are set + * correctly with respect to min_free_kbytes. + */ +void setup_per_zone_wmarks(void) +{ + mutex_lock(&zonelists_mutex); + __setup_per_zone_wmarks(); + mutex_unlock(&zonelists_mutex); +} + /* * The inactive anon list should be small enough that the VM never has to * do too much work, but large enough that each inactive page has a chance -- cgit v0.10.2 From bba9071087108d3de70bea274e35064cc480487b Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Wed, 25 Jan 2012 12:09:52 +0100 Subject: mm: extract reclaim code from __alloc_pages_direct_reclaim() This patch extracts common reclaim code from __alloc_pages_direct_reclaim() function to separate function: __perform_reclaim() which can be later used by alloc_contig_range(). Signed-off-by: Marek Szyprowski Signed-off-by: Kyungmin Park Cc: Michal Nazarewicz Acked-by: Mel Gorman Tested-by: Rob Clark Tested-by: Ohad Ben-Cohen Tested-by: Benjamin Gaignard Tested-by: Robert Nelson Tested-by: Barry Song diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 8be37bc..4615531 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2130,16 +2130,13 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, } #endif /* CONFIG_COMPACTION */ -/* The really slow allocator path where we enter direct reclaim */ -static inline struct page * -__alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order, - struct zonelist *zonelist, enum zone_type high_zoneidx, - nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone, - int migratetype, unsigned long *did_some_progress) +/* Perform direct synchronous page reclaim */ +static int +__perform_reclaim(gfp_t gfp_mask, unsigned int order, struct zonelist *zonelist, + nodemask_t *nodemask) { - struct page *page = NULL; struct reclaim_state reclaim_state; - bool drained = false; + int progress; cond_resched(); @@ -2150,7 +2147,7 @@ __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order, reclaim_state.reclaimed_slab = 0; current->reclaim_state = &reclaim_state; - *did_some_progress = try_to_free_pages(zonelist, order, gfp_mask, nodemask); + progress = try_to_free_pages(zonelist, order, gfp_mask, nodemask); current->reclaim_state = NULL; lockdep_clear_current_reclaim_state(); @@ -2158,6 +2155,21 @@ __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order, cond_resched(); + return progress; +} + +/* The really slow allocator path where we enter direct reclaim */ +static inline struct page * +__alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order, + struct zonelist *zonelist, enum zone_type high_zoneidx, + nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone, + int migratetype, unsigned long *did_some_progress) +{ + struct page *page = NULL; + bool drained = false; + + *did_some_progress = __perform_reclaim(gfp_mask, order, zonelist, + nodemask); if (unlikely(!(*did_some_progress))) return NULL; -- cgit v0.10.2 From 49f223a9cd96c7293d7258ff88c2bdf83065f69c Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Wed, 25 Jan 2012 12:49:24 +0100 Subject: mm: trigger page reclaim in alloc_contig_range() to stabilise watermarks alloc_contig_range() performs memory allocation so it also should keep track on keeping the correct level of memory watermarks. This commit adds a call to *_slowpath style reclaim to grab enough pages to make sure that the final collection of contiguous pages from freelists will not starve the system. Signed-off-by: Marek Szyprowski Signed-off-by: Kyungmin Park CC: Michal Nazarewicz Tested-by: Rob Clark Tested-by: Ohad Ben-Cohen Tested-by: Benjamin Gaignard Tested-by: Robert Nelson Tested-by: Barry Song diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 8c1335f..26f2040 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -63,8 +63,10 @@ enum { #ifdef CONFIG_CMA # define is_migrate_cma(migratetype) unlikely((migratetype) == MIGRATE_CMA) +# define cma_wmark_pages(zone) zone->min_cma_pages #else # define is_migrate_cma(migratetype) false +# define cma_wmark_pages(zone) 0 #endif #define for_each_migratetype_order(order, type) \ @@ -371,6 +373,13 @@ struct zone { /* see spanned/present_pages for more description */ seqlock_t span_seqlock; #endif +#ifdef CONFIG_CMA + /* + * CMA needs to increase watermark levels during the allocation + * process to make sure that the system is not starved. + */ + unsigned long min_cma_pages; +#endif struct free_area free_area[MAX_ORDER]; #ifndef CONFIG_SPARSEMEM diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 4615531..22348ae 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5079,6 +5079,11 @@ static void __setup_per_zone_wmarks(void) zone->watermark[WMARK_LOW] = min_wmark_pages(zone) + (tmp >> 2); zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) + (tmp >> 1); + + zone->watermark[WMARK_MIN] += cma_wmark_pages(zone); + zone->watermark[WMARK_LOW] += cma_wmark_pages(zone); + zone->watermark[WMARK_HIGH] += cma_wmark_pages(zone); + setup_zone_migrate_reserve(zone); spin_unlock_irqrestore(&zone->lock, flags); } @@ -5684,6 +5689,54 @@ static int __alloc_contig_migrate_range(unsigned long start, unsigned long end) return ret > 0 ? 0 : ret; } +/* + * Update zone's cma pages counter used for watermark level calculation. + */ +static inline void __update_cma_watermarks(struct zone *zone, int count) +{ + unsigned long flags; + spin_lock_irqsave(&zone->lock, flags); + zone->min_cma_pages += count; + spin_unlock_irqrestore(&zone->lock, flags); + setup_per_zone_wmarks(); +} + +/* + * Trigger memory pressure bump to reclaim some pages in order to be able to + * allocate 'count' pages in single page units. Does similar work as + *__alloc_pages_slowpath() function. + */ +static int __reclaim_pages(struct zone *zone, gfp_t gfp_mask, int count) +{ + enum zone_type high_zoneidx = gfp_zone(gfp_mask); + struct zonelist *zonelist = node_zonelist(0, gfp_mask); + int did_some_progress = 0; + int order = 1; + + /* + * Increase level of watermarks to force kswapd do his job + * to stabilise at new watermark level. + */ + __update_cma_watermarks(zone, count); + + /* Obey watermarks as if the page was being allocated */ + while (!zone_watermark_ok(zone, 0, low_wmark_pages(zone), 0, 0)) { + wake_all_kswapd(order, zonelist, high_zoneidx, zone_idx(zone)); + + did_some_progress = __perform_reclaim(gfp_mask, order, zonelist, + NULL); + if (!did_some_progress) { + /* Exhausted what can be done so it's blamo time */ + out_of_memory(zonelist, gfp_mask, order, NULL, false); + } + } + + /* Restore original watermark levels. */ + __update_cma_watermarks(zone, -count); + + return count; +} + /** * alloc_contig_range() -- tries to allocate given range of pages * @start: start PFN to allocate @@ -5782,6 +5835,13 @@ int alloc_contig_range(unsigned long start, unsigned long end, goto done; } + /* + * Reclaim enough pages to make sure that contiguous allocation + * will not starve the system. + */ + __reclaim_pages(zone, GFP_HIGHUSER_MOVABLE, end-start); + + /* Grab isolated pages from freelists. */ outer_end = isolate_freepages_range(outer_start, end); if (!outer_end) { ret = -EBUSY; -- cgit v0.10.2 From c64be2bb1c6eb43c838b2c6d57b074078be208dd Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 29 Dec 2011 13:09:51 +0100 Subject: drivers: add Contiguous Memory Allocator The Contiguous Memory Allocator is a set of helper functions for DMA mapping framework that improves allocations of contiguous memory chunks. CMA grabs memory on system boot, marks it with MIGRATE_CMA migrate type and gives back to the system. Kernel is allowed to allocate only movable pages within CMA's managed memory so that it can be used for example for page cache when DMA mapping do not use it. On dma_alloc_from_contiguous() request such pages are migrated out of CMA area to free required contiguous block and fulfill the request. This allows to allocate large contiguous chunks of memory at any time assuming that there is enough free memory available in the system. This code is heavily based on earlier works by Michal Nazarewicz. Signed-off-by: Marek Szyprowski Signed-off-by: Kyungmin Park Signed-off-by: Michal Nazarewicz Acked-by: Arnd Bergmann Tested-by: Rob Clark Tested-by: Ohad Ben-Cohen Tested-by: Benjamin Gaignard Tested-by: Robert Nelson Tested-by: Barry Song diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index c1601e5..669e8bb 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -508,6 +508,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted. Also note the kernel might malfunction if you disable some critical bits. + cma=nn[MG] [ARM,KNL] + Sets the size of kernel global memory area for contiguous + memory allocations. For more information, see + include/linux/dma-contiguous.h + cmo_free_hint= [PPC] Format: { yes | no } Specify whether pages are marked as being inactive when they are freed. This is used in CMO environments diff --git a/arch/Kconfig b/arch/Kconfig index 684eb5a..0a3ffe4 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -142,6 +142,9 @@ config HAVE_ARCH_TRACEHOOK config HAVE_DMA_ATTRS bool +config HAVE_DMA_CONTIGUOUS + bool + config USE_GENERIC_SMP_HELPERS bool diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig index 9aa618a..9b21469 100644 --- a/drivers/base/Kconfig +++ b/drivers/base/Kconfig @@ -192,4 +192,93 @@ config DMA_SHARED_BUFFER APIs extension; the file's descriptor can then be passed on to other driver. +config CMA + bool "Contiguous Memory Allocator (EXPERIMENTAL)" + depends on HAVE_DMA_CONTIGUOUS && HAVE_MEMBLOCK && EXPERIMENTAL + select MIGRATION + help + This enables the Contiguous Memory Allocator which allows drivers + to allocate big physically-contiguous blocks of memory for use with + hardware components that do not support I/O map nor scatter-gather. + + For more information see . + If unsure, say "n". + +if CMA + +config CMA_DEBUG + bool "CMA debug messages (DEVELOPMENT)" + depends on DEBUG_KERNEL + help + Turns on debug messages in CMA. This produces KERN_DEBUG + messages for every CMA call as well as various messages while + processing calls such as dma_alloc_from_contiguous(). + This option does not affect warning and error messages. + +comment "Default contiguous memory area size:" + +config CMA_SIZE_MBYTES + int "Size in Mega Bytes" + depends on !CMA_SIZE_SEL_PERCENTAGE + default 16 + help + Defines the size (in MiB) of the default memory area for Contiguous + Memory Allocator. + +config CMA_SIZE_PERCENTAGE + int "Percentage of total memory" + depends on !CMA_SIZE_SEL_MBYTES + default 10 + help + Defines the size of the default memory area for Contiguous Memory + Allocator as a percentage of the total memory in the system. + +choice + prompt "Selected region size" + default CMA_SIZE_SEL_ABSOLUTE + +config CMA_SIZE_SEL_MBYTES + bool "Use mega bytes value only" + +config CMA_SIZE_SEL_PERCENTAGE + bool "Use percentage value only" + +config CMA_SIZE_SEL_MIN + bool "Use lower value (minimum)" + +config CMA_SIZE_SEL_MAX + bool "Use higher value (maximum)" + +endchoice + +config CMA_ALIGNMENT + int "Maximum PAGE_SIZE order of alignment for contiguous buffers" + range 4 9 + default 8 + help + DMA mapping framework by default aligns all buffers to the smallest + PAGE_SIZE order which is greater than or equal to the requested buffer + size. This works well for buffers up to a few hundreds kilobytes, but + for larger buffers it just a memory waste. With this parameter you can + specify the maximum PAGE_SIZE order for contiguous buffers. Larger + buffers will be aligned only to this specified order. The order is + expressed as a power of two multiplied by the PAGE_SIZE. + + For example, if your system defaults to 4KiB pages, the order value + of 8 means that the buffers will be aligned up to 1MiB only. + + If unsure, leave the default value "8". + +config CMA_AREAS + int "Maximum count of the CMA device-private areas" + default 7 + help + CMA allows to create CMA areas for particular devices. This parameter + sets the maximum number of such device private CMA areas in the + system. + + If unsure, leave the default value "7". + +endif + endmenu diff --git a/drivers/base/Makefile b/drivers/base/Makefile index b6d1b9c..5aa2d70 100644 --- a/drivers/base/Makefile +++ b/drivers/base/Makefile @@ -6,6 +6,7 @@ obj-y := core.o bus.o dd.o syscore.o \ attribute_container.o transport_class.o \ topology.o obj-$(CONFIG_DEVTMPFS) += devtmpfs.o +obj-$(CONFIG_CMA) += dma-contiguous.o obj-y += power/ obj-$(CONFIG_HAS_DMA) += dma-mapping.o obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o diff --git a/drivers/base/dma-contiguous.c b/drivers/base/dma-contiguous.c new file mode 100644 index 0000000..78efb03 --- /dev/null +++ b/drivers/base/dma-contiguous.c @@ -0,0 +1,401 @@ +/* + * Contiguous Memory Allocator for DMA mapping framework + * Copyright (c) 2010-2011 by Samsung Electronics. + * Written by: + * Marek Szyprowski + * Michal Nazarewicz + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License or (at your optional) any later version of the license. + */ + +#define pr_fmt(fmt) "cma: " fmt + +#ifdef CONFIG_CMA_DEBUG +#ifndef DEBUG +# define DEBUG +#endif +#endif + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef SZ_1M +#define SZ_1M (1 << 20) +#endif + +struct cma { + unsigned long base_pfn; + unsigned long count; + unsigned long *bitmap; +}; + +struct cma *dma_contiguous_default_area; + +#ifdef CONFIG_CMA_SIZE_MBYTES +#define CMA_SIZE_MBYTES CONFIG_CMA_SIZE_MBYTES +#else +#define CMA_SIZE_MBYTES 0 +#endif + +/* + * Default global CMA area size can be defined in kernel's .config. + * This is usefull mainly for distro maintainers to create a kernel + * that works correctly for most supported systems. + * The size can be set in bytes or as a percentage of the total memory + * in the system. + * + * Users, who want to set the size of global CMA area for their system + * should use cma= kernel parameter. + */ +static const unsigned long size_bytes = CMA_SIZE_MBYTES * SZ_1M; +static long size_cmdline = -1; + +static int __init early_cma(char *p) +{ + pr_debug("%s(%s)\n", __func__, p); + size_cmdline = memparse(p, &p); + return 0; +} +early_param("cma", early_cma); + +#ifdef CONFIG_CMA_SIZE_PERCENTAGE + +static unsigned long __init __maybe_unused cma_early_percent_memory(void) +{ + struct memblock_region *reg; + unsigned long total_pages = 0; + + /* + * We cannot use memblock_phys_mem_size() here, because + * memblock_analyze() has not been called yet. + */ + for_each_memblock(memory, reg) + total_pages += memblock_region_memory_end_pfn(reg) - + memblock_region_memory_base_pfn(reg); + + return (total_pages * CONFIG_CMA_SIZE_PERCENTAGE / 100) << PAGE_SHIFT; +} + +#else + +static inline __maybe_unused unsigned long cma_early_percent_memory(void) +{ + return 0; +} + +#endif + +/** + * dma_contiguous_reserve() - reserve area for contiguous memory handling + * @limit: End address of the reserved memory (optional, 0 for any). + * + * This function reserves memory from early allocator. It should be + * called by arch specific code once the early allocator (memblock or bootmem) + * has been activated and all other subsystems have already allocated/reserved + * memory. + */ +void __init dma_contiguous_reserve(phys_addr_t limit) +{ + unsigned long selected_size = 0; + + pr_debug("%s(limit %08lx)\n", __func__, (unsigned long)limit); + + if (size_cmdline != -1) { + selected_size = size_cmdline; + } else { +#ifdef CONFIG_CMA_SIZE_SEL_MBYTES + selected_size = size_bytes; +#elif defined(CONFIG_CMA_SIZE_SEL_PERCENTAGE) + selected_size = cma_early_percent_memory(); +#elif defined(CONFIG_CMA_SIZE_SEL_MIN) + selected_size = min(size_bytes, cma_early_percent_memory()); +#elif defined(CONFIG_CMA_SIZE_SEL_MAX) + selected_size = max(size_bytes, cma_early_percent_memory()); +#endif + } + + if (selected_size) { + pr_debug("%s: reserving %ld MiB for global area\n", __func__, + selected_size / SZ_1M); + + dma_declare_contiguous(NULL, selected_size, 0, limit); + } +}; + +static DEFINE_MUTEX(cma_mutex); + +static __init int cma_activate_area(unsigned long base_pfn, unsigned long count) +{ + unsigned long pfn = base_pfn; + unsigned i = count >> pageblock_order; + struct zone *zone; + + WARN_ON_ONCE(!pfn_valid(pfn)); + zone = page_zone(pfn_to_page(pfn)); + + do { + unsigned j; + base_pfn = pfn; + for (j = pageblock_nr_pages; j; --j, pfn++) { + WARN_ON_ONCE(!pfn_valid(pfn)); + if (page_zone(pfn_to_page(pfn)) != zone) + return -EINVAL; + } + init_cma_reserved_pageblock(pfn_to_page(base_pfn)); + } while (--i); + return 0; +} + +static __init struct cma *cma_create_area(unsigned long base_pfn, + unsigned long count) +{ + int bitmap_size = BITS_TO_LONGS(count) * sizeof(long); + struct cma *cma; + int ret = -ENOMEM; + + pr_debug("%s(base %08lx, count %lx)\n", __func__, base_pfn, count); + + cma = kmalloc(sizeof *cma, GFP_KERNEL); + if (!cma) + return ERR_PTR(-ENOMEM); + + cma->base_pfn = base_pfn; + cma->count = count; + cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL); + + if (!cma->bitmap) + goto no_mem; + + ret = cma_activate_area(base_pfn, count); + if (ret) + goto error; + + pr_debug("%s: returned %p\n", __func__, (void *)cma); + return cma; + +error: + kfree(cma->bitmap); +no_mem: + kfree(cma); + return ERR_PTR(ret); +} + +static struct cma_reserved { + phys_addr_t start; + unsigned long size; + struct device *dev; +} cma_reserved[MAX_CMA_AREAS] __initdata; +static unsigned cma_reserved_count __initdata; + +static int __init cma_init_reserved_areas(void) +{ + struct cma_reserved *r = cma_reserved; + unsigned i = cma_reserved_count; + + pr_debug("%s()\n", __func__); + + for (; i; --i, ++r) { + struct cma *cma; + cma = cma_create_area(PFN_DOWN(r->start), + r->size >> PAGE_SHIFT); + if (!IS_ERR(cma)) + dev_set_cma_area(r->dev, cma); + } + return 0; +} +core_initcall(cma_init_reserved_areas); + +/** + * dma_declare_contiguous() - reserve area for contiguous memory handling + * for particular device + * @dev: Pointer to device structure. + * @size: Size of the reserved memory. + * @base: Start address of the reserved memory (optional, 0 for any). + * @limit: End address of the reserved memory (optional, 0 for any). + * + * This function reserves memory for specified device. It should be + * called by board specific code when early allocator (memblock or bootmem) + * is still activate. + */ +int __init dma_declare_contiguous(struct device *dev, unsigned long size, + phys_addr_t base, phys_addr_t limit) +{ + struct cma_reserved *r = &cma_reserved[cma_reserved_count]; + unsigned long alignment; + + pr_debug("%s(size %lx, base %08lx, limit %08lx)\n", __func__, + (unsigned long)size, (unsigned long)base, + (unsigned long)limit); + + /* Sanity checks */ + if (cma_reserved_count == ARRAY_SIZE(cma_reserved)) { + pr_err("Not enough slots for CMA reserved regions!\n"); + return -ENOSPC; + } + + if (!size) + return -EINVAL; + + /* Sanitise input arguments */ + alignment = PAGE_SIZE << max(MAX_ORDER, pageblock_order); + base = ALIGN(base, alignment); + size = ALIGN(size, alignment); + limit &= ~(alignment - 1); + + /* Reserve memory */ + if (base) { + if (memblock_is_region_reserved(base, size) || + memblock_reserve(base, size) < 0) { + base = -EBUSY; + goto err; + } + } else { + /* + * Use __memblock_alloc_base() since + * memblock_alloc_base() panic()s. + */ + phys_addr_t addr = __memblock_alloc_base(size, alignment, limit); + if (!addr) { + base = -ENOMEM; + goto err; + } else if (addr + size > ~(unsigned long)0) { + memblock_free(addr, size); + base = -EINVAL; + goto err; + } else { + base = addr; + } + } + + /* + * Each reserved area must be initialised later, when more kernel + * subsystems (like slab allocator) are available. + */ + r->start = base; + r->size = size; + r->dev = dev; + cma_reserved_count++; + pr_info("CMA: reserved %ld MiB at %08lx\n", size / SZ_1M, + (unsigned long)base); + + /* Architecture specific contiguous memory fixup. */ + dma_contiguous_early_fixup(base, size); + return 0; +err: + pr_err("CMA: failed to reserve %ld MiB\n", size / SZ_1M); + return base; +} + +/** + * dma_alloc_from_contiguous() - allocate pages from contiguous area + * @dev: Pointer to device for which the allocation is performed. + * @count: Requested number of pages. + * @align: Requested alignment of pages (in PAGE_SIZE order). + * + * This function allocates memory buffer for specified device. It uses + * device specific contiguous memory area if available or the default + * global one. Requires architecture specific get_dev_cma_area() helper + * function. + */ +struct page *dma_alloc_from_contiguous(struct device *dev, int count, + unsigned int align) +{ + unsigned long mask, pfn, pageno, start = 0; + struct cma *cma = dev_get_cma_area(dev); + int ret; + + if (!cma || !cma->count) + return NULL; + + if (align > CONFIG_CMA_ALIGNMENT) + align = CONFIG_CMA_ALIGNMENT; + + pr_debug("%s(cma %p, count %d, align %d)\n", __func__, (void *)cma, + count, align); + + if (!count) + return NULL; + + mask = (1 << align) - 1; + + mutex_lock(&cma_mutex); + + for (;;) { + pageno = bitmap_find_next_zero_area(cma->bitmap, cma->count, + start, count, mask); + if (pageno >= cma->count) { + ret = -ENOMEM; + goto error; + } + + pfn = cma->base_pfn + pageno; + ret = alloc_contig_range(pfn, pfn + count, MIGRATE_CMA); + if (ret == 0) { + bitmap_set(cma->bitmap, pageno, count); + break; + } else if (ret != -EBUSY) { + goto error; + } + pr_debug("%s(): memory range at %p is busy, retrying\n", + __func__, pfn_to_page(pfn)); + /* try again with a bit different memory target */ + start = pageno + mask + 1; + } + + mutex_unlock(&cma_mutex); + + pr_debug("%s(): returned %p\n", __func__, pfn_to_page(pfn)); + return pfn_to_page(pfn); +error: + mutex_unlock(&cma_mutex); + return NULL; +} + +/** + * dma_release_from_contiguous() - release allocated pages + * @dev: Pointer to device for which the pages were allocated. + * @pages: Allocated pages. + * @count: Number of allocated pages. + * + * This function releases memory allocated by dma_alloc_from_contiguous(). + * It returns false when provided pages do not belong to contiguous area and + * true otherwise. + */ +bool dma_release_from_contiguous(struct device *dev, struct page *pages, + int count) +{ + struct cma *cma = dev_get_cma_area(dev); + unsigned long pfn; + + if (!cma || !pages) + return false; + + pr_debug("%s(page %p)\n", __func__, (void *)pages); + + pfn = page_to_pfn(pages); + + if (pfn < cma->base_pfn || pfn >= cma->base_pfn + cma->count) + return false; + + VM_BUG_ON(pfn + count > cma->base_pfn + cma->count); + + mutex_lock(&cma_mutex); + bitmap_clear(cma->bitmap, pfn - cma->base_pfn, count); + free_contig_range(pfn, count); + mutex_unlock(&cma_mutex); + + return true; +} diff --git a/include/asm-generic/dma-contiguous.h b/include/asm-generic/dma-contiguous.h new file mode 100644 index 0000000..c544356 --- /dev/null +++ b/include/asm-generic/dma-contiguous.h @@ -0,0 +1,28 @@ +#ifndef ASM_DMA_CONTIGUOUS_H +#define ASM_DMA_CONTIGUOUS_H + +#ifdef __KERNEL__ +#ifdef CONFIG_CMA + +#include +#include + +static inline struct cma *dev_get_cma_area(struct device *dev) +{ + if (dev && dev->cma_area) + return dev->cma_area; + return dma_contiguous_default_area; +} + +static inline void dev_set_cma_area(struct device *dev, struct cma *cma) +{ + if (dev) + dev->cma_area = cma; + if (!dev || !dma_contiguous_default_area) + dma_contiguous_default_area = cma; +} + +#endif +#endif + +#endif diff --git a/include/linux/device.h b/include/linux/device.h index 5ad17cc..e339929 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -661,6 +661,10 @@ struct device { struct dma_coherent_mem *dma_mem; /* internal for coherent mem override */ +#ifdef CONFIG_CMA + struct cma *cma_area; /* contiguous memory area for dma + allocations */ +#endif /* arch specific additions */ struct dev_archdata archdata; diff --git a/include/linux/dma-contiguous.h b/include/linux/dma-contiguous.h new file mode 100644 index 0000000..2f303e4 --- /dev/null +++ b/include/linux/dma-contiguous.h @@ -0,0 +1,110 @@ +#ifndef __LINUX_CMA_H +#define __LINUX_CMA_H + +/* + * Contiguous Memory Allocator for DMA mapping framework + * Copyright (c) 2010-2011 by Samsung Electronics. + * Written by: + * Marek Szyprowski + * Michal Nazarewicz + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License or (at your optional) any later version of the license. + */ + +/* + * Contiguous Memory Allocator + * + * The Contiguous Memory Allocator (CMA) makes it possible to + * allocate big contiguous chunks of memory after the system has + * booted. + * + * Why is it needed? + * + * Various devices on embedded systems have no scatter-getter and/or + * IO map support and require contiguous blocks of memory to + * operate. They include devices such as cameras, hardware video + * coders, etc. + * + * Such devices often require big memory buffers (a full HD frame + * is, for instance, more then 2 mega pixels large, i.e. more than 6 + * MB of memory), which makes mechanisms such as kmalloc() or + * alloc_page() ineffective. + * + * At the same time, a solution where a big memory region is + * reserved for a device is suboptimal since often more memory is + * reserved then strictly required and, moreover, the memory is + * inaccessible to page system even if device drivers don't use it. + * + * CMA tries to solve this issue by operating on memory regions + * where only movable pages can be allocated from. This way, kernel + * can use the memory for pagecache and when device driver requests + * it, allocated pages can be migrated. + * + * Driver usage + * + * CMA should not be used by the device drivers directly. It is + * only a helper framework for dma-mapping subsystem. + * + * For more information, see kernel-docs in drivers/base/dma-contiguous.c + */ + +#ifdef __KERNEL__ + +struct cma; +struct page; +struct device; + +#ifdef CONFIG_CMA + +/* + * There is always at least global CMA area and a few optional device + * private areas configured in kernel .config. + */ +#define MAX_CMA_AREAS (1 + CONFIG_CMA_AREAS) + +extern struct cma *dma_contiguous_default_area; + +void dma_contiguous_reserve(phys_addr_t addr_limit); +int dma_declare_contiguous(struct device *dev, unsigned long size, + phys_addr_t base, phys_addr_t limit); + +struct page *dma_alloc_from_contiguous(struct device *dev, int count, + unsigned int order); +bool dma_release_from_contiguous(struct device *dev, struct page *pages, + int count); + +#else + +#define MAX_CMA_AREAS (0) + +static inline void dma_contiguous_reserve(phys_addr_t limit) { } + +static inline +int dma_declare_contiguous(struct device *dev, unsigned long size, + phys_addr_t base, phys_addr_t limit) +{ + return -ENOSYS; +} + +static inline +struct page *dma_alloc_from_contiguous(struct device *dev, int count, + unsigned int order) +{ + return NULL; +} + +static inline +bool dma_release_from_contiguous(struct device *dev, struct page *pages, + int count) +{ + return false; +} + +#endif + +#endif + +#endif -- cgit v0.10.2 From 0a2b9a6ea93650b8a00f9fd5ee8fdd25671e2df6 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 29 Dec 2011 13:09:51 +0100 Subject: X86: integrate CMA with DMA-mapping subsystem This patch adds support for CMA to dma-mapping subsystem for x86 architecture that uses common pci-dma/pci-nommu implementation. This allows to test CMA on KVM/QEMU and a lot of common x86 boxes. Signed-off-by: Marek Szyprowski Signed-off-by: Kyungmin Park CC: Michal Nazarewicz Acked-by: Arnd Bergmann diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index c9866b0..7cbdfda 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -31,6 +31,7 @@ config X86 select ARCH_WANT_OPTIONAL_GPIOLIB select ARCH_WANT_FRAME_POINTERS select HAVE_DMA_ATTRS + select HAVE_DMA_CONTIGUOUS if !SWIOTLB select HAVE_KRETPROBES select HAVE_OPTPROBES select HAVE_FTRACE_MCOUNT_RECORD diff --git a/arch/x86/include/asm/dma-contiguous.h b/arch/x86/include/asm/dma-contiguous.h new file mode 100644 index 0000000..c092416 --- /dev/null +++ b/arch/x86/include/asm/dma-contiguous.h @@ -0,0 +1,13 @@ +#ifndef ASMX86_DMA_CONTIGUOUS_H +#define ASMX86_DMA_CONTIGUOUS_H + +#ifdef __KERNEL__ + +#include +#include + +static inline void +dma_contiguous_early_fixup(phys_addr_t base, unsigned long size) { } + +#endif +#endif diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index 4b4331d..7b9227b 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -13,6 +13,7 @@ #include #include #include +#include #ifdef CONFIG_ISA # define ISA_DMA_BIT_MASK DMA_BIT_MASK(24) @@ -62,6 +63,10 @@ extern void *dma_generic_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, gfp_t flag, struct dma_attrs *attrs); +extern void dma_generic_free_coherent(struct device *dev, size_t size, + void *vaddr, dma_addr_t dma_addr, + struct dma_attrs *attrs); + static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) { if (!dev->dma_mask) diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 3003250..62c9457 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -100,14 +100,18 @@ void *dma_generic_alloc_coherent(struct device *dev, size_t size, struct dma_attrs *attrs) { unsigned long dma_mask; - struct page *page; + struct page *page = NULL; + unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; dma_addr_t addr; dma_mask = dma_alloc_coherent_mask(dev, flag); flag |= __GFP_ZERO; again: - page = alloc_pages_node(dev_to_node(dev), flag, get_order(size)); + if (!(flag & GFP_ATOMIC)) + page = dma_alloc_from_contiguous(dev, count, get_order(size)); + if (!page) + page = alloc_pages_node(dev_to_node(dev), flag, get_order(size)); if (!page) return NULL; @@ -127,6 +131,16 @@ again: return page_address(page); } +void dma_generic_free_coherent(struct device *dev, size_t size, void *vaddr, + dma_addr_t dma_addr, struct dma_attrs *attrs) +{ + unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; + struct page *page = virt_to_page(vaddr); + + if (!dma_release_from_contiguous(dev, page, count)) + free_pages((unsigned long)vaddr, get_order(size)); +} + /* * See for the iommu kernel * parameter documentation. diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index f960506..871be4a 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c @@ -74,12 +74,6 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg, return nents; } -static void nommu_free_coherent(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_addr, struct dma_attrs *attrs) -{ - free_pages((unsigned long)vaddr, get_order(size)); -} - static void nommu_sync_single_for_device(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir) @@ -97,7 +91,7 @@ static void nommu_sync_sg_for_device(struct device *dev, struct dma_map_ops nommu_dma_ops = { .alloc = dma_generic_alloc_coherent, - .free = nommu_free_coherent, + .free = dma_generic_free_coherent, .map_sg = nommu_map_sg, .map_page = nommu_map_page, .sync_single_for_device = nommu_sync_single_for_device, diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 1a29015..d6c956e 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -50,6 +50,7 @@ #include #include #include +#include #include #include @@ -934,6 +935,7 @@ void __init setup_arch(char **cmdline_p) } #endif memblock.current_limit = get_max_mapped(); + dma_contiguous_reserve(0); /* * NOTE: On x86-32, only from this point on, fixmaps are ready for use. -- cgit v0.10.2 From c79095092834a18ae74cfc08def1a5a101dc106c Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 29 Dec 2011 13:09:51 +0100 Subject: ARM: integrate CMA with DMA-mapping subsystem This patch adds support for CMA to dma-mapping subsystem for ARM architecture. By default a global CMA area is used, but specific devices are allowed to have their private memory areas if required (they can be created with dma_declare_contiguous() function during board initialisation). Contiguous memory areas reserved for DMA are remapped with 2-level page tables on boot. Once a buffer is requested, a low memory kernel mapping is updated to to match requested memory access type. GFP_ATOMIC allocations are performed from special pool which is created early during boot. This way remapping page attributes is not needed on allocation time. CMA has been enabled unconditionally for ARMv6+ systems. Signed-off-by: Marek Szyprowski Signed-off-by: Kyungmin Park CC: Michal Nazarewicz Acked-by: Arnd Bergmann Tested-by: Rob Clark Tested-by: Ohad Ben-Cohen Tested-by: Benjamin Gaignard Tested-by: Robert Nelson Tested-by: Barry Song diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 669e8bb..41996c6 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -520,6 +520,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. a hypervisor. Default: yes + coherent_pool=nn[KMG] [ARM,KNL] + Sets the size of memory pool for coherent, atomic dma + allocations if Contiguous Memory Allocator (CMA) is used. + code_bytes [X86] How many bytes of object code to print in an oops report. Range: 0 - 8192 diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 36586dba..cbbbc45 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -4,6 +4,8 @@ config ARM select HAVE_AOUT select HAVE_DMA_API_DEBUG select HAVE_IDE if PCI || ISA || PCMCIA + select HAVE_DMA_CONTIGUOUS if (CPU_V6 || CPU_V6K || CPU_V7) + select CMA if (CPU_V6 || CPU_V6K || CPU_V7) select HAVE_MEMBLOCK select RTC_LIB select SYS_SUPPORTS_APM_EMULATION diff --git a/arch/arm/include/asm/dma-contiguous.h b/arch/arm/include/asm/dma-contiguous.h new file mode 100644 index 0000000..3ed37b4 --- /dev/null +++ b/arch/arm/include/asm/dma-contiguous.h @@ -0,0 +1,15 @@ +#ifndef ASMARM_DMA_CONTIGUOUS_H +#define ASMARM_DMA_CONTIGUOUS_H + +#ifdef __KERNEL__ +#ifdef CONFIG_CMA + +#include +#include + +void dma_contiguous_early_fixup(phys_addr_t base, unsigned long size); + +#endif +#endif + +#endif diff --git a/arch/arm/include/asm/mach/map.h b/arch/arm/include/asm/mach/map.h index b36f365..a6efcdd 100644 --- a/arch/arm/include/asm/mach/map.h +++ b/arch/arm/include/asm/mach/map.h @@ -30,6 +30,7 @@ struct map_desc { #define MT_MEMORY_DTCM 12 #define MT_MEMORY_ITCM 13 #define MT_MEMORY_SO 14 +#define MT_MEMORY_DMA_READY 15 #ifdef CONFIG_MMU extern void iotable_init(struct map_desc *, int); diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index ebfac78..1b3096d 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -81,6 +81,7 @@ __setup("fpe=", fpe_setup); extern void paging_init(struct machine_desc *desc); extern void sanity_check_meminfo(void); extern void reboot_setup(char *str); +extern void setup_dma_zone(struct machine_desc *desc); unsigned int processor_id; EXPORT_SYMBOL(processor_id); @@ -939,12 +940,8 @@ void __init setup_arch(char **cmdline_p) machine_desc = mdesc; machine_name = mdesc->name; -#ifdef CONFIG_ZONE_DMA - if (mdesc->dma_zone_size) { - extern unsigned long arm_dma_zone_size; - arm_dma_zone_size = mdesc->dma_zone_size; - } -#endif + setup_dma_zone(mdesc); + if (mdesc->restart_mode) reboot_setup(&mdesc->restart_mode); diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index db23ae4..302f5bf 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -17,7 +17,9 @@ #include #include #include +#include #include +#include #include #include @@ -26,6 +28,9 @@ #include #include #include +#include +#include +#include #include "mm.h" @@ -56,6 +61,19 @@ static u64 get_coherent_dma_mask(struct device *dev) return mask; } +static void __dma_clear_buffer(struct page *page, size_t size) +{ + void *ptr; + /* + * Ensure that the allocated pages are zeroed, and that any data + * lurking in the kernel direct-mapped region is invalidated. + */ + ptr = page_address(page); + memset(ptr, 0, size); + dmac_flush_range(ptr, ptr + size); + outer_flush_range(__pa(ptr), __pa(ptr) + size); +} + /* * Allocate a DMA buffer for 'dev' of size 'size' using the * specified gfp mask. Note that 'size' must be page aligned. @@ -64,23 +82,6 @@ static struct page *__dma_alloc_buffer(struct device *dev, size_t size, gfp_t gf { unsigned long order = get_order(size); struct page *page, *p, *e; - void *ptr; - u64 mask = get_coherent_dma_mask(dev); - -#ifdef CONFIG_DMA_API_DEBUG - u64 limit = (mask + 1) & ~mask; - if (limit && size >= limit) { - dev_warn(dev, "coherent allocation too big (requested %#x mask %#llx)\n", - size, mask); - return NULL; - } -#endif - - if (!mask) - return NULL; - - if (mask < 0xffffffffULL) - gfp |= GFP_DMA; page = alloc_pages(gfp, order); if (!page) @@ -93,14 +94,7 @@ static struct page *__dma_alloc_buffer(struct device *dev, size_t size, gfp_t gf for (p = page + (size >> PAGE_SHIFT), e = page + (1 << order); p < e; p++) __free_page(p); - /* - * Ensure that the allocated pages are zeroed, and that any data - * lurking in the kernel direct-mapped region is invalidated. - */ - ptr = page_address(page); - memset(ptr, 0, size); - dmac_flush_range(ptr, ptr + size); - outer_flush_range(__pa(ptr), __pa(ptr) + size); + __dma_clear_buffer(page, size); return page; } @@ -170,6 +164,9 @@ static int __init consistent_init(void) unsigned long base = consistent_base; unsigned long num_ptes = (CONSISTENT_END - base) >> PMD_SHIFT; + if (cpu_architecture() >= CPU_ARCH_ARMv6) + return 0; + consistent_pte = kmalloc(num_ptes * sizeof(pte_t), GFP_KERNEL); if (!consistent_pte) { pr_err("%s: no memory\n", __func__); @@ -210,9 +207,101 @@ static int __init consistent_init(void) return ret; } - core_initcall(consistent_init); +static void *__alloc_from_contiguous(struct device *dev, size_t size, + pgprot_t prot, struct page **ret_page); + +static struct arm_vmregion_head coherent_head = { + .vm_lock = __SPIN_LOCK_UNLOCKED(&coherent_head.vm_lock), + .vm_list = LIST_HEAD_INIT(coherent_head.vm_list), +}; + +size_t coherent_pool_size = DEFAULT_CONSISTENT_DMA_SIZE / 8; + +static int __init early_coherent_pool(char *p) +{ + coherent_pool_size = memparse(p, &p); + return 0; +} +early_param("coherent_pool", early_coherent_pool); + +/* + * Initialise the coherent pool for atomic allocations. + */ +static int __init coherent_init(void) +{ + pgprot_t prot = pgprot_dmacoherent(pgprot_kernel); + size_t size = coherent_pool_size; + struct page *page; + void *ptr; + + if (cpu_architecture() < CPU_ARCH_ARMv6) + return 0; + + ptr = __alloc_from_contiguous(NULL, size, prot, &page); + if (ptr) { + coherent_head.vm_start = (unsigned long) ptr; + coherent_head.vm_end = (unsigned long) ptr + size; + printk(KERN_INFO "DMA: preallocated %u KiB pool for atomic coherent allocations\n", + (unsigned)size / 1024); + return 0; + } + printk(KERN_ERR "DMA: failed to allocate %u KiB pool for atomic coherent allocation\n", + (unsigned)size / 1024); + return -ENOMEM; +} +/* + * CMA is activated by core_initcall, so we must be called after it. + */ +postcore_initcall(coherent_init); + +struct dma_contig_early_reserve { + phys_addr_t base; + unsigned long size; +}; + +static struct dma_contig_early_reserve dma_mmu_remap[MAX_CMA_AREAS] __initdata; + +static int dma_mmu_remap_num __initdata; + +void __init dma_contiguous_early_fixup(phys_addr_t base, unsigned long size) +{ + dma_mmu_remap[dma_mmu_remap_num].base = base; + dma_mmu_remap[dma_mmu_remap_num].size = size; + dma_mmu_remap_num++; +} + +void __init dma_contiguous_remap(void) +{ + int i; + for (i = 0; i < dma_mmu_remap_num; i++) { + phys_addr_t start = dma_mmu_remap[i].base; + phys_addr_t end = start + dma_mmu_remap[i].size; + struct map_desc map; + unsigned long addr; + + if (end > arm_lowmem_limit) + end = arm_lowmem_limit; + if (start >= end) + return; + + map.pfn = __phys_to_pfn(start); + map.virtual = __phys_to_virt(start); + map.length = end - start; + map.type = MT_MEMORY_DMA_READY; + + /* + * Clear previous low-memory mapping + */ + for (addr = __phys_to_virt(start); addr < __phys_to_virt(end); + addr += PGDIR_SIZE) + pmd_clear(pmd_off_k(addr)); + + iotable_init(&map, 1); + } +} + static void * __dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot, const void *caller) @@ -319,20 +408,173 @@ static void __dma_free_remap(void *cpu_addr, size_t size) arm_vmregion_free(&consistent_head, c); } +static int __dma_update_pte(pte_t *pte, pgtable_t token, unsigned long addr, + void *data) +{ + struct page *page = virt_to_page(addr); + pgprot_t prot = *(pgprot_t *)data; + + set_pte_ext(pte, mk_pte(page, prot), 0); + return 0; +} + +static void __dma_remap(struct page *page, size_t size, pgprot_t prot) +{ + unsigned long start = (unsigned long) page_address(page); + unsigned end = start + size; + + apply_to_page_range(&init_mm, start, size, __dma_update_pte, &prot); + dsb(); + flush_tlb_kernel_range(start, end); +} + +static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp, + pgprot_t prot, struct page **ret_page, + const void *caller) +{ + struct page *page; + void *ptr; + page = __dma_alloc_buffer(dev, size, gfp); + if (!page) + return NULL; + + ptr = __dma_alloc_remap(page, size, gfp, prot, caller); + if (!ptr) { + __dma_free_buffer(page, size); + return NULL; + } + + *ret_page = page; + return ptr; +} + +static void *__alloc_from_pool(struct device *dev, size_t size, + struct page **ret_page, const void *caller) +{ + struct arm_vmregion *c; + size_t align; + + if (!coherent_head.vm_start) { + printk(KERN_ERR "%s: coherent pool not initialised!\n", + __func__); + dump_stack(); + return NULL; + } + + /* + * Align the region allocation - allocations from pool are rather + * small, so align them to their order in pages, minimum is a page + * size. This helps reduce fragmentation of the DMA space. + */ + align = PAGE_SIZE << get_order(size); + c = arm_vmregion_alloc(&coherent_head, align, size, 0, caller); + if (c) { + void *ptr = (void *)c->vm_start; + struct page *page = virt_to_page(ptr); + *ret_page = page; + return ptr; + } + return NULL; +} + +static int __free_from_pool(void *cpu_addr, size_t size) +{ + unsigned long start = (unsigned long)cpu_addr; + unsigned long end = start + size; + struct arm_vmregion *c; + + if (start < coherent_head.vm_start || end > coherent_head.vm_end) + return 0; + + c = arm_vmregion_find_remove(&coherent_head, (unsigned long)start); + + if ((c->vm_end - c->vm_start) != size) { + printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n", + __func__, c->vm_end - c->vm_start, size); + dump_stack(); + size = c->vm_end - c->vm_start; + } + + arm_vmregion_free(&coherent_head, c); + return 1; +} + +static void *__alloc_from_contiguous(struct device *dev, size_t size, + pgprot_t prot, struct page **ret_page) +{ + unsigned long order = get_order(size); + size_t count = size >> PAGE_SHIFT; + struct page *page; + + page = dma_alloc_from_contiguous(dev, count, order); + if (!page) + return NULL; + + __dma_clear_buffer(page, size); + __dma_remap(page, size, prot); + + *ret_page = page; + return page_address(page); +} + +static void __free_from_contiguous(struct device *dev, struct page *page, + size_t size) +{ + __dma_remap(page, size, pgprot_kernel); + dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT); +} + +#define nommu() 0 + #else /* !CONFIG_MMU */ -#define __dma_alloc_remap(page, size, gfp, prot, c) page_address(page) -#define __dma_free_remap(addr, size) do { } while (0) +#define nommu() 1 + +#define __alloc_remap_buffer(dev, size, gfp, prot, ret, c) NULL +#define __alloc_from_pool(dev, size, ret_page, c) NULL +#define __alloc_from_contiguous(dev, size, prot, ret) NULL +#define __free_from_pool(cpu_addr, size) 0 +#define __free_from_contiguous(dev, page, size) do { } while (0) +#define __dma_free_remap(cpu_addr, size) do { } while (0) #endif /* CONFIG_MMU */ -static void * -__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, - pgprot_t prot, const void *caller) +static void *__alloc_simple_buffer(struct device *dev, size_t size, gfp_t gfp, + struct page **ret_page) { struct page *page; + page = __dma_alloc_buffer(dev, size, gfp); + if (!page) + return NULL; + + *ret_page = page; + return page_address(page); +} + + + +static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, + gfp_t gfp, pgprot_t prot, const void *caller) +{ + u64 mask = get_coherent_dma_mask(dev); + struct page *page; void *addr; +#ifdef CONFIG_DMA_API_DEBUG + u64 limit = (mask + 1) & ~mask; + if (limit && size >= limit) { + dev_warn(dev, "coherent allocation too big (requested %#x mask %#llx)\n", + size, mask); + return NULL; + } +#endif + + if (!mask) + return NULL; + + if (mask < 0xffffffffULL) + gfp |= GFP_DMA; + /* * Following is a work-around (a.k.a. hack) to prevent pages * with __GFP_COMP being passed to split_page() which cannot @@ -345,19 +587,17 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, *handle = ~0; size = PAGE_ALIGN(size); - page = __dma_alloc_buffer(dev, size, gfp); - if (!page) - return NULL; - - if (!arch_is_coherent()) - addr = __dma_alloc_remap(page, size, gfp, prot, caller); + if (arch_is_coherent() || nommu()) + addr = __alloc_simple_buffer(dev, size, gfp, &page); + else if (cpu_architecture() < CPU_ARCH_ARMv6) + addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, caller); + else if (gfp & GFP_ATOMIC) + addr = __alloc_from_pool(dev, size, &page, caller); else - addr = page_address(page); + addr = __alloc_from_contiguous(dev, size, prot, &page); if (addr) *handle = pfn_to_dma(dev, page_to_pfn(page)); - else - __dma_free_buffer(page, size); return addr; } @@ -366,8 +606,8 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, * Allocate DMA-coherent memory space and return both the kernel remapped * virtual and bus address for that space. */ -void * -dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp) +void *dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, + gfp_t gfp) { void *memory; @@ -398,25 +638,11 @@ static int dma_mmap(struct device *dev, struct vm_area_struct *vma, { int ret = -ENXIO; #ifdef CONFIG_MMU - unsigned long user_size, kern_size; - struct arm_vmregion *c; - - user_size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; - - c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr); - if (c) { - unsigned long off = vma->vm_pgoff; - - kern_size = (c->vm_end - c->vm_start) >> PAGE_SHIFT; - - if (off < kern_size && - user_size <= (kern_size - off)) { - ret = remap_pfn_range(vma, vma->vm_start, - page_to_pfn(c->vm_pages) + off, - user_size << PAGE_SHIFT, - vma->vm_page_prot); - } - } + unsigned long pfn = dma_to_pfn(dev, dma_addr); + ret = remap_pfn_range(vma, vma->vm_start, + pfn + vma->vm_pgoff, + vma->vm_end - vma->vm_start, + vma->vm_page_prot); #endif /* CONFIG_MMU */ return ret; @@ -438,23 +664,33 @@ int dma_mmap_writecombine(struct device *dev, struct vm_area_struct *vma, } EXPORT_SYMBOL(dma_mmap_writecombine); + /* - * free a page as defined by the above mapping. - * Must not be called with IRQs disabled. + * Free a buffer as defined by the above mapping. */ void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle) { - WARN_ON(irqs_disabled()); + struct page *page = pfn_to_page(dma_to_pfn(dev, handle)); if (dma_release_from_coherent(dev, get_order(size), cpu_addr)) return; size = PAGE_ALIGN(size); - if (!arch_is_coherent()) + if (arch_is_coherent() || nommu()) { + __dma_free_buffer(page, size); + } else if (cpu_architecture() < CPU_ARCH_ARMv6) { __dma_free_remap(cpu_addr, size); - - __dma_free_buffer(pfn_to_page(dma_to_pfn(dev, handle)), size); + __dma_free_buffer(page, size); + } else { + if (__free_from_pool(cpu_addr, size)) + return; + /* + * Non-atomic allocations cannot be freed with IRQs disabled + */ + WARN_ON(irqs_disabled()); + __free_from_contiguous(dev, page, size); + } } EXPORT_SYMBOL(dma_free_coherent); diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 8f5813b..c21d06c 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -226,6 +227,17 @@ static void __init arm_adjust_dma_zone(unsigned long *size, unsigned long *hole, } #endif +void __init setup_dma_zone(struct machine_desc *mdesc) +{ +#ifdef CONFIG_ZONE_DMA + if (mdesc->dma_zone_size) { + arm_dma_zone_size = mdesc->dma_zone_size; + arm_dma_limit = PHYS_OFFSET + arm_dma_zone_size - 1; + } else + arm_dma_limit = 0xffffffff; +#endif +} + static void __init arm_bootmem_free(unsigned long min, unsigned long max_low, unsigned long max_high) { @@ -273,12 +285,9 @@ static void __init arm_bootmem_free(unsigned long min, unsigned long max_low, * Adjust the sizes according to any special requirements for * this machine type. */ - if (arm_dma_zone_size) { + if (arm_dma_zone_size) arm_adjust_dma_zone(zone_size, zhole_size, arm_dma_zone_size >> PAGE_SHIFT); - arm_dma_limit = PHYS_OFFSET + arm_dma_zone_size - 1; - } else - arm_dma_limit = 0xffffffff; #endif free_area_init_node(0, zone_size, min, zhole_size); @@ -364,6 +373,12 @@ void __init arm_memblock_init(struct meminfo *mi, struct machine_desc *mdesc) if (mdesc->reserve) mdesc->reserve(); + /* + * reserve memory for DMA contigouos allocations, + * must come from DMA area inside low memory + */ + dma_contiguous_reserve(min(arm_dma_limit, arm_lowmem_limit)); + arm_memblock_steal_permitted = false; memblock_allow_resize(); memblock_dump_all(); diff --git a/arch/arm/mm/mm.h b/arch/arm/mm/mm.h index 27f4a61..93dc0c1 100644 --- a/arch/arm/mm/mm.h +++ b/arch/arm/mm/mm.h @@ -67,5 +67,8 @@ extern u32 arm_dma_limit; #define arm_dma_limit ((u32)~0) #endif +extern phys_addr_t arm_lowmem_limit; + void __init bootmem_init(void); void arm_mm_memblock_reserve(void); +void dma_contiguous_remap(void); diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index aa78de8..e5dad60 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -288,6 +288,11 @@ static struct mem_type mem_types[] = { PMD_SECT_UNCACHED | PMD_SECT_XN, .domain = DOMAIN_KERNEL, }, + [MT_MEMORY_DMA_READY] = { + .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY, + .prot_l1 = PMD_TYPE_TABLE, + .domain = DOMAIN_KERNEL, + }, }; const struct mem_type *get_mem_type(unsigned int type) @@ -429,6 +434,7 @@ static void __init build_mem_type_table(void) if (arch_is_coherent() && cpu_is_xsc3()) { mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S; mem_types[MT_MEMORY].prot_pte |= L_PTE_SHARED; + mem_types[MT_MEMORY_DMA_READY].prot_pte |= L_PTE_SHARED; mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_S; mem_types[MT_MEMORY_NONCACHED].prot_pte |= L_PTE_SHARED; } @@ -460,6 +466,7 @@ static void __init build_mem_type_table(void) mem_types[MT_DEVICE_CACHED].prot_pte |= L_PTE_SHARED; mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S; mem_types[MT_MEMORY].prot_pte |= L_PTE_SHARED; + mem_types[MT_MEMORY_DMA_READY].prot_pte |= L_PTE_SHARED; mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_S; mem_types[MT_MEMORY_NONCACHED].prot_pte |= L_PTE_SHARED; } @@ -512,6 +519,7 @@ static void __init build_mem_type_table(void) mem_types[MT_HIGH_VECTORS].prot_l1 |= ecc_mask; mem_types[MT_MEMORY].prot_sect |= ecc_mask | cp->pmd; mem_types[MT_MEMORY].prot_pte |= kern_pgprot; + mem_types[MT_MEMORY_DMA_READY].prot_pte |= kern_pgprot; mem_types[MT_MEMORY_NONCACHED].prot_sect |= ecc_mask; mem_types[MT_ROM].prot_sect |= cp->pmd; @@ -596,7 +604,7 @@ static void __init alloc_init_section(pud_t *pud, unsigned long addr, * L1 entries, whereas PGDs refer to a group of L1 entries making * up one logical pointer to an L2 table. */ - if (((addr | end | phys) & ~SECTION_MASK) == 0) { + if (type->prot_sect && ((addr | end | phys) & ~SECTION_MASK) == 0) { pmd_t *p = pmd; #ifndef CONFIG_ARM_LPAE @@ -814,7 +822,7 @@ static int __init early_vmalloc(char *arg) } early_param("vmalloc", early_vmalloc); -static phys_addr_t lowmem_limit __initdata = 0; +phys_addr_t arm_lowmem_limit __initdata = 0; void __init sanity_check_meminfo(void) { @@ -897,8 +905,8 @@ void __init sanity_check_meminfo(void) bank->size = newsize; } #endif - if (!bank->highmem && bank->start + bank->size > lowmem_limit) - lowmem_limit = bank->start + bank->size; + if (!bank->highmem && bank->start + bank->size > arm_lowmem_limit) + arm_lowmem_limit = bank->start + bank->size; j++; } @@ -923,8 +931,8 @@ void __init sanity_check_meminfo(void) } #endif meminfo.nr_banks = j; - high_memory = __va(lowmem_limit - 1) + 1; - memblock_set_current_limit(lowmem_limit); + high_memory = __va(arm_lowmem_limit - 1) + 1; + memblock_set_current_limit(arm_lowmem_limit); } static inline void prepare_page_table(void) @@ -949,8 +957,8 @@ static inline void prepare_page_table(void) * Find the end of the first block of lowmem. */ end = memblock.memory.regions[0].base + memblock.memory.regions[0].size; - if (end >= lowmem_limit) - end = lowmem_limit; + if (end >= arm_lowmem_limit) + end = arm_lowmem_limit; /* * Clear out all the kernel space mappings, except for the first @@ -1093,8 +1101,8 @@ static void __init map_lowmem(void) phys_addr_t end = start + reg->size; struct map_desc map; - if (end > lowmem_limit) - end = lowmem_limit; + if (end > arm_lowmem_limit) + end = arm_lowmem_limit; if (start >= end) break; @@ -1115,11 +1123,12 @@ void __init paging_init(struct machine_desc *mdesc) { void *zero_page; - memblock_set_current_limit(lowmem_limit); + memblock_set_current_limit(arm_lowmem_limit); build_mem_type_table(); prepare_page_table(); map_lowmem(); + dma_contiguous_remap(); devicemaps_init(mdesc); kmap_init(); -- cgit v0.10.2 From 58f42fd54144346898e6dc6d6ae3acd4c591b42f Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Fri, 11 May 2012 09:37:13 +0200 Subject: cma: fix migration mode __alloc_contig_migrate_range calls migrate_pages with wrong argument for migrate_mode. Fix it. Cc: Marek Szyprowski Signed-off-by: Minchan Kim Acked-by: Michal Nazarewicz Signed-off-by: Marek Szyprowski diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 22348ae..ed85c02 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5682,7 +5682,7 @@ static int __alloc_contig_migrate_range(unsigned long start, unsigned long end) ret = migrate_pages(&cc.migratepages, __alloc_contig_migrate_alloc, - 0, false, true); + 0, false, MIGRATE_SYNC); } putback_lru_pages(&cc.migratepages); -- cgit v0.10.2 From 61f6c7a47a2f84b7ba4b65240ffe9247df772b06 Mon Sep 17 00:00:00 2001 From: Vitaly Andrianov Date: Mon, 14 May 2012 13:49:56 -0400 Subject: ARM: dma-mapping: use PMD size for section unmap The dma_contiguous_remap() function clears existing section maps using the wrong size (PGDIR_SIZE instead of PMD_SIZE). This is a bug which does not affect non-LPAE systems, where PGDIR_SIZE and PMD_SIZE are the same. On LPAE systems, however, this bug causes the kernel to hang at this point. This fix has been tested on both LPAE and non-LPAE kernel builds. Signed-off-by: Vitaly Andrianov Signed-off-by: Marek Szyprowski diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 302f5bf..153f555 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -295,7 +295,7 @@ void __init dma_contiguous_remap(void) * Clear previous low-memory mapping */ for (addr = __phys_to_virt(start); addr < __phys_to_virt(end); - addr += PGDIR_SIZE) + addr += PMD_SIZE) pmd_clear(pmd_off_k(addr)); iotable_init(&map, 1); -- cgit v0.10.2