From 836bfa0d2930b7ec236aa488f01678c92eb5f0de Mon Sep 17 00:00:00 2001 From: YoungJun Cho Date: Mon, 17 Jun 2013 13:18:52 +0900 Subject: ARM: dma-mapping: Get pages if the cpu_addr is out of atomic_pool In __iommu_get_pages(), the cpu_addr is checked wheather in atomic_pool range or not. So if the cpu_addr is in atomic_pool range, it does not need to check twice. Signed-off-by: YoungJun Cho Signed-off-by: Kyungmin Park Signed-off-by: Marek Szyprowski diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index ef3e0f3..1d158c2 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -1372,16 +1372,17 @@ static int arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, void arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle, struct dma_attrs *attrs) { - struct page **pages = __iommu_get_pages(cpu_addr, attrs); + struct page **pages; size = PAGE_ALIGN(size); - if (!pages) { - WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr); + if (__in_atomic_pool(cpu_addr, size)) { + __iommu_free_atomic(dev, cpu_addr, handle, size); return; } - if (__in_atomic_pool(cpu_addr, size)) { - __iommu_free_atomic(dev, cpu_addr, handle, size); + pages = __iommu_get_pages(cpu_addr, attrs); + if (!pages) { + WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr); return; } -- cgit v0.10.2 From 13987d68bcf476a93fa864ea4e58755f1bc4bd30 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 10 Jun 2013 19:34:39 +0100 Subject: ARM: dma-mapping: convert DMA direction into IOMMU protection attributes IOMMU mappings take a prot parameter, identifying the protection bits to enforce on the newly created mapping (READ or WRITE). The ARM dma-mapping framework currently just passes 0 as the prot argument, resulting in faulting mappings. This patch infers the protection attributes based on the direction of the DMA transfer. Cc: Marek Szyprowski Signed-off-by: Will Deacon Signed-off-by: Marek Szyprowski diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 1d158c2..282aacd 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -1637,13 +1637,27 @@ static dma_addr_t arm_coherent_iommu_map_page(struct device *dev, struct page *p { struct dma_iommu_mapping *mapping = dev->archdata.mapping; dma_addr_t dma_addr; - int ret, len = PAGE_ALIGN(size + offset); + int ret, prot, len = PAGE_ALIGN(size + offset); dma_addr = __alloc_iova(mapping, len); if (dma_addr == DMA_ERROR_CODE) return dma_addr; - ret = iommu_map(mapping->domain, dma_addr, page_to_phys(page), len, 0); + switch (dir) { + case DMA_BIDIRECTIONAL: + prot = IOMMU_READ | IOMMU_WRITE; + break; + case DMA_TO_DEVICE: + prot = IOMMU_READ; + break; + case DMA_FROM_DEVICE: + prot = IOMMU_WRITE; + break; + default: + prot = 0; + } + + ret = iommu_map(mapping->domain, dma_addr, page_to_phys(page), len, prot); if (ret < 0) goto fail; -- cgit v0.10.2 From 9e4b259d4fbf8a9c822117e734356e94b7f30927 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 10 Jun 2013 19:34:40 +0100 Subject: ARM: dma-mapping: NULLify dev->archdata.mapping pointer on detach The current code only clobbers a local variable, so the device is left with a stale mapping pointer. Cc: Hiroshi Doyu Signed-off-by: Will Deacon Acked-by: Hiroshi Doyu Signed-off-by: Marek Szyprowski diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 282aacd..26a5833 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -1922,7 +1922,7 @@ void arm_iommu_detach_device(struct device *dev) iommu_detach_device(mapping->domain, dev); kref_put(&mapping->kref, release_iommu_mapping); - mapping = NULL; + dev->archdata.mapping = NULL; set_dma_ops(dev, NULL); pr_debug("Detached IOMMU controller from %s device.\n", dev_name(dev)); -- cgit v0.10.2 From 63c181922f25371b7d24ef63d4e15887ff23a088 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 15 May 2013 12:09:49 +0800 Subject: ARM: DMA-mapping: mark all !DMA_TO_DEVICE pages in unmapping as clean It is common for one sg to include many pages, so mark all these pages as clean to avoid unnecessary flushing on them in set_pte_at() or update_mmu_cache(). The patch might improve loading performance of applciation code a bit. On the below test code to read file(~1GByte size) from usb mass storage disk to buffer created with mmap(PROT_READ | PROT_EXEC) on Pandaboard, average ~1% improvement can be observed with the patch on 10 times test. unsigned int sum = 0; static unsigned long tv_diff(struct timeval *tv1, struct timeval *tv2) { return (tv2->tv_sec - tv1->tv_sec) * 1000000 + (tv2->tv_usec - tv1->tv_usec); } int main(int argc, char *argv[]) { char *mbuffer; int fd; int i; unsigned long page_size, size; struct stat stat; struct timeval t1, t2; page_size = getpagesize(); fd = open(argv[1], O_RDONLY); assert(fd >= 0); fstat(fd, &stat); size = stat.st_size; printf("%s: file %s, file size %lu, page size %lu\n", argv[0], read_filename, size, page_size); gettimeofday(&t1, NULL); mbuffer = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_SHARED, fd, 0); for (i = 0 ; i < size ; i += page_size) sum += mbuffer[i]; munmap(mbuffer, page_size); gettimeofday(&t2, NULL); printf("\tread mmaped time: %luus\n", tv_diff(&t1, &t2)); close(fd); } Acked-by: Nicolas Pitre Cc: Catalin Marinas Cc: Marek Szyprowski Cc: Russell King Signed-off-by: Ming Lei Signed-off-by: Marek Szyprowski diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 26a5833..97926b1 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -880,10 +880,24 @@ static void __dma_page_dev_to_cpu(struct page *page, unsigned long off, dma_cache_maint_page(page, off, size, dir, dmac_unmap_area); /* - * Mark the D-cache clean for this page to avoid extra flushing. + * Mark the D-cache clean for these pages to avoid extra flushing. */ - if (dir != DMA_TO_DEVICE && off == 0 && size >= PAGE_SIZE) - set_bit(PG_dcache_clean, &page->flags); + if (dir != DMA_TO_DEVICE && size >= PAGE_SIZE) { + unsigned long pfn; + size_t left = size; + + pfn = page_to_pfn(page) + off / PAGE_SIZE; + off %= PAGE_SIZE; + if (off) { + pfn++; + left -= PAGE_SIZE - off; + } + while (left >= PAGE_SIZE) { + page = pfn_to_page(pfn++); + set_bit(PG_dcache_clean, &page->flags); + left -= PAGE_SIZE; + } + } } /** -- cgit v0.10.2 From 5b91a98c61abe914e6a80d7dc15e435c47ea0004 Mon Sep 17 00:00:00 2001 From: Richard Zhao Date: Thu, 20 Jun 2013 20:31:00 +0800 Subject: ARM: dma: Drop __GFP_COMP for iommu dma memory allocations __iommu_alloc_buffer wants to split pages after allocation in order to reduce the memory footprint. This does not work well with __GFP_COMP pages, so drop this flag before allocation One failure example is snd_malloc_dev_pages call dma_alloc_coherent with __GFP_COMP. Signed-off-by: Richard Zhao Signed-off-by: Marek Szyprowski diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 97926b1..22c7d7e 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -1328,6 +1328,15 @@ static void *arm_iommu_alloc_attrs(struct device *dev, size_t size, if (gfp & GFP_ATOMIC) return __iommu_alloc_atomic(dev, size, handle); + /* + * Following is a work-around (a.k.a. hack) to prevent pages + * with __GFP_COMP being passed to split_page() which cannot + * handle them. The real problem is that this flag probably + * should be 0 on ARM as it is not supported on this + * platform; see CONFIG_HUGETLBFS. + */ + gfp &= ~(__GFP_COMP); + pages = __iommu_alloc_buffer(dev, size, gfp, attrs); if (!pages) return NULL; -- cgit v0.10.2