summaryrefslogtreecommitdiff
path: root/arch/arm/mm
AgeCommit message (Collapse)Author
2016-05-21lib/GCD.c: use binary GCD algorithm instead of EuclideanZhaoxiu Zeng
The binary GCD algorithm is based on the following facts: 1. If a and b are all evens, then gcd(a,b) = 2 * gcd(a/2, b/2) 2. If a is even and b is odd, then gcd(a,b) = gcd(a/2, b) 3. If a and b are all odds, then gcd(a,b) = gcd((a-b)/2, b) = gcd((a+b)/2, b) Even on x86 machines with reasonable division hardware, the binary algorithm runs about 25% faster (80% the execution time) than the division-based Euclidian algorithm. On platforms like Alpha and ARMv6 where division is a function call to emulation code, it's even more significant. There are two variants of the code here, depending on whether a fast __ffs (find least significant set bit) instruction is available. This allows the unpredictable branches in the bit-at-a-time shifting loop to be eliminated. If fast __ffs is not available, the "even/odd" GCD variant is used. I use the following code to benchmark: #include <stdio.h> #include <stdlib.h> #include <stdint.h> #include <string.h> #include <time.h> #include <unistd.h> #define swap(a, b) \ do { \ a ^= b; \ b ^= a; \ a ^= b; \ } while (0) unsigned long gcd0(unsigned long a, unsigned long b) { unsigned long r; if (a < b) { swap(a, b); } if (b == 0) return a; while ((r = a % b) != 0) { a = b; b = r; } return b; } unsigned long gcd1(unsigned long a, unsigned long b) { unsigned long r = a | b; if (!a || !b) return r; b >>= __builtin_ctzl(b); for (;;) { a >>= __builtin_ctzl(a); if (a == b) return a << __builtin_ctzl(r); if (a < b) swap(a, b); a -= b; } } unsigned long gcd2(unsigned long a, unsigned long b) { unsigned long r = a | b; if (!a || !b) return r; r &= -r; while (!(b & r)) b >>= 1; for (;;) { while (!(a & r)) a >>= 1; if (a == b) return a; if (a < b) swap(a, b); a -= b; a >>= 1; if (a & r) a += b; a >>= 1; } } unsigned long gcd3(unsigned long a, unsigned long b) { unsigned long r = a | b; if (!a || !b) return r; b >>= __builtin_ctzl(b); if (b == 1) return r & -r; for (;;) { a >>= __builtin_ctzl(a); if (a == 1) return r & -r; if (a == b) return a << __builtin_ctzl(r); if (a < b) swap(a, b); a -= b; } } unsigned long gcd4(unsigned long a, unsigned long b) { unsigned long r = a | b; if (!a || !b) return r; r &= -r; while (!(b & r)) b >>= 1; if (b == r) return r; for (;;) { while (!(a & r)) a >>= 1; if (a == r) return r; if (a == b) return a; if (a < b) swap(a, b); a -= b; a >>= 1; if (a & r) a += b; a >>= 1; } } static unsigned long (*gcd_func[])(unsigned long a, unsigned long b) = { gcd0, gcd1, gcd2, gcd3, gcd4, }; #define TEST_ENTRIES (sizeof(gcd_func) / sizeof(gcd_func[0])) #if defined(__x86_64__) #define rdtscll(val) do { \ unsigned long __a,__d; \ __asm__ __volatile__("rdtsc" : "=a" (__a), "=d" (__d)); \ (val) = ((unsigned long long)__a) | (((unsigned long long)__d)<<32); \ } while(0) static unsigned long long benchmark_gcd_func(unsigned long (*gcd)(unsigned long, unsigned long), unsigned long a, unsigned long b, unsigned long *res) { unsigned long long start, end; unsigned long long ret; unsigned long gcd_res; rdtscll(start); gcd_res = gcd(a, b); rdtscll(end); if (end >= start) ret = end - start; else ret = ~0ULL - start + 1 + end; *res = gcd_res; return ret; } #else static inline struct timespec read_time(void) { struct timespec time; clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &time); return time; } static inline unsigned long long diff_time(struct timespec start, struct timespec end) { struct timespec temp; if ((end.tv_nsec - start.tv_nsec) < 0) { temp.tv_sec = end.tv_sec - start.tv_sec - 1; temp.tv_nsec = 1000000000ULL + end.tv_nsec - start.tv_nsec; } else { temp.tv_sec = end.tv_sec - start.tv_sec; temp.tv_nsec = end.tv_nsec - start.tv_nsec; } return temp.tv_sec * 1000000000ULL + temp.tv_nsec; } static unsigned long long benchmark_gcd_func(unsigned long (*gcd)(unsigned long, unsigned long), unsigned long a, unsigned long b, unsigned long *res) { struct timespec start, end; unsigned long gcd_res; start = read_time(); gcd_res = gcd(a, b); end = read_time(); *res = gcd_res; return diff_time(start, end); } #endif static inline unsigned long get_rand() { if (sizeof(long) == 8) return (unsigned long)rand() << 32 | rand(); else return rand(); } int main(int argc, char **argv) { unsigned int seed = time(0); int loops = 100; int repeats = 1000; unsigned long (*res)[TEST_ENTRIES]; unsigned long long elapsed[TEST_ENTRIES]; int i, j, k; for (;;) { int opt = getopt(argc, argv, "n:r:s:"); /* End condition always first */ if (opt == -1) break; switch (opt) { case 'n': loops = atoi(optarg); break; case 'r': repeats = atoi(optarg); break; case 's': seed = strtoul(optarg, NULL, 10); break; default: /* You won't actually get here. */ break; } } res = malloc(sizeof(unsigned long) * TEST_ENTRIES * loops); memset(elapsed, 0, sizeof(elapsed)); srand(seed); for (j = 0; j < loops; j++) { unsigned long a = get_rand(); /* Do we have args? */ unsigned long b = argc > optind ? strtoul(argv[optind], NULL, 10) : get_rand(); unsigned long long min_elapsed[TEST_ENTRIES]; for (k = 0; k < repeats; k++) { for (i = 0; i < TEST_ENTRIES; i++) { unsigned long long tmp = benchmark_gcd_func(gcd_func[i], a, b, &res[j][i]); if (k == 0 || min_elapsed[i] > tmp) min_elapsed[i] = tmp; } } for (i = 0; i < TEST_ENTRIES; i++) elapsed[i] += min_elapsed[i]; } for (i = 0; i < TEST_ENTRIES; i++) printf("gcd%d: elapsed %llu\n", i, elapsed[i]); k = 0; srand(seed); for (j = 0; j < loops; j++) { unsigned long a = get_rand(); unsigned long b = argc > optind ? strtoul(argv[optind], NULL, 10) : get_rand(); for (i = 1; i < TEST_ENTRIES; i++) { if (res[j][i] != res[j][0]) break; } if (i < TEST_ENTRIES) { if (k == 0) { k = 1; fprintf(stderr, "Error:\n"); } fprintf(stderr, "gcd(%lu, %lu): ", a, b); for (i = 0; i < TEST_ENTRIES; i++) fprintf(stderr, "%ld%s", res[j][i], i < TEST_ENTRIES - 1 ? ", " : "\n"); } } if (k == 0) fprintf(stderr, "PASS\n"); free(res); return 0; } Compiled with "-O2", on "VirtualBox 4.4.0-22-generic #38-Ubuntu x86_64" got: zhaoxiuzeng@zhaoxiuzeng-VirtualBox:~/develop$ ./gcd -r 500000 -n 10 gcd0: elapsed 10174 gcd1: elapsed 2120 gcd2: elapsed 2902 gcd3: elapsed 2039 gcd4: elapsed 2812 PASS zhaoxiuzeng@zhaoxiuzeng-VirtualBox:~/develop$ ./gcd -r 500000 -n 10 gcd0: elapsed 9309 gcd1: elapsed 2280 gcd2: elapsed 2822 gcd3: elapsed 2217 gcd4: elapsed 2710 PASS zhaoxiuzeng@zhaoxiuzeng-VirtualBox:~/develop$ ./gcd -r 500000 -n 10 gcd0: elapsed 9589 gcd1: elapsed 2098 gcd2: elapsed 2815 gcd3: elapsed 2030 gcd4: elapsed 2718 PASS zhaoxiuzeng@zhaoxiuzeng-VirtualBox:~/develop$ ./gcd -r 500000 -n 10 gcd0: elapsed 9914 gcd1: elapsed 2309 gcd2: elapsed 2779 gcd3: elapsed 2228 gcd4: elapsed 2709 PASS [akpm@linux-foundation.org: avoid #defining a CONFIG_ variable] Signed-off-by: Zhaoxiu Zeng <zhaoxiu.zeng@gmail.com> Signed-off-by: George Spelvin <linux@horizon.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-05-20Merge branch 'for-linus' of git://git.armlinux.org.uk/~rmk/linux-armLinus Torvalds
Pull ARM updates from Russell King: "Changes included in this pull request: - revert pxa2xx-flash back to using ioremap_cached() and switch memremap() to use arch_memremap_wb() - remove pci=firmware command line argument handling - remove unnecessary arm_dma_set_mask() implementation, the generic implementation will do for ARM - removal of the ARM kallsyms "hack" to work around mode switching veneers and vectors located below PAGE_OFFSET - tidy up build system output a little - add L2 cache power management DT bindings - remove duplicated local_irq_disable() in reboot paths - handle AMBA primecell devices better at registration time with PM domains (needed for Samsung SoCs) - ARM specific preparation to support Keystone II kexec" * 'for-linus' of git://git.armlinux.org.uk/~rmk/linux-arm: ARM: 8567/1: cache-uniphier: activate ways for secondary CPUs ARM: 8570/2: Documentation: devicetree: Add PL310 PM bindings ARM: 8569/1: pl2x0: Add OF control of cache power management ARM: 8568/1: reboot: remove duplicated local_irq_disable() ARM: 8566/1: drivers: amba: properly handle devices with power domains ARM: provide arm_has_idmap_alias() helper ARM: kexec: remove 512MB restriction on kexec crashdump ARM: provide improved virt_to_idmap() functionality ARM: kexec: fix crashkernel= handling ARM: 8557/1: specify install, zinstall, and uinstall as PHONY targets ARM: 8562/1: suppress "include/generated/mach-types.h is up to date." ARM: 8553/1: kallsyms: remove --page-offset command line option ARM: 8552/1: kallsyms: remove special lower address limit for CONFIG_ARM ARM: 8555/1: kallsyms: ignore ARM mode switching veneers ARM: 8548/1: dma-mapping: remove arm_dma_set_mask() ARM: 8554/1: kernel: pci: remove pci=firmware command line parameter handling ARM: memremap: implement arch_memremap_wb() memremap: add arch specific hook for MEMREMAP_WB mappings mtd: pxa2xx-flash: switch back from memremap to ioremap_cached ARM: reintroduce ioremap_cached() for creating cached I/O mappings
2016-05-19Merge branches 'amba', 'devel-stable', 'kexec-for-next' and 'misc' into ↵Russell King
for-linus
2016-05-09iommu: of: enforce const-ness of struct iommu_opsRobin Murphy
As a set of driver-provided callbacks and static data, there is no compelling reason for struct iommu_ops to be mutable in core code, so enforce const-ness throughout. Acked-by: Thierry Reding <treding@nvidia.com> Signed-off-by: Robin Murphy <robin.murphy@arm.com> Acked-by: Will Deacon <will.deacon@arm.com> Signed-off-by: Joerg Roedel <jroedel@suse.de>
2016-05-07Merge branch 'fixes' of git://git.armlinux.org.uk/~rmk/linux-armLinus Torvalds
Pull ARM fixes from Russell King: "These are a number of updates to fix a few problems found in the ARM nommu code over the last couple of years, caused mostly by changes on the mmu side" * 'fixes' of git://git.armlinux.org.uk/~rmk/linux-arm: ARM: 8573/1: domain: move {set,get}_domain under config guard ARM: 8572/1: nommu: change memory reserve for the vectors ARM: 8571/1: nommu: fix PMSAv7 setup
2016-05-05ARM: 8567/1: cache-uniphier: activate ways for secondary CPUsMasahiro Yamada
This outer cache allows to control active ways independently for each CPU, but currently nothing is done for secondary CPUs. In other words, all the ways are locked for secondary CPUs by default. This commit fixes it to fully bring out the performance of this outer cache. There would be two possible ways to achieve this: [1] Each CPU initializes active ways for itself. This can be done via the SSCLPDAWCR register. This is a banked register, so each CPU sees a different instance of the register for its own. [2] The master CPU initializes active ways for all the CPUs. This is available via SSCDAWCARMR(N) registers, where all instances of SSCLPDAWCR are mirrored. They are mapped at the address SSCDAWCARMR + 4 * N, where N is the CPU number. The outer cache frame work does not support a per-CPU init callback. So this commit adopts [2]; the master CPU iterates over possible CPUs setting up SSCDAWCARMR(N) registers. Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2016-05-05ARM: 8572/1: nommu: change memory reserve for the vectorsJean-Philippe Brucker
Commit 19accfd3 (ARM: move vector stubs) moved the vector stubs in an additional page above the base vector one. This change wasn't taken into account by the nommu memreserve. This patch ensures that the kernel won't overwrite any vector stub on nommu. [changed the MPU side too] Signed-off-by: Jean-Philippe Brucker <jean-philippe.brucker@arm.com> Signed-off-by: Vladimir Murzin <vladimir.murzin@arm.com> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2016-05-05ARM: 8571/1: nommu: fix PMSAv7 setupJean-Philippe Brucker
Commit 1c2f87c (ARM: 8025/1: Get rid of meminfo) broke the support for MPU on ARMv7-R. This patch adapts the code inside CONFIG_ARM_MPU to use memblocks appropriately. MPU initialisation only uses the first memory region, and removes all subsequent ones. Because looping over all regions that need removal is inefficient, and memblock_remove already handles memory ranges, we can flatten the 'for_each_memblock' part. Signed-off-by: Jean-Philippe Brucker <jean-philippe.brucker@arm.com> Signed-off-by: Vladimir Murzin <vladimir.murzin@arm.com> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2016-05-05ARM: 8569/1: pl2x0: Add OF control of cache power managementBrad Mouring
Add ability to override power management bits of 310 controllers (dynamic clock gating and standby mode) through OF entries. As the saved register is only applied when working on a supported controller, it is safe to save the settings. In order to maintain existing behavior, if the settings are not found in the DT, the corresponding feature will be enabled. Signed-off-by: Brad Mouring <brad.mouring@ni.com> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2016-05-03ARM: provide improved virt_to_idmap() functionalityRussell King
For kexec, we need more functionality from the IDMAP system. We need to be able to convert physical addresses to their identity mappped versions as well as virtual addresses. Convert the existing arch_virt_to_idmap() to deal with physical addresses instead. Acked-by: Santosh Shilimkar <ssantosh@kernel.org> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2016-04-21Merge branch 'fixes' of git://ftp.arm.linux.org.uk/~rmk/linux-armLinus Torvalds
Pull ARM fixes from Russell King: "Three further fixes for ARM. Alexandre Courbot was having problems with DMA allocations with the GFP flags affecting where the tracking data was being allocated from. Vladimir Murzin noticed that the CPU feature code was not entirely correct, which can cause some features to be misreported" * 'fixes' of git://ftp.arm.linux.org.uk/~rmk/linux-arm: ARM: 8564/1: fix cpu feature extracting helper ARM: 8563/1: fix demoting HWCAP_SWP ARM: 8551/2: DMA: Fix kzalloc flags in __dma_alloc
2016-04-20Merge tag 'arm-memremap-for-v4.7' of ↵Russell King
git://git.linaro.org/people/ard.biesheuvel/linux-arm into devel-stable This series wires up the generic memremap() function for ARM in a way that allows it to be used as intended, i.e., without regard for whether the region being mapped is covered by a struct page and/or the linear mapping (lowmem)
2016-04-15ARM: 8551/2: DMA: Fix kzalloc flags in __dma_allocAlexandre Courbot
Commit 19e6e5e5392b ("ARM: 8547/1: dma-mapping: store buffer information") allocates a structure meant for internal buffer management with the GFP flags of the buffer itself. This can trigger the following safeguard in the slab/slub allocator: if (unlikely(flags & GFP_SLAB_BUG_MASK)) { pr_emerg("gfp: %un", flags & GFP_SLAB_BUG_MASK); BUG(); } Fix this by filtering the flags that make the slab allocator unhappy. Signed-off-by: Alexandre Courbot <acourbot@nvidia.com> Acked-by: Rabin Vincent <rabin@rab.in> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2016-04-11Merge branch 'fixes' of git://ftp.arm.linux.org.uk/~rmk/linux-armLinus Torvalds
Pull ARM fixes from Russell King: "A couple of small fixes, and wiring up the new syscalls which appeared during the merge window" * 'fixes' of git://ftp.arm.linux.org.uk/~rmk/linux-arm: ARM: 8550/1: protect idiv patching against undefined gcc behavior ARM: wire up preadv2 and pwritev2 syscalls ARM: SMP enable of cache maintanence broadcast
2016-04-07ARM: 8548/1: dma-mapping: remove arm_dma_set_mask()Alexandre Courbot
arm_dma_set_mask() implements exactly the same behavior as the fallback that dma_set_mask() takes if the set_dma_mask op is not set. Remove it and use that fallback instead like what is already done for dma_get_mask(). Signed-off-by: Alexandre Courbot <acourbot@nvidia.com> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2016-04-04mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macrosKirill A. Shutemov
PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time ago with promise that one day it will be possible to implement page cache with bigger chunks than PAGE_SIZE. This promise never materialized. And unlikely will. We have many places where PAGE_CACHE_SIZE assumed to be equal to PAGE_SIZE. And it's constant source of confusion on whether PAGE_CACHE_* or PAGE_* constant should be used in a particular case, especially on the border between fs and mm. Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much breakage to be doable. Let's stop pretending that pages in page cache are special. They are not. The changes are pretty straight-forward: - <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>; - <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>; - PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN}; - page_cache_get() -> get_page(); - page_cache_release() -> put_page(); This patch contains automated changes generated with coccinelle using script below. For some reason, coccinelle doesn't patch header files. I've called spatch for them manually. The only adjustment after coccinelle is revert of changes to PAGE_CAHCE_ALIGN definition: we are going to drop it later. There are few places in the code where coccinelle didn't reach. I'll fix them manually in a separate patch. Comments and documentation also will be addressed with the separate patch. virtual patch @@ expression E; @@ - E << (PAGE_CACHE_SHIFT - PAGE_SHIFT) + E @@ expression E; @@ - E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) + E @@ @@ - PAGE_CACHE_SHIFT + PAGE_SHIFT @@ @@ - PAGE_CACHE_SIZE + PAGE_SIZE @@ @@ - PAGE_CACHE_MASK + PAGE_MASK @@ expression E; @@ - PAGE_CACHE_ALIGN(E) + PAGE_ALIGN(E) @@ expression E; @@ - page_cache_get(E) + get_page(E) @@ expression E; @@ - page_cache_release(E) + put_page(E) Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Acked-by: Michal Hocko <mhocko@suse.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-04ARM: memremap: implement arch_memremap_wb()Ard Biesheuvel
The generic memremap() falls back to using ioremap_cache() to create MEMREMAP_WB mappings if the requested region is not already covered by the linear mapping, unless the architecture provides an implementation of arch_memremap_wb(). Since ioremap_cache() is not appropriate on ARM to map memory with the same attributes used for the linear mapping, implement arch_memremap_wb() which does exactly that. Also, relax the WARN() check to allow MT_MEMORY_RW mappings of pfn_valid() pages. Cc: Russell King <rmk+kernel@arm.linux.org.uk> Acked-by: Dan Williams <dan.j.williams@intel.com> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
2016-04-04ARM: reintroduce ioremap_cached() for creating cached I/O mappingsArd Biesheuvel
The original ARM-only ioremap flavor 'ioremap_cached' has been renamed to 'ioremap_cache' to align with other architectures, and subsequently abused in generic code to map things like firmware tables in memory. For that reason, there is currently an effort underway to deprecate ioremap_cache, whose semantics are poorly defined, and which is typed with an __iomem annotation that is inappropriate for mappings of ordinary memory. However, original users of ioremap_cached() used it in a context where the I/O connotation is appropriate, and replacing those instances with memremap() does not make sense. So let's revive ioremap_cached(), so that we can change back those original users before we drop ioremap_cache entirely in favor of memremap. Cc: Russell King <rmk+kernel@arm.linux.org.uk> Acked-by: Dan Williams <dan.j.williams@intel.com> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
2016-04-01ARM: SMP enable of cache maintanence broadcastRussell King
Masahiro Yamada reports that we can fail to set the FW bit in the auxiliary control register, which enables broadcasting the cache maintanence operations. This occurs because we only check that the SMP/nAMP bit is set, rather than checking whether all the bits we want to be set are set. Rearrange the code to ensure that all desired bits are set, and only update the register if we discover some required bits are not set. Tested-by: Masahiro Yamada <yamada.masahiro@socionext.com>
2016-03-19Merge branch 'for-linus' of git://ftp.arm.linux.org.uk/~rmk/linux-armLinus Torvalds
Pull ARM updates from Russell King: "Another mixture of changes this time around: - Split XIP linker file from main linker file to make it more maintainable, and various XIP fixes, and clean up a resulting macro. - Decompressor cleanups from Masahiro Yamada - Avoid printing an error for a missing L2 cache - Remove some duplicated symbols in System.map, and move vectors/stubs back into kernel VMA - Various low priority fixes from Arnd - Updates to allow bus match functions to return negative errno values, touching some drivers and the driver core. Greg has acked these changes. - Virtualisation platform udpates form Jean-Philippe Brucker. - Security enhancements from Kees Cook - Rework some Kconfig dependencies and move PSCI idle management code out of arch/arm into drivers/firmware/psci.c - ARM DMA mapping updates, touching media, acked by Mauro. - Fix places in ARM code which should be using virt_to_idmap() so that Keystone2 can work. - Fix Marvell Tauros2 to work again with non-DT boots. - Provide a delay timer for ARM Orion platforms" * 'for-linus' of git://ftp.arm.linux.org.uk/~rmk/linux-arm: (45 commits) ARM: 8546/1: dma-mapping: refactor to fix coherent+cma+gfp=0 ARM: 8547/1: dma-mapping: store buffer information ARM: 8543/1: decompressor: rename suffix_y to compress-y ARM: 8542/1: decompressor: merge piggy.*.S and simplify Makefile ARM: 8541/1: decompressor: drop redundant FORCE in Makefile ARM: 8540/1: decompressor: use clean-files instead of extra-y to clean files ARM: 8539/1: decompressor: drop more unneeded assignments to "targets" ARM: 8538/1: decompressor: drop unneeded assignments to "targets" ARM: 8532/1: uncompress: mark putc as inline ARM: 8531/1: turn init_new_context into an inline function ARM: 8530/1: remove VIRT_TO_BUS ARM: 8537/1: drop unused DEBUG_RODATA from XIP_KERNEL ARM: 8536/1: mm: hide __start_rodata_section_aligned for non-debug builds ARM: 8535/1: mm: DEBUG_RODATA makes no sense with XIP_KERNEL ARM: 8534/1: virt: fix hyp-stub build for pre-ARMv7 CPUs ARM: make the physical-relative calculation more obvious ARM: 8512/1: proc-v7.S: Adjust stack address when XIP_KERNEL ARM: 8411/1: Add default SPARSEMEM settings ARM: 8503/1: clk_register_clkdev: remove format string interface ARM: 8529/1: remove 'i' and 'zi' targets ...
2016-03-17mm: remove VM_FAULT_MINORJan Kara
The define has a comment from Nick Piggin from 2007: /* For backwards compat. Remove me quickly. */ I guess 9 years should not be too hurried sense of 'quickly' even for kernel measures. Signed-off-by: Jan Kara <jack@suse.cz> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-03-17mm: cleanup *pte_alloc* interfacesKirill A. Shutemov
There are few things about *pte_alloc*() helpers worth cleaning up: - 'vma' argument is unused, let's drop it; - most __pte_alloc() callers do speculative check for pmd_none(), before taking ptl: let's introduce pte_alloc() macro which does the check. The only direct user of __pte_alloc left is userfaultfd, which has different expectation about atomicity wrt pmd. - pte_alloc_map() and pte_alloc_map_lock() are redefined using pte_alloc(). [sudeep.holla@arm.com: fix build for arm64 hugetlbpage] [sfr@canb.auug.org.au: fix arch/arm/mm/mmu.c some more] Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Dave Hansen <dave.hansen@intel.com> Signed-off-by: Sudeep Holla <sudeep.holla@arm.com> Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-03-06Merge branch 'fixes' of git://ftp.arm.linux.org.uk/~rmk/linux-armLinus Torvalds
Pull ARM fixes from Russell King: "Just two ARM fixes this time: one to fix the hyp-stub for older ARM CPUs, and another to fix the set_memory_xx() permission functions to deal with zero sizes correctly" * 'fixes' of git://ftp.arm.linux.org.uk/~rmk/linux-arm: ARM: 8544/1: set_memory_xx fixes ARM: 8534/1: virt: fix hyp-stub build for pre-ARMv7 CPUs
2016-03-04Merge branches 'amba', 'fixes', 'misc' and 'tauros2' into for-nextRussell King
2016-03-04ARM: 8546/1: dma-mapping: refactor to fix coherent+cma+gfp=0Rabin Vincent
Given a device which uses arm_coherent_dma_ops and on which dev_get_cma_area(dev) returns non-NULL, the following usage of the DMA API with gfp=0 results in memory corruption and a memory leak. p = dma_alloc_coherent(dev, sz, &dma, 0); if (p) dma_free_coherent(dev, sz, p, dma); The memory leak is because the alloc allocates using __alloc_simple_buffer() but the free attempts dma_release_from_contiguous() which does not do free anything since the page is not in the CMA area. The memory corruption is because the free calls __dma_remap() on a page which is backed by only first level page tables. The apply_to_page_range() + __dma_update_pte() loop ends up interpreting the section mapping as an addresses to a second level page table and writing the new PTE to memory which is not used by page tables. We don't have access to the GFP flags used for allocation in the free function. Fix this by adding allocator backends and using this information in the free function so that we always use the correct release routine. Fixes: 21caf3a7 ("ARM: 8398/1: arm DMA: Fix allocation from CMA for coherent DMA") Signed-off-by: Rabin Vincent <rabin.vincent@axis.com> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2016-03-04ARM: 8547/1: dma-mapping: store buffer informationRabin Vincent
Keep a list of allocated DMA buffers so that we can store metadata in alloc() which we later need in free(). Signed-off-by: Rabin Vincent <rabin.vincent@axis.com> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2016-03-04ARM: 8544/1: set_memory_xx fixesMika Penttilä
Allow zero size updates. This makes set_memory_xx() consistent with x86, s390 and arm64 and makes apply_to_page_range() not to BUG() when loading modules. Signed-off-by: Mika Penttilä mika.penttila@nextfour.com Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2016-02-27mm: ASLR: use get_random_long()Daniel Cashman
Replace calls to get_random_int() followed by a cast to (unsigned long) with calls to get_random_long(). Also address shifting bug which, in case of x86 removed entropy mask for mmap_rnd_bits values > 31 bits. Signed-off-by: Daniel Cashman <dcashman@android.com> Acked-by: Kees Cook <keescook@chromium.org> Cc: "Theodore Ts'o" <tytso@mit.edu> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Will Deacon <will.deacon@arm.com> Cc: Ralf Baechle <ralf@linux-mips.org> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: David S. Miller <davem@davemloft.net> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Ingo Molnar <mingo@redhat.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Nick Kralevich <nnk@google.com> Cc: Jeff Vander Stoep <jeffv@google.com> Cc: Mark Salyzyn <salyzyn@android.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-02-22ARM: 8535/1: mm: DEBUG_RODATA makes no sense with XIP_KERNELArnd Bergmann
When CONFIG_DEBUG_ALIGN_RODATA is set, we get a link error: arch/arm/mm/built-in.o:(.data+0x4bc): undefined reference to `__start_rodata_section_aligned' However, this combination is useless, as XIP_KERNEL implies that all the RODATA is already marked readonly, so both CONFIG_DEBUG_RODATA and CONFIG_DEBUG_ALIGN_RODATA (which depends on the other) are not needed with XIP_KERNEL, and this patches enforces that using a Kconfig dependency. Signed-off-by: Arnd Bergmann <arnd@arndb.de> Fixes: 25362dc496ed ("ARM: 8501/1: mm: flip priority of CONFIG_DEBUG_RODATA") Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2016-02-17ARM: make the physical-relative calculation more obviousRussell King
The physical-relative calculation between the XIP text and data sections introduced by the previous patch was far from obvious. Let's simplify it by turning it into a macro which takes the two (virtual) addresses. This allows us to arrange the calculation in a more obvious manner - we can make it two sub-expressions which calculate the physical address for each symbol, and then takes the difference of those physical addresses. Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2016-02-16ARM: 8512/1: proc-v7.S: Adjust stack address when XIP_KERNELNicolas Pitre
When XIP_KERNEL is enabled, the virt to phys address translation for RAM is not the same as the virt to phys address translation for .text. The only way to know where physical RAM is located is to use PLAT_PHYS_OFFSET. The MACRO will be useful for other places where there is a similar problem. Signed-off-by: Nicolas Pitre <nico@linaro.org> Signed-off-by: Chris Brandt <chris.brandt@renesas.com> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2016-02-11ARM: 8502/1: mm: mark section-aligned portion of rodata NXKees Cook
When rodata is large enough that it crosses a section boundary after the kernel text, mark the rest NX. This is as close to full NX of rodata as we can get without splitting page tables or doing section alignment via CONFIG_DEBUG_ALIGN_RODATA. When the config is: CONFIG_DEBUG_RODATA=y # CONFIG_DEBUG_ALIGN_RODATA is not set Before: ---[ Kernel Mapping ]--- 0x80000000-0x80100000 1M RW NX SHD 0x80100000-0x80a00000 9M ro x SHD 0x80a00000-0xa0000000 502M RW NX SHD After: ---[ Kernel Mapping ]--- 0x80000000-0x80100000 1M RW NX SHD 0x80100000-0x80700000 6M ro x SHD 0x80700000-0x80a00000 3M ro NX SHD 0x80a00000-0xa0000000 502M RW NX SHD Signed-off-by: Kees Cook <keescook@chromium.org> Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2016-02-11ARM: 8518/1: Use correct symbols for XIP_KERNELChris Brandt
For an XIP build, _etext does not represent the end of the binary image that needs to stay mapped into the MODULES_VADDR area. Years ago, data came before text in the memory map. However, now that the order is text/init/data, an XIP_KERNEL needs to map up to the data location in order to keep from cutting off parts of the kernel that are needed. We only map up to the beginning of data because data has already been copied, so there's no reason to keep it around anymore. A new symbol is created to make it clear what it is we are referring to. This fixes the bug where you might lose the end of your kernel area after page table setup is complete. Signed-off-by: Chris Brandt <chris.brandt@renesas.com> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2016-02-11ARM: 8507/1: dma-mapping: Use DMA_ATTR_ALLOC_SINGLE_PAGES hint to optimize allocDoug Anderson
If we know that TLB efficiency will not be an issue when memory is accessed then it's not terribly important to allocate big chunks of memory. The whole point of allocating the big chunks was that it would make TLB usage efficient. As Marek Szyprowski indicated: Please note that mapping memory with larger pages significantly improves performance, especially when IOMMU has a little TLB cache. This can be easily observed when multimedia devices do processing of RGB data with 90/270 degree rotation Image rotation is distinctly an operation that needs to bounce around through memory, so it makes sense that TLB efficiency is important there. Video decoding, on the other hand, is a fairly sequential operation. During video decoding it's not expected that we'll be jumping all over memory. Decoding video is also pretty heavy and the TLB misses aren't a huge deal. Presumably most HW video acceleration users of dma-mapping will not care about huge pages and will set DMA_ATTR_ALLOC_SINGLE_PAGES. Allocating big chunks of memory is quite expensive, especially if we're doing it repeadly and memory is full. In one (out of tree) usage model it is common that arm_iommu_alloc_attrs() is called 16 times in a row, each one trying to allocate 4 MB of memory. This is called whenever the system encounters a new video, which could easily happen while the memory system is stressed out. In fact, on certain social media websites that auto-play video and have infinite scrolling, it's quite common to see not just one of these 16x4MB allocations but 2 or 3 right after another. Asking the system even to do a small amount of extra work to give us big chunks in this case is just not a good use of time. Allocating big chunks of memory is also expensive indirectly. Even if we ask the system not to do ANY extra work to allocate _our_ memory, we're still potentially eating up all big chunks in the system. Presumably there are other users in the system that aren't quite as flexible and that actually need these big chunks. By eating all the big chunks we're causing extra work for the rest of the system. We also may start making other memory allocations fail. While the system may be robust to such failures (as is the case with dwc2 USB trying to allocate buffers for Ethernet data and with WiFi trying to allocate buffers for WiFi data), it is yet another big performance hit. Signed-off-by: Douglas Anderson <dianders@chromium.org> Acked-by: Marek Szyprowski <m.szyprowski@samsung.com> Tested-by: Javier Martinez Canillas <javier@osg.samsung.com> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2016-02-11ARM: 8505/1: dma-mapping: Optimize allocationDoug Anderson
The __iommu_alloc_buffer() is expected to be called to allocate pretty sizeable buffers. Upon simple tests of video I saw it trying to allocate 4,194,304 bytes. The function tries to allocate large chunks in order to optimize IOMMU TLB usage. The current function is very, very slow. One problem is the way it keeps trying and trying to allocate big chunks. Imagine a very fragmented memory that has 4M free but no contiguous pages at all. Further imagine allocating 4M (1024 pages). We'll do the following memory allocations: - For page 1: - Try to allocate order 10 (no retry) - Try to allocate order 9 (no retry) - ... - Try to allocate order 0 (with retry, but not needed) - For page 2: - Try to allocate order 9 (no retry) - Try to allocate order 8 (no retry) - ... - Try to allocate order 0 (with retry, but not needed) - ... - ... Total number of calls to alloc() calls for this case is: sum(int(math.log(i, 2)) + 1 for i in range(1, 1025)) => 9228 The above is obviously worse case, but given how slow alloc can be we really want to try to avoid even somewhat bad cases. I timed the old code with a device under memory pressure and it wasn't hard to see it take more than 120 seconds to allocate 4 megs of memory! (NOTE: testing was done on kernel 3.14, so possibly mainline would behave differently). A second problem is that allocating big chunks under memory pressure when we don't need them is just not a great idea anyway unless we really need them. We can make due pretty well with smaller chunks so it's probably wise to leave bigger chunks for other users once memory pressure is on. Let's adjust the allocation like this: 1. If a big chunk fails, stop trying to hard and bump down to lower order allocations. 2. Don't try useless orders. The whole point of big chunks is to optimize the TLB and it can really only make use of 2M, 1M, 64K and 4K sizes. We'll still tend to eat up a bunch of big chunks, but that might be the right answer for some users. A future patch could possibly add a new DMA_ATTR that would let the caller decide that TLB optimization isn't important and that we should use smaller chunks. Presumably this would be a sane strategy for some callers. Signed-off-by: Douglas Anderson <dianders@chromium.org> Acked-by: Marek Szyprowski <m.szyprowski@samsung.com> Reviewed-by: Robin Murphy <robin.murphy@arm.com> Reviewed-by: Tomasz Figa <tfiga@chromium.org> Tested-by: Javier Martinez Canillas <javier@osg.samsung.com> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2016-02-08ARM: 8501/1: mm: flip priority of CONFIG_DEBUG_RODATAKees Cook
The use of CONFIG_DEBUG_RODATA is generally seen as an essential part of kernel self-protection: http://www.openwall.com/lists/kernel-hardening/2015/11/30/13 Additionally, its name has grown to mean things beyond just rodata. To get ARM closer to this, we ought to rearrange the names of the configs that control how the kernel protects its memory. What was called CONFIG_ARM_KERNMEM_PERMS is realy doing the work that other architectures call CONFIG_DEBUG_RODATA. This redefines CONFIG_DEBUG_RODATA to actually do the bulk of the ROing (and NXing). In the place of the old CONFIG_DEBUG_RODATA, use CONFIG_DEBUG_ALIGN_RODATA, since that's what the option does: adds section alignment for making rodata explicitly NX, as arm does not split the page tables like arm64 does without _ALIGN_RODATA. Also adds human readable names to the sections so I could more easily debug my typos, and makes CONFIG_DEBUG_RODATA default "y" for CPU_V7. Results in /sys/kernel/debug/kernel_page_tables for each config state: # CONFIG_DEBUG_RODATA is not set # CONFIG_DEBUG_ALIGN_RODATA is not set ---[ Kernel Mapping ]--- 0x80000000-0x80900000 9M RW x SHD 0x80900000-0xa0000000 503M RW NX SHD CONFIG_DEBUG_RODATA=y CONFIG_DEBUG_ALIGN_RODATA=y ---[ Kernel Mapping ]--- 0x80000000-0x80100000 1M RW NX SHD 0x80100000-0x80700000 6M ro x SHD 0x80700000-0x80a00000 3M ro NX SHD 0x80a00000-0xa0000000 502M RW NX SHD CONFIG_DEBUG_RODATA=y # CONFIG_DEBUG_ALIGN_RODATA is not set ---[ Kernel Mapping ]--- 0x80000000-0x80100000 1M RW NX SHD 0x80100000-0x80a00000 9M ro x SHD 0x80a00000-0xa0000000 502M RW NX SHD Signed-off-by: Kees Cook <keescook@chromium.org> Reviewed-by: Laura Abbott <labbott@fedoraproject.org> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2016-02-08ARM: make virt_to_idmap() return unsigned longRussell King
Make virt_to_idmap() return an unsigned long rather than phys_addr_t. Returning phys_addr_t here makes no sense, because the definition of virt_to_idmap() is that it shall return a physical address which maps identically with the virtual address. Since virtual addresses are limited to 32-bit, identity mapped physical addresses are as well. Almost all users already had an implicit narrowing cast to unsigned long so let's make this official and part of this interface. Tested-by: Grygorii Strashko <grygorii.strashko@ti.com> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2016-01-23tree wide: use kvfree() than conditional kfree()/vfree()Tetsuo Handa
There are many locations that do if (memory_was_allocated_by_vmalloc) vfree(ptr); else kfree(ptr); but kvfree() can handle both kmalloc()ed memory and vmalloc()ed memory using is_vmalloc_addr(). Unless callers have special reasons, we can replace this branch with kvfree(). Please check and reply if you found problems. Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp> Acked-by: Michal Hocko <mhocko@suse.com> Acked-by: Jan Kara <jack@suse.com> Acked-by: Russell King <rmk+kernel@arm.linux.org.uk> Reviewed-by: Andreas Dilger <andreas.dilger@intel.com> Acked-by: "Rafael J. Wysocki" <rjw@rjwysocki.net> Acked-by: David Rientjes <rientjes@google.com> Cc: "Luck, Tony" <tony.luck@intel.com> Cc: Oleg Drokin <oleg.drokin@intel.com> Cc: Boris Petkov <bp@suse.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-01-21Merge tag 'armsoc-multiplatform' of ↵Linus Torvalds
git://git.kernel.org/pub/scm/linux/kernel/git/arm/arm-soc Pull ARM SoC multiplatform code updates from Arnd Bergmann: "This branch is the culmination of 5 years of effort to bring the ARMv6 and ARMv7 platforms together such that they can all be enabled and boot the same kernel. It has been a tremendous amount of cleanup and refactoring by a huge number of people, and creation of several new (and major) subsystems to better abstract out all the platform details in an appropriate manner. The bulk of this branch is a large patchset from Arnd that brings several of the more minor and older platforms we have closer to multiplatform support. Among these are MMP, S3C64xx, Orion5x, mv78xx0 and realview Much of this is moving around header files from old mach directories, but there are also some cleanup patches of debug_ll (lowlevel debug per-platform options) and other parts. Linus Walleij also has some patchs to clean up the older ARM Realview platforms by finally introducing DT support, and Rob Herring has some for ARM Versatile which is now DT-only. Both of these platforms are now multiplatform. Finally, a couple of patches from Russell for Dove PMU, and a fix from Valentin Rothberg for Exynos ADC, which were rebased on top of the series to avoid conflicts" * tag 'armsoc-multiplatform' of git://git.kernel.org/pub/scm/linux/kernel/git/arm/arm-soc: (75 commits) ARM: realview: don't select SMP_ON_UP for UP builds ARM: s3c: simplify s3c_irqwake_{e,}intallow definition ARM: s3c64xx: fix pm-debug compilation iio: exynos-adc: fix irqf_oneshot.cocci warnings ARM: realview: build realview-dt SMP support only when used ARM: realview: select apropriate targets ARM: realview: clean up header files ARM: realview: make all header files local ARM: no longer make CPU targets visible separately ARM: integrator: use explicit core module options ARM: realview: enable multiplatform ARM: make default platform work for NOMMU ARM: debug-ll: move DEBUG_LL_UART_EFM32 to correct Kconfig location ARM: defconfig: use correct debug_ll settings ARM: versatile: convert to multi-platform ARM: versatile: merge mach code into a single file ARM: versatile: switch to DT only booting and remove legacy code ARM: versatile: add DT based PCI detection ARM: pxa: mark ezx structures as __maybe_unused ARM: pxa: mark raumfeld init functions as __maybe_unused ...
2016-01-16mm: differentiate page_mapped() from page_mapcount() for compound pagesKirill A. Shutemov
Let's define page_mapped() to be true for compound pages if any sub-pages of the compound page is mapped (with PMD or PTE). On other hand page_mapcount() return mapcount for this particular small page. This will make cases like page_get_anon_vma() behave correctly once we allow huge pages to be mapped with PTE. Most users outside core-mm should use page_mapcount() instead of page_mapped(). Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Tested-by: Sasha Levin <sasha.levin@oracle.com> Tested-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Acked-by: Jerome Marchand <jmarchan@redhat.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Hugh Dickins <hughd@google.com> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Mel Gorman <mgorman@suse.de> Cc: Rik van Riel <riel@redhat.com> Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> Cc: Steve Capper <steve.capper@linaro.org> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Michal Hocko <mhocko@suse.cz> Cc: Christoph Lameter <cl@linux.com> Cc: David Rientjes <rientjes@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-01-16arm, thp: remove infrastructure for handling splitting PMDsKirill A. Shutemov
With new refcounting we don't need to mark PMDs splitting. Let's drop code to handle this. pmdp_splitting_flush() is not needed too: on splitting PMD we will do pmdp_clear_flush() + set_pte_at(). pmdp_clear_flush() will do IPI as needed for fast_gup. [arnd@arndb.de: fix unterminated ifdef in header file] Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Sasha Levin <sasha.levin@oracle.com> Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Cc: Jerome Marchand <jmarchan@redhat.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Hugh Dickins <hughd@google.com> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Mel Gorman <mgorman@suse.de> Cc: Rik van Riel <riel@redhat.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Steve Capper <steve.capper@linaro.org> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Michal Hocko <mhocko@suse.cz> Cc: Christoph Lameter <cl@linux.com> Cc: David Rientjes <rientjes@google.com> Signed-off-by: Arnd Bergmann <arnd@arndb.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-01-15arm: mm: support ARCH_MMAP_RND_BITSDaniel Cashman
arm: arch_mmap_rnd() uses a hard-code value of 8 to generate the random offset for the mmap base address. This value represents a compromise between increased ASLR effectiveness and avoiding address-space fragmentation. Replace it with a Kconfig option, which is sensibly bounded, so that platform developers may choose where to place this compromise. Keep 8 as the minimum acceptable value. [arnd@arndb.de: ARM: avoid ARCH_MMAP_RND_BITS for NOMMU] Signed-off-by: Daniel Cashman <dcashman@google.com> Cc: Russell King <linux@arm.linux.org.uk> Acked-by: Kees Cook <keescook@chromium.org> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Don Zickus <dzickus@redhat.com> Cc: Eric W. Biederman <ebiederm@xmission.com> Cc: Heinrich Schuchardt <xypron.glpk@gmx.de> Cc: Josh Poimboeuf <jpoimboe@redhat.com> Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Mel Gorman <mgorman@suse.de> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: David Rientjes <rientjes@google.com> Cc: Mark Salyzyn <salyzyn@android.com> Cc: Jeff Vander Stoep <jeffv@google.com> Cc: Nick Kralevich <nnk@google.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Will Deacon <will.deacon@arm.com> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Hector Marco-Gisbert <hecmargi@upv.es> Cc: Borislav Petkov <bp@suse.de> Cc: Ralf Baechle <ralf@linux-mips.org> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Signed-off-by: Arnd Bergmann <arnd@arndb.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-01-12Merge branch 'devel-stable' into for-linusRussell King
2016-01-05Merge branches 'misc' and 'misc-rc6' into for-linusRussell King
2016-01-04ARM: 8494/1: mm: Enable PXN when running non-LPAE kernel on LPAE processorJungseung Lee
The VMSA field of MMFR0 (bottom 4 bits) is incremented for each added feature. PXN is supported if the value is >= 4 and LPAE is supported if it is >= 5. In case a kernel with CONFIG_ARM_LPAE disabled is used on a processor that supports LPAE, we can still use PXN in short descriptors. So check for >= 4 not == 4. Signed-off-by: Jungseung Lee <js07.lee@samsung.com> Acked-by: Catalin Marinas <catalin.marinas@arm.com> Signed-off-by: Ben Hutchings <ben@decadent.org.uk> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2015-12-22ARM: 8482/1: l2x0: make it possible to disable outer sync from DTLinus Walleij
According to commit 2503a5ecd86c002506001eba432c524ea009fe7f "ARM: 6201/1: RealView: Do not use outer_sync() on ARM11MPCore boards with L220" Some PB11MPCore RealView core tiles have broken outer_sync. We got rid of the custom barriers from the machine by disabling outer sync, but that was just for the boardfile case. We have to be able to do the same in the device tree case. Since __l2c_init() is cloning and copying the L2C vtable, we pass an argument to this function to optionally numb the outer sync operation if desired, before initializing the cache. After this we can set up the cache correctly on the RealView PB11MPCore. This was tested on a PB11MPCore known to have the issue. Before this, spurious crashes would occur if we try to set up the cache properly, after this it boots rock solid. Cc: Arnd Bergmann <arnd@arndb.de> Cc: devicetree@vger.kernel.org Acked-by: Rob Herring <robh@kernel.org> Signed-off-by: Linus Walleij <linus.walleij@linaro.org> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2015-12-18Merge tag 'realview-multiplatform-tag' of ↵Arnd Bergmann
git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-integrator into next/multiplatform Pull "Multiplatform support for the RealView" from Linus Walleij: Here is the result of my application of the second part of Arnds patchset, actually enabling multiplatform and getting the RealView off the ground as a multiplatform target. It is dependent on an outstanding patch to the irqchips tree bumping the number of GICs to 2 for the RealView platform. I cannot say I will be sleepless if these go in side by side: each branch will compile but will not boot until both trees have been pulled hurting bisectability a bit. - Tested on the ARM PB11MPCore - Tested with boardfile boot - Tested with DeviceTree boot * tag 'realview-multiplatform-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-integrator: ARM: realview: select apropriate targets ARM: realview: clean up header files ARM: realview: make all header files local ARM: no longer make CPU targets visible separately ARM: integrator: use explicit core module options ARM: realview: enable multiplatform
2015-12-18ARM: no longer make CPU targets visible separatelyArnd Bergmann
Now that realview and integrator always select the correct CPU type themselves based on the core tiles, there is no need to still have them user-visible in arch/arm/mm/Kconfig. The ARM925T symbol has been selected by the only user for many years, so that can be removed along with the realview and integrator specific ones. This also solves randconfig build problems on realview. Signed-off-by: Arnd Bergmann <arnd@arndb.de> Cc: Russell King <linux@arm.linux.org.uk> Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
2015-12-17Merge branch 'libnvdimm-fixes' of ↵Linus Torvalds
git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm Pull libnvdimm fixes from Dan Williams: - Two bug fixes for misuse of PAGE_MASK in scatterlist and dma-debug. These are tagged for -stable. The scatterlist impact is potentially corrupted dma addresses on HIGHMEM enabled platforms. - A minor locking fix for the NFIT hot-add implementation that is new in 4.4-rc. This would only trigger in the case a hot-add raced driver removal. * 'libnvdimm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: dma-debug: Fix dma_debug_entry offset calculation Revert "scatterlist: use sg_phys()" nfit: acpi_nfit_notify(): Do not leave device locked
2015-12-17ARM: 8453/2: proc-v7.S: don't locate temporary stack space in .text sectionNicolas Pitre
The proc-v7.S code uses a small temporary stack to preserve register content in its setup code. This stack is located in the .text section which is normally meant to be read-only. Move that temporary stack to the .bss section and get its address in a position independent way, similarly to what we do in other parts of the kernel. While at it, one comments was updated to reflect reality, and the list of saved registers in the proc-v7.S case is updated to match the comment next to it for coherency. Signed-off-by: Nicolas Pitre <nico@linaro.org> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>