From 2b8514d0a792857b0826fe6b7c3b941cdb59a9c3 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 24 Nov 2014 16:45:12 +0100 Subject: ARM: 8219/1: handle interworking and out-of-range relocations separately Currently, interworking calls on module boundaries are not supported, and are handled by the same error handling code path as non-interworking calls whose targets are simply out of range. Before modifying the handling of those out-of-range jump and call relocations in a subsequent patch, move the handling of interworking restrictions out of it. Acked-by: Nicolas Pitre Signed-off-by: Ard Biesheuvel Signed-off-by: Russell King diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c index 2e11961..af791f4 100644 --- a/arch/arm/kernel/module.c +++ b/arch/arm/kernel/module.c @@ -98,14 +98,19 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, case R_ARM_PC24: case R_ARM_CALL: case R_ARM_JUMP24: + if (sym->st_value & 3) { + pr_err("%s: section %u reloc %u sym '%s': unsupported interworking call (ARM -> Thumb)\n", + module->name, relindex, i, symname); + return -ENOEXEC; + } + offset = __mem_to_opcode_arm(*(u32 *)loc); offset = (offset & 0x00ffffff) << 2; if (offset & 0x02000000) offset -= 0x04000000; offset += sym->st_value - loc; - if (offset & 3 || - offset <= (s32)0xfe000000 || + if (offset <= (s32)0xfe000000 || offset >= (s32)0x02000000) { pr_err("%s: section %u reloc %u sym '%s': relocation %u out of range (%#lx -> %#x)\n", module->name, relindex, i, symname, @@ -155,6 +160,22 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, #ifdef CONFIG_THUMB2_KERNEL case R_ARM_THM_CALL: case R_ARM_THM_JUMP24: + /* + * For function symbols, only Thumb addresses are + * allowed (no interworking). + * + * For non-function symbols, the destination + * has no specific ARM/Thumb disposition, so + * the branch is resolved under the assumption + * that interworking is not required. + */ + if (ELF32_ST_TYPE(sym->st_info) == STT_FUNC && + !(sym->st_value & 1)) { + pr_err("%s: section %u reloc %u sym '%s': unsupported interworking call (Thumb -> ARM)\n", + module->name, relindex, i, symname); + return -ENOEXEC; + } + upper = __mem_to_opcode_thumb16(*(u16 *)loc); lower = __mem_to_opcode_thumb16(*(u16 *)(loc + 2)); @@ -182,18 +203,7 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, offset -= 0x02000000; offset += sym->st_value - loc; - /* - * For function symbols, only Thumb addresses are - * allowed (no interworking). - * - * For non-function symbols, the destination - * has no specific ARM/Thumb disposition, so - * the branch is resolved under the assumption - * that interworking is not required. - */ - if ((ELF32_ST_TYPE(sym->st_info) == STT_FUNC && - !(offset & 1)) || - offset <= (s32)0xff000000 || + if (offset <= (s32)0xff000000 || offset >= (s32)0x01000000) { pr_err("%s: section %u reloc %u sym '%s': relocation %u out of range (%#lx -> %#x)\n", module->name, relindex, i, symname, -- cgit v0.10.2 From 415ae101caf9fbf6746a88126494eda333174e90 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Tue, 27 Jan 2015 10:43:13 +0100 Subject: ARM: 8293/1: kernel: fix pci_mmap_page_range() offset calculation The pci_mmap_page_range() API should be written to expect offset values representing PCI memory resource addresses as seen by user space, through the pci_resource_to_user() API. ARM relies on the standard implementation of pci_resource_to_user() which actually is an identity map and exports to user space PCI memory resources as they are stored in PCI devices resources structures, which represent CPU physical addresses (fixed-up using BUS to CPU address conversions) not PCI bus addresses. Therefore, on ARM platforms where the mapping between CPU and BUS address is not a 1:1 the current pci_mmap_page_range() implementation is erroneous, in that an additional shift is applied to an already fixed-up offset passed from userspace. Hence, this patch removes the mem_offset from the pgoff calculation since the offset as passed from user space already represents the CPU physical address corresponding to the resource to be mapped, ie no additional offset should be applied. Cc: Arnd Bergmann Signed-off-by: Lorenzo Pieralisi Acked-by: Bjorn Helgaas Signed-off-by: Russell King diff --git a/arch/arm/kernel/bios32.c b/arch/arm/kernel/bios32.c index ab19b7c0..fcbbbb1 100644 --- a/arch/arm/kernel/bios32.c +++ b/arch/arm/kernel/bios32.c @@ -618,21 +618,15 @@ int pcibios_enable_device(struct pci_dev *dev, int mask) int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, enum pci_mmap_state mmap_state, int write_combine) { - struct pci_sys_data *root = dev->sysdata; - unsigned long phys; - - if (mmap_state == pci_mmap_io) { + if (mmap_state == pci_mmap_io) return -EINVAL; - } else { - phys = vma->vm_pgoff + (root->mem_offset >> PAGE_SHIFT); - } /* * Mark this as IO */ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - if (remap_pfn_range(vma, vma->vm_start, phys, + if (remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, vma->vm_end - vma->vm_start, vma->vm_page_prot)) return -EAGAIN; -- cgit v0.10.2 From 6e8266e3333bd01700decf9866725e254d84f21a Mon Sep 17 00:00:00 2001 From: Carlo Caione Date: Mon, 9 Feb 2015 10:38:35 +0100 Subject: ARM: 8304/1: Respect NO_KERNEL_MAPPING when we don't have an IOMMU Even without an iommu, NO_KERNEL_MAPPING is still convenient to save on kernel address space in places where we don't need a kernel mapping. Implement support for it in the two places where we're creating an expensive mapping. __alloc_from_pool uses an internal pool from which we already have virtual addresses, so it's not relevant, and __alloc_simple_buffer uses alloc_pages, which will always return a lowmem page, which is already mapped into kernel space, so we can't prevent a mapping for it in that case. Signed-off-by: Jasper St. Pierre Signed-off-by: Carlo Caione Reviewed-by: Rob Clark Reviewed-by: Daniel Drake Acked-by: Marek Szyprowski Signed-off-by: Russell King diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 170a116..7137616 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -289,11 +289,11 @@ static void __dma_free_buffer(struct page *page, size_t size) static void *__alloc_from_contiguous(struct device *dev, size_t size, pgprot_t prot, struct page **ret_page, - const void *caller); + const void *caller, bool want_vaddr); static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp, pgprot_t prot, struct page **ret_page, - const void *caller); + const void *caller, bool want_vaddr); static void * __dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot, @@ -357,10 +357,10 @@ static int __init atomic_pool_init(void) if (dev_get_cma_area(NULL)) ptr = __alloc_from_contiguous(NULL, atomic_pool_size, prot, - &page, atomic_pool_init); + &page, atomic_pool_init, true); else ptr = __alloc_remap_buffer(NULL, atomic_pool_size, gfp, prot, - &page, atomic_pool_init); + &page, atomic_pool_init, true); if (ptr) { int ret; @@ -467,13 +467,15 @@ static void __dma_remap(struct page *page, size_t size, pgprot_t prot) static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp, pgprot_t prot, struct page **ret_page, - const void *caller) + const void *caller, bool want_vaddr) { struct page *page; - void *ptr; + void *ptr = NULL; page = __dma_alloc_buffer(dev, size, gfp); if (!page) return NULL; + if (!want_vaddr) + goto out; ptr = __dma_alloc_remap(page, size, gfp, prot, caller); if (!ptr) { @@ -481,6 +483,7 @@ static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp, return NULL; } + out: *ret_page = page; return ptr; } @@ -523,12 +526,12 @@ static int __free_from_pool(void *start, size_t size) static void *__alloc_from_contiguous(struct device *dev, size_t size, pgprot_t prot, struct page **ret_page, - const void *caller) + const void *caller, bool want_vaddr) { unsigned long order = get_order(size); size_t count = size >> PAGE_SHIFT; struct page *page; - void *ptr; + void *ptr = NULL; page = dma_alloc_from_contiguous(dev, count, order); if (!page) @@ -536,6 +539,9 @@ static void *__alloc_from_contiguous(struct device *dev, size_t size, __dma_clear_buffer(page, size); + if (!want_vaddr) + goto out; + if (PageHighMem(page)) { ptr = __dma_alloc_remap(page, size, GFP_KERNEL, prot, caller); if (!ptr) { @@ -546,17 +552,21 @@ static void *__alloc_from_contiguous(struct device *dev, size_t size, __dma_remap(page, size, prot); ptr = page_address(page); } + + out: *ret_page = page; return ptr; } static void __free_from_contiguous(struct device *dev, struct page *page, - void *cpu_addr, size_t size) + void *cpu_addr, size_t size, bool want_vaddr) { - if (PageHighMem(page)) - __dma_free_remap(cpu_addr, size); - else - __dma_remap(page, size, PAGE_KERNEL); + if (want_vaddr) { + if (PageHighMem(page)) + __dma_free_remap(cpu_addr, size); + else + __dma_remap(page, size, PAGE_KERNEL); + } dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT); } @@ -574,12 +584,12 @@ static inline pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot) #define nommu() 1 -#define __get_dma_pgprot(attrs, prot) __pgprot(0) -#define __alloc_remap_buffer(dev, size, gfp, prot, ret, c) NULL +#define __get_dma_pgprot(attrs, prot) __pgprot(0) +#define __alloc_remap_buffer(dev, size, gfp, prot, ret, c, wv) NULL #define __alloc_from_pool(size, ret_page) NULL -#define __alloc_from_contiguous(dev, size, prot, ret, c) NULL +#define __alloc_from_contiguous(dev, size, prot, ret, c, wv) NULL #define __free_from_pool(cpu_addr, size) 0 -#define __free_from_contiguous(dev, page, cpu_addr, size) do { } while (0) +#define __free_from_contiguous(dev, page, cpu_addr, size, wv) do { } while (0) #define __dma_free_remap(cpu_addr, size) do { } while (0) #endif /* CONFIG_MMU */ @@ -599,11 +609,13 @@ static void *__alloc_simple_buffer(struct device *dev, size_t size, gfp_t gfp, static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, - gfp_t gfp, pgprot_t prot, bool is_coherent, const void *caller) + gfp_t gfp, pgprot_t prot, bool is_coherent, + struct dma_attrs *attrs, const void *caller) { u64 mask = get_coherent_dma_mask(dev); struct page *page = NULL; void *addr; + bool want_vaddr; #ifdef CONFIG_DMA_API_DEBUG u64 limit = (mask + 1) & ~mask; @@ -631,20 +643,21 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, *handle = DMA_ERROR_CODE; size = PAGE_ALIGN(size); + want_vaddr = !dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs); if (is_coherent || nommu()) addr = __alloc_simple_buffer(dev, size, gfp, &page); else if (!(gfp & __GFP_WAIT)) addr = __alloc_from_pool(size, &page); else if (!dev_get_cma_area(dev)) - addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, caller); + addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, caller, want_vaddr); else - addr = __alloc_from_contiguous(dev, size, prot, &page, caller); + addr = __alloc_from_contiguous(dev, size, prot, &page, caller, want_vaddr); - if (addr) + if (page) *handle = pfn_to_dma(dev, page_to_pfn(page)); - return addr; + return want_vaddr ? addr : page; } /* @@ -661,7 +674,7 @@ void *arm_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, return memory; return __dma_alloc(dev, size, handle, gfp, prot, false, - __builtin_return_address(0)); + attrs, __builtin_return_address(0)); } static void *arm_coherent_dma_alloc(struct device *dev, size_t size, @@ -674,7 +687,7 @@ static void *arm_coherent_dma_alloc(struct device *dev, size_t size, return memory; return __dma_alloc(dev, size, handle, gfp, prot, true, - __builtin_return_address(0)); + attrs, __builtin_return_address(0)); } /* @@ -715,6 +728,7 @@ static void __arm_dma_free(struct device *dev, size_t size, void *cpu_addr, bool is_coherent) { struct page *page = pfn_to_page(dma_to_pfn(dev, handle)); + bool want_vaddr = !dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs); if (dma_release_from_coherent(dev, get_order(size), cpu_addr)) return; @@ -726,14 +740,15 @@ static void __arm_dma_free(struct device *dev, size_t size, void *cpu_addr, } else if (__free_from_pool(cpu_addr, size)) { return; } else if (!dev_get_cma_area(dev)) { - __dma_free_remap(cpu_addr, size); + if (want_vaddr) + __dma_free_remap(cpu_addr, size); __dma_free_buffer(page, size); } else { /* * Non-atomic allocations cannot be freed with IRQs disabled */ WARN_ON(irqs_disabled()); - __free_from_contiguous(dev, page, cpu_addr, size); + __free_from_contiguous(dev, page, cpu_addr, size, want_vaddr); } } -- cgit v0.10.2 From 5744ff43c2c737055c65b9594b0783c1a2832a65 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 13 Feb 2015 11:04:21 +0000 Subject: ARM: drop experimental status of SMP_ON_UP SMP_ON_UP has been around for a while, and seems to be well-proven now. Drop the EXPERIMENTAL tag from the option. Signed-off-by: Russell King diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 9f1f09a..d7d7b27 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1343,7 +1343,7 @@ config SMP If you don't know what to do here, say N. config SMP_ON_UP - bool "Allow booting SMP kernel on uniprocessor systems (EXPERIMENTAL)" + bool "Allow booting SMP kernel on uniprocessor systems" depends on SMP && !XIP_KERNEL && MMU default y help -- cgit v0.10.2 From 1b4bd608763e063ea87e20030e05db005e70177f Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 9 Mar 2015 18:54:32 +0100 Subject: ARM: 8309/1: l2c: enforce use of cache-level property Make sure that we can read the "cache-level" property from the L2 cache controller node, and ensure its value is 2. Signed-off-by: Florian Fainelli Signed-off-by: Russell King diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c index c6c7696..8b933dc 100644 --- a/arch/arm/mm/cache-l2x0.c +++ b/arch/arm/mm/cache-l2x0.c @@ -1648,6 +1648,7 @@ int __init l2x0_of_init(u32 aux_val, u32 aux_mask) struct device_node *np; struct resource res; u32 cache_id, old_aux; + u32 cache_level = 2; np = of_find_matching_node(NULL, l2x0_ids); if (!np) @@ -1680,6 +1681,12 @@ int __init l2x0_of_init(u32 aux_val, u32 aux_mask) if (!of_property_read_bool(np, "cache-unified")) pr_err("L2C: device tree omits to specify unified cache\n"); + if (of_property_read_u32(np, "cache-level", &cache_level)) + pr_err("L2C: device tree omits to specify cache-level\n"); + + if (cache_level != 2) + pr_err("L2C: device tree specifies invalid cache level\n"); + /* Read back current (default) hardware configuration */ if (data->save) data->save(l2x0_base); -- cgit v0.10.2 From 65bab45113a2c5e9f13bc8cc3f6fea92f467d417 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Sat, 28 Feb 2015 00:11:33 +0000 Subject: ARM: perf: Preparatory work for Scorpion PMU support Do some things to make the Krait PMU support code generic enough to be used by the Scorpion PMU support code. * Rename the venum register functions to be venum instead of krait specific because the same registers exist on Scorpion * Add some macros to decode our Krait specific event encoding that's the same on Scorpion (modulo an extra region). * Drop 'krait' from krait_clear_pmresrn_group() so it can be used by Scorpion code Signed-off-by: Stephen Boyd Signed-off-by: Will Deacon diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c index 8993770..97a7eda 100644 --- a/arch/arm/kernel/perf_event_v7.c +++ b/arch/arm/kernel/perf_event_v7.c @@ -1103,6 +1103,12 @@ static int armv7_a17_pmu_init(struct arm_pmu *cpu_pmu) #define KRAIT_EVENT_MASK (KRAIT_EVENT | VENUM_EVENT) #define PMRESRn_EN BIT(31) +#define EVENT_REGION(event) (((event) >> 12) & 0xf) /* R */ +#define EVENT_GROUP(event) ((event) & 0xf) /* G */ +#define EVENT_CODE(event) (((event) >> 4) & 0xff) /* CC */ +#define EVENT_VENUM(event) (!!(event & VENUM_EVENT)) /* N=2 */ +#define EVENT_CPU(event) (!!(event & KRAIT_EVENT)) /* N=1 */ + static u32 krait_read_pmresrn(int n) { u32 val; @@ -1141,19 +1147,19 @@ static void krait_write_pmresrn(int n, u32 val) } } -static u32 krait_read_vpmresr0(void) +static u32 venum_read_pmresr(void) { u32 val; asm volatile("mrc p10, 7, %0, c11, c0, 0" : "=r" (val)); return val; } -static void krait_write_vpmresr0(u32 val) +static void venum_write_pmresr(u32 val) { asm volatile("mcr p10, 7, %0, c11, c0, 0" : : "r" (val)); } -static void krait_pre_vpmresr0(u32 *venum_orig_val, u32 *fp_orig_val) +static void venum_pre_pmresr(u32 *venum_orig_val, u32 *fp_orig_val) { u32 venum_new_val; u32 fp_new_val; @@ -1170,7 +1176,7 @@ static void krait_pre_vpmresr0(u32 *venum_orig_val, u32 *fp_orig_val) fmxr(FPEXC, fp_new_val); } -static void krait_post_vpmresr0(u32 venum_orig_val, u32 fp_orig_val) +static void venum_post_pmresr(u32 venum_orig_val, u32 fp_orig_val) { BUG_ON(preemptible()); /* Restore FPEXC */ @@ -1193,16 +1199,11 @@ static void krait_evt_setup(int idx, u32 config_base) u32 val; u32 mask; u32 vval, fval; - unsigned int region; - unsigned int group; - unsigned int code; + unsigned int region = EVENT_REGION(config_base); + unsigned int group = EVENT_GROUP(config_base); + unsigned int code = EVENT_CODE(config_base); unsigned int group_shift; - bool venum_event; - - venum_event = !!(config_base & VENUM_EVENT); - region = (config_base >> 12) & 0xf; - code = (config_base >> 4) & 0xff; - group = (config_base >> 0) & 0xf; + bool venum_event = EVENT_VENUM(config_base); group_shift = group * 8; mask = 0xff << group_shift; @@ -1220,13 +1221,13 @@ static void krait_evt_setup(int idx, u32 config_base) asm volatile("mcr p15, 0, %0, c9, c15, 0" : : "r" (0)); if (venum_event) { - krait_pre_vpmresr0(&vval, &fval); - val = krait_read_vpmresr0(); + venum_pre_pmresr(&vval, &fval); + val = venum_read_pmresr(); val &= ~mask; val |= code << group_shift; val |= PMRESRn_EN; - krait_write_vpmresr0(val); - krait_post_vpmresr0(vval, fval); + venum_write_pmresr(val); + venum_post_pmresr(vval, fval); } else { val = krait_read_pmresrn(region); val &= ~mask; @@ -1236,7 +1237,7 @@ static void krait_evt_setup(int idx, u32 config_base) } } -static u32 krait_clear_pmresrn_group(u32 val, int group) +static u32 clear_pmresrn_group(u32 val, int group) { u32 mask; int group_shift; @@ -1256,23 +1257,19 @@ static void krait_clearpmu(u32 config_base) { u32 val; u32 vval, fval; - unsigned int region; - unsigned int group; - bool venum_event; - - venum_event = !!(config_base & VENUM_EVENT); - region = (config_base >> 12) & 0xf; - group = (config_base >> 0) & 0xf; + unsigned int region = EVENT_REGION(config_base); + unsigned int group = EVENT_GROUP(config_base); + bool venum_event = EVENT_VENUM(config_base); if (venum_event) { - krait_pre_vpmresr0(&vval, &fval); - val = krait_read_vpmresr0(); - val = krait_clear_pmresrn_group(val, group); - krait_write_vpmresr0(val); - krait_post_vpmresr0(vval, fval); + venum_pre_pmresr(&vval, &fval); + val = venum_read_pmresr(); + val = clear_pmresrn_group(val, group); + venum_write_pmresr(val); + venum_post_pmresr(vval, fval); } else { val = krait_read_pmresrn(region); - val = krait_clear_pmresrn_group(val, group); + val = clear_pmresrn_group(val, group); krait_write_pmresrn(region, val); } } @@ -1350,9 +1347,9 @@ static void krait_pmu_reset(void *info) krait_write_pmresrn(1, 0); krait_write_pmresrn(2, 0); - krait_pre_vpmresr0(&vval, &fval); - krait_write_vpmresr0(0); - krait_post_vpmresr0(vval, fval); + venum_pre_pmresr(&vval, &fval); + venum_write_pmresr(0); + venum_post_pmresr(vval, fval); } static int krait_event_to_bit(struct perf_event *event, unsigned int region, @@ -1386,26 +1383,18 @@ static int krait_pmu_get_event_idx(struct pmu_hw_events *cpuc, { int idx; int bit = -1; - unsigned int prefix; - unsigned int region; - unsigned int code; - unsigned int group; - bool krait_event; struct hw_perf_event *hwc = &event->hw; + unsigned int region = EVENT_REGION(hwc->config_base); + unsigned int code = EVENT_CODE(hwc->config_base); + unsigned int group = EVENT_GROUP(hwc->config_base); + bool venum_event = EVENT_VENUM(hwc->config_base); + bool krait_event = EVENT_CPU(hwc->config_base); - region = (hwc->config_base >> 12) & 0xf; - code = (hwc->config_base >> 4) & 0xff; - group = (hwc->config_base >> 0) & 0xf; - krait_event = !!(hwc->config_base & KRAIT_EVENT_MASK); - - if (krait_event) { + if (venum_event || krait_event) { /* Ignore invalid events */ if (group > 3 || region > 2) return -EINVAL; - prefix = hwc->config_base & KRAIT_EVENT_MASK; - if (prefix != KRAIT_EVENT && prefix != VENUM_EVENT) - return -EINVAL; - if (prefix == VENUM_EVENT && (code & 0xe0)) + if (venum_event && (code & 0xe0)) return -EINVAL; bit = krait_event_to_bit(event, region, group); @@ -1425,15 +1414,12 @@ static void krait_pmu_clear_event_idx(struct pmu_hw_events *cpuc, { int bit; struct hw_perf_event *hwc = &event->hw; - unsigned int region; - unsigned int group; - bool krait_event; - - region = (hwc->config_base >> 12) & 0xf; - group = (hwc->config_base >> 0) & 0xf; - krait_event = !!(hwc->config_base & KRAIT_EVENT_MASK); + unsigned int region = EVENT_REGION(hwc->config_base); + unsigned int group = EVENT_GROUP(hwc->config_base); + bool venum_event = EVENT_VENUM(hwc->config_base); + bool krait_event = EVENT_CPU(hwc->config_base); - if (krait_event) { + if (venum_event || krait_event) { bit = krait_event_to_bit(event, region, group); clear_bit(bit, cpuc->used_mask); } -- cgit v0.10.2 From 934999185edd613ca80916d238ba7393b84ae53c Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Sat, 28 Feb 2015 00:11:34 +0000 Subject: ARM: perf: Only reset PMxEVCNTCR registers on reset The Krait specific PMxEVCNTCR register is unpredictable upon reset. Currently we clear the register before we setup an event, but we don't need to do that. Instead, we can iterate through all the events and clear them once when we reset the PMU, saving a write in the event setup path. Cc: Neil Leeder Cc: Ashwin Chaugule Cc: Sheetal Sahasrabudhe Signed-off-by: Stephen Boyd Signed-off-by: Will Deacon diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c index 97a7eda..fae6c4e 100644 --- a/arch/arm/kernel/perf_event_v7.c +++ b/arch/arm/kernel/perf_event_v7.c @@ -1218,8 +1218,6 @@ static void krait_evt_setup(int idx, u32 config_base) val |= config_base & (ARMV7_EXCLUDE_USER | ARMV7_EXCLUDE_PL1); armv7_pmnc_write_evtsel(idx, val); - asm volatile("mcr p15, 0, %0, c9, c15, 0" : : "r" (0)); - if (venum_event) { venum_pre_pmresr(&vval, &fval); val = venum_read_pmresr(); @@ -1339,6 +1337,8 @@ static void krait_pmu_enable_event(struct perf_event *event) static void krait_pmu_reset(void *info) { u32 vval, fval; + struct arm_pmu *cpu_pmu = info; + u32 idx, nb_cnt = cpu_pmu->num_events; armv7pmu_reset(info); @@ -1350,6 +1350,13 @@ static void krait_pmu_reset(void *info) venum_pre_pmresr(&vval, &fval); venum_write_pmresr(0); venum_post_pmresr(vval, fval); + + /* Reset PMxEVNCTCR to sane default */ + for (idx = ARMV7_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) { + armv7_pmnc_select_counter(idx); + asm volatile("mcr p15, 0, %0, c9, c15, 0" : : "r" (0)); + } + } static int krait_event_to_bit(struct perf_event *event, unsigned int region, -- cgit v0.10.2 From 341e42c4e3f97af9bbeada64c3e1a41f65ce086a Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Fri, 27 Feb 2015 16:11:35 -0800 Subject: ARM: perf: Add support for Scorpion PMUs Scorpion supports a set of local performance monitor event selection registers (LPM) sitting behind a cp15 based interface that extend the architected PMU events to include Scorpion CPU and Venum VFP specific events. To use these events the user is expected to program the lpm register with the event code shifted into the group they care about and then point the PMNx event at that region+group combo by writing a LPMn_GROUPx event. Add support for this hardware. Note: the raw event number is a pure software construct that allows us to map the multi-dimensional number space of regions, groups, and event codes into a flat event number space suitable for use by the perf framework. This is based on code originally written by Sheetal Sahasrabudhe, Ashwin Chaugule, and Neil Leeder [1]. [1] https://www.codeaurora.org/cgit/quic/la/kernel/msm/tree/arch/arm/kernel/perf_event_msm.c?h=msm-3.4 Cc: Mark Rutland Cc: Neil Leeder Cc: Ashwin Chaugule Cc: Sheetal Sahasrabudhe Cc: Signed-off-by: Stephen Boyd Signed-off-by: Will Deacon diff --git a/Documentation/devicetree/bindings/arm/pmu.txt b/Documentation/devicetree/bindings/arm/pmu.txt index 75ef91d..6e54a9d 100644 --- a/Documentation/devicetree/bindings/arm/pmu.txt +++ b/Documentation/devicetree/bindings/arm/pmu.txt @@ -18,6 +18,8 @@ Required properties: "arm,arm11mpcore-pmu" "arm,arm1176-pmu" "arm,arm1136-pmu" + "qcom,scorpion-pmu" + "qcom,scorpion-mp-pmu" "qcom,krait-pmu" - interrupts : 1 combined interrupt or 1 per core. If the interrupt is a per-cpu interrupt (PPI) then 1 interrupt should be specified. diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c index 61b53c4..7eb86e2 100644 --- a/arch/arm/kernel/perf_event_cpu.c +++ b/arch/arm/kernel/perf_event_cpu.c @@ -243,6 +243,8 @@ static const struct of_device_id cpu_pmu_of_device_ids[] = { {.compatible = "arm,arm1176-pmu", .data = armv6_1176_pmu_init}, {.compatible = "arm,arm1136-pmu", .data = armv6_1136_pmu_init}, {.compatible = "qcom,krait-pmu", .data = krait_pmu_init}, + {.compatible = "qcom,scorpion-pmu", .data = scorpion_pmu_init}, + {.compatible = "qcom,scorpion-mp-pmu", .data = scorpion_mp_pmu_init}, {}, }; diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c index fae6c4e..f4207a4 100644 --- a/arch/arm/kernel/perf_event_v7.c +++ b/arch/arm/kernel/perf_event_v7.c @@ -140,6 +140,23 @@ enum krait_perf_types { KRAIT_PERFCTR_L1_DTLB_ACCESS = 0x12210, }; +/* ARMv7 Scorpion specific event types */ +enum scorpion_perf_types { + SCORPION_LPM0_GROUP0 = 0x4c, + SCORPION_LPM1_GROUP0 = 0x50, + SCORPION_LPM2_GROUP0 = 0x54, + SCORPION_L2LPM_GROUP0 = 0x58, + SCORPION_VLPM_GROUP0 = 0x5c, + + SCORPION_ICACHE_ACCESS = 0x10053, + SCORPION_ICACHE_MISS = 0x10052, + + SCORPION_DTLB_ACCESS = 0x12013, + SCORPION_DTLB_MISS = 0x12012, + + SCORPION_ITLB_MISS = 0x12021, +}; + /* * Cortex-A8 HW events mapping * @@ -482,6 +499,49 @@ static const unsigned krait_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] }; /* + * Scorpion HW events mapping + */ +static const unsigned scorpion_perf_map[PERF_COUNT_HW_MAX] = { + PERF_MAP_ALL_UNSUPPORTED, + [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES, +}; + +static const unsigned scorpion_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + PERF_CACHE_MAP_ALL_UNSUPPORTED, + /* + * The performance counters don't differentiate between read and write + * accesses/misses so this isn't strictly correct, but it's the best we + * can do. Writes and reads get combined. + */ + [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, + [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, + [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = SCORPION_ICACHE_ACCESS, + [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = SCORPION_ICACHE_MISS, + /* + * Only ITLB misses and DTLB refills are supported. If users want the + * DTLB refills misses a raw counter must be used. + */ + [C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = SCORPION_DTLB_ACCESS, + [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = SCORPION_DTLB_MISS, + [C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = SCORPION_DTLB_ACCESS, + [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = SCORPION_DTLB_MISS, + [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = SCORPION_ITLB_MISS, + [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = SCORPION_ITLB_MISS, + [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, + [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, +}; + +/* * Perf Events' indices */ #define ARMV7_IDX_CYCLE_COUNTER 0 @@ -976,6 +1036,12 @@ static int krait_map_event_no_branch(struct perf_event *event) &krait_perf_cache_map, 0xFFFFF); } +static int scorpion_map_event(struct perf_event *event) +{ + return armpmu_map_event(event, &scorpion_perf_map, + &scorpion_perf_cache_map, 0xFFFFF); +} + static void armv7pmu_init(struct arm_pmu *cpu_pmu) { cpu_pmu->handle_irq = armv7pmu_handle_irq; @@ -1451,6 +1517,344 @@ static int krait_pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->clear_event_idx = krait_pmu_clear_event_idx; return 0; } + +/* + * Scorpion Local Performance Monitor Register (LPMn) + * + * 31 30 24 16 8 0 + * +--------------------------------+ + * LPM0 | EN | CC | CC | CC | CC | N = 1, R = 0 + * +--------------------------------+ + * LPM1 | EN | CC | CC | CC | CC | N = 1, R = 1 + * +--------------------------------+ + * LPM2 | EN | CC | CC | CC | CC | N = 1, R = 2 + * +--------------------------------+ + * L2LPM | EN | CC | CC | CC | CC | N = 1, R = 3 + * +--------------------------------+ + * VLPM | EN | CC | CC | CC | CC | N = 2, R = ? + * +--------------------------------+ + * EN | G=3 | G=2 | G=1 | G=0 + * + * + * Event Encoding: + * + * hwc->config_base = 0xNRCCG + * + * N = prefix, 1 for Scorpion CPU (LPMn/L2LPM), 2 for Venum VFP (VLPM) + * R = region register + * CC = class of events the group G is choosing from + * G = group or particular event + * + * Example: 0x12021 is a Scorpion CPU event in LPM2's group 1 with code 2 + * + * A region (R) corresponds to a piece of the CPU (execution unit, instruction + * unit, etc.) while the event code (CC) corresponds to a particular class of + * events (interrupts for example). An event code is broken down into + * groups (G) that can be mapped into the PMU (irq, fiqs, and irq+fiqs for + * example). + */ + +static u32 scorpion_read_pmresrn(int n) +{ + u32 val; + + switch (n) { + case 0: + asm volatile("mrc p15, 0, %0, c15, c0, 0" : "=r" (val)); + break; + case 1: + asm volatile("mrc p15, 1, %0, c15, c0, 0" : "=r" (val)); + break; + case 2: + asm volatile("mrc p15, 2, %0, c15, c0, 0" : "=r" (val)); + break; + case 3: + asm volatile("mrc p15, 3, %0, c15, c2, 0" : "=r" (val)); + break; + default: + BUG(); /* Should be validated in scorpion_pmu_get_event_idx() */ + } + + return val; +} + +static void scorpion_write_pmresrn(int n, u32 val) +{ + switch (n) { + case 0: + asm volatile("mcr p15, 0, %0, c15, c0, 0" : : "r" (val)); + break; + case 1: + asm volatile("mcr p15, 1, %0, c15, c0, 0" : : "r" (val)); + break; + case 2: + asm volatile("mcr p15, 2, %0, c15, c0, 0" : : "r" (val)); + break; + case 3: + asm volatile("mcr p15, 3, %0, c15, c2, 0" : : "r" (val)); + break; + default: + BUG(); /* Should be validated in scorpion_pmu_get_event_idx() */ + } +} + +static u32 scorpion_get_pmresrn_event(unsigned int region) +{ + static const u32 pmresrn_table[] = { SCORPION_LPM0_GROUP0, + SCORPION_LPM1_GROUP0, + SCORPION_LPM2_GROUP0, + SCORPION_L2LPM_GROUP0 }; + return pmresrn_table[region]; +} + +static void scorpion_evt_setup(int idx, u32 config_base) +{ + u32 val; + u32 mask; + u32 vval, fval; + unsigned int region = EVENT_REGION(config_base); + unsigned int group = EVENT_GROUP(config_base); + unsigned int code = EVENT_CODE(config_base); + unsigned int group_shift; + bool venum_event = EVENT_VENUM(config_base); + + group_shift = group * 8; + mask = 0xff << group_shift; + + /* Configure evtsel for the region and group */ + if (venum_event) + val = SCORPION_VLPM_GROUP0; + else + val = scorpion_get_pmresrn_event(region); + val += group; + /* Mix in mode-exclusion bits */ + val |= config_base & (ARMV7_EXCLUDE_USER | ARMV7_EXCLUDE_PL1); + armv7_pmnc_write_evtsel(idx, val); + + asm volatile("mcr p15, 0, %0, c9, c15, 0" : : "r" (0)); + + if (venum_event) { + venum_pre_pmresr(&vval, &fval); + val = venum_read_pmresr(); + val &= ~mask; + val |= code << group_shift; + val |= PMRESRn_EN; + venum_write_pmresr(val); + venum_post_pmresr(vval, fval); + } else { + val = scorpion_read_pmresrn(region); + val &= ~mask; + val |= code << group_shift; + val |= PMRESRn_EN; + scorpion_write_pmresrn(region, val); + } +} + +static void scorpion_clearpmu(u32 config_base) +{ + u32 val; + u32 vval, fval; + unsigned int region = EVENT_REGION(config_base); + unsigned int group = EVENT_GROUP(config_base); + bool venum_event = EVENT_VENUM(config_base); + + if (venum_event) { + venum_pre_pmresr(&vval, &fval); + val = venum_read_pmresr(); + val = clear_pmresrn_group(val, group); + venum_write_pmresr(val); + venum_post_pmresr(vval, fval); + } else { + val = scorpion_read_pmresrn(region); + val = clear_pmresrn_group(val, group); + scorpion_write_pmresrn(region, val); + } +} + +static void scorpion_pmu_disable_event(struct perf_event *event) +{ + unsigned long flags; + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); + + /* Disable counter and interrupt */ + raw_spin_lock_irqsave(&events->pmu_lock, flags); + + /* Disable counter */ + armv7_pmnc_disable_counter(idx); + + /* + * Clear pmresr code (if destined for PMNx counters) + */ + if (hwc->config_base & KRAIT_EVENT_MASK) + scorpion_clearpmu(hwc->config_base); + + /* Disable interrupt for this counter */ + armv7_pmnc_disable_intens(idx); + + raw_spin_unlock_irqrestore(&events->pmu_lock, flags); +} + +static void scorpion_pmu_enable_event(struct perf_event *event) +{ + unsigned long flags; + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); + + /* + * Enable counter and interrupt, and set the counter to count + * the event that we're interested in. + */ + raw_spin_lock_irqsave(&events->pmu_lock, flags); + + /* Disable counter */ + armv7_pmnc_disable_counter(idx); + + /* + * Set event (if destined for PMNx counters) + * We don't set the event for the cycle counter because we + * don't have the ability to perform event filtering. + */ + if (hwc->config_base & KRAIT_EVENT_MASK) + scorpion_evt_setup(idx, hwc->config_base); + else if (idx != ARMV7_IDX_CYCLE_COUNTER) + armv7_pmnc_write_evtsel(idx, hwc->config_base); + + /* Enable interrupt for this counter */ + armv7_pmnc_enable_intens(idx); + + /* Enable counter */ + armv7_pmnc_enable_counter(idx); + + raw_spin_unlock_irqrestore(&events->pmu_lock, flags); +} + +static void scorpion_pmu_reset(void *info) +{ + u32 vval, fval; + struct arm_pmu *cpu_pmu = info; + u32 idx, nb_cnt = cpu_pmu->num_events; + + armv7pmu_reset(info); + + /* Clear all pmresrs */ + scorpion_write_pmresrn(0, 0); + scorpion_write_pmresrn(1, 0); + scorpion_write_pmresrn(2, 0); + scorpion_write_pmresrn(3, 0); + + venum_pre_pmresr(&vval, &fval); + venum_write_pmresr(0); + venum_post_pmresr(vval, fval); + + /* Reset PMxEVNCTCR to sane default */ + for (idx = ARMV7_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) { + armv7_pmnc_select_counter(idx); + asm volatile("mcr p15, 0, %0, c9, c15, 0" : : "r" (0)); + } +} + +static int scorpion_event_to_bit(struct perf_event *event, unsigned int region, + unsigned int group) +{ + int bit; + struct hw_perf_event *hwc = &event->hw; + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + + if (hwc->config_base & VENUM_EVENT) + bit = SCORPION_VLPM_GROUP0; + else + bit = scorpion_get_pmresrn_event(region); + bit -= scorpion_get_pmresrn_event(0); + bit += group; + /* + * Lower bits are reserved for use by the counters (see + * armv7pmu_get_event_idx() for more info) + */ + bit += ARMV7_IDX_COUNTER_LAST(cpu_pmu) + 1; + + return bit; +} + +/* + * We check for column exclusion constraints here. + * Two events cant use the same group within a pmresr register. + */ +static int scorpion_pmu_get_event_idx(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + int idx; + int bit = -1; + struct hw_perf_event *hwc = &event->hw; + unsigned int region = EVENT_REGION(hwc->config_base); + unsigned int group = EVENT_GROUP(hwc->config_base); + bool venum_event = EVENT_VENUM(hwc->config_base); + bool scorpion_event = EVENT_CPU(hwc->config_base); + + if (venum_event || scorpion_event) { + /* Ignore invalid events */ + if (group > 3 || region > 3) + return -EINVAL; + + bit = scorpion_event_to_bit(event, region, group); + if (test_and_set_bit(bit, cpuc->used_mask)) + return -EAGAIN; + } + + idx = armv7pmu_get_event_idx(cpuc, event); + if (idx < 0 && bit >= 0) + clear_bit(bit, cpuc->used_mask); + + return idx; +} + +static void scorpion_pmu_clear_event_idx(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + int bit; + struct hw_perf_event *hwc = &event->hw; + unsigned int region = EVENT_REGION(hwc->config_base); + unsigned int group = EVENT_GROUP(hwc->config_base); + bool venum_event = EVENT_VENUM(hwc->config_base); + bool scorpion_event = EVENT_CPU(hwc->config_base); + + if (venum_event || scorpion_event) { + bit = scorpion_event_to_bit(event, region, group); + clear_bit(bit, cpuc->used_mask); + } +} + +static int scorpion_pmu_init(struct arm_pmu *cpu_pmu) +{ + armv7pmu_init(cpu_pmu); + cpu_pmu->name = "armv7_scorpion"; + cpu_pmu->map_event = scorpion_map_event; + cpu_pmu->num_events = armv7_read_num_pmnc_events(); + cpu_pmu->reset = scorpion_pmu_reset; + cpu_pmu->enable = scorpion_pmu_enable_event; + cpu_pmu->disable = scorpion_pmu_disable_event; + cpu_pmu->get_event_idx = scorpion_pmu_get_event_idx; + cpu_pmu->clear_event_idx = scorpion_pmu_clear_event_idx; + return 0; +} + +static int scorpion_mp_pmu_init(struct arm_pmu *cpu_pmu) +{ + armv7pmu_init(cpu_pmu); + cpu_pmu->name = "armv7_scorpion_mp"; + cpu_pmu->map_event = scorpion_map_event; + cpu_pmu->num_events = armv7_read_num_pmnc_events(); + cpu_pmu->reset = scorpion_pmu_reset; + cpu_pmu->enable = scorpion_pmu_enable_event; + cpu_pmu->disable = scorpion_pmu_disable_event; + cpu_pmu->get_event_idx = scorpion_pmu_get_event_idx; + cpu_pmu->clear_event_idx = scorpion_pmu_clear_event_idx; + return 0; +} #else static inline int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu) { @@ -1491,4 +1895,14 @@ static inline int krait_pmu_init(struct arm_pmu *cpu_pmu) { return -ENODEV; } + +static inline int scorpion_pmu_init(struct arm_pmu *cpu_pmu) +{ + return -ENODEV; +} + +static inline int scorpion_mp_pmu_init(struct arm_pmu *cpu_pmu) +{ + return -ENODEV; +} #endif /* CONFIG_CPU_V7 */ -- cgit v0.10.2 From 89cfdb19a88872088a8cf69621e55d41c379f02f Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 16 Jan 2015 18:02:15 +0100 Subject: ARM: 8289/1: dma-mapping: use to_dma_iommu_mapping instead of accessing archdata When using the IOMMU-backed DMA ops for a device, we store a pointer to the dma_iommu_mapping structure (used to keep track of the address space) in the archdata.mapping field of the struct device. Rather than access this field directly, use the to_dma_iommu_mapping helper in dma-mapping, so that we don't really care where the mapping information is held. Cc: Laurent Pinchart Signed-off-by: Will Deacon Signed-off-by: Russell King diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 7137616..b8fe720 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -1221,7 +1221,7 @@ __iommu_alloc_remap(struct page **pages, size_t size, gfp_t gfp, pgprot_t prot, static dma_addr_t __iommu_create_mapping(struct device *dev, struct page **pages, size_t size) { - struct dma_iommu_mapping *mapping = dev->archdata.mapping; + struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; dma_addr_t dma_addr, iova; int i, ret = DMA_ERROR_CODE; @@ -1257,7 +1257,7 @@ fail: static int __iommu_remove_mapping(struct device *dev, dma_addr_t iova, size_t size) { - struct dma_iommu_mapping *mapping = dev->archdata.mapping; + struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); /* * add optional in-page offset from iova to size and align @@ -1472,7 +1472,7 @@ static int __map_sg_chunk(struct device *dev, struct scatterlist *sg, enum dma_data_direction dir, struct dma_attrs *attrs, bool is_coherent) { - struct dma_iommu_mapping *mapping = dev->archdata.mapping; + struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); dma_addr_t iova, iova_base; int ret = 0; unsigned int count; @@ -1693,7 +1693,7 @@ static dma_addr_t arm_coherent_iommu_map_page(struct device *dev, struct page *p unsigned long offset, size_t size, enum dma_data_direction dir, struct dma_attrs *attrs) { - struct dma_iommu_mapping *mapping = dev->archdata.mapping; + struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); dma_addr_t dma_addr; int ret, prot, len = PAGE_ALIGN(size + offset); @@ -1746,7 +1746,7 @@ static void arm_coherent_iommu_unmap_page(struct device *dev, dma_addr_t handle, size_t size, enum dma_data_direction dir, struct dma_attrs *attrs) { - struct dma_iommu_mapping *mapping = dev->archdata.mapping; + struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); dma_addr_t iova = handle & PAGE_MASK; int offset = handle & ~PAGE_MASK; int len = PAGE_ALIGN(size + offset); @@ -1771,7 +1771,7 @@ static void arm_iommu_unmap_page(struct device *dev, dma_addr_t handle, size_t size, enum dma_data_direction dir, struct dma_attrs *attrs) { - struct dma_iommu_mapping *mapping = dev->archdata.mapping; + struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); dma_addr_t iova = handle & PAGE_MASK; struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova)); int offset = handle & ~PAGE_MASK; @@ -1790,7 +1790,7 @@ static void arm_iommu_unmap_page(struct device *dev, dma_addr_t handle, static void arm_iommu_sync_single_for_cpu(struct device *dev, dma_addr_t handle, size_t size, enum dma_data_direction dir) { - struct dma_iommu_mapping *mapping = dev->archdata.mapping; + struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); dma_addr_t iova = handle & PAGE_MASK; struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova)); unsigned int offset = handle & ~PAGE_MASK; @@ -1804,7 +1804,7 @@ static void arm_iommu_sync_single_for_cpu(struct device *dev, static void arm_iommu_sync_single_for_device(struct device *dev, dma_addr_t handle, size_t size, enum dma_data_direction dir) { - struct dma_iommu_mapping *mapping = dev->archdata.mapping; + struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); dma_addr_t iova = handle & PAGE_MASK; struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova)); unsigned int offset = handle & ~PAGE_MASK; @@ -1965,7 +1965,7 @@ static int __arm_iommu_attach_device(struct device *dev, return err; kref_get(&mapping->kref); - dev->archdata.mapping = mapping; + to_dma_iommu_mapping(dev) = mapping; pr_debug("Attached IOMMU controller to %s device.\n", dev_name(dev)); return 0; @@ -2010,7 +2010,7 @@ static void __arm_iommu_detach_device(struct device *dev) iommu_detach_device(mapping->domain, dev); kref_put(&mapping->kref, release_iommu_mapping); - dev->archdata.mapping = NULL; + to_dma_iommu_mapping(dev) = NULL; pr_debug("Detached IOMMU controller from %s device.\n", dev_name(dev)); } @@ -2061,7 +2061,7 @@ static bool arm_setup_iommu_dma_ops(struct device *dev, u64 dma_base, u64 size, static void arm_teardown_iommu_dma_ops(struct device *dev) { - struct dma_iommu_mapping *mapping = dev->archdata.mapping; + struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); if (!mapping) return; -- cgit v0.10.2 From e429817b401f095ac483fcb02524b01faf45dad6 Mon Sep 17 00:00:00 2001 From: "Suzuki K. Poulose" Date: Tue, 17 Mar 2015 18:14:58 +0000 Subject: ARM: perf: reject groups spanning multiple hardware PMUs The perf core implicitly rejects events spanning multiple HW PMUs, as in these cases the event->ctx will differ. However this validation is performed after pmu::event_init() is called in perf_init_event(), and thus pmu::event_init() may be called with a group leader from a different HW PMU. The ARM PMU driver does not take this fact into account, and when validating groups assumes that it can call to_arm_pmu(event->pmu) for any HW event. When the event in question is from another HW PMU this is wrong, and results in dereferencing garbage. This patch updates the ARM PMU driver to first test for and reject events from other PMUs, moving the to_arm_pmu and related logic after this test. Fixes a crash triggered by perf_fuzzer on Linux-4.0-rc2, with a CCI PMU present: --- CPU: 0 PID: 1527 Comm: perf_fuzzer Not tainted 4.0.0-rc2 #57 Hardware name: ARM-Versatile Express task: bd8484c0 ti: be676000 task.ti: be676000 PC is at 0xbf1bbc90 LR is at validate_event+0x34/0x5c pc : [] lr : [<80016060>] psr: 00000013 ... [<80016060>] (validate_event) from [<80016198>] (validate_group+0x28/0x90) [<80016198>] (validate_group) from [<80016398>] (armpmu_event_init+0x150/0x218) [<80016398>] (armpmu_event_init) from [<800882e4>] (perf_try_init_event+0x30/0x48) [<800882e4>] (perf_try_init_event) from [<8008f544>] (perf_init_event+0x5c/0xf4) [<8008f544>] (perf_init_event) from [<8008f8a8>] (perf_event_alloc+0x2cc/0x35c) [<8008f8a8>] (perf_event_alloc) from [<8009015c>] (SyS_perf_event_open+0x498/0xa70) [<8009015c>] (SyS_perf_event_open) from [<8000e420>] (ret_fast_syscall+0x0/0x34) Code: bf1be000 bf1bb380 802a2664 00000000 (00000002) ---[ end trace 01aff0ff00926a0a ]--- Also cleans up the code to use the arm_pmu only when we know that we are dealing with an arm pmu event. Cc: Will Deacon Acked-by: Mark Rutland Acked-by: Peter Ziljstra (Intel) Signed-off-by: Suzuki K. Poulose Signed-off-by: Will Deacon diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 557e128..4a86a01 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -259,20 +259,29 @@ out: } static int -validate_event(struct pmu_hw_events *hw_events, - struct perf_event *event) +validate_event(struct pmu *pmu, struct pmu_hw_events *hw_events, + struct perf_event *event) { - struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + struct arm_pmu *armpmu; if (is_software_event(event)) return 1; + /* + * Reject groups spanning multiple HW PMUs (e.g. CPU + CCI). The + * core perf code won't check that the pmu->ctx == leader->ctx + * until after pmu->event_init(event). + */ + if (event->pmu != pmu) + return 0; + if (event->state < PERF_EVENT_STATE_OFF) return 1; if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec) return 1; + armpmu = to_arm_pmu(event->pmu); return armpmu->get_event_idx(hw_events, event) >= 0; } @@ -288,15 +297,15 @@ validate_group(struct perf_event *event) */ memset(&fake_pmu.used_mask, 0, sizeof(fake_pmu.used_mask)); - if (!validate_event(&fake_pmu, leader)) + if (!validate_event(event->pmu, &fake_pmu, leader)) return -EINVAL; list_for_each_entry(sibling, &leader->sibling_list, group_entry) { - if (!validate_event(&fake_pmu, sibling)) + if (!validate_event(event->pmu, &fake_pmu, sibling)) return -EINVAL; } - if (!validate_event(&fake_pmu, event)) + if (!validate_event(event->pmu, &fake_pmu, event)) return -EINVAL; return 0; -- cgit v0.10.2 From 9fd85eb502a78bd812db58bd1f668b2a06ee30a5 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 6 Mar 2015 11:54:09 +0000 Subject: ARM: pmu: add support for interrupt-affinity property Historically, the PMU devicetree bindings have expected SPIs to be listed in order of *logical* CPU number. This is problematic for bootloaders, especially when the boot CPU (logical ID 0) isn't listed first in the devicetree. This patch adds a new optional property, interrupt-affinity, to the PMU node which allows the interrupt affinity to be described using a list of phandled to CPU nodes, with each entry in the list corresponding to the SPI at the same index in the interrupts property. Cc: Mark Rutland Signed-off-by: Will Deacon diff --git a/arch/arm/include/asm/pmu.h b/arch/arm/include/asm/pmu.h index b1596bd..675e4ab 100644 --- a/arch/arm/include/asm/pmu.h +++ b/arch/arm/include/asm/pmu.h @@ -92,6 +92,7 @@ struct pmu_hw_events { struct arm_pmu { struct pmu pmu; cpumask_t active_irqs; + int *irq_affinity; char *name; irqreturn_t (*handle_irq)(int irq_num, void *dev); void (*enable)(struct perf_event *event); diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c index 7eb86e2..91c7ba1 100644 --- a/arch/arm/kernel/perf_event_cpu.c +++ b/arch/arm/kernel/perf_event_cpu.c @@ -92,11 +92,16 @@ static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu) free_percpu_irq(irq, &hw_events->percpu_pmu); } else { for (i = 0; i < irqs; ++i) { - if (!cpumask_test_and_clear_cpu(i, &cpu_pmu->active_irqs)) + int cpu = i; + + if (cpu_pmu->irq_affinity) + cpu = cpu_pmu->irq_affinity[i]; + + if (!cpumask_test_and_clear_cpu(cpu, &cpu_pmu->active_irqs)) continue; irq = platform_get_irq(pmu_device, i); if (irq >= 0) - free_irq(irq, per_cpu_ptr(&hw_events->percpu_pmu, i)); + free_irq(irq, per_cpu_ptr(&hw_events->percpu_pmu, cpu)); } } } @@ -128,32 +133,37 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler) on_each_cpu(cpu_pmu_enable_percpu_irq, &irq, 1); } else { for (i = 0; i < irqs; ++i) { + int cpu = i; + err = 0; irq = platform_get_irq(pmu_device, i); if (irq < 0) continue; + if (cpu_pmu->irq_affinity) + cpu = cpu_pmu->irq_affinity[i]; + /* * If we have a single PMU interrupt that we can't shift, * assume that we're running on a uniprocessor machine and * continue. Otherwise, continue without this interrupt. */ - if (irq_set_affinity(irq, cpumask_of(i)) && irqs > 1) { + if (irq_set_affinity(irq, cpumask_of(cpu)) && irqs > 1) { pr_warn("unable to set irq affinity (irq=%d, cpu=%u)\n", - irq, i); + irq, cpu); continue; } err = request_irq(irq, handler, IRQF_NOBALANCING | IRQF_NO_THREAD, "arm-pmu", - per_cpu_ptr(&hw_events->percpu_pmu, i)); + per_cpu_ptr(&hw_events->percpu_pmu, cpu)); if (err) { pr_err("unable to request IRQ%d for ARM PMU counters\n", irq); return err; } - cpumask_set_cpu(i, &cpu_pmu->active_irqs); + cpumask_set_cpu(cpu, &cpu_pmu->active_irqs); } } @@ -291,6 +301,48 @@ static int probe_current_pmu(struct arm_pmu *pmu) return ret; } +static int of_pmu_irq_cfg(struct platform_device *pdev) +{ + int i; + int *irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL); + + if (!irqs) + return -ENOMEM; + + for (i = 0; i < pdev->num_resources; ++i) { + struct device_node *dn; + int cpu; + + dn = of_parse_phandle(pdev->dev.of_node, "interrupt-affinity", + i); + if (!dn) { + pr_warn("Failed to parse %s/interrupt-affinity[%d]\n", + of_node_full_name(dn), i); + break; + } + + for_each_possible_cpu(cpu) + if (arch_find_n_match_cpu_physical_id(dn, cpu, NULL)) + break; + + of_node_put(dn); + if (cpu >= nr_cpu_ids) { + pr_warn("Failed to find logical CPU for %s\n", + dn->name); + break; + } + + irqs[i] = cpu; + } + + if (i == pdev->num_resources) + cpu_pmu->irq_affinity = irqs; + else + kfree(irqs); + + return 0; +} + static int cpu_pmu_device_probe(struct platform_device *pdev) { const struct of_device_id *of_id; @@ -315,7 +367,10 @@ static int cpu_pmu_device_probe(struct platform_device *pdev) if (node && (of_id = of_match_node(cpu_pmu_of_device_ids, pdev->dev.of_node))) { init_fn = of_id->data; - ret = init_fn(pmu); + + ret = of_pmu_irq_cfg(pdev); + if (!ret) + ret = init_fn(pmu); } else { ret = probe_current_pmu(pmu); } -- cgit v0.10.2 From 1713ce7c43755fe8b0f31ea317513129bf784909 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Wed, 25 Mar 2015 19:13:16 +0100 Subject: ARM: 8329/1: miscellaneous vdso infrastructure, preparation Define the layout of the data structure shared between kernel and userspace. Track the vdso address in the mm_context; needed for communicating AT_SYSINFO_EHDR to the ELF loader. Add declarations for arm_install_vdso; implementation is in a following patch. Define AT_SYSINFO_EHDR, and, if CONFIG_VDSO=y, report the vdso shared object address via the ELF auxiliary vector. Note - this adds the AT_SYSINFO_EHDR in a new user-visible header asm/auxvec.h; this is consistent with other architectures. Signed-off-by: Nathan Lynch Signed-off-by: Russell King diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild index fe74c0d..eb0f43f 100644 --- a/arch/arm/include/asm/Kbuild +++ b/arch/arm/include/asm/Kbuild @@ -1,6 +1,5 @@ -generic-y += auxvec.h generic-y += bitsperlong.h generic-y += cputime.h generic-y += current.h diff --git a/arch/arm/include/asm/auxvec.h b/arch/arm/include/asm/auxvec.h new file mode 100644 index 0000000..fbd388c --- /dev/null +++ b/arch/arm/include/asm/auxvec.h @@ -0,0 +1 @@ +#include diff --git a/arch/arm/include/asm/elf.h b/arch/arm/include/asm/elf.h index afb9caf..ac3f17f 100644 --- a/arch/arm/include/asm/elf.h +++ b/arch/arm/include/asm/elf.h @@ -1,7 +1,9 @@ #ifndef __ASMARM_ELF_H #define __ASMARM_ELF_H +#include #include +#include /* * ELF register definitions.. @@ -130,6 +132,13 @@ extern unsigned long arch_randomize_brk(struct mm_struct *mm); #define arch_randomize_brk arch_randomize_brk #ifdef CONFIG_MMU +#ifdef CONFIG_VDSO +#define ARCH_DLINFO \ +do { \ + NEW_AUX_ENT(AT_SYSINFO_EHDR, \ + (elf_addr_t)current->mm->context.vdso); \ +} while (0) +#endif #define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1 struct linux_binprm; int arch_setup_additional_pages(struct linux_binprm *, int); diff --git a/arch/arm/include/asm/mmu.h b/arch/arm/include/asm/mmu.h index 64fd151..a5b47421 100644 --- a/arch/arm/include/asm/mmu.h +++ b/arch/arm/include/asm/mmu.h @@ -11,6 +11,9 @@ typedef struct { #endif unsigned int vmalloc_seq; unsigned long sigpage; +#ifdef CONFIG_VDSO + unsigned long vdso; +#endif } mm_context_t; #ifdef CONFIG_CPU_HAS_ASID diff --git a/arch/arm/include/asm/vdso.h b/arch/arm/include/asm/vdso.h new file mode 100644 index 0000000..d0295f1 --- /dev/null +++ b/arch/arm/include/asm/vdso.h @@ -0,0 +1,32 @@ +#ifndef __ASM_VDSO_H +#define __ASM_VDSO_H + +#ifdef __KERNEL__ + +#ifndef __ASSEMBLY__ + +struct mm_struct; + +#ifdef CONFIG_VDSO + +void arm_install_vdso(struct mm_struct *mm, unsigned long addr); + +extern char vdso_start, vdso_end; + +extern unsigned int vdso_total_pages; + +#else /* CONFIG_VDSO */ + +static inline void arm_install_vdso(struct mm_struct *mm, unsigned long addr) +{ +} + +#define vdso_total_pages 0 + +#endif /* CONFIG_VDSO */ + +#endif /* __ASSEMBLY__ */ + +#endif /* __KERNEL__ */ + +#endif /* __ASM_VDSO_H */ diff --git a/arch/arm/include/asm/vdso_datapage.h b/arch/arm/include/asm/vdso_datapage.h new file mode 100644 index 0000000..9be2594 --- /dev/null +++ b/arch/arm/include/asm/vdso_datapage.h @@ -0,0 +1,60 @@ +/* + * Adapted from arm64 version. + * + * Copyright (C) 2012 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#ifndef __ASM_VDSO_DATAPAGE_H +#define __ASM_VDSO_DATAPAGE_H + +#ifdef __KERNEL__ + +#ifndef __ASSEMBLY__ + +#include + +/* Try to be cache-friendly on systems that don't implement the + * generic timer: fit the unconditionally updated fields in the first + * 32 bytes. + */ +struct vdso_data { + u32 seq_count; /* sequence count - odd during updates */ + u16 tk_is_cntvct; /* fall back to syscall if false */ + u16 cs_shift; /* clocksource shift */ + u32 xtime_coarse_sec; /* coarse time */ + u32 xtime_coarse_nsec; + + u32 wtm_clock_sec; /* wall to monotonic offset */ + u32 wtm_clock_nsec; + u32 xtime_clock_sec; /* CLOCK_REALTIME - seconds */ + u32 cs_mult; /* clocksource multiplier */ + + u64 cs_cycle_last; /* last cycle value */ + u64 cs_mask; /* clocksource mask */ + + u64 xtime_clock_snsec; /* CLOCK_REALTIME sub-ns base */ + u32 tz_minuteswest; /* timezone info for gettimeofday(2) */ + u32 tz_dsttime; +}; + +union vdso_data_store { + struct vdso_data data; + u8 page[PAGE_SIZE]; +}; + +#endif /* !__ASSEMBLY__ */ + +#endif /* __KERNEL__ */ + +#endif /* __ASM_VDSO_DATAPAGE_H */ diff --git a/arch/arm/include/uapi/asm/Kbuild b/arch/arm/include/uapi/asm/Kbuild index 70a1c9d..a1c05f9 100644 --- a/arch/arm/include/uapi/asm/Kbuild +++ b/arch/arm/include/uapi/asm/Kbuild @@ -1,6 +1,7 @@ # UAPI Header export list include include/uapi/asm-generic/Kbuild.asm +header-y += auxvec.h header-y += byteorder.h header-y += fcntl.h header-y += hwcap.h diff --git a/arch/arm/include/uapi/asm/auxvec.h b/arch/arm/include/uapi/asm/auxvec.h new file mode 100644 index 0000000..cb02a76 --- /dev/null +++ b/arch/arm/include/uapi/asm/auxvec.h @@ -0,0 +1,7 @@ +#ifndef __ASM_AUXVEC_H +#define __ASM_AUXVEC_H + +/* VDSO location */ +#define AT_SYSINFO_EHDR 33 + +#endif -- cgit v0.10.2 From 8512287a8165592466cb9cb347ba94892e9c56a5 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Wed, 25 Mar 2015 19:14:22 +0100 Subject: ARM: 8330/1: add VDSO user-space code Place VDSO-related user-space code in arch/arm/kernel/vdso/. It is almost completely written in C with some assembly helpers to load the data page address, sample the counter, and fall back to system calls when necessary. The VDSO can service gettimeofday and clock_gettime when CONFIG_ARM_ARCH_TIMER is enabled and the architected timer is present (and correctly configured). It reads the CP15-based virtual counter to compute high-resolution timestamps. Of particular note is that a post-processing step ("vdsomunge") is necessary to produce a shared object which is architecturally allowed to be used by both soft- and hard-float EABI programs. The 2012 edition of the ARM ABI defines Tag_ABI_VFP_args = 3 "Code is compatible with both the base and VFP variants; the user did not permit non-variadic functions to pass FP parameters/results." Unfortunately current toolchains do not support this tag, which is ideally what we would use. The best available option is to ensure that both EF_ARM_ABI_FLOAT_SOFT and EF_ARM_ABI_FLOAT_HARD are unset in the ELF header's e_flags, indicating that the shared object is "old" and should be accepted for backward compatibility's sake. While binutils < 2.24 appear to produce a vdso.so with both flags clear, 2.24 always sets EF_ARM_ABI_FLOAT_SOFT, with no way to inhibit this behavior. So we have to fix things up with a custom post-processing step. In fact, the VDSO code in glibc does much less validation (including checking these flags) than the code for handling conventional file-backed shared libraries, so this is a bit moot unless glibc's VDSO code becomes more strict. Signed-off-by: Nathan Lynch Signed-off-by: Russell King diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c index 2d2d608..9147008 100644 --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -210,5 +211,9 @@ int main(void) #endif DEFINE(KVM_VTTBR, offsetof(struct kvm, arch.vttbr)); #endif + BLANK(); +#ifdef CONFIG_VDSO + DEFINE(VDSO_DATA_SIZE, sizeof(union vdso_data_store)); +#endif return 0; } diff --git a/arch/arm/vdso/.gitignore b/arch/arm/vdso/.gitignore new file mode 100644 index 0000000..f8b69d8 --- /dev/null +++ b/arch/arm/vdso/.gitignore @@ -0,0 +1 @@ +vdso.lds diff --git a/arch/arm/vdso/Makefile b/arch/arm/vdso/Makefile new file mode 100644 index 0000000..bab0a8b --- /dev/null +++ b/arch/arm/vdso/Makefile @@ -0,0 +1,74 @@ +hostprogs-y := vdsomunge + +obj-vdso := vgettimeofday.o datapage.o + +# Build rules +targets := $(obj-vdso) vdso.so vdso.so.dbg vdso.so.raw vdso.lds +obj-vdso := $(addprefix $(obj)/, $(obj-vdso)) + +ccflags-y := -shared -fPIC -fno-common -fno-builtin -fno-stack-protector +ccflags-y += -nostdlib -Wl,-soname=linux-vdso.so.1 -DDISABLE_BRANCH_PROFILING +ccflags-y += -Wl,--no-undefined $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) + +obj-y += vdso.o +extra-y += vdso.lds +CPPFLAGS_vdso.lds += -P -C -U$(ARCH) + +CFLAGS_REMOVE_vdso.o = -pg + +# Force -O2 to avoid libgcc dependencies +CFLAGS_REMOVE_vgettimeofday.o = -pg -Os +CFLAGS_vgettimeofday.o = -O2 + +# Disable gcov profiling for VDSO code +GCOV_PROFILE := n + +# Force dependency +$(obj)/vdso.o : $(obj)/vdso.so + +# Link rule for the .so file +$(obj)/vdso.so.raw: $(src)/vdso.lds $(obj-vdso) FORCE + $(call if_changed,vdsold) + +$(obj)/vdso.so.dbg: $(obj)/vdso.so.raw $(obj)/vdsomunge FORCE + $(call if_changed,vdsomunge) + +# Strip rule for the .so file +$(obj)/%.so: OBJCOPYFLAGS := -S +$(obj)/%.so: $(obj)/%.so.dbg FORCE + $(call if_changed,objcopy) + +# Actual build commands +quiet_cmd_vdsold = VDSO $@ + cmd_vdsold = $(CC) $(c_flags) -Wl,-T $(filter %.lds,$^) $(filter %.o,$^) \ + $(call cc-ldoption, -Wl$(comma)--build-id) \ + -Wl,-Bsymbolic -Wl,-z,max-page-size=4096 \ + -Wl,-z,common-page-size=4096 -o $@ + +quiet_cmd_vdsomunge = MUNGE $@ + cmd_vdsomunge = $(objtree)/$(obj)/vdsomunge $< $@ + +# +# Install the unstripped copy of vdso.so.dbg. If our toolchain +# supports build-id, install .build-id links as well. +# +# Cribbed from arch/x86/vdso/Makefile. +# +quiet_cmd_vdso_install = INSTALL $< +define cmd_vdso_install + cp $< "$(MODLIB)/vdso/vdso.so"; \ + if readelf -n $< | grep -q 'Build ID'; then \ + buildid=`readelf -n $< |grep 'Build ID' |sed -e 's/^.*Build ID: \(.*\)$$/\1/'`; \ + first=`echo $$buildid | cut -b-2`; \ + last=`echo $$buildid | cut -b3-`; \ + mkdir -p "$(MODLIB)/vdso/.build-id/$$first"; \ + ln -sf "../../vdso.so" "$(MODLIB)/vdso/.build-id/$$first/$$last.debug"; \ + fi +endef + +$(MODLIB)/vdso: FORCE + @mkdir -p $(MODLIB)/vdso + +PHONY += vdso_install +vdso_install: $(obj)/vdso.so.dbg $(MODLIB)/vdso FORCE + $(call cmd,vdso_install) diff --git a/arch/arm/vdso/datapage.S b/arch/arm/vdso/datapage.S new file mode 100644 index 0000000..a2e6036 --- /dev/null +++ b/arch/arm/vdso/datapage.S @@ -0,0 +1,15 @@ +#include +#include + + .align 2 +.L_vdso_data_ptr: + .long _start - . - VDSO_DATA_SIZE + +ENTRY(__get_datapage) + .fnstart + adr r0, .L_vdso_data_ptr + ldr r1, [r0] + add r0, r0, r1 + bx lr + .fnend +ENDPROC(__get_datapage) diff --git a/arch/arm/vdso/vdso.S b/arch/arm/vdso/vdso.S new file mode 100644 index 0000000..b2b97e3 --- /dev/null +++ b/arch/arm/vdso/vdso.S @@ -0,0 +1,35 @@ +/* + * Adapted from arm64 version. + * + * Copyright (C) 2012 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Author: Will Deacon + */ + +#include +#include +#include +#include + + __PAGE_ALIGNED_DATA + + .globl vdso_start, vdso_end + .balign PAGE_SIZE +vdso_start: + .incbin "arch/arm/vdso/vdso.so" + .balign PAGE_SIZE +vdso_end: + + .previous diff --git a/arch/arm/vdso/vdso.lds.S b/arch/arm/vdso/vdso.lds.S new file mode 100644 index 0000000..89ca89f --- /dev/null +++ b/arch/arm/vdso/vdso.lds.S @@ -0,0 +1,87 @@ +/* + * Adapted from arm64 version. + * + * GNU linker script for the VDSO library. + * + * Copyright (C) 2012 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Author: Will Deacon + * Heavily based on the vDSO linker scripts for other archs. + */ + +#include +#include +#include + +OUTPUT_FORMAT("elf32-littlearm", "elf32-bigarm", "elf32-littlearm") +OUTPUT_ARCH(arm) + +SECTIONS +{ + PROVIDE(_start = .); + + . = SIZEOF_HEADERS; + + .hash : { *(.hash) } :text + .gnu.hash : { *(.gnu.hash) } + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + + .note : { *(.note.*) } :text :note + + + .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr + .eh_frame : { KEEP (*(.eh_frame)) } :text + + .dynamic : { *(.dynamic) } :text :dynamic + + .rodata : { *(.rodata*) } :text + + .text : { *(.text*) } :text =0xe7f001f2 + + .got : { *(.got) } + .rel.plt : { *(.rel.plt) } + + /DISCARD/ : { + *(.note.GNU-stack) + *(.data .data.* .gnu.linkonce.d.* .sdata*) + *(.bss .sbss .dynbss .dynsbss) + } +} + +/* + * We must supply the ELF program headers explicitly to get just one + * PT_LOAD segment, and set the flags explicitly to make segments read-only. + */ +PHDRS +{ + text PT_LOAD FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */ + dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ + note PT_NOTE FLAGS(4); /* PF_R */ + eh_frame_hdr PT_GNU_EH_FRAME; +} + +VERSION +{ + LINUX_2.6 { + global: + __vdso_clock_gettime; + __vdso_gettimeofday; + local: *; + }; +} diff --git a/arch/arm/vdso/vdsomunge.c b/arch/arm/vdso/vdsomunge.c new file mode 100644 index 0000000..9005b07 --- /dev/null +++ b/arch/arm/vdso/vdsomunge.c @@ -0,0 +1,201 @@ +/* + * Copyright 2015 Mentor Graphics Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * + * vdsomunge - Host program which produces a shared object + * architecturally specified to be usable by both soft- and hard-float + * programs. + * + * The Procedure Call Standard for the ARM Architecture (ARM IHI + * 0042E) says: + * + * 6.4.1 VFP and Base Standard Compatibility + * + * Code compiled for the VFP calling standard is compatible with + * the base standard (and vice-versa) if no floating-point or + * containerized vector arguments or results are used. + * + * And ELF for the ARM Architecture (ARM IHI 0044E) (Table 4-2) says: + * + * If both EF_ARM_ABI_FLOAT_XXXX bits are clear, conformance to the + * base procedure-call standard is implied. + * + * The VDSO is built with -msoft-float, as with the rest of the ARM + * kernel, and uses no floating point arguments or results. The build + * process will produce a shared object that may or may not have the + * EF_ARM_ABI_FLOAT_SOFT flag set (it seems to depend on the binutils + * version; binutils starting with 2.24 appears to set it). The + * EF_ARM_ABI_FLOAT_HARD flag should definitely not be set, and this + * program will error out if it is. + * + * If the soft-float flag is set, this program clears it. That's all + * it does. + */ + +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#define HOST_ORDER ELFDATA2LSB +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#define HOST_ORDER ELFDATA2MSB +#endif + +/* Some of the ELF constants we'd like to use were added to + * relatively recently. + */ +#ifndef EF_ARM_EABI_VER5 +#define EF_ARM_EABI_VER5 0x05000000 +#endif + +#ifndef EF_ARM_ABI_FLOAT_SOFT +#define EF_ARM_ABI_FLOAT_SOFT 0x200 +#endif + +#ifndef EF_ARM_ABI_FLOAT_HARD +#define EF_ARM_ABI_FLOAT_HARD 0x400 +#endif + +static const char *outfile; + +static void cleanup(void) +{ + if (error_message_count > 0 && outfile != NULL) + unlink(outfile); +} + +static Elf32_Word read_elf_word(Elf32_Word word, bool swap) +{ + return swap ? bswap_32(word) : word; +} + +static Elf32_Half read_elf_half(Elf32_Half half, bool swap) +{ + return swap ? bswap_16(half) : half; +} + +static void write_elf_word(Elf32_Word val, Elf32_Word *dst, bool swap) +{ + *dst = swap ? bswap_32(val) : val; +} + +int main(int argc, char **argv) +{ + const Elf32_Ehdr *inhdr; + bool clear_soft_float; + const char *infile; + Elf32_Word e_flags; + const void *inbuf; + struct stat stat; + void *outbuf; + bool swap; + int outfd; + int infd; + + atexit(cleanup); + + if (argc != 3) + error(EXIT_FAILURE, 0, "Usage: %s [infile] [outfile]", argv[0]); + + infile = argv[1]; + outfile = argv[2]; + + infd = open(infile, O_RDONLY); + if (infd < 0) + error(EXIT_FAILURE, errno, "Cannot open %s", infile); + + if (fstat(infd, &stat) != 0) + error(EXIT_FAILURE, errno, "Failed stat for %s", infile); + + inbuf = mmap(NULL, stat.st_size, PROT_READ, MAP_PRIVATE, infd, 0); + if (inbuf == MAP_FAILED) + error(EXIT_FAILURE, errno, "Failed to map %s", infile); + + close(infd); + + inhdr = inbuf; + + if (memcmp(&inhdr->e_ident, ELFMAG, SELFMAG) != 0) + error(EXIT_FAILURE, 0, "Not an ELF file"); + + if (inhdr->e_ident[EI_CLASS] != ELFCLASS32) + error(EXIT_FAILURE, 0, "Unsupported ELF class"); + + swap = inhdr->e_ident[EI_DATA] != HOST_ORDER; + + if (read_elf_half(inhdr->e_type, swap) != ET_DYN) + error(EXIT_FAILURE, 0, "Not a shared object"); + + if (read_elf_half(inhdr->e_machine, swap) != EM_ARM) { + error(EXIT_FAILURE, 0, "Unsupported architecture %#x", + inhdr->e_machine); + } + + e_flags = read_elf_word(inhdr->e_flags, swap); + + if (EF_ARM_EABI_VERSION(e_flags) != EF_ARM_EABI_VER5) { + error(EXIT_FAILURE, 0, "Unsupported EABI version %#x", + EF_ARM_EABI_VERSION(e_flags)); + } + + if (e_flags & EF_ARM_ABI_FLOAT_HARD) + error(EXIT_FAILURE, 0, + "Unexpected hard-float flag set in e_flags"); + + clear_soft_float = !!(e_flags & EF_ARM_ABI_FLOAT_SOFT); + + outfd = open(outfile, O_RDWR | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR); + if (outfd < 0) + error(EXIT_FAILURE, errno, "Cannot open %s", outfile); + + if (ftruncate(outfd, stat.st_size) != 0) + error(EXIT_FAILURE, errno, "Cannot truncate %s", outfile); + + outbuf = mmap(NULL, stat.st_size, PROT_READ | PROT_WRITE, MAP_SHARED, + outfd, 0); + if (outbuf == MAP_FAILED) + error(EXIT_FAILURE, errno, "Failed to map %s", outfile); + + close(outfd); + + memcpy(outbuf, inbuf, stat.st_size); + + if (clear_soft_float) { + Elf32_Ehdr *outhdr; + + outhdr = outbuf; + e_flags &= ~EF_ARM_ABI_FLOAT_SOFT; + write_elf_word(e_flags, &outhdr->e_flags, swap); + } + + if (msync(outbuf, stat.st_size, MS_SYNC) != 0) + error(EXIT_FAILURE, errno, "Failed to sync %s", outfile); + + return EXIT_SUCCESS; +} diff --git a/arch/arm/vdso/vgettimeofday.c b/arch/arm/vdso/vgettimeofday.c new file mode 100644 index 0000000..79214d5 --- /dev/null +++ b/arch/arm/vdso/vgettimeofday.c @@ -0,0 +1,282 @@ +/* + * Copyright 2015 Mentor Graphics Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef CONFIG_AEABI +#error This code depends on AEABI system call conventions +#endif + +extern struct vdso_data *__get_datapage(void); + +static notrace u32 __vdso_read_begin(const struct vdso_data *vdata) +{ + u32 seq; +repeat: + seq = ACCESS_ONCE(vdata->seq_count); + if (seq & 1) { + cpu_relax(); + goto repeat; + } + return seq; +} + +static notrace u32 vdso_read_begin(const struct vdso_data *vdata) +{ + u32 seq; + + seq = __vdso_read_begin(vdata); + + smp_rmb(); /* Pairs with smp_wmb in vdso_write_end */ + return seq; +} + +static notrace int vdso_read_retry(const struct vdso_data *vdata, u32 start) +{ + smp_rmb(); /* Pairs with smp_wmb in vdso_write_begin */ + return vdata->seq_count != start; +} + +static notrace long clock_gettime_fallback(clockid_t _clkid, + struct timespec *_ts) +{ + register struct timespec *ts asm("r1") = _ts; + register clockid_t clkid asm("r0") = _clkid; + register long ret asm ("r0"); + register long nr asm("r7") = __NR_clock_gettime; + + asm volatile( + " swi #0\n" + : "=r" (ret) + : "r" (clkid), "r" (ts), "r" (nr) + : "memory"); + + return ret; +} + +static notrace int do_realtime_coarse(struct timespec *ts, + struct vdso_data *vdata) +{ + u32 seq; + + do { + seq = vdso_read_begin(vdata); + + ts->tv_sec = vdata->xtime_coarse_sec; + ts->tv_nsec = vdata->xtime_coarse_nsec; + + } while (vdso_read_retry(vdata, seq)); + + return 0; +} + +static notrace int do_monotonic_coarse(struct timespec *ts, + struct vdso_data *vdata) +{ + struct timespec tomono; + u32 seq; + + do { + seq = vdso_read_begin(vdata); + + ts->tv_sec = vdata->xtime_coarse_sec; + ts->tv_nsec = vdata->xtime_coarse_nsec; + + tomono.tv_sec = vdata->wtm_clock_sec; + tomono.tv_nsec = vdata->wtm_clock_nsec; + + } while (vdso_read_retry(vdata, seq)); + + ts->tv_sec += tomono.tv_sec; + timespec_add_ns(ts, tomono.tv_nsec); + + return 0; +} + +#ifdef CONFIG_ARM_ARCH_TIMER + +static notrace u64 get_ns(struct vdso_data *vdata) +{ + u64 cycle_delta; + u64 cycle_now; + u64 nsec; + + cycle_now = arch_counter_get_cntvct(); + + cycle_delta = (cycle_now - vdata->cs_cycle_last) & vdata->cs_mask; + + nsec = (cycle_delta * vdata->cs_mult) + vdata->xtime_clock_snsec; + nsec >>= vdata->cs_shift; + + return nsec; +} + +static notrace int do_realtime(struct timespec *ts, struct vdso_data *vdata) +{ + u64 nsecs; + u32 seq; + + do { + seq = vdso_read_begin(vdata); + + if (!vdata->tk_is_cntvct) + return -1; + + ts->tv_sec = vdata->xtime_clock_sec; + nsecs = get_ns(vdata); + + } while (vdso_read_retry(vdata, seq)); + + ts->tv_nsec = 0; + timespec_add_ns(ts, nsecs); + + return 0; +} + +static notrace int do_monotonic(struct timespec *ts, struct vdso_data *vdata) +{ + struct timespec tomono; + u64 nsecs; + u32 seq; + + do { + seq = vdso_read_begin(vdata); + + if (!vdata->tk_is_cntvct) + return -1; + + ts->tv_sec = vdata->xtime_clock_sec; + nsecs = get_ns(vdata); + + tomono.tv_sec = vdata->wtm_clock_sec; + tomono.tv_nsec = vdata->wtm_clock_nsec; + + } while (vdso_read_retry(vdata, seq)); + + ts->tv_sec += tomono.tv_sec; + ts->tv_nsec = 0; + timespec_add_ns(ts, nsecs + tomono.tv_nsec); + + return 0; +} + +#else /* CONFIG_ARM_ARCH_TIMER */ + +static notrace int do_realtime(struct timespec *ts, struct vdso_data *vdata) +{ + return -1; +} + +static notrace int do_monotonic(struct timespec *ts, struct vdso_data *vdata) +{ + return -1; +} + +#endif /* CONFIG_ARM_ARCH_TIMER */ + +notrace int __vdso_clock_gettime(clockid_t clkid, struct timespec *ts) +{ + struct vdso_data *vdata; + int ret = -1; + + vdata = __get_datapage(); + + switch (clkid) { + case CLOCK_REALTIME_COARSE: + ret = do_realtime_coarse(ts, vdata); + break; + case CLOCK_MONOTONIC_COARSE: + ret = do_monotonic_coarse(ts, vdata); + break; + case CLOCK_REALTIME: + ret = do_realtime(ts, vdata); + break; + case CLOCK_MONOTONIC: + ret = do_monotonic(ts, vdata); + break; + default: + break; + } + + if (ret) + ret = clock_gettime_fallback(clkid, ts); + + return ret; +} + +static notrace long gettimeofday_fallback(struct timeval *_tv, + struct timezone *_tz) +{ + register struct timezone *tz asm("r1") = _tz; + register struct timeval *tv asm("r0") = _tv; + register long ret asm ("r0"); + register long nr asm("r7") = __NR_gettimeofday; + + asm volatile( + " swi #0\n" + : "=r" (ret) + : "r" (tv), "r" (tz), "r" (nr) + : "memory"); + + return ret; +} + +notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) +{ + struct timespec ts; + struct vdso_data *vdata; + int ret; + + vdata = __get_datapage(); + + ret = do_realtime(&ts, vdata); + if (ret) + return gettimeofday_fallback(tv, tz); + + if (tv) { + tv->tv_sec = ts.tv_sec; + tv->tv_usec = ts.tv_nsec / 1000; + } + if (tz) { + tz->tz_minuteswest = vdata->tz_minuteswest; + tz->tz_dsttime = vdata->tz_dsttime; + } + + return ret; +} + +/* Avoid unresolved references emitted by GCC */ + +void __aeabi_unwind_cpp_pr0(void) +{ +} + +void __aeabi_unwind_cpp_pr1(void) +{ +} + +void __aeabi_unwind_cpp_pr2(void) +{ +} -- cgit v0.10.2 From ecf99a439105ebd0a507af1a9cd901a2e166bf9a Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Wed, 25 Mar 2015 19:15:08 +0100 Subject: ARM: 8331/1: VDSO initialization, mapping, and synchronization Initialize the VDSO page list at boot, install the VDSO mapping at exec time, and update the data page during timer ticks. This code is not built if CONFIG_VDSO is not enabled. Account for the VDSO length when randomizing the offset from the stack. The [vdso] and [vvar] pages are placed immediately following the sigpage with separate _install_special_mapping calls. We want to "penalize" systems lacking the arch timer as little as possible. Previous versions of this code installed the VDSO unconditionally and unmodified, making it a measurably slower way for glibc to invoke the real syscalls on such systems. E.g. calling gettimeofday via glibc goes from ~560ns to ~630ns on i.MX6Q. If we can indicate to glibc that the time-related APIs in the VDSO are not accelerated, glibc can continue to invoke the syscalls directly instead of dispatching through the VDSO only to fall back to the slow path. Thus, if the architected timer is unusable for whatever reason, patch the VDSO at boot time so that symbol lookups for gettimeofday and clock_gettime return NULL. (This is similar to what powerpc does and borrows code from there.) This allows glibc to perform the syscall directly instead of passing control to the VDSO, which minimizes the penalty. In my measurements the time taken for a gettimeofday call via glibc goes from ~560ns to ~580ns (again on i.MX6Q), and this is solely due to adding a test and branch to glibc's gettimeofday syscall wrapper. An alternative to patching the VDSO at boot would be to not install the VDSO at all when the arch timer isn't usable. Another alternative is to include a separate "dummy" vdso.so without gettimeofday and clock_gettime, which would be selected at boot time. Either of these would get cumbersome if the VDSO were to gain support for an API such as getcpu which is unrelated to arch timer support. Signed-off-by: Nathan Lynch Signed-off-by: Russell King diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index fdfa3a7..c50fe21 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -41,6 +41,7 @@ #include #include #include +#include #ifdef CONFIG_CC_STACKPROTECTOR #include @@ -475,7 +476,7 @@ const char *arch_vma_name(struct vm_area_struct *vma) } /* If possible, provide a placement hint at a random offset from the - * stack for the signal page. + * stack for the sigpage and vdso pages. */ static unsigned long sigpage_addr(const struct mm_struct *mm, unsigned int npages) @@ -519,6 +520,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; + unsigned long npages; unsigned long addr; unsigned long hint; int ret = 0; @@ -528,9 +530,12 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) if (!signal_page) return -ENOMEM; + npages = 1; /* for sigpage */ + npages += vdso_total_pages; + down_write(&mm->mmap_sem); - hint = sigpage_addr(mm, 1); - addr = get_unmapped_area(NULL, hint, PAGE_SIZE, 0, 0); + hint = sigpage_addr(mm, npages); + addr = get_unmapped_area(NULL, hint, npages << PAGE_SHIFT, 0, 0); if (IS_ERR_VALUE(addr)) { ret = addr; goto up_fail; @@ -547,6 +552,12 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) mm->context.sigpage = addr; + /* Unlike the sigpage, failure to install the vdso is unlikely + * to be fatal to the process, so no error check needed + * here. + */ + arm_install_vdso(mm, addr + PAGE_SIZE); + up_fail: up_write(&mm->mmap_sem); return ret; diff --git a/arch/arm/kernel/vdso.c b/arch/arm/kernel/vdso.c new file mode 100644 index 0000000..0d31d3c --- /dev/null +++ b/arch/arm/kernel/vdso.c @@ -0,0 +1,337 @@ +/* + * Adapted from arm64 version. + * + * Copyright (C) 2012 ARM Limited + * Copyright (C) 2015 Mentor Graphics Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define MAX_SYMNAME 64 + +static struct page **vdso_text_pagelist; + +/* Total number of pages needed for the data and text portions of the VDSO. */ +unsigned int vdso_total_pages __read_mostly; + +/* + * The VDSO data page. + */ +static union vdso_data_store vdso_data_store __page_aligned_data; +static struct vdso_data *vdso_data = &vdso_data_store.data; + +static struct page *vdso_data_page; +static struct vm_special_mapping vdso_data_mapping = { + .name = "[vvar]", + .pages = &vdso_data_page, +}; + +static struct vm_special_mapping vdso_text_mapping = { + .name = "[vdso]", +}; + +struct elfinfo { + Elf32_Ehdr *hdr; /* ptr to ELF */ + Elf32_Sym *dynsym; /* ptr to .dynsym section */ + unsigned long dynsymsize; /* size of .dynsym section */ + char *dynstr; /* ptr to .dynstr section */ +}; + +/* Cached result of boot-time check for whether the arch timer exists, + * and if so, whether the virtual counter is useable. + */ +static bool cntvct_ok __read_mostly; + +static bool __init cntvct_functional(void) +{ + struct device_node *np; + bool ret = false; + + if (!IS_ENABLED(CONFIG_ARM_ARCH_TIMER)) + goto out; + + /* The arm_arch_timer core should export + * arch_timer_use_virtual or similar so we don't have to do + * this. + */ + np = of_find_compatible_node(NULL, NULL, "arm,armv7-timer"); + if (!np) + goto out_put; + + if (of_property_read_bool(np, "arm,cpu-registers-not-fw-configured")) + goto out_put; + + ret = true; + +out_put: + of_node_put(np); +out: + return ret; +} + +static void * __init find_section(Elf32_Ehdr *ehdr, const char *name, + unsigned long *size) +{ + Elf32_Shdr *sechdrs; + unsigned int i; + char *secnames; + + /* Grab section headers and strings so we can tell who is who */ + sechdrs = (void *)ehdr + ehdr->e_shoff; + secnames = (void *)ehdr + sechdrs[ehdr->e_shstrndx].sh_offset; + + /* Find the section they want */ + for (i = 1; i < ehdr->e_shnum; i++) { + if (strcmp(secnames + sechdrs[i].sh_name, name) == 0) { + if (size) + *size = sechdrs[i].sh_size; + return (void *)ehdr + sechdrs[i].sh_offset; + } + } + + if (size) + *size = 0; + return NULL; +} + +static Elf32_Sym * __init find_symbol(struct elfinfo *lib, const char *symname) +{ + unsigned int i; + + for (i = 0; i < (lib->dynsymsize / sizeof(Elf32_Sym)); i++) { + char name[MAX_SYMNAME], *c; + + if (lib->dynsym[i].st_name == 0) + continue; + strlcpy(name, lib->dynstr + lib->dynsym[i].st_name, + MAX_SYMNAME); + c = strchr(name, '@'); + if (c) + *c = 0; + if (strcmp(symname, name) == 0) + return &lib->dynsym[i]; + } + return NULL; +} + +static void __init vdso_nullpatch_one(struct elfinfo *lib, const char *symname) +{ + Elf32_Sym *sym; + + sym = find_symbol(lib, symname); + if (!sym) + return; + + sym->st_name = 0; +} + +static void __init patch_vdso(void *ehdr) +{ + struct elfinfo einfo; + + einfo = (struct elfinfo) { + .hdr = ehdr, + }; + + einfo.dynsym = find_section(einfo.hdr, ".dynsym", &einfo.dynsymsize); + einfo.dynstr = find_section(einfo.hdr, ".dynstr", NULL); + + /* If the virtual counter is absent or non-functional we don't + * want programs to incur the slight additional overhead of + * dispatching through the VDSO only to fall back to syscalls. + */ + if (!cntvct_ok) { + vdso_nullpatch_one(&einfo, "__vdso_gettimeofday"); + vdso_nullpatch_one(&einfo, "__vdso_clock_gettime"); + } +} + +static int __init vdso_init(void) +{ + unsigned int text_pages; + int i; + + if (memcmp(&vdso_start, "\177ELF", 4)) { + pr_err("VDSO is not a valid ELF object!\n"); + return -ENOEXEC; + } + + text_pages = (&vdso_end - &vdso_start) >> PAGE_SHIFT; + pr_debug("vdso: %i text pages at base %p\n", text_pages, &vdso_start); + + /* Allocate the VDSO text pagelist */ + vdso_text_pagelist = kcalloc(text_pages, sizeof(struct page *), + GFP_KERNEL); + if (vdso_text_pagelist == NULL) + return -ENOMEM; + + /* Grab the VDSO data page. */ + vdso_data_page = virt_to_page(vdso_data); + + /* Grab the VDSO text pages. */ + for (i = 0; i < text_pages; i++) { + struct page *page; + + page = virt_to_page(&vdso_start + i * PAGE_SIZE); + vdso_text_pagelist[i] = page; + } + + vdso_text_mapping.pages = vdso_text_pagelist; + + vdso_total_pages = 1; /* for the data/vvar page */ + vdso_total_pages += text_pages; + + cntvct_ok = cntvct_functional(); + + patch_vdso(&vdso_start); + + return 0; +} +arch_initcall(vdso_init); + +static int install_vvar(struct mm_struct *mm, unsigned long addr) +{ + struct vm_area_struct *vma; + + vma = _install_special_mapping(mm, addr, PAGE_SIZE, + VM_READ | VM_MAYREAD, + &vdso_data_mapping); + + return IS_ERR(vma) ? PTR_ERR(vma) : 0; +} + +/* assumes mmap_sem is write-locked */ +void arm_install_vdso(struct mm_struct *mm, unsigned long addr) +{ + struct vm_area_struct *vma; + unsigned long len; + + mm->context.vdso = 0; + + if (vdso_text_pagelist == NULL) + return; + + if (install_vvar(mm, addr)) + return; + + /* Account for vvar page. */ + addr += PAGE_SIZE; + len = (vdso_total_pages - 1) << PAGE_SHIFT; + + vma = _install_special_mapping(mm, addr, len, + VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC, + &vdso_text_mapping); + + if (!IS_ERR(vma)) + mm->context.vdso = addr; +} + +static void vdso_write_begin(struct vdso_data *vdata) +{ + ++vdso_data->seq_count; + smp_wmb(); /* Pairs with smp_rmb in vdso_read_retry */ +} + +static void vdso_write_end(struct vdso_data *vdata) +{ + smp_wmb(); /* Pairs with smp_rmb in vdso_read_begin */ + ++vdso_data->seq_count; +} + +static bool tk_is_cntvct(const struct timekeeper *tk) +{ + if (!IS_ENABLED(CONFIG_ARM_ARCH_TIMER)) + return false; + + if (strcmp(tk->tkr.clock->name, "arch_sys_counter") != 0) + return false; + + return true; +} + +/** + * update_vsyscall - update the vdso data page + * + * Increment the sequence counter, making it odd, indicating to + * userspace that an update is in progress. Update the fields used + * for coarse clocks and, if the architected system timer is in use, + * the fields used for high precision clocks. Increment the sequence + * counter again, making it even, indicating to userspace that the + * update is finished. + * + * Userspace is expected to sample seq_count before reading any other + * fields from the data page. If seq_count is odd, userspace is + * expected to wait until it becomes even. After copying data from + * the page, userspace must sample seq_count again; if it has changed + * from its previous value, userspace must retry the whole sequence. + * + * Calls to update_vsyscall are serialized by the timekeeping core. + */ +void update_vsyscall(struct timekeeper *tk) +{ + struct timespec xtime_coarse; + struct timespec64 *wtm = &tk->wall_to_monotonic; + + if (!cntvct_ok) { + /* The entry points have been zeroed, so there is no + * point in updating the data page. + */ + return; + } + + vdso_write_begin(vdso_data); + + xtime_coarse = __current_kernel_time(); + vdso_data->tk_is_cntvct = tk_is_cntvct(tk); + vdso_data->xtime_coarse_sec = xtime_coarse.tv_sec; + vdso_data->xtime_coarse_nsec = xtime_coarse.tv_nsec; + vdso_data->wtm_clock_sec = wtm->tv_sec; + vdso_data->wtm_clock_nsec = wtm->tv_nsec; + + if (vdso_data->tk_is_cntvct) { + vdso_data->cs_cycle_last = tk->tkr.cycle_last; + vdso_data->xtime_clock_sec = tk->xtime_sec; + vdso_data->xtime_clock_snsec = tk->tkr.xtime_nsec; + vdso_data->cs_mult = tk->tkr.mult; + vdso_data->cs_shift = tk->tkr.shift; + vdso_data->cs_mask = tk->tkr.mask; + } + + vdso_write_end(vdso_data); + + flush_dcache_page(virt_to_page(vdso_data)); +} + +void update_vsyscall_tz(void) +{ + vdso_data->tz_minuteswest = sys_tz.tz_minuteswest; + vdso_data->tz_dsttime = sys_tz.tz_dsttime; + flush_dcache_page(virt_to_page(vdso_data)); +} -- cgit v0.10.2 From e5b61deb3af465f11db7e5e11944ba00a33ece1f Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Wed, 25 Mar 2015 19:16:05 +0100 Subject: ARM: 8332/1: add CONFIG_VDSO Kconfig and Makefile bits Allow users to enable the vdso in Kconfig; include the vdso in the build if CONFIG_VDSO is enabled. Add 'vdso_install' target. Signed-off-by: Nathan Lynch Signed-off-by: Russell King diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 7f99cd6..6c13a84b 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -263,6 +263,7 @@ core-$(CONFIG_FPE_FASTFPE) += $(FASTFPE_OBJ) core-$(CONFIG_VFP) += arch/arm/vfp/ core-$(CONFIG_XEN) += arch/arm/xen/ core-$(CONFIG_KVM_ARM_HOST) += arch/arm/kvm/ +core-$(CONFIG_VDSO) += arch/arm/vdso/ # If we have a machine-specific directory, then include it in the build. core-y += arch/arm/kernel/ arch/arm/mm/ arch/arm/common/ @@ -320,6 +321,12 @@ dtbs: prepare scripts dtbs_install: $(Q)$(MAKE) $(dtbinst)=$(boot)/dts +PHONY += vdso_install +vdso_install: +ifeq ($(CONFIG_VDSO),y) + $(Q)$(MAKE) $(build)=arch/arm/vdso $@ +endif + # We use MRPROPER_FILES and CLEAN_FILES now archclean: $(Q)$(MAKE) $(clean)=$(boot) @@ -344,4 +351,5 @@ define archhelp echo ' Install using (your) ~/bin/$(INSTALLKERNEL) or' echo ' (distribution) /sbin/$(INSTALLKERNEL) or' echo ' install to $$(INSTALL_PATH) and run lilo' + echo ' vdso_install - Install unstripped vdso.so to $$(INSTALL_MOD_PATH)/vdso' endef diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 902397d..3e316ca 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -75,6 +75,7 @@ obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o perf_event_cpu.o CFLAGS_pj4-cp0.o := -marm AFLAGS_iwmmxt.o := -Wa,-mcpu=iwmmxt obj-$(CONFIG_ARM_CPU_TOPOLOGY) += topology.o +obj-$(CONFIG_VDSO) += vdso.o ifneq ($(CONFIG_ARCH_EBSA110),y) obj-y += io.o diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index 9b4f29e..8a5f1e6 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -825,6 +825,20 @@ config KUSER_HELPERS Say N here only if you are absolutely certain that you do not need these helpers; otherwise, the safe option is to say Y. +config VDSO + bool "Enable VDSO for acceleration of some system calls" + depends on AEABI && MMU + default y if ARM_ARCH_TIMER + select GENERIC_TIME_VSYSCALL + help + Place in the process address space an ELF shared object + providing fast implementations of gettimeofday and + clock_gettime. Systems that implement the ARM architected + timer will receive maximum benefit. + + You must have glibc 2.22 or later for programs to seamlessly + take advantage of this. + config DMA_CACHE_RWFO bool "Enable read/write for ownership DMA cache maintenance" depends on CPU_V6K && SMP -- cgit v0.10.2 From 0a6a78b8b3c1c1757fbeca4bbf518e44c70c9e4b Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 26 Mar 2015 09:41:33 +0000 Subject: ARM: add documentation for finding start of physical memory Occasionally, there's a question about the method we use to find the start of physical memory. Add some documentation so we don't have to keep repeating outselves on the mailing list. Signed-off-by: Russell King diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index c41a793..55a3532 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -168,9 +168,26 @@ not_angel: .text #ifdef CONFIG_AUTO_ZRELADDR - @ determine final kernel image address + /* + * Find the start of physical memory. As we are executing + * without the MMU on, we are in the physical address space. + * We just need to get rid of any offset by aligning the + * address. + * + * This alignment is a balance between the requirements of + * different platforms - we have chosen 128MB to allow + * platforms which align the start of their physical memory + * to 128MB to use this feature, while allowing the zImage + * to be placed within the first 128MB of memory on other + * platforms. Increasing the alignment means we place + * stricter alignment requirements on the start of physical + * memory, but relaxing it means that we break people who + * are already placing their zImage in (eg) the top 64MB + * of this range. + */ mov r4, pc and r4, r4, #0xf8000000 + /* Determine final kernel image address. */ add r4, r4, #TEXT_OFFSET #else ldr r4, =zreladdr -- cgit v0.10.2 From bf35706f3d0929b413e90b32cf9dd453f200a570 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 18 Mar 2015 07:29:32 +0100 Subject: ARM: 8314/1: replace PROCINFO embedded branch with relative offset This patch replaces the 'branch to setup()' instructions embedded in the PROCINFO structs with the offset to that setup function relative to the base of the struct. This preserves the position independent nature of that field, but uses a data item rather than an instruction. This is mainly done to prevent linker failures on large kernels, where the setup function is out of reach for the branch. Acked-by: Nicolas Pitre Signed-off-by: Ard Biesheuvel Signed-off-by: Russell King diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index 0196327..3637973 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -138,9 +138,9 @@ ENTRY(stext) @ mmu has been enabled adr lr, BSYM(1f) @ return (PIC) address mov r8, r4 @ set TTBR1 to swapper_pg_dir - ARM( add pc, r10, #PROCINFO_INITFUNC ) - THUMB( add r12, r10, #PROCINFO_INITFUNC ) - THUMB( ret r12 ) + ldr r12, [r10, #PROCINFO_INITFUNC] + add r12, r12, r10 + ret r12 1: b __enable_mmu ENDPROC(stext) .ltorg @@ -386,10 +386,10 @@ ENTRY(secondary_startup) ldr r8, [r7, lr] @ get secondary_data.swapper_pg_dir adr lr, BSYM(__enable_mmu) @ return address mov r13, r12 @ __secondary_switched address - ARM( add pc, r10, #PROCINFO_INITFUNC ) @ initialise processor - @ (return control reg) - THUMB( add r12, r10, #PROCINFO_INITFUNC ) - THUMB( ret r12 ) + ldr r12, [r10, #PROCINFO_INITFUNC] + add r12, r12, r10 @ initialise processor + @ (return control reg) + ret r12 ENDPROC(secondary_startup) ENDPROC(secondary_startup_arm) diff --git a/arch/arm/mm/proc-arm1020.S b/arch/arm/mm/proc-arm1020.S index 86ee5d4..aa0519e 100644 --- a/arch/arm/mm/proc-arm1020.S +++ b/arch/arm/mm/proc-arm1020.S @@ -507,7 +507,7 @@ cpu_arm1020_name: .align - .section ".proc.info.init", #alloc, #execinstr + .section ".proc.info.init", #alloc .type __arm1020_proc_info,#object __arm1020_proc_info: @@ -519,7 +519,7 @@ __arm1020_proc_info: .long PMD_TYPE_SECT | \ PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ - b __arm1020_setup + initfn __arm1020_setup, __arm1020_proc_info .long cpu_arch_name .long cpu_elf_name .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB diff --git a/arch/arm/mm/proc-arm1020e.S b/arch/arm/mm/proc-arm1020e.S index a6331d7..bff4c7f 100644 --- a/arch/arm/mm/proc-arm1020e.S +++ b/arch/arm/mm/proc-arm1020e.S @@ -465,7 +465,7 @@ arm1020e_crval: .align - .section ".proc.info.init", #alloc, #execinstr + .section ".proc.info.init", #alloc .type __arm1020e_proc_info,#object __arm1020e_proc_info: @@ -479,7 +479,7 @@ __arm1020e_proc_info: PMD_BIT4 | \ PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ - b __arm1020e_setup + initfn __arm1020e_setup, __arm1020e_proc_info .long cpu_arch_name .long cpu_elf_name .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB | HWCAP_EDSP diff --git a/arch/arm/mm/proc-arm1022.S b/arch/arm/mm/proc-arm1022.S index a126b7a..dbb2413 100644 --- a/arch/arm/mm/proc-arm1022.S +++ b/arch/arm/mm/proc-arm1022.S @@ -448,7 +448,7 @@ arm1022_crval: .align - .section ".proc.info.init", #alloc, #execinstr + .section ".proc.info.init", #alloc .type __arm1022_proc_info,#object __arm1022_proc_info: @@ -462,7 +462,7 @@ __arm1022_proc_info: PMD_BIT4 | \ PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ - b __arm1022_setup + initfn __arm1022_setup, __arm1022_proc_info .long cpu_arch_name .long cpu_elf_name .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB | HWCAP_EDSP diff --git a/arch/arm/mm/proc-arm1026.S b/arch/arm/mm/proc-arm1026.S index fc29406..0b37b2c 100644 --- a/arch/arm/mm/proc-arm1026.S +++ b/arch/arm/mm/proc-arm1026.S @@ -442,7 +442,7 @@ arm1026_crval: string cpu_arm1026_name, "ARM1026EJ-S" .align - .section ".proc.info.init", #alloc, #execinstr + .section ".proc.info.init", #alloc .type __arm1026_proc_info,#object __arm1026_proc_info: @@ -456,7 +456,7 @@ __arm1026_proc_info: PMD_BIT4 | \ PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ - b __arm1026_setup + initfn __arm1026_setup, __arm1026_proc_info .long cpu_arch_name .long cpu_elf_name .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP|HWCAP_JAVA diff --git a/arch/arm/mm/proc-arm720.S b/arch/arm/mm/proc-arm720.S index 2baa66b..3651cd7 100644 --- a/arch/arm/mm/proc-arm720.S +++ b/arch/arm/mm/proc-arm720.S @@ -186,7 +186,7 @@ arm720_crval: * See for a definition of this structure. */ - .section ".proc.info.init", #alloc, #execinstr + .section ".proc.info.init", #alloc .macro arm720_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cpu_flush:req .type __\name\()_proc_info,#object @@ -203,7 +203,7 @@ __\name\()_proc_info: PMD_BIT4 | \ PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ - b \cpu_flush @ cpu_flush + initfn \cpu_flush, __\name\()_proc_info @ cpu_flush .long cpu_arch_name @ arch_name .long cpu_elf_name @ elf_name .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB @ elf_hwcap diff --git a/arch/arm/mm/proc-arm740.S b/arch/arm/mm/proc-arm740.S index ac1ea6b..024fb77 100644 --- a/arch/arm/mm/proc-arm740.S +++ b/arch/arm/mm/proc-arm740.S @@ -132,14 +132,14 @@ __arm740_setup: .align - .section ".proc.info.init", #alloc, #execinstr + .section ".proc.info.init", #alloc .type __arm740_proc_info,#object __arm740_proc_info: .long 0x41807400 .long 0xfffffff0 .long 0 .long 0 - b __arm740_setup + initfn __arm740_setup, __arm740_proc_info .long cpu_arch_name .long cpu_elf_name .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB | HWCAP_26BIT diff --git a/arch/arm/mm/proc-arm7tdmi.S b/arch/arm/mm/proc-arm7tdmi.S index bf6ba4b..25472d9 100644 --- a/arch/arm/mm/proc-arm7tdmi.S +++ b/arch/arm/mm/proc-arm7tdmi.S @@ -76,7 +76,7 @@ __arm7tdmi_setup: .align - .section ".proc.info.init", #alloc, #execinstr + .section ".proc.info.init", #alloc .macro arm7tdmi_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, \ extra_hwcaps=0 @@ -86,7 +86,7 @@ __\name\()_proc_info: .long \cpu_mask .long 0 .long 0 - b __arm7tdmi_setup + initfn __arm7tdmi_setup, __\name\()_proc_info .long cpu_arch_name .long cpu_elf_name .long HWCAP_SWP | HWCAP_26BIT | ( \extra_hwcaps ) diff --git a/arch/arm/mm/proc-arm920.S b/arch/arm/mm/proc-arm920.S index 22bf8dd..7a14bd4 100644 --- a/arch/arm/mm/proc-arm920.S +++ b/arch/arm/mm/proc-arm920.S @@ -448,7 +448,7 @@ arm920_crval: .align - .section ".proc.info.init", #alloc, #execinstr + .section ".proc.info.init", #alloc .type __arm920_proc_info,#object __arm920_proc_info: @@ -464,7 +464,7 @@ __arm920_proc_info: PMD_BIT4 | \ PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ - b __arm920_setup + initfn __arm920_setup, __arm920_proc_info .long cpu_arch_name .long cpu_elf_name .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB diff --git a/arch/arm/mm/proc-arm922.S b/arch/arm/mm/proc-arm922.S index 0c6d5ac..edccfcd 100644 --- a/arch/arm/mm/proc-arm922.S +++ b/arch/arm/mm/proc-arm922.S @@ -426,7 +426,7 @@ arm922_crval: .align - .section ".proc.info.init", #alloc, #execinstr + .section ".proc.info.init", #alloc .type __arm922_proc_info,#object __arm922_proc_info: @@ -442,7 +442,7 @@ __arm922_proc_info: PMD_BIT4 | \ PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ - b __arm922_setup + initfn __arm922_setup, __arm922_proc_info .long cpu_arch_name .long cpu_elf_name .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB diff --git a/arch/arm/mm/proc-arm925.S b/arch/arm/mm/proc-arm925.S index c32d073..ede8c54 100644 --- a/arch/arm/mm/proc-arm925.S +++ b/arch/arm/mm/proc-arm925.S @@ -494,7 +494,7 @@ arm925_crval: .align - .section ".proc.info.init", #alloc, #execinstr + .section ".proc.info.init", #alloc .macro arm925_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cache .type __\name\()_proc_info,#object @@ -510,7 +510,7 @@ __\name\()_proc_info: PMD_BIT4 | \ PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ - b __arm925_setup + initfn __arm925_setup, __\name\()_proc_info .long cpu_arch_name .long cpu_elf_name .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB diff --git a/arch/arm/mm/proc-arm926.S b/arch/arm/mm/proc-arm926.S index 252b250..fb827c6 100644 --- a/arch/arm/mm/proc-arm926.S +++ b/arch/arm/mm/proc-arm926.S @@ -474,7 +474,7 @@ arm926_crval: .align - .section ".proc.info.init", #alloc, #execinstr + .section ".proc.info.init", #alloc .type __arm926_proc_info,#object __arm926_proc_info: @@ -490,7 +490,7 @@ __arm926_proc_info: PMD_BIT4 | \ PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ - b __arm926_setup + initfn __arm926_setup, __arm926_proc_info .long cpu_arch_name .long cpu_elf_name .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP|HWCAP_JAVA diff --git a/arch/arm/mm/proc-arm940.S b/arch/arm/mm/proc-arm940.S index e5212d4..0a0b7a9 100644 --- a/arch/arm/mm/proc-arm940.S +++ b/arch/arm/mm/proc-arm940.S @@ -354,14 +354,14 @@ __arm940_setup: .align - .section ".proc.info.init", #alloc, #execinstr + .section ".proc.info.init", #alloc .type __arm940_proc_info,#object __arm940_proc_info: .long 0x41009400 .long 0xff00fff0 .long 0 - b __arm940_setup + initfn __arm940_setup, __arm940_proc_info .long cpu_arch_name .long cpu_elf_name .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB diff --git a/arch/arm/mm/proc-arm946.S b/arch/arm/mm/proc-arm946.S index b3dd9b2..c85b40d 100644 --- a/arch/arm/mm/proc-arm946.S +++ b/arch/arm/mm/proc-arm946.S @@ -409,14 +409,14 @@ __arm946_setup: .align - .section ".proc.info.init", #alloc, #execinstr + .section ".proc.info.init", #alloc .type __arm946_proc_info,#object __arm946_proc_info: .long 0x41009460 .long 0xff00fff0 .long 0 .long 0 - b __arm946_setup + initfn __arm946_setup, __arm946_proc_info .long cpu_arch_name .long cpu_elf_name .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB diff --git a/arch/arm/mm/proc-arm9tdmi.S b/arch/arm/mm/proc-arm9tdmi.S index 8227322..7fac8c6 100644 --- a/arch/arm/mm/proc-arm9tdmi.S +++ b/arch/arm/mm/proc-arm9tdmi.S @@ -70,7 +70,7 @@ __arm9tdmi_setup: .align - .section ".proc.info.init", #alloc, #execinstr + .section ".proc.info.init", #alloc .macro arm9tdmi_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req .type __\name\()_proc_info, #object @@ -79,7 +79,7 @@ __\name\()_proc_info: .long \cpu_mask .long 0 .long 0 - b __arm9tdmi_setup + initfn __arm9tdmi_setup, __\name\()_proc_info .long cpu_arch_name .long cpu_elf_name .long HWCAP_SWP | HWCAP_THUMB | HWCAP_26BIT diff --git a/arch/arm/mm/proc-fa526.S b/arch/arm/mm/proc-fa526.S index c494886..4001b73 100644 --- a/arch/arm/mm/proc-fa526.S +++ b/arch/arm/mm/proc-fa526.S @@ -190,7 +190,7 @@ fa526_cr1_set: .align - .section ".proc.info.init", #alloc, #execinstr + .section ".proc.info.init", #alloc .type __fa526_proc_info,#object __fa526_proc_info: @@ -206,7 +206,7 @@ __fa526_proc_info: PMD_BIT4 | \ PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ - b __fa526_setup + initfn __fa526_setup, __fa526_proc_info .long cpu_arch_name .long cpu_elf_name .long HWCAP_SWP | HWCAP_HALF diff --git a/arch/arm/mm/proc-feroceon.S b/arch/arm/mm/proc-feroceon.S index 03a1b75..e494d6d 100644 --- a/arch/arm/mm/proc-feroceon.S +++ b/arch/arm/mm/proc-feroceon.S @@ -584,7 +584,7 @@ feroceon_crval: .align - .section ".proc.info.init", #alloc, #execinstr + .section ".proc.info.init", #alloc .macro feroceon_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cache:req .type __\name\()_proc_info,#object @@ -601,7 +601,8 @@ __\name\()_proc_info: PMD_BIT4 | \ PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ - b __feroceon_setup + initfn __feroceon_setup, __\name\()_proc_info + .long __feroceon_setup .long cpu_arch_name .long cpu_elf_name .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S index 082b9f2..0f13b5f 100644 --- a/arch/arm/mm/proc-macros.S +++ b/arch/arm/mm/proc-macros.S @@ -331,3 +331,7 @@ ENTRY(\name\()_tlb_fns) .globl \x .equ \x, \y .endm + +.macro initfn, func, base + .long \func - \base +.endm diff --git a/arch/arm/mm/proc-mohawk.S b/arch/arm/mm/proc-mohawk.S index 53d3934..d65edf7 100644 --- a/arch/arm/mm/proc-mohawk.S +++ b/arch/arm/mm/proc-mohawk.S @@ -427,7 +427,7 @@ mohawk_crval: .align - .section ".proc.info.init", #alloc, #execinstr + .section ".proc.info.init", #alloc .type __88sv331x_proc_info,#object __88sv331x_proc_info: @@ -443,7 +443,7 @@ __88sv331x_proc_info: PMD_BIT4 | \ PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ - b __mohawk_setup + initfn __mohawk_setup, __88sv331x_proc_info .long cpu_arch_name .long cpu_elf_name .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP diff --git a/arch/arm/mm/proc-sa110.S b/arch/arm/mm/proc-sa110.S index 8008a04..ee2ce49 100644 --- a/arch/arm/mm/proc-sa110.S +++ b/arch/arm/mm/proc-sa110.S @@ -199,7 +199,7 @@ sa110_crval: .align - .section ".proc.info.init", #alloc, #execinstr + .section ".proc.info.init", #alloc .type __sa110_proc_info,#object __sa110_proc_info: @@ -213,7 +213,7 @@ __sa110_proc_info: .long PMD_TYPE_SECT | \ PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ - b __sa110_setup + initfn __sa110_setup, __sa110_proc_info .long cpu_arch_name .long cpu_elf_name .long HWCAP_SWP | HWCAP_HALF | HWCAP_26BIT | HWCAP_FAST_MULT diff --git a/arch/arm/mm/proc-sa1100.S b/arch/arm/mm/proc-sa1100.S index 89f97ac..222d583 100644 --- a/arch/arm/mm/proc-sa1100.S +++ b/arch/arm/mm/proc-sa1100.S @@ -242,7 +242,7 @@ sa1100_crval: .align - .section ".proc.info.init", #alloc, #execinstr + .section ".proc.info.init", #alloc .macro sa1100_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req .type __\name\()_proc_info,#object @@ -257,7 +257,7 @@ __\name\()_proc_info: .long PMD_TYPE_SECT | \ PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ - b __sa1100_setup + initfn __sa1100_setup, __\name\()_proc_info .long cpu_arch_name .long cpu_elf_name .long HWCAP_SWP | HWCAP_HALF | HWCAP_26BIT | HWCAP_FAST_MULT diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S index d0390f4..06d890a 100644 --- a/arch/arm/mm/proc-v6.S +++ b/arch/arm/mm/proc-v6.S @@ -264,7 +264,7 @@ v6_crval: string cpu_elf_name, "v6" .align - .section ".proc.info.init", #alloc, #execinstr + .section ".proc.info.init", #alloc /* * Match any ARMv6 processor core. @@ -287,7 +287,7 @@ __v6_proc_info: PMD_SECT_XN | \ PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ - b __v6_setup + initfn __v6_setup, __v6_proc_info .long cpu_arch_name .long cpu_elf_name /* See also feat_v6_fixup() for HWCAP_TLS */ diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index 8b4ee5e..6bdaa4c 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -462,19 +462,19 @@ __v7_setup_stack: string cpu_elf_name, "v7" .align - .section ".proc.info.init", #alloc, #execinstr + .section ".proc.info.init", #alloc /* * Standard v7 proc info content */ -.macro __v7_proc initfunc, mm_mmuflags = 0, io_mmuflags = 0, hwcaps = 0, proc_fns = v7_processor_functions +.macro __v7_proc name, initfunc, mm_mmuflags = 0, io_mmuflags = 0, hwcaps = 0, proc_fns = v7_processor_functions ALT_SMP(.long PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_AP_READ | \ PMD_SECT_AF | PMD_FLAGS_SMP | \mm_mmuflags) ALT_UP(.long PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_AP_READ | \ PMD_SECT_AF | PMD_FLAGS_UP | \mm_mmuflags) .long PMD_TYPE_SECT | PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ | PMD_SECT_AF | \io_mmuflags - W(b) \initfunc + initfn \initfunc, \name .long cpu_arch_name .long cpu_elf_name .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB | HWCAP_FAST_MULT | \ @@ -494,7 +494,7 @@ __v7_setup_stack: __v7_ca5mp_proc_info: .long 0x410fc050 .long 0xff0ffff0 - __v7_proc __v7_ca5mp_setup + __v7_proc __v7_ca5mp_proc_info, __v7_ca5mp_setup .size __v7_ca5mp_proc_info, . - __v7_ca5mp_proc_info /* @@ -504,7 +504,7 @@ __v7_ca5mp_proc_info: __v7_ca9mp_proc_info: .long 0x410fc090 .long 0xff0ffff0 - __v7_proc __v7_ca9mp_setup, proc_fns = ca9mp_processor_functions + __v7_proc __v7_ca9mp_proc_info, __v7_ca9mp_setup, proc_fns = ca9mp_processor_functions .size __v7_ca9mp_proc_info, . - __v7_ca9mp_proc_info #endif /* CONFIG_ARM_LPAE */ @@ -517,7 +517,7 @@ __v7_ca9mp_proc_info: __v7_pj4b_proc_info: .long 0x560f5800 .long 0xff0fff00 - __v7_proc __v7_pj4b_setup, proc_fns = pj4b_processor_functions + __v7_proc __v7_pj4b_proc_info, __v7_pj4b_setup, proc_fns = pj4b_processor_functions .size __v7_pj4b_proc_info, . - __v7_pj4b_proc_info #endif @@ -528,7 +528,7 @@ __v7_pj4b_proc_info: __v7_cr7mp_proc_info: .long 0x410fc170 .long 0xff0ffff0 - __v7_proc __v7_cr7mp_setup + __v7_proc __v7_cr7mp_proc_info, __v7_cr7mp_setup .size __v7_cr7mp_proc_info, . - __v7_cr7mp_proc_info /* @@ -538,7 +538,7 @@ __v7_cr7mp_proc_info: __v7_ca7mp_proc_info: .long 0x410fc070 .long 0xff0ffff0 - __v7_proc __v7_ca7mp_setup + __v7_proc __v7_ca7mp_proc_info, __v7_ca7mp_setup .size __v7_ca7mp_proc_info, . - __v7_ca7mp_proc_info /* @@ -548,7 +548,7 @@ __v7_ca7mp_proc_info: __v7_ca12mp_proc_info: .long 0x410fc0d0 .long 0xff0ffff0 - __v7_proc __v7_ca12mp_setup + __v7_proc __v7_ca12mp_proc_info, __v7_ca12mp_setup .size __v7_ca12mp_proc_info, . - __v7_ca12mp_proc_info /* @@ -558,7 +558,7 @@ __v7_ca12mp_proc_info: __v7_ca15mp_proc_info: .long 0x410fc0f0 .long 0xff0ffff0 - __v7_proc __v7_ca15mp_setup + __v7_proc __v7_ca15mp_proc_info, __v7_ca15mp_setup .size __v7_ca15mp_proc_info, . - __v7_ca15mp_proc_info /* @@ -568,7 +568,7 @@ __v7_ca15mp_proc_info: __v7_b15mp_proc_info: .long 0x420f00f0 .long 0xff0ffff0 - __v7_proc __v7_b15mp_setup + __v7_proc __v7_b15mp_proc_info, __v7_b15mp_setup .size __v7_b15mp_proc_info, . - __v7_b15mp_proc_info /* @@ -578,7 +578,7 @@ __v7_b15mp_proc_info: __v7_ca17mp_proc_info: .long 0x410fc0e0 .long 0xff0ffff0 - __v7_proc __v7_ca17mp_setup + __v7_proc __v7_ca17mp_proc_info, __v7_ca17mp_setup .size __v7_ca17mp_proc_info, . - __v7_ca17mp_proc_info /* @@ -594,7 +594,7 @@ __krait_proc_info: * do support them. They also don't indicate support for fused multiply * instructions even though they actually do support them. */ - __v7_proc __v7_setup, hwcaps = HWCAP_IDIV | HWCAP_VFPv4 + __v7_proc __krait_proc_info, __v7_setup, hwcaps = HWCAP_IDIV | HWCAP_VFPv4 .size __krait_proc_info, . - __krait_proc_info /* @@ -604,5 +604,5 @@ __krait_proc_info: __v7_proc_info: .long 0x000f0000 @ Required ID value .long 0x000f0000 @ Mask for ID - __v7_proc __v7_setup + __v7_proc __v7_proc_info, __v7_setup .size __v7_proc_info, . - __v7_proc_info diff --git a/arch/arm/mm/proc-v7m.S b/arch/arm/mm/proc-v7m.S index d1e68b5..e08e1f2 100644 --- a/arch/arm/mm/proc-v7m.S +++ b/arch/arm/mm/proc-v7m.S @@ -135,7 +135,7 @@ __v7m_setup_stack_top: string cpu_elf_name "v7m" string cpu_v7m_name "ARMv7-M" - .section ".proc.info.init", #alloc, #execinstr + .section ".proc.info.init", #alloc /* * Match any ARMv7-M processor core. @@ -146,7 +146,7 @@ __v7m_proc_info: .long 0x000f0000 @ Mask for ID .long 0 @ proc_info_list.__cpu_mm_mmu_flags .long 0 @ proc_info_list.__cpu_io_mmu_flags - b __v7m_setup @ proc_info_list.__cpu_flush + initfn __v7m_setup, __v7m_proc_info @ proc_info_list.__cpu_flush .long cpu_arch_name .long cpu_elf_name .long HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT diff --git a/arch/arm/mm/proc-xsc3.S b/arch/arm/mm/proc-xsc3.S index f8acdfe..293dcc2 100644 --- a/arch/arm/mm/proc-xsc3.S +++ b/arch/arm/mm/proc-xsc3.S @@ -499,7 +499,7 @@ xsc3_crval: .align - .section ".proc.info.init", #alloc, #execinstr + .section ".proc.info.init", #alloc .macro xsc3_proc_info name:req, cpu_val:req, cpu_mask:req .type __\name\()_proc_info,#object @@ -514,7 +514,7 @@ __\name\()_proc_info: .long PMD_TYPE_SECT | \ PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ - b __xsc3_setup + initfn __xsc3_setup, __\name\()_proc_info .long cpu_arch_name .long cpu_elf_name .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP diff --git a/arch/arm/mm/proc-xscale.S b/arch/arm/mm/proc-xscale.S index afa2b3c..b6bbfdb 100644 --- a/arch/arm/mm/proc-xscale.S +++ b/arch/arm/mm/proc-xscale.S @@ -612,7 +612,7 @@ xscale_crval: .align - .section ".proc.info.init", #alloc, #execinstr + .section ".proc.info.init", #alloc .macro xscale_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cache .type __\name\()_proc_info,#object @@ -627,7 +627,7 @@ __\name\()_proc_info: .long PMD_TYPE_SECT | \ PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ - b __xscale_setup + initfn __xscale_setup, __\name\()_proc_info .long cpu_arch_name .long cpu_elf_name .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP -- cgit v0.10.2 From eb765c1ceb275b839ec67ff5779148b9298369c2 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 18 Mar 2015 07:35:01 +0100 Subject: ARM: 8317/1: move the .idmap.text section closer to .head.text This moves the .idmap.text section closer to .head.text, so that relative branches are less likely to go out of range if the kernel text gets bigger. Acked-by: Nicolas Pitre Signed-off-by: Ard Biesheuvel Signed-off-by: Russell King diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index b31aa73..e8d5fba 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -100,6 +100,7 @@ SECTIONS .text : { /* Real text segment */ _stext = .; /* Text and read-only data */ + IDMAP_TEXT __exception_text_start = .; *(.exception.text) __exception_text_end = .; @@ -108,7 +109,6 @@ SECTIONS SCHED_TEXT LOCK_TEXT KPROBES_TEXT - IDMAP_TEXT #ifdef CONFIG_MMU *(.fixup) #endif -- cgit v0.10.2 From b8c9592b4a6c93211c8163888a97880d608503b5 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 19 Mar 2015 19:03:25 +0100 Subject: ARM: 8318/1: treat CPU feature register fields as signed quantities The various CPU feature registers consist of 4-bit blocks that represent signed quantities, whose positive values represent incremental features, and whose negative values are reserved. To improve forward compatibility, update the feature detection code to take possible future higher values into account, but ignore negative values. Signed-off-by: Ard Biesheuvel Signed-off-by: Russell King diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h index 819777d..85e374f 100644 --- a/arch/arm/include/asm/cputype.h +++ b/arch/arm/include/asm/cputype.h @@ -253,4 +253,20 @@ static inline int cpu_is_pj4(void) #else #define cpu_is_pj4() 0 #endif + +static inline int __attribute_const__ cpuid_feature_extract_field(u32 features, + int field) +{ + int feature = (features >> field) & 15; + + /* feature registers are signed values */ + if (feature > 8) + feature -= 16; + + return feature; +} + +#define cpuid_feature_extract(reg, field) \ + cpuid_feature_extract_field(read_cpuid_ext(reg), field) + #endif diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index e55408e..637c449e 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -375,30 +375,26 @@ void __init early_print(const char *str, ...) static void __init cpuid_init_hwcaps(void) { - unsigned int divide_instrs, vmsa; + int block; if (cpu_architecture() < CPU_ARCH_ARMv7) return; - divide_instrs = (read_cpuid_ext(CPUID_EXT_ISAR0) & 0x0f000000) >> 24; - - switch (divide_instrs) { - case 2: + block = cpuid_feature_extract(CPUID_EXT_ISAR0, 24); + if (block >= 2) elf_hwcap |= HWCAP_IDIVA; - case 1: + if (block >= 1) elf_hwcap |= HWCAP_IDIVT; - } /* LPAE implies atomic ldrd/strd instructions */ - vmsa = (read_cpuid_ext(CPUID_EXT_MMFR0) & 0xf) >> 0; - if (vmsa >= 5) + block = cpuid_feature_extract(CPUID_EXT_MMFR0, 0); + if (block >= 5) elf_hwcap |= HWCAP_LPAE; } static void __init elf_hwcap_fixup(void) { unsigned id = read_cpuid_id(); - unsigned sync_prim; /* * HWCAP_TLS is available only on 1136 r1p0 and later, @@ -419,9 +415,9 @@ static void __init elf_hwcap_fixup(void) * avoid advertising SWP; it may not be atomic with * multiprocessing cores. */ - sync_prim = ((read_cpuid_ext(CPUID_EXT_ISAR3) >> 8) & 0xf0) | - ((read_cpuid_ext(CPUID_EXT_ISAR4) >> 20) & 0x0f); - if (sync_prim >= 0x13) + if (cpuid_feature_extract(CPUID_EXT_ISAR3, 12) > 1 || + (cpuid_feature_extract(CPUID_EXT_ISAR3, 12) == 1 && + cpuid_feature_extract(CPUID_EXT_ISAR3, 20) >= 3)) elf_hwcap &= ~HWCAP_SWP; } -- cgit v0.10.2 From a092aedb8115c16cb49bc64dd09cb20471ff942b Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 19 Mar 2015 19:04:05 +0100 Subject: ARM: 8319/1: advertise availability of v8 Crypto instructions When running the 32-bit ARM kernel on ARMv8 capable bare metal (e.g., 32-bit Android userland and kernel on a Cortex-A53), or as a KVM guest on a 64-bit host, we should advertise the availability of the Crypto instructions, so that userland libraries such as OpenSSL may use them. (Support for the v8 Crypto instructions in the 32-bit build was added to OpenSSL more than six months ago) This adds the ID feature bit detection, and sets elf_hwcap2 accordingly. Signed-off-by: Ard Biesheuvel Signed-off-by: Russell King diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 637c449e..910bb17 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -376,6 +376,7 @@ void __init early_print(const char *str, ...) static void __init cpuid_init_hwcaps(void) { int block; + u32 isar5; if (cpu_architecture() < CPU_ARCH_ARMv7) return; @@ -390,6 +391,27 @@ static void __init cpuid_init_hwcaps(void) block = cpuid_feature_extract(CPUID_EXT_MMFR0, 0); if (block >= 5) elf_hwcap |= HWCAP_LPAE; + + /* check for supported v8 Crypto instructions */ + isar5 = read_cpuid_ext(CPUID_EXT_ISAR5); + + block = cpuid_feature_extract_field(isar5, 4); + if (block >= 2) + elf_hwcap2 |= HWCAP2_PMULL; + if (block >= 1) + elf_hwcap2 |= HWCAP2_AES; + + block = cpuid_feature_extract_field(isar5, 8); + if (block >= 1) + elf_hwcap2 |= HWCAP2_SHA1; + + block = cpuid_feature_extract_field(isar5, 12); + if (block >= 1) + elf_hwcap2 |= HWCAP2_SHA2; + + block = cpuid_feature_extract_field(isar5, 16); + if (block >= 1) + elf_hwcap2 |= HWCAP2_CRC32; } static void __init elf_hwcap_fixup(void) -- cgit v0.10.2 From 8defb3367fcd19d1af64c07792aade0747b54e0f Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Fri, 20 Mar 2015 15:42:27 +0100 Subject: ARM: 8320/1: fix integer overflow in ELF_ET_DYN_BASE Usually ELF_ET_DYN_BASE is 2/3 of TASK_SIZE. With 3G/1G user/kernel split this is not so, because 2*TASK_SIZE overflows 32 bits, so the actual value of ELF_ET_DYN_BASE is: (2 * TASK_SIZE / 3) = 0x2a000000 When ASLR is disabled PIE binaries will load at ELF_ET_DYN_BASE address. On 32bit platforms AddressSanitzer uses addresses [0x20000000 - 0x40000000] for shadow memory [1]. So ASan doesn't work for PIE binaries when ASLR disabled as it fails to map shadow memory. Also after Kees's 'split ET_DYN ASLR from mmap ASLR' patchset PIE binaries has a high chance of loading somewhere in between [0x2a000000 - 0x40000000] even if ASLR enabled. This makes ASan with PIE absolutely incompatible. Fix overflow by dividing TASK_SIZE prior to multiplying. After this patch ELF_ET_DYN_BASE equals to (for CONFIG_VMSPLIT_3G=y): (TASK_SIZE / 3 * 2) = 0x7f555554 [1] https://code.google.com/p/address-sanitizer/wiki/AddressSanitizerAlgorithm#Mapping Signed-off-by: Andrey Ryabinin Reported-by: Maria Guseva Cc: stable@vger.kernel.org Signed-off-by: Russell King diff --git a/arch/arm/include/asm/elf.h b/arch/arm/include/asm/elf.h index afb9caf..674d03f 100644 --- a/arch/arm/include/asm/elf.h +++ b/arch/arm/include/asm/elf.h @@ -115,7 +115,7 @@ int dump_task_regs(struct task_struct *t, elf_gregset_t *elfregs); the loader. We need to make sure that it is out of the way of the program that it will "exec", and that there is sufficient room for the brk. */ -#define ELF_ET_DYN_BASE (2 * TASK_SIZE / 3) +#define ELF_ET_DYN_BASE (TASK_SIZE / 3 * 2) /* When the program starts, a1 contains a pointer to a function to be registered with atexit, as per the SVR4 ABI. A value of 0 means we -- cgit v0.10.2 From c20611df13c3e3070607c267cf781ba8645a185e Mon Sep 17 00:00:00 2001 From: Joachim Eastwood Date: Wed, 25 Mar 2015 08:47:18 +0100 Subject: ARM: 8327/1: zImage: add support for ARMv7-M MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch makes it possible to enter zImage in Thumb mode for ARMv7-M (Cortex-M) CPUs that do not support ARM mode. The kernel entry is also made in Thumb mode. [ukl: fix spelling in commit log, return early in call_cache_fn] Signed-off-by: Joachim Eastwood Tested-by: Stefan Agner Tested-by: Ezequiel Garcia Tested-by: Chanwoo Choi Signed-off-by: Uwe Kleine-König Signed-off-by: Russell King diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index 55a3532..2c45b57 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -10,8 +10,11 @@ */ #include #include +#include + + AR_CLASS( .arch armv7-a ) + M_CLASS( .arch armv7-m ) - .arch armv7-a /* * Debugging stuff * @@ -114,7 +117,12 @@ * sort out different calling conventions */ .align - .arm @ Always enter in ARM state + /* + * Always enter in ARM state for CPUs that support the ARM ISA. + * As of today (2014) that's exactly the members of the A and R + * classes. + */ + AR_CLASS( .arm ) start: .type start,#function .rept 7 @@ -132,14 +140,15 @@ start: THUMB( .thumb ) 1: - ARM_BE8( setend be ) @ go BE8 if compiled for BE8 - mrs r9, cpsr + ARM_BE8( setend be ) @ go BE8 if compiled for BE8 + AR_CLASS( mrs r9, cpsr ) #ifdef CONFIG_ARM_VIRT_EXT bl __hyp_stub_install @ get into SVC mode, reversibly #endif mov r7, r1 @ save architecture ID mov r8, r2 @ save atags pointer +#ifndef CONFIG_CPU_V7M /* * Booting from Angel - need to enter SVC mode and disable * FIQs/IRQs (numeric definitions from angel arm.h source). @@ -155,6 +164,7 @@ not_angel: safe_svcmode_maskall r0 msr spsr_cxsf, r9 @ Save the CPU boot mode in @ SPSR +#endif /* * Note that some cache flushing and other stuff may * be needed here - is there an Angel SWI call for this? @@ -827,6 +837,16 @@ __common_mmu_cache_on: call_cache_fn: adr r12, proc_types #ifdef CONFIG_CPU_CP15 mrc p15, 0, r9, c0, c0 @ get processor ID +#elif defined(CONFIG_CPU_V7M) + /* + * On v7-M the processor id is located in the V7M_SCB_CPUID + * register, but as cache handling is IMPLEMENTATION DEFINED on + * v7-M (if existant at all) we just return early here. + * If V7M_SCB_CPUID were used the cpu ID functions (i.e. + * __armv7_mmu_cache_{on,off,flush}) would be selected which + * use cp15 registers that are not implemented on v7-M. + */ + bx lr #else ldr r9, =CONFIG_PROCESSOR_ID #endif @@ -1327,8 +1347,9 @@ __hyp_reentry_vectors: __enter_kernel: mov r0, #0 @ must be 0 - ARM( mov pc, r4 ) @ call kernel - THUMB( bx r4 ) @ entry point is always ARM + ARM( mov pc, r4 ) @ call kernel + M_CLASS( add r4, r4, #1 ) @ enter in Thumb mode for M class + THUMB( bx r4 ) @ entry point is always ARM for A/R classes reloc_code_end: diff --git a/arch/arm/include/asm/unified.h b/arch/arm/include/asm/unified.h index b88beab..200f9a7 100644 --- a/arch/arm/include/asm/unified.h +++ b/arch/arm/include/asm/unified.h @@ -24,6 +24,14 @@ .syntax unified #endif +#ifdef CONFIG_CPU_V7M +#define AR_CLASS(x...) +#define M_CLASS(x...) x +#else +#define AR_CLASS(x...) x +#define M_CLASS(x...) +#endif + #ifdef CONFIG_THUMB2_KERNEL #if __GNUC__ < 4 -- cgit v0.10.2 From 15955e70320bdc5d60b153a572ee4d89ab34e3d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 25 Mar 2015 11:44:14 +0100 Subject: ARM: 8328/1: remove empty preprocessor #else branch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the patch for e16343c47e42 (ARM: 8160/1: drop warning about return_address not using unwind tables) was created there was still more code in said branch. Probably this simplification was just missed during conflict resolution when the patch was applied. Signed-off-by: Uwe Kleine-König Signed-off-by: Russell King diff --git a/arch/arm/kernel/return_address.c b/arch/arm/kernel/return_address.c index 24b4a04..36ed350 100644 --- a/arch/arm/kernel/return_address.c +++ b/arch/arm/kernel/return_address.c @@ -56,8 +56,6 @@ void *return_address(unsigned int level) return NULL; } -#else /* if defined(CONFIG_FRAME_POINTER) && !defined(CONFIG_ARM_UNWIND) */ - -#endif /* if defined(CONFIG_FRAME_POINTER) && !defined(CONFIG_ARM_UNWIND) / else */ +#endif /* if defined(CONFIG_FRAME_POINTER) && !defined(CONFIG_ARM_UNWIND) */ EXPORT_SYMBOL_GPL(return_address); -- cgit v0.10.2 From c097877319ab61dd045b6497953b4e3df8f2bb44 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 6 Mar 2015 12:08:30 +0100 Subject: ARM: 8307/1: psci: move psci firmware calls out of line arm64 builds with GCC 5 have caused the __asmeq assertions in the PSCI calling code to fire, so move the ARM PSCI calls out of line into their own assembly file for consistency and to safeguard against the same issue occuring with the 32-bit toolchain. [will: brought into line with arm64 implementation] Reported-by: Andy Whitcroft Signed-off-by: Mark Rutland Signed-off-by: Will Deacon Signed-off-by: Russell King diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 902397d..1c1cdfa 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -86,7 +86,7 @@ obj-$(CONFIG_EARLY_PRINTK) += early_printk.o obj-$(CONFIG_ARM_VIRT_EXT) += hyp-stub.o ifeq ($(CONFIG_ARM_PSCI),y) -obj-y += psci.o +obj-y += psci.o psci-call.o obj-$(CONFIG_SMP) += psci_smp.o endif diff --git a/arch/arm/kernel/psci-call.S b/arch/arm/kernel/psci-call.S new file mode 100644 index 0000000..a78e9e1 --- /dev/null +++ b/arch/arm/kernel/psci-call.S @@ -0,0 +1,31 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Copyright (C) 2015 ARM Limited + * + * Author: Mark Rutland + */ + +#include + +#include +#include + +/* int __invoke_psci_fn_hvc(u32 function_id, u32 arg0, u32 arg1, u32 arg2) */ +ENTRY(__invoke_psci_fn_hvc) + __HVC(0) + bx lr +ENDPROC(__invoke_psci_fn_hvc) + +/* int __invoke_psci_fn_smc(u32 function_id, u32 arg0, u32 arg1, u32 arg2) */ +ENTRY(__invoke_psci_fn_smc) + __SMC(0) + bx lr +ENDPROC(__invoke_psci_fn_smc) diff --git a/arch/arm/kernel/psci.c b/arch/arm/kernel/psci.c index f73891b..f90fdf4 100644 --- a/arch/arm/kernel/psci.c +++ b/arch/arm/kernel/psci.c @@ -23,8 +23,6 @@ #include #include -#include -#include #include #include @@ -33,6 +31,9 @@ struct psci_operations psci_ops; static int (*invoke_psci_fn)(u32, u32, u32, u32); typedef int (*psci_initcall_t)(const struct device_node *); +asmlinkage int __invoke_psci_fn_hvc(u32, u32, u32, u32); +asmlinkage int __invoke_psci_fn_smc(u32, u32, u32, u32); + enum psci_function { PSCI_FN_CPU_SUSPEND, PSCI_FN_CPU_ON, @@ -71,40 +72,6 @@ static u32 psci_power_state_pack(struct psci_power_state state) & PSCI_0_2_POWER_STATE_AFFL_MASK); } -/* - * The following two functions are invoked via the invoke_psci_fn pointer - * and will not be inlined, allowing us to piggyback on the AAPCS. - */ -static noinline int __invoke_psci_fn_hvc(u32 function_id, u32 arg0, u32 arg1, - u32 arg2) -{ - asm volatile( - __asmeq("%0", "r0") - __asmeq("%1", "r1") - __asmeq("%2", "r2") - __asmeq("%3", "r3") - __HVC(0) - : "+r" (function_id) - : "r" (arg0), "r" (arg1), "r" (arg2)); - - return function_id; -} - -static noinline int __invoke_psci_fn_smc(u32 function_id, u32 arg0, u32 arg1, - u32 arg2) -{ - asm volatile( - __asmeq("%0", "r0") - __asmeq("%1", "r1") - __asmeq("%2", "r2") - __asmeq("%3", "r3") - __SMC(0) - : "+r" (function_id) - : "r" (arg0), "r" (arg1), "r" (arg2)); - - return function_id; -} - static int psci_get_version(void) { int err; -- cgit v0.10.2 From 779c88c94c34bd3b521da97b456a1aa51d870dec Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 24 Mar 2015 10:39:10 +0100 Subject: ARM: 8321/1: asm-generic: introduce .text.fixup input section This introduces a new .text.fixup input section that gets emitted together with the .text section for each input object file. Note that *(.text) *(.text.fixup) is not the same as *(.text .text.fixup) and we are looking for the latter, to ensure that fixup snippets that are assembled into a separate section in the object file do not end up out of range for the relative branch instructions it contains if the .text section itself grows very large. This helps prevent linker failures on large ARM kernels. Acked-by: Arnd Bergmann Signed-off-by: Ard Biesheuvel Signed-off-by: Russell King diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index ac78910..463231d 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -401,7 +401,7 @@ #define TEXT_TEXT \ ALIGN_FUNCTION(); \ *(.text.hot) \ - *(.text) \ + *(.text .text.fixup) \ *(.ref.text) \ MEM_KEEP(init.text) \ MEM_KEEP(exit.text) \ -- cgit v0.10.2 From c4a84ae39b4a5bdf609c0001e14207aa731aab30 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 24 Mar 2015 10:41:09 +0100 Subject: ARM: 8322/1: keep .text and .fixup regions closer together This moves all fixup snippets to the .text.fixup section, which is a special section that gets emitted along with the .text section for each input object file, i.e., the snippets are kept much closer to the code they refer to, which helps prevent linker failure on large kernels. Acked-by: Nicolas Pitre Signed-off-by: Ard Biesheuvel Signed-off-by: Russell King diff --git a/arch/arm/include/asm/futex.h b/arch/arm/include/asm/futex.h index 53e69da..4e78065 100644 --- a/arch/arm/include/asm/futex.h +++ b/arch/arm/include/asm/futex.h @@ -13,7 +13,7 @@ " .align 3\n" \ " .long 1b, 4f, 2b, 4f\n" \ " .popsection\n" \ - " .pushsection .fixup,\"ax\"\n" \ + " .pushsection .text.fixup,\"ax\"\n" \ " .align 2\n" \ "4: mov %0, " err_reg "\n" \ " b 3b\n" \ diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index ce0786e..74b17d0 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -315,7 +315,7 @@ do { \ __asm__ __volatile__( \ "1: " TUSER(ldrb) " %1,[%2],#0\n" \ "2:\n" \ - " .pushsection .fixup,\"ax\"\n" \ + " .pushsection .text.fixup,\"ax\"\n" \ " .align 2\n" \ "3: mov %0, %3\n" \ " mov %1, #0\n" \ @@ -351,7 +351,7 @@ do { \ __asm__ __volatile__( \ "1: " TUSER(ldr) " %1,[%2],#0\n" \ "2:\n" \ - " .pushsection .fixup,\"ax\"\n" \ + " .pushsection .text.fixup,\"ax\"\n" \ " .align 2\n" \ "3: mov %0, %3\n" \ " mov %1, #0\n" \ @@ -397,7 +397,7 @@ do { \ __asm__ __volatile__( \ "1: " TUSER(strb) " %1,[%2],#0\n" \ "2:\n" \ - " .pushsection .fixup,\"ax\"\n" \ + " .pushsection .text.fixup,\"ax\"\n" \ " .align 2\n" \ "3: mov %0, %3\n" \ " b 2b\n" \ @@ -430,7 +430,7 @@ do { \ __asm__ __volatile__( \ "1: " TUSER(str) " %1,[%2],#0\n" \ "2:\n" \ - " .pushsection .fixup,\"ax\"\n" \ + " .pushsection .text.fixup,\"ax\"\n" \ " .align 2\n" \ "3: mov %0, %3\n" \ " b 2b\n" \ @@ -458,7 +458,7 @@ do { \ THUMB( "1: " TUSER(str) " " __reg_oper1 ", [%1]\n" ) \ THUMB( "2: " TUSER(str) " " __reg_oper0 ", [%1, #4]\n" ) \ "3:\n" \ - " .pushsection .fixup,\"ax\"\n" \ + " .pushsection .text.fixup,\"ax\"\n" \ " .align 2\n" \ "4: mov %0, %3\n" \ " b 3b\n" \ diff --git a/arch/arm/include/asm/word-at-a-time.h b/arch/arm/include/asm/word-at-a-time.h index a6d0a29..5831dce 100644 --- a/arch/arm/include/asm/word-at-a-time.h +++ b/arch/arm/include/asm/word-at-a-time.h @@ -71,7 +71,7 @@ static inline unsigned long load_unaligned_zeropad(const void *addr) asm( "1: ldr %0, [%2]\n" "2:\n" - " .pushsection .fixup,\"ax\"\n" + " .pushsection .text.fixup,\"ax\"\n" " .align 2\n" "3: and %1, %2, #0x3\n" " bic %2, %2, #0x3\n" diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 672b219..570306c 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -545,7 +545,7 @@ ENDPROC(__und_usr) /* * The out of line fixup for the ldrt instructions above. */ - .pushsection .fixup, "ax" + .pushsection .text.fixup, "ax" .align 2 4: str r4, [sp, #S_PC] @ retry current instruction ret r9 diff --git a/arch/arm/kernel/swp_emulate.c b/arch/arm/kernel/swp_emulate.c index afdd51e..1361756 100644 --- a/arch/arm/kernel/swp_emulate.c +++ b/arch/arm/kernel/swp_emulate.c @@ -42,7 +42,7 @@ " cmp %0, #0\n" \ " movne %0, %4\n" \ "2:\n" \ - " .section .fixup,\"ax\"\n" \ + " .section .text.fixup,\"ax\"\n" \ " .align 2\n" \ "3: mov %0, %5\n" \ " b 2b\n" \ diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index e8d5fba..7a301be 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -74,7 +74,7 @@ SECTIONS ARM_EXIT_DISCARD(EXIT_DATA) EXIT_CALL #ifndef CONFIG_MMU - *(.fixup) + *(.text.fixup) *(__ex_table) #endif #ifndef CONFIG_SMP_ON_UP @@ -109,9 +109,6 @@ SECTIONS SCHED_TEXT LOCK_TEXT KPROBES_TEXT -#ifdef CONFIG_MMU - *(.fixup) -#endif *(.gnu.warning) *(.glue_7) *(.glue_7t) diff --git a/arch/arm/lib/clear_user.S b/arch/arm/lib/clear_user.S index 14a0d98..1710fd7 100644 --- a/arch/arm/lib/clear_user.S +++ b/arch/arm/lib/clear_user.S @@ -47,7 +47,7 @@ USER( strnebt r2, [r0]) ENDPROC(__clear_user) ENDPROC(__clear_user_std) - .pushsection .fixup,"ax" + .pushsection .text.fixup,"ax" .align 0 9001: ldmfd sp!, {r0, pc} .popsection diff --git a/arch/arm/lib/copy_to_user.S b/arch/arm/lib/copy_to_user.S index a9d3db1..9648b06 100644 --- a/arch/arm/lib/copy_to_user.S +++ b/arch/arm/lib/copy_to_user.S @@ -100,7 +100,7 @@ WEAK(__copy_to_user) ENDPROC(__copy_to_user) ENDPROC(__copy_to_user_std) - .pushsection .fixup,"ax" + .pushsection .text.fixup,"ax" .align 0 copy_abort_preamble ldmfd sp!, {r1, r2, r3} diff --git a/arch/arm/lib/csumpartialcopyuser.S b/arch/arm/lib/csumpartialcopyuser.S index 7d08b43..1d0957e 100644 --- a/arch/arm/lib/csumpartialcopyuser.S +++ b/arch/arm/lib/csumpartialcopyuser.S @@ -68,7 +68,7 @@ * so properly, we would have to add in whatever registers were loaded before * the fault, which, with the current asm above is not predictable. */ - .pushsection .fixup,"ax" + .pushsection .text.fixup,"ax" .align 4 9001: mov r4, #-EFAULT ldr r5, [sp, #8*4] @ *err_ptr diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c index 2c0c541..9769f1e 100644 --- a/arch/arm/mm/alignment.c +++ b/arch/arm/mm/alignment.c @@ -201,7 +201,7 @@ union offset_union { THUMB( "1: "ins" %1, [%2]\n" ) \ THUMB( " add %2, %2, #1\n" ) \ "2:\n" \ - " .pushsection .fixup,\"ax\"\n" \ + " .pushsection .text.fixup,\"ax\"\n" \ " .align 2\n" \ "3: mov %0, #1\n" \ " b 2b\n" \ @@ -261,7 +261,7 @@ union offset_union { " mov %1, %1, "NEXT_BYTE"\n" \ "2: "ins" %1, [%2]\n" \ "3:\n" \ - " .pushsection .fixup,\"ax\"\n" \ + " .pushsection .text.fixup,\"ax\"\n" \ " .align 2\n" \ "4: mov %0, #1\n" \ " b 3b\n" \ @@ -301,7 +301,7 @@ union offset_union { " mov %1, %1, "NEXT_BYTE"\n" \ "4: "ins" %1, [%2]\n" \ "5:\n" \ - " .pushsection .fixup,\"ax\"\n" \ + " .pushsection .text.fixup,\"ax\"\n" \ " .align 2\n" \ "6: mov %0, #1\n" \ " b 5b\n" \ diff --git a/arch/arm/nwfpe/entry.S b/arch/arm/nwfpe/entry.S index 5d65be1..71df435 100644 --- a/arch/arm/nwfpe/entry.S +++ b/arch/arm/nwfpe/entry.S @@ -113,7 +113,7 @@ next: @ to fault. Emit the appropriate exception gunk to fix things up. @ ??? For some reason, faults can happen at .Lx2 even with a @ plain LDR instruction. Weird, but it seems harmless. - .pushsection .fixup,"ax" + .pushsection .text.fixup,"ax" .align 2 .Lfix: ret r9 @ let the user eat segfaults .popsection -- cgit v0.10.2 From 02e541db0540a2830f4af749c6f2b650abbbb77c Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 25 Mar 2015 07:37:57 +0100 Subject: ARM: 8323/1: force linker to use PIC veneers When building a very large kernel, it is up to the linker to decide when and where to insert stubs to allow calls to functions that are out of range for the ordinary b/bl instructions. However, since the kernel is built as a position dependent binary, these stubs (aka veneers) may contain absolute addresses, which will break far calls performed with the MMU off. For instance, the call from __enable_mmu() in the .head.text section to __turn_mmu_on() in the .idmap.text section may be turned into something like this: c0008168 <__enable_mmu>: c0008168: f020 0002 bic.w r0, r0, #2 c000816c: f420 5080 bic.w r0, r0, #4096 c0008170: f000 b846 b.w c0008200 <____turn_mmu_on_veneer> [...] c0008200 <____turn_mmu_on_veneer>: c0008200: 4778 bx pc c0008202: 46c0 nop c0008204: e59fc000 ldr ip, [pc] c0008208: e12fff1c bx ip c000820c: c13dfae1 teqgt sp, r1, ror #21 [...] c13dfae0 <__turn_mmu_on>: c13dfae0: 4600 mov r0, r0 [...] After adding --pic-veneer to the LDFLAGS, the veneer is emitted like this instead: c0008200 <____turn_mmu_on_veneer>: c0008200: 4778 bx pc c0008202: 46c0 nop c0008204: e59fc004 ldr ip, [pc, #4] c0008208: e08fc00c add ip, pc, ip c000820c: e12fff1c bx ip c0008210: 013d7d31 teqeq sp, r1, lsr sp c0008214: 00000000 andeq r0, r0, r0 Note that this particular example is best addressed by moving .head.text and .idmap.text closer together, but this issue could potentially affect any code that needs to execute with the MMU off. Acked-by: Nicolas Pitre Signed-off-by: Ard Biesheuvel Signed-off-by: Russell King diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 7f99cd6..7d98070 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -13,7 +13,7 @@ # Ensure linker flags are correct LDFLAGS := -LDFLAGS_vmlinux :=-p --no-undefined -X +LDFLAGS_vmlinux :=-p --no-undefined -X --pic-veneer ifeq ($(CONFIG_CPU_ENDIAN_BE8),y) LDFLAGS_vmlinux += --be8 LDFLAGS_MODULE += --be8 -- cgit v0.10.2 From d0776aff9a38b1390cc06ffc2c4dcf6ece7c05b9 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 25 Mar 2015 07:39:21 +0100 Subject: ARM: 8324/1: move cpu_resume() to .text section Move cpu_resume() to the .text section where it belongs. Change the adr reference to sleep_save_sp to an explicit PC relative reference so sleep_save_sp itself can remain in .data. This helps prevent linker failure on large kernels, as the code in the .data section may be too far away to be in range for normal b/bl instructions. Reviewed-by: Nicolas Pitre Tested-by: Sudeep Holla Signed-off-by: Ard Biesheuvel Signed-off-by: Russell King diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S index e1e60e5..7d37bfc 100644 --- a/arch/arm/kernel/sleep.S +++ b/arch/arm/kernel/sleep.S @@ -116,14 +116,7 @@ cpu_resume_after_mmu: ldmfd sp!, {r4 - r11, pc} ENDPROC(cpu_resume_after_mmu) -/* - * Note: Yes, part of the following code is located into the .data section. - * This is to allow sleep_save_sp to be accessed with a relative load - * while we can't rely on any MMU translation. We could have put - * sleep_save_sp in the .text section as well, but some setups might - * insist on it to be truly read-only. - */ - .data + .text .align ENTRY(cpu_resume) ARM_BE8(setend be) @ ensure we are in BE mode @@ -145,6 +138,8 @@ ARM_BE8(setend be) @ ensure we are in BE mode compute_mpidr_hash r1, r4, r5, r6, r0, r3 1: adr r0, _sleep_save_sp + ldr r2, [r0] + add r0, r0, r2 ldr r0, [r0, #SLEEP_SAVE_SP_PHYS] ldr r0, [r0, r1, lsl #2] @@ -156,10 +151,12 @@ THUMB( bx r3 ) ENDPROC(cpu_resume) .align 2 +_sleep_save_sp: + .long sleep_save_sp - . mpidr_hash_ptr: .long mpidr_hash - . @ mpidr_hash struct offset + .data .type sleep_save_sp, #object ENTRY(sleep_save_sp) -_sleep_save_sp: .space SLEEP_SAVE_SP_SZ @ struct sleep_save_sp -- cgit v0.10.2 From 12833bacf5d904c2dac0c3f52b2ebde5f2c5a2bc Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 25 Mar 2015 07:41:43 +0100 Subject: ARM: 8325/1: exynos: move resume code to .text section This code calls cpu_resume() using a straight branch (b), so now that we have moved cpu_resume() back to .text, this should be moved there as well. Any direct references to symbols that will remain in the .data section are replaced with explicit PC-relative references. Acked-by: Nicolas Pitre Signed-off-by: Ard Biesheuvel Signed-off-by: Russell King diff --git a/arch/arm/mach-exynos/sleep.S b/arch/arm/mach-exynos/sleep.S index 31d2583..cf95079 100644 --- a/arch/arm/mach-exynos/sleep.S +++ b/arch/arm/mach-exynos/sleep.S @@ -23,14 +23,7 @@ #define CPU_MASK 0xff0ffff0 #define CPU_CORTEX_A9 0x410fc090 - /* - * The following code is located into the .data section. This is to - * allow l2x0_regs_phys to be accessed with a relative load while we - * can't rely on any MMU translation. We could have put l2x0_regs_phys - * in the .text section as well, but some setups might insist on it to - * be truly read-only. (Reference from: arch/arm/kernel/sleep.S) - */ - .data + .text .align /* @@ -69,10 +62,12 @@ ENTRY(exynos_cpu_resume_ns) cmp r0, r1 bne skip_cp15 - adr r0, cp15_save_power + adr r0, _cp15_save_power ldr r1, [r0] - adr r0, cp15_save_diag + ldr r1, [r0, r1] + adr r0, _cp15_save_diag ldr r2, [r0] + ldr r2, [r0, r2] mov r0, #SMC_CMD_C15RESUME dsb smc #0 @@ -118,14 +113,20 @@ skip_l2x0: skip_cp15: b cpu_resume ENDPROC(exynos_cpu_resume_ns) + + .align +_cp15_save_power: + .long cp15_save_power - . +_cp15_save_diag: + .long cp15_save_diag - . +#ifdef CONFIG_CACHE_L2X0 +1: .long l2x0_saved_regs - . +#endif /* CONFIG_CACHE_L2X0 */ + + .data .globl cp15_save_diag cp15_save_diag: .long 0 @ cp15 diagnostic .globl cp15_save_power cp15_save_power: .long 0 @ cp15 power control - -#ifdef CONFIG_CACHE_L2X0 - .align -1: .long l2x0_saved_regs - . -#endif /* CONFIG_CACHE_L2X0 */ -- cgit v0.10.2 From 00ee68ecc2bf714ee97bc3629c29eef502e69c4b Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 25 Mar 2015 07:42:49 +0100 Subject: ARM: 8326/1: s5pv210: move resume code to .text section This code calls cpu_resume() using a straight branch (b), so now that we have moved cpu_resume() back to .text, this should be moved there as well. Acked-by: Nicolas Pitre Signed-off-by: Ard Biesheuvel Signed-off-by: Russell King diff --git a/arch/arm/mach-s5pv210/sleep.S b/arch/arm/mach-s5pv210/sleep.S index 7c43ddd..dfbfc0f 100644 --- a/arch/arm/mach-s5pv210/sleep.S +++ b/arch/arm/mach-s5pv210/sleep.S @@ -14,7 +14,7 @@ #include - .data + .text .align /* -- cgit v0.10.2 From 767bf7e7a1e82a81c59778348d156993d0a6175d Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 1 Apr 2015 16:20:39 +0100 Subject: ARM: fix broken hibernation Normally, when a CPU wants to clear a cache line to zero in the external L2 cache, it would generate bus cycles to write each word as it would do with any other data access. However, a Cortex A9 connected to a L2C-310 has a specific feature where the CPU can detect this operation, and signal that it wants to zero an entire cache line. This feature, known as Full Line of Zeros (FLZ), involves a non-standard AXI signalling mechanism which only the L2C-310 can properly interpret. There are separate enable bits in both the L2C-310 and the Cortex A9 - the L2C-310 needs to be enabled and have the FLZ enable bit set in the auxiliary control register before the Cortex A9 has this feature enabled. Unfortunately, the suspend code was not respecting this - it's not obvious from the code: swsusp_arch_suspend() cpu_suspend() /* saves the Cortex A9 auxiliary control register */ arch_save_image() soft_restart() /* turns off FLZ in Cortex A9, and disables L2C */ cpu_resume() /* restores the Cortex A9 registers, inc auxcr */ At this point, we end up with the L2C disabled, but the Cortex A9 with FLZ enabled - which means any memset() or zeroing of a full cache line will fail to take effect. A similar issue exists in the resume path, but it's slightly more complex: swsusp_arch_suspend() cpu_suspend() /* saves the Cortex A9 auxiliary control register */ arch_save_image() /* image with A9 auxcr saved */ ... swsusp_arch_resume() call_with_stack() arch_restore_image() /* restores image with A9 auxcr saved above */ soft_restart() /* turns off FLZ in Cortex A9, and disables L2C */ cpu_resume() /* restores the Cortex A9 registers, inc auxcr */ Again, here we end up with the L2C disabled, but Cortex A9 FLZ enabled. There's no need to turn off the L2C in either of these two paths; there are benefits from not doing so - for example, the page copies will be faster with the L2C enabled. Hence, fix this by providing a variant of soft_restart() which can be used without turning the L2 cache controller off, and use it in both of these paths to keep the L2C enabled across the respective resume transitions. Fixes: 8ef418c7178f ("ARM: l2c: trial at enabling some Cortex-A9 optimisations") Reported-by: Sean Cross Tested-by: Sean Cross Signed-off-by: Russell King diff --git a/arch/arm/kernel/hibernate.c b/arch/arm/kernel/hibernate.c index c4cc50e..cfb354f 100644 --- a/arch/arm/kernel/hibernate.c +++ b/arch/arm/kernel/hibernate.c @@ -22,6 +22,7 @@ #include #include #include +#include "reboot.h" int pfn_is_nosave(unsigned long pfn) { @@ -61,7 +62,7 @@ static int notrace arch_save_image(unsigned long unused) ret = swsusp_save(); if (ret == 0) - soft_restart(virt_to_phys(cpu_resume)); + _soft_restart(virt_to_phys(cpu_resume), false); return ret; } @@ -86,7 +87,7 @@ static void notrace arch_restore_image(void *unused) for (pbe = restore_pblist; pbe; pbe = pbe->next) copy_page(pbe->orig_address, pbe->address); - soft_restart(virt_to_phys(cpu_resume)); + _soft_restart(virt_to_phys(cpu_resume), false); } static u64 resume_stack[PAGE_SIZE/2/sizeof(u64)] __nosavedata; diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index fdfa3a7..2bf1a16 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -41,6 +41,7 @@ #include #include #include +#include "reboot.h" #ifdef CONFIG_CC_STACKPROTECTOR #include @@ -95,7 +96,7 @@ static void __soft_restart(void *addr) BUG(); } -void soft_restart(unsigned long addr) +void _soft_restart(unsigned long addr, bool disable_l2) { u64 *stack = soft_restart_stack + ARRAY_SIZE(soft_restart_stack); @@ -104,7 +105,7 @@ void soft_restart(unsigned long addr) local_fiq_disable(); /* Disable the L2 if we're the last man standing. */ - if (num_online_cpus() == 1) + if (disable_l2) outer_disable(); /* Change to the new stack and continue with the reset. */ @@ -114,6 +115,11 @@ void soft_restart(unsigned long addr) BUG(); } +void soft_restart(unsigned long addr) +{ + _soft_restart(addr, num_online_cpus() == 1); +} + /* * Function pointers to optional machine specific functions */ diff --git a/arch/arm/kernel/reboot.h b/arch/arm/kernel/reboot.h new file mode 100644 index 0000000..c87f058 --- /dev/null +++ b/arch/arm/kernel/reboot.h @@ -0,0 +1,6 @@ +#ifndef REBOOT_H +#define REBOOT_H + +extern void _soft_restart(unsigned long addr, bool disable_l2); + +#endif -- cgit v0.10.2 From 045ab94e10ee17038066d71abc8fdce719ab56f9 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 1 Apr 2015 17:02:45 +0100 Subject: ARM: move reboot code to arch/arm/kernel/reboot.c Move shutdown and reboot related code to a separate file, out of process.c. This helps to avoid polluting process.c with non-process related code. Signed-off-by: Russell King diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 1c1cdfa..b6544ab 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -16,7 +16,7 @@ CFLAGS_REMOVE_return_address.o = -pg # Object file lists. obj-y := elf.o entry-common.o irq.o opcodes.o \ - process.o ptrace.o return_address.o \ + process.o ptrace.o reboot.o return_address.o \ setup.o signal.o sigreturn_codes.o \ stacktrace.o sys_arm.o time.o traps.o diff --git a/arch/arm/kernel/hibernate.c b/arch/arm/kernel/hibernate.c index cfb354f..a71501f 100644 --- a/arch/arm/kernel/hibernate.c +++ b/arch/arm/kernel/hibernate.c @@ -100,7 +100,6 @@ static u64 resume_stack[PAGE_SIZE/2/sizeof(u64)] __nosavedata; */ int swsusp_arch_resume(void) { - extern void call_with_stack(void (*fn)(void *), void *arg, void *sp); call_with_stack(arch_restore_image, 0, resume_stack + ARRAY_SIZE(resume_stack)); return 0; diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index 2bf1a16..f810a6c 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -17,12 +17,9 @@ #include #include #include -#include -#include #include #include #include -#include #include #include #include @@ -31,17 +28,14 @@ #include #include #include -#include -#include -#include #include #include #include #include #include #include -#include "reboot.h" +#include #ifdef CONFIG_CC_STACKPROTECTOR #include @@ -60,74 +54,6 @@ static const char *isa_modes[] __maybe_unused = { "ARM" , "Thumb" , "Jazelle", "ThumbEE" }; -extern void call_with_stack(void (*fn)(void *), void *arg, void *sp); -typedef void (*phys_reset_t)(unsigned long); - -/* - * A temporary stack to use for CPU reset. This is static so that we - * don't clobber it with the identity mapping. When running with this - * stack, any references to the current task *will not work* so you - * should really do as little as possible before jumping to your reset - * code. - */ -static u64 soft_restart_stack[16]; - -static void __soft_restart(void *addr) -{ - phys_reset_t phys_reset; - - /* Take out a flat memory mapping. */ - setup_mm_for_reboot(); - - /* Clean and invalidate caches */ - flush_cache_all(); - - /* Turn off caching */ - cpu_proc_fin(); - - /* Push out any further dirty data, and ensure cache is empty */ - flush_cache_all(); - - /* Switch to the identity mapping. */ - phys_reset = (phys_reset_t)(unsigned long)virt_to_phys(cpu_reset); - phys_reset((unsigned long)addr); - - /* Should never get here. */ - BUG(); -} - -void _soft_restart(unsigned long addr, bool disable_l2) -{ - u64 *stack = soft_restart_stack + ARRAY_SIZE(soft_restart_stack); - - /* Disable interrupts first */ - raw_local_irq_disable(); - local_fiq_disable(); - - /* Disable the L2 if we're the last man standing. */ - if (disable_l2) - outer_disable(); - - /* Change to the new stack and continue with the reset. */ - call_with_stack(__soft_restart, (void *)addr, (void *)stack); - - /* Should never get here. */ - BUG(); -} - -void soft_restart(unsigned long addr) -{ - _soft_restart(addr, num_online_cpus() == 1); -} - -/* - * Function pointers to optional machine specific functions - */ -void (*pm_power_off)(void); -EXPORT_SYMBOL(pm_power_off); - -void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd); - /* * This is our default idle handler. */ @@ -172,79 +98,6 @@ void arch_cpu_idle_dead(void) } #endif -/* - * Called by kexec, immediately prior to machine_kexec(). - * - * This must completely disable all secondary CPUs; simply causing those CPUs - * to execute e.g. a RAM-based pin loop is not sufficient. This allows the - * kexec'd kernel to use any and all RAM as it sees fit, without having to - * avoid any code or data used by any SW CPU pin loop. The CPU hotplug - * functionality embodied in disable_nonboot_cpus() to achieve this. - */ -void machine_shutdown(void) -{ - disable_nonboot_cpus(); -} - -/* - * Halting simply requires that the secondary CPUs stop performing any - * activity (executing tasks, handling interrupts). smp_send_stop() - * achieves this. - */ -void machine_halt(void) -{ - local_irq_disable(); - smp_send_stop(); - - local_irq_disable(); - while (1); -} - -/* - * Power-off simply requires that the secondary CPUs stop performing any - * activity (executing tasks, handling interrupts). smp_send_stop() - * achieves this. When the system power is turned off, it will take all CPUs - * with it. - */ -void machine_power_off(void) -{ - local_irq_disable(); - smp_send_stop(); - - if (pm_power_off) - pm_power_off(); -} - -/* - * Restart requires that the secondary CPUs stop performing any activity - * while the primary CPU resets the system. Systems with a single CPU can - * use soft_restart() as their machine descriptor's .restart hook, since that - * will cause the only available CPU to reset. Systems with multiple CPUs must - * provide a HW restart implementation, to ensure that all CPUs reset at once. - * This is required so that any code running after reset on the primary CPU - * doesn't have to co-ordinate with other CPUs to ensure they aren't still - * executing pre-reset code, and using RAM that the primary CPU's code wishes - * to use. Implementing such co-ordination would be essentially impossible. - */ -void machine_restart(char *cmd) -{ - local_irq_disable(); - smp_send_stop(); - - if (arm_pm_restart) - arm_pm_restart(reboot_mode, cmd); - else - do_kernel_restart(cmd); - - /* Give a grace period for failure to restart of 1s */ - mdelay(1000); - - /* Whoops - the platform was unable to reboot. Tell the user! */ - printk("Reboot failed -- System halted\n"); - local_irq_disable(); - while (1); -} - void __show_regs(struct pt_regs *regs) { unsigned long flags; diff --git a/arch/arm/kernel/reboot.c b/arch/arm/kernel/reboot.c new file mode 100644 index 0000000..1a4d232 --- /dev/null +++ b/arch/arm/kernel/reboot.c @@ -0,0 +1,155 @@ +/* + * Copyright (C) 1996-2000 Russell King - Converted to ARM. + * Original Copyright (C) 1995 Linus Torvalds + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include + +#include +#include + +#include "reboot.h" + +typedef void (*phys_reset_t)(unsigned long); + +/* + * Function pointers to optional machine specific functions + */ +void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd); +void (*pm_power_off)(void); +EXPORT_SYMBOL(pm_power_off); + +/* + * A temporary stack to use for CPU reset. This is static so that we + * don't clobber it with the identity mapping. When running with this + * stack, any references to the current task *will not work* so you + * should really do as little as possible before jumping to your reset + * code. + */ +static u64 soft_restart_stack[16]; + +static void __soft_restart(void *addr) +{ + phys_reset_t phys_reset; + + /* Take out a flat memory mapping. */ + setup_mm_for_reboot(); + + /* Clean and invalidate caches */ + flush_cache_all(); + + /* Turn off caching */ + cpu_proc_fin(); + + /* Push out any further dirty data, and ensure cache is empty */ + flush_cache_all(); + + /* Switch to the identity mapping. */ + phys_reset = (phys_reset_t)(unsigned long)virt_to_phys(cpu_reset); + phys_reset((unsigned long)addr); + + /* Should never get here. */ + BUG(); +} + +void _soft_restart(unsigned long addr, bool disable_l2) +{ + u64 *stack = soft_restart_stack + ARRAY_SIZE(soft_restart_stack); + + /* Disable interrupts first */ + raw_local_irq_disable(); + local_fiq_disable(); + + /* Disable the L2 if we're the last man standing. */ + if (disable_l2) + outer_disable(); + + /* Change to the new stack and continue with the reset. */ + call_with_stack(__soft_restart, (void *)addr, (void *)stack); + + /* Should never get here. */ + BUG(); +} + +void soft_restart(unsigned long addr) +{ + _soft_restart(addr, num_online_cpus() == 1); +} + +/* + * Called by kexec, immediately prior to machine_kexec(). + * + * This must completely disable all secondary CPUs; simply causing those CPUs + * to execute e.g. a RAM-based pin loop is not sufficient. This allows the + * kexec'd kernel to use any and all RAM as it sees fit, without having to + * avoid any code or data used by any SW CPU pin loop. The CPU hotplug + * functionality embodied in disable_nonboot_cpus() to achieve this. + */ +void machine_shutdown(void) +{ + disable_nonboot_cpus(); +} + +/* + * Halting simply requires that the secondary CPUs stop performing any + * activity (executing tasks, handling interrupts). smp_send_stop() + * achieves this. + */ +void machine_halt(void) +{ + local_irq_disable(); + smp_send_stop(); + + local_irq_disable(); + while (1); +} + +/* + * Power-off simply requires that the secondary CPUs stop performing any + * activity (executing tasks, handling interrupts). smp_send_stop() + * achieves this. When the system power is turned off, it will take all CPUs + * with it. + */ +void machine_power_off(void) +{ + local_irq_disable(); + smp_send_stop(); + + if (pm_power_off) + pm_power_off(); +} + +/* + * Restart requires that the secondary CPUs stop performing any activity + * while the primary CPU resets the system. Systems with a single CPU can + * use soft_restart() as their machine descriptor's .restart hook, since that + * will cause the only available CPU to reset. Systems with multiple CPUs must + * provide a HW restart implementation, to ensure that all CPUs reset at once. + * This is required so that any code running after reset on the primary CPU + * doesn't have to co-ordinate with other CPUs to ensure they aren't still + * executing pre-reset code, and using RAM that the primary CPU's code wishes + * to use. Implementing such co-ordination would be essentially impossible. + */ +void machine_restart(char *cmd) +{ + local_irq_disable(); + smp_send_stop(); + + if (arm_pm_restart) + arm_pm_restart(reboot_mode, cmd); + else + do_kernel_restart(cmd); + + /* Give a grace period for failure to restart of 1s */ + mdelay(1000); + + /* Whoops - the platform was unable to reboot. Tell the user! */ + printk("Reboot failed -- System halted\n"); + local_irq_disable(); + while (1); +} diff --git a/arch/arm/kernel/reboot.h b/arch/arm/kernel/reboot.h index c87f058..bf7a0b1 100644 --- a/arch/arm/kernel/reboot.h +++ b/arch/arm/kernel/reboot.h @@ -1,6 +1,7 @@ #ifndef REBOOT_H #define REBOOT_H +extern void call_with_stack(void (*fn)(void *), void *arg, void *sp); extern void _soft_restart(unsigned long addr, bool disable_l2); #endif -- cgit v0.10.2 From 49f28aa6b0d0735dbe5f04263c49a199ed0c5bb7 Mon Sep 17 00:00:00 2001 From: Tomasz Figa Date: Wed, 1 Apr 2015 07:26:33 +0100 Subject: ARM: 8337/1: mm: Do not invoke OOM for higher order IOMMU DMA allocations IOMMU should be able to use single pages as well as bigger blocks, so if higher order allocations fail, we should not affect state of the system, with events such as OOM killer, but rather fall back to order 0 allocations. This patch changes the behavior of ARM IOMMU DMA allocator to use __GFP_NORETRY, which bypasses OOM invocation, for orders higher than zero and, only if that fails, fall back to normal order 0 allocation which might invoke OOM killer. Signed-off-by: Tomasz Figa Reviewed-by: Doug Anderson Acked-by: David Rientjes Acked-by: Marek Szyprowski Signed-off-by: Russell King diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index c274476..f9941cd 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -1135,13 +1135,28 @@ static struct page **__iommu_alloc_buffer(struct device *dev, size_t size, gfp |= __GFP_NOWARN | __GFP_HIGHMEM; while (count) { - int j, order = __fls(count); + int j, order; + + for (order = __fls(count); order > 0; --order) { + /* + * We do not want OOM killer to be invoked as long + * as we can fall back to single pages, so we force + * __GFP_NORETRY for orders higher than zero. + */ + pages[i] = alloc_pages(gfp | __GFP_NORETRY, order); + if (pages[i]) + break; + } - pages[i] = alloc_pages(gfp, order); - while (!pages[i] && order) - pages[i] = alloc_pages(gfp, --order); - if (!pages[i]) - goto error; + if (!pages[i]) { + /* + * Fall back to single page allocation. + * Might invoke OOM killer as last resort. + */ + pages[i] = alloc_pages(gfp, 0); + if (!pages[i]) + goto error; + } if (order) { split_page(pages[i], order); -- cgit v0.10.2 From fee3fd4fd2ad136b26226346c3f8b446cc120bf5 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 1 Apr 2015 13:36:57 +0100 Subject: ARM: 8338/1: kexec: Relax SMP validation to improve DT compatibility When trying to kexec into a new kernel on a platform where multiple CPU cores are present, but no SMP bringup code is available yet, the kexec_load system call fails with: kexec_load failed: Invalid argument The SMP test added to machine_kexec_prepare() in commit 2103f6cba61a8b8b ("ARM: 7807/1: kexec: validate CPU hotplug support") wants to prohibit kexec on SMP platforms where it cannot disable secondary CPUs. However, this test is too strict: if the secondary CPUs couldn't be enabled in the first place, there's no need to disable them later at kexec time. Hence skip the test in the absence of SMP bringup code. This allows to add all CPU cores to the DTS from the beginning, without having to implement SMP bringup first, improving DT compatibility. Signed-off-by: Geert Uytterhoeven Acked-by: Stephen Warren Signed-off-by: Russell King diff --git a/arch/arm/include/asm/smp_plat.h b/arch/arm/include/asm/smp_plat.h index 0ad7d49..993e522 100644 --- a/arch/arm/include/asm/smp_plat.h +++ b/arch/arm/include/asm/smp_plat.h @@ -104,6 +104,7 @@ static inline u32 mpidr_hash_size(void) return 1 << mpidr_hash.bits; } +extern int platform_can_secondary_boot(void); extern int platform_can_cpu_hotplug(void); #endif diff --git a/arch/arm/kernel/machine_kexec.c b/arch/arm/kernel/machine_kexec.c index de2b085..8bf3b7c 100644 --- a/arch/arm/kernel/machine_kexec.c +++ b/arch/arm/kernel/machine_kexec.c @@ -46,7 +46,8 @@ int machine_kexec_prepare(struct kimage *image) * and implements CPU hotplug for the current HW. If not, we won't be * able to kexec reliably, so fail the prepare operation. */ - if (num_possible_cpus() > 1 && !platform_can_cpu_hotplug()) + if (num_possible_cpus() > 1 && platform_can_secondary_boot() && + !platform_can_cpu_hotplug()) return -EINVAL; /* diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 86ef244..cca5b87 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -145,6 +145,11 @@ void __init smp_init_cpus(void) smp_ops.smp_init_cpus(); } +int platform_can_secondary_boot(void) +{ + return !!smp_ops.smp_boot_secondary; +} + int platform_can_cpu_hotplug(void) { #ifdef CONFIG_HOTPLUG_CPU -- cgit v0.10.2 From 7c07005eea967db09163491d39bd0c1cda485c21 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 1 Apr 2015 13:37:11 +0100 Subject: ARM: 8339/1: Enable CONFIG_GENERIC_IRQ_SHOW_LEVEL Several interrupt controllers support both edge and level interrupts, so it's useful to provide that information in /proc/interrupts. Signed-off-by: Geert Uytterhoeven Signed-off-by: Russell King diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index d7d7b27..35fed4b 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -21,6 +21,7 @@ config ARM select GENERIC_IDLE_POLL_SETUP select GENERIC_IRQ_PROBE select GENERIC_IRQ_SHOW + select GENERIC_IRQ_SHOW_LEVEL select GENERIC_PCI_IOMAP select GENERIC_SCHED_CLOCK select GENERIC_SMP_IDLE_THREAD -- cgit v0.10.2 From 049e4b3f801778569c619324a36b4518d955dba4 Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Thu, 26 Mar 2015 08:53:57 +0100 Subject: ARM: 8333/1: amba: tegra-ahb: fix register offsets in the macros amba: tegra-ahb: fix register offsets in the macros From a hardware SoC integration point of view, the offsets of the Tegra AHB registers that are currently defined in tegra-ahb.c macros are all off by four bytes. Similarly, the starting address of this IP block in our existing DT files is also off by four bytes. Since we attempt to make old DT files forward-compatible with newer kernels, we cannot fix the IP block base address in old DT data. However, we can fix the offsets in the driver so that they are correct with respect to the hardware, which is what this patch does. And a subsequent patch will allow the offset to be removed for DT 'compatible' strings used in future DT files for newer Tegra chips that the kernel does not yet support. Signed-off-by: Paul Walmsley Cc: Paul Walmsley Cc: Alexandre Courbot Cc: Hiroshi DOYU Cc: Stephen Warren Cc: Thierry Reding Cc: linux-kernel@vger.kernel.org Acked-by: Stephen Warren Signed-off-by: Russell King diff --git a/drivers/amba/tegra-ahb.c b/drivers/amba/tegra-ahb.c index c6dc354..30759a5 100644 --- a/drivers/amba/tegra-ahb.c +++ b/drivers/amba/tegra-ahb.c @@ -25,49 +25,50 @@ #include #include #include +#include #include #define DRV_NAME "tegra-ahb" -#define AHB_ARBITRATION_DISABLE 0x00 -#define AHB_ARBITRATION_PRIORITY_CTRL 0x04 +#define AHB_ARBITRATION_DISABLE 0x04 +#define AHB_ARBITRATION_PRIORITY_CTRL 0x08 #define AHB_PRIORITY_WEIGHT(x) (((x) & 0x7) << 29) #define PRIORITY_SELECT_USB BIT(6) #define PRIORITY_SELECT_USB2 BIT(18) #define PRIORITY_SELECT_USB3 BIT(17) -#define AHB_GIZMO_AHB_MEM 0x0c +#define AHB_GIZMO_AHB_MEM 0x10 #define ENB_FAST_REARBITRATE BIT(2) #define DONT_SPLIT_AHB_WR BIT(7) -#define AHB_GIZMO_APB_DMA 0x10 -#define AHB_GIZMO_IDE 0x18 -#define AHB_GIZMO_USB 0x1c -#define AHB_GIZMO_AHB_XBAR_BRIDGE 0x20 -#define AHB_GIZMO_CPU_AHB_BRIDGE 0x24 -#define AHB_GIZMO_COP_AHB_BRIDGE 0x28 -#define AHB_GIZMO_XBAR_APB_CTLR 0x2c -#define AHB_GIZMO_VCP_AHB_BRIDGE 0x30 -#define AHB_GIZMO_NAND 0x3c -#define AHB_GIZMO_SDMMC4 0x44 -#define AHB_GIZMO_XIO 0x48 -#define AHB_GIZMO_BSEV 0x60 -#define AHB_GIZMO_BSEA 0x70 -#define AHB_GIZMO_NOR 0x74 -#define AHB_GIZMO_USB2 0x78 -#define AHB_GIZMO_USB3 0x7c +#define AHB_GIZMO_APB_DMA 0x14 +#define AHB_GIZMO_IDE 0x1c +#define AHB_GIZMO_USB 0x20 +#define AHB_GIZMO_AHB_XBAR_BRIDGE 0x24 +#define AHB_GIZMO_CPU_AHB_BRIDGE 0x28 +#define AHB_GIZMO_COP_AHB_BRIDGE 0x2c +#define AHB_GIZMO_XBAR_APB_CTLR 0x30 +#define AHB_GIZMO_VCP_AHB_BRIDGE 0x34 +#define AHB_GIZMO_NAND 0x40 +#define AHB_GIZMO_SDMMC4 0x48 +#define AHB_GIZMO_XIO 0x4c +#define AHB_GIZMO_BSEV 0x64 +#define AHB_GIZMO_BSEA 0x74 +#define AHB_GIZMO_NOR 0x78 +#define AHB_GIZMO_USB2 0x7c +#define AHB_GIZMO_USB3 0x80 #define IMMEDIATE BIT(18) -#define AHB_GIZMO_SDMMC1 0x80 -#define AHB_GIZMO_SDMMC2 0x84 -#define AHB_GIZMO_SDMMC3 0x88 -#define AHB_MEM_PREFETCH_CFG_X 0xd8 -#define AHB_ARBITRATION_XBAR_CTRL 0xdc -#define AHB_MEM_PREFETCH_CFG3 0xe0 -#define AHB_MEM_PREFETCH_CFG4 0xe4 -#define AHB_MEM_PREFETCH_CFG1 0xec -#define AHB_MEM_PREFETCH_CFG2 0xf0 +#define AHB_GIZMO_SDMMC1 0x84 +#define AHB_GIZMO_SDMMC2 0x88 +#define AHB_GIZMO_SDMMC3 0x8c +#define AHB_MEM_PREFETCH_CFG_X 0xdc +#define AHB_ARBITRATION_XBAR_CTRL 0xe0 +#define AHB_MEM_PREFETCH_CFG3 0xe4 +#define AHB_MEM_PREFETCH_CFG4 0xe8 +#define AHB_MEM_PREFETCH_CFG1 0xf0 +#define AHB_MEM_PREFETCH_CFG2 0xf4 #define PREFETCH_ENB BIT(31) #define MST_ID(x) (((x) & 0x1f) << 26) #define AHBDMA_MST_ID MST_ID(5) @@ -77,7 +78,7 @@ #define ADDR_BNDRY(x) (((x) & 0xf) << 21) #define INACTIVITY_TIMEOUT(x) (((x) & 0xffff) << 0) -#define AHB_ARBITRATION_AHB_MEM_WRQUE_MST_ID 0xf8 +#define AHB_ARBITRATION_AHB_MEM_WRQUE_MST_ID 0xfc #define AHB_ARBITRATION_XBAR_CTRL_SMMU_INIT_DONE BIT(17) @@ -123,12 +124,12 @@ struct tegra_ahb { static inline u32 gizmo_readl(struct tegra_ahb *ahb, u32 offset) { - return readl(ahb->regs + offset); + return readl(ahb->regs - 4 + offset); } static inline void gizmo_writel(struct tegra_ahb *ahb, u32 value, u32 offset) { - writel(value, ahb->regs + offset); + writel(value, ahb->regs - 4 + offset); } #ifdef CONFIG_TEGRA_IOMMU_SMMU -- cgit v0.10.2 From ce7a10b0ff3db63a43d2d7885aa0f43dc8c96419 Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Thu, 26 Mar 2015 08:56:35 +0100 Subject: ARM: 8334/1: amba: tegra-ahb: detect and correct bogus base address amba: tegra-ahb: detect and correct bogus base address From a hardware SoC integration point of view, the starting address of this IP block in the existing Tegra SoC DT files is off by 4 bytes from the actual base address. Since we attempt to make old DT files forward-compatible with newer kernels, we cannot fix the IP block base address in old DT data. This patch works around the problem by detecting the four byte base address offset in the driver code, and correcting it if it's detected. (In general, IP block base addresses almost always have a null low byte.) Future SoC DT data for Tegra AHB should use the correct Tegra AHB base address, in cases where there is no DT data backward compatibility requirement. This patch is a revision of the patch originally titled "amba: tegra-ahb: use correct base address for future chip support". This revision implements changes requested by Russell King: http://marc.info/?l=linux-tegra&m=142658851825062&w=2 http://marc.info/?l=linux-tegra&m=142658873925178&w=2 Signed-off-by: Paul Walmsley Cc: Paul Walmsley Cc: Alexandre Courbot Cc: Hiroshi DOYU Cc: Stephen Warren Cc: Thierry Reding Cc: linux-kernel@vger.kernel.org Acked-by: Stephen Warren Signed-off-by: Russell King diff --git a/drivers/amba/tegra-ahb.c b/drivers/amba/tegra-ahb.c index 30759a5..b0b688c 100644 --- a/drivers/amba/tegra-ahb.c +++ b/drivers/amba/tegra-ahb.c @@ -82,6 +82,16 @@ #define AHB_ARBITRATION_XBAR_CTRL_SMMU_INIT_DONE BIT(17) +/* + * INCORRECT_BASE_ADDR_LOW_BYTE: Legacy kernel DT files for Tegra SoCs + * prior to Tegra124 generally use a physical base address ending in + * 0x4 for the AHB IP block. According to the TRM, the low byte + * should be 0x0. During device probing, this macro is used to detect + * whether the passed-in physical address is incorrect, and if so, to + * correct it. + */ +#define INCORRECT_BASE_ADDR_LOW_BYTE 0x4 + static struct platform_driver tegra_ahb_driver; static const u32 tegra_ahb_gizmo[] = { @@ -124,12 +134,12 @@ struct tegra_ahb { static inline u32 gizmo_readl(struct tegra_ahb *ahb, u32 offset) { - return readl(ahb->regs - 4 + offset); + return readl(ahb->regs + offset); } static inline void gizmo_writel(struct tegra_ahb *ahb, u32 value, u32 offset) { - writel(value, ahb->regs - 4 + offset); + writel(value, ahb->regs + offset); } #ifdef CONFIG_TEGRA_IOMMU_SMMU @@ -258,6 +268,15 @@ static int tegra_ahb_probe(struct platform_device *pdev) return -ENOMEM; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + + /* Correct the IP block base address if necessary */ + if (res && + (res->start & INCORRECT_BASE_ADDR_LOW_BYTE) == + INCORRECT_BASE_ADDR_LOW_BYTE) { + dev_warn(&pdev->dev, "incorrect AHB base address in DT data - enabling workaround\n"); + res->start -= INCORRECT_BASE_ADDR_LOW_BYTE; + } + ahb->regs = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(ahb->regs)) return PTR_ERR(ahb->regs); -- cgit v0.10.2 From 38e42f121601fc0640c032871a38efa5a59cff68 Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Thu, 26 Mar 2015 08:58:31 +0100 Subject: ARM: 8335/1: Documentation: DT bindings: Tegra AHB: document the legacy base address Documentation: DT bindings: Tegra AHB: require the legacy base address for existing chips Per Stephen Warren, note in the Tegra AHB DT binding documentation that we specifically deprecate any attempt to use the IP block's actual hardware base address, and advocate the use of the legacy "off-by-four" address in the 'regs' property, for Tegra chips with existing upstream Linux DT files that include a Tegra AHB node. This patch updates the documentation accordingly. Changing the existing kernel DT data isn't under consideration because Linux kernel DT data policy is to preserve compatibility between newer DT data files and older kernels. However, this additional step of changing the documentation should discourage others from sending kernel patches to try to change the legacy kernel DT data. Furthermore, for out-of-tree software (such as bootloaders or other operating systems) that may rely on Linux kernel DT binding documentation as an ABI (but not the Linux kernel DT data itself), such a change may allow future convergence with the Linux kernel DT data without additional code changes. Signed-off-by: Paul Walmsley Cc: Paul Walmsley Cc: Stephen Warren Cc: Alexandre Courbot Cc: Eduardo Valentin Cc: Ian Campbell Cc: Kumar Gala Cc: Mark Rutland Cc: Pawel Moll Cc: Rob Herring Cc: Thierry Reding Cc: devicetree@vger.kernel.org Cc: linux-kernel@vger.kernel.org Acked-by: Stephen Warren Signed-off-by: Russell King diff --git a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-ahb.txt b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-ahb.txt index 067c979..9a4295b 100644 --- a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-ahb.txt +++ b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-ahb.txt @@ -5,9 +5,12 @@ Required properties: Tegra30, must contain "nvidia,tegra30-ahb". Otherwise, must contain '"nvidia,-ahb", "nvidia,tegra30-ahb"' where is tegra124, tegra132, or tegra210. -- reg : Should contain 1 register ranges(address and length) +- reg : Should contain 1 register ranges(address and length). For + Tegra20, Tegra30, and Tegra114 chips, the value must be <0x6000c004 + 0x10c>. For Tegra124, Tegra132 and Tegra210 chips, the value should + be be <0x6000c000 0x150>. -Example: +Example (for a Tegra20 chip): ahb: ahb@6000c004 { compatible = "nvidia,tegra20-ahb"; reg = <0x6000c004 0x10c>; /* AHB Arbitration + Gizmo Controller */ -- cgit v0.10.2 From e1e2f6e4c5759aab3a8cfb1a0c19017ea770dfd2 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 9 Jan 2015 19:34:43 +0100 Subject: ARM: 8276/1: Make CPU_DCACHE_DISABLE depend on !SMP Enabling CPU_DCACHE_DISABLE on a SMP capable system will prevent the kernel from booting because of the following ldrex instruction in arch_spin_lock: (gdb) x/10i $pc => 0xc053cfa8 <_raw_spin_lock+4>: ldrex r3, [r0] 0xc053cfac <_raw_spin_lock+8>: add r2, r3, #65536 ; 0x10000 which is taken by the very first printk call: at /home/fainelli/work/linux/arch/arm/include/asm/spinlock.h:65 fmt=0xc0637650 " 01 66Booting Linux on physical CPU 0x%xn", args=) at kernel/printk/printk.c:1525 fmt=0xc05370f4 " 24320215342 04340235344 20320215342 36377/341 17") at kernel/printk/printk.c:1688 ldrex requires exclusive monitor(s) (local or global) which are no longer working when the Data cache is disabled in CP15 and will just hang the CPU there. Acked-by: Arnd Bergmann Signed-off-by: Florian Fainelli Signed-off-by: Russell King diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index 9b4f29e..133ecff 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -738,7 +738,7 @@ config CPU_ICACHE_DISABLE config CPU_DCACHE_DISABLE bool "Disable D-Cache (C-bit)" - depends on CPU_CP15 + depends on CPU_CP15 && !SMP help Say Y here to disable the processor data cache. Unless you have a reason not to or are unsure, say N. -- cgit v0.10.2 From f6ac49ba29499387e12e864a22e6d4bf46dafe9b Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 2 Apr 2015 15:38:18 +0100 Subject: ARM: vexpress: fix CPU hotplug with CT9x4 tile. The Cortex A9 tile fails to unplug CPUs if errata 643719 is not enabled. This leads to random weird behaviours, but ultimately seem to lock the kernel one way or another when a CPU is hot unplugged. Symptoms range from a spinlock lockup in the scheduler, the entire system hanging, to dumping out the kernel printk buffer a few lines at a time, and other weird behaviours. This is caused by the outgoing CPU not having its inner caches properly flushed before it exits coherency - flush_cache_louis() is used to achieve this, but as a result of the hardware bug, this function ends up doing nothing without the errata workaround enabled. As the Versatile Express has an affected CPU, this errata must always be enabled. Signed-off-by: Russell King diff --git a/arch/arm/mach-vexpress/Kconfig b/arch/arm/mach-vexpress/Kconfig index 3c2509b..4be5379 100644 --- a/arch/arm/mach-vexpress/Kconfig +++ b/arch/arm/mach-vexpress/Kconfig @@ -42,6 +42,7 @@ if ARCH_VEXPRESS config ARCH_VEXPRESS_CORTEX_A5_A9_ERRATA bool "Enable A5 and A9 only errata work-arounds" default y + select ARM_ERRATA_643719 if SMP select ARM_ERRATA_720789 select PL310_ERRATA_753970 if CACHE_L2X0 help -- cgit v0.10.2 From 6c5c2a01fcfdb70f2e95e30e96ccf53b88e81023 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 4 Apr 2015 23:22:07 +0100 Subject: ARM: proc-arm94*.S: fix setup function Both ARM946 and ARM940 setup functions were corrupting r1 and r2, which is not permissible - these are used to carry the machine ID and boot data into the kernel, and must be preserved. The code responsible for this was the same in both files: they were using the registers to generate a protection region register value. Fix this by turning this process into a macro, and using that macro in both these files with an alternative register allocation. r0, r3 and r7 can be used for temporary values here. Reported-by: Alex Dumitrache Tested-by: Georg Hofstetter Signed-off-by: Russell King diff --git a/arch/arm/mm/proc-arm940.S b/arch/arm/mm/proc-arm940.S index e5212d4..c42cdd3 100644 --- a/arch/arm/mm/proc-arm940.S +++ b/arch/arm/mm/proc-arm940.S @@ -297,26 +297,16 @@ __arm940_setup: mcr p15, 0, r0, c6, c0, 1 ldr r0, =(CONFIG_DRAM_BASE & 0xFFFFF000) @ base[31:12] of RAM - ldr r1, =(CONFIG_DRAM_SIZE >> 12) @ size of RAM (must be >= 4KB) - mov r2, #10 @ 11 is the minimum (4KB) -1: add r2, r2, #1 @ area size *= 2 - mov r1, r1, lsr #1 - bne 1b @ count not zero r-shift - orr r0, r0, r2, lsl #1 @ the area register value - orr r0, r0, #1 @ set enable bit - mcr p15, 0, r0, c6, c1, 0 @ set area 1, RAM - mcr p15, 0, r0, c6, c1, 1 + ldr r7, =CONFIG_DRAM_SIZE >> 12 @ size of RAM (must be >= 4KB) + pr_val r3, r0, r7, #1 + mcr p15, 0, r3, c6, c1, 0 @ set area 1, RAM + mcr p15, 0, r3, c6, c1, 1 ldr r0, =(CONFIG_FLASH_MEM_BASE & 0xFFFFF000) @ base[31:12] of FLASH - ldr r1, =(CONFIG_FLASH_SIZE >> 12) @ size of FLASH (must be >= 4KB) - mov r2, #10 @ 11 is the minimum (4KB) -1: add r2, r2, #1 @ area size *= 2 - mov r1, r1, lsr #1 - bne 1b @ count not zero r-shift - orr r0, r0, r2, lsl #1 @ the area register value - orr r0, r0, #1 @ set enable bit - mcr p15, 0, r0, c6, c2, 0 @ set area 2, ROM/FLASH - mcr p15, 0, r0, c6, c2, 1 + ldr r7, =CONFIG_FLASH_SIZE @ size of FLASH (must be >= 4KB) + pr_val r3, r0, r6, #1 + mcr p15, 0, r3, c6, c2, 0 @ set area 2, ROM/FLASH + mcr p15, 0, r3, c6, c2, 1 mov r0, #0x06 mcr p15, 0, r0, c2, c0, 0 @ Region 1&2 cacheable diff --git a/arch/arm/mm/proc-arm946.S b/arch/arm/mm/proc-arm946.S index b3dd9b2..17a8c20 100644 --- a/arch/arm/mm/proc-arm946.S +++ b/arch/arm/mm/proc-arm946.S @@ -343,24 +343,14 @@ __arm946_setup: mcr p15, 0, r0, c6, c0, 0 @ set region 0, default ldr r0, =(CONFIG_DRAM_BASE & 0xFFFFF000) @ base[31:12] of RAM - ldr r1, =(CONFIG_DRAM_SIZE >> 12) @ size of RAM (must be >= 4KB) - mov r2, #10 @ 11 is the minimum (4KB) -1: add r2, r2, #1 @ area size *= 2 - mov r1, r1, lsr #1 - bne 1b @ count not zero r-shift - orr r0, r0, r2, lsl #1 @ the region register value - orr r0, r0, #1 @ set enable bit - mcr p15, 0, r0, c6, c1, 0 @ set region 1, RAM + ldr r7, =CONFIG_DRAM_SIZE @ size of RAM (must be >= 4KB) + pr_val r3, r0, r7, #1 + mcr p15, 0, r3, c6, c1, 0 ldr r0, =(CONFIG_FLASH_MEM_BASE & 0xFFFFF000) @ base[31:12] of FLASH - ldr r1, =(CONFIG_FLASH_SIZE >> 12) @ size of FLASH (must be >= 4KB) - mov r2, #10 @ 11 is the minimum (4KB) -1: add r2, r2, #1 @ area size *= 2 - mov r1, r1, lsr #1 - bne 1b @ count not zero r-shift - orr r0, r0, r2, lsl #1 @ the region register value - orr r0, r0, #1 @ set enable bit - mcr p15, 0, r0, c6, c2, 0 @ set region 2, ROM/FLASH + ldr r7, =CONFIG_FLASH_SIZE @ size of FLASH (must be >= 4KB) + pr_val r3, r0, r7, #1 + mcr p15, 0, r3, c6, c2, 0 mov r0, #0x06 mcr p15, 0, r0, c2, c0, 0 @ region 1,2 d-cacheable diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S index 082b9f2..d081c9d 100644 --- a/arch/arm/mm/proc-macros.S +++ b/arch/arm/mm/proc-macros.S @@ -331,3 +331,27 @@ ENTRY(\name\()_tlb_fns) .globl \x .equ \x, \y .endm + + /* + * Macro to calculate the log2 size for the protection region + * registers. This calculates rd = log2(size) - 1. tmp must + * not be the same register as rd. + */ +.macro pr_sz, rd, size, tmp + mov \tmp, \size, lsr #12 + mov \rd, #11 +1: movs \tmp, \tmp, lsr #1 + addne \rd, \rd, #1 + bne 1b +.endm + + /* + * Macro to generate a protection region register value + * given a pre-masked address, size, and enable bit. + * Corrupts size. + */ +.macro pr_val, dest, addr, size, enable + pr_sz \dest, \size, \size @ calculate log2(size) - 1 + orr \dest, \addr, \dest, lsl #1 @ mask in the region size + orr \dest, \dest, \enable +.endm -- cgit v0.10.2 From 89c6bc5884e52ec004f03071f268ba3f27003aba Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 9 Apr 2015 12:59:35 +0100 Subject: ARM: allow 16-bit instructions in ALT_UP() Allow ALT_UP() to cope with a 16-bit Thumb instruction by automatically inserting a following nop instruction. This allows us to care less about getting the assembler to emit a 32-bit thumb instruction. Signed-off-by: Russell King diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index f67fd3a..186270b 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -237,6 +237,9 @@ .pushsection ".alt.smp.init", "a" ;\ .long 9998b ;\ 9997: instr ;\ + .if . - 9997b == 2 ;\ + nop ;\ + .endif ;\ .if . - 9997b != 4 ;\ .error "ALT_UP() content must assemble to exactly 4 bytes";\ .endif ;\ -- cgit v0.10.2 From 5aca370826a2487aaaae5db31f6bb0b906e9755f Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 3 Apr 2015 11:10:46 +0100 Subject: ARM: cache-v7: use movw/movt instructions We always build cache-v7.S for ARMv7, so we can use the ARMv7 16-bit move instructions to load large constants, rather than using constants in a literal pool. Reviewed-by: Catalin Marinas Signed-off-by: Russell King diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S index b966656..30c81e7 100644 --- a/arch/arm/mm/cache-v7.S +++ b/arch/arm/mm/cache-v7.S @@ -36,10 +36,10 @@ ENTRY(v7_invalidate_l1) mcr p15, 2, r0, c0, c0, 0 mrc p15, 1, r0, c0, c0, 0 - ldr r1, =0x7fff + movw r1, #0x7fff and r2, r1, r0, lsr #13 - ldr r1, =0x3ff + movw r1, #0x3ff and r3, r1, r0, lsr #3 @ NumWays - 1 add r2, r2, #1 @ NumSets @@ -95,7 +95,8 @@ ENTRY(v7_flush_dcache_louis) #ifdef CONFIG_ARM_ERRATA_643719 ALT_SMP(mrceq p15, 0, r2, c0, c0, 0) @ read main ID register ALT_UP(reteq lr) @ LoUU is zero, so nothing to do - ldreq r1, =0x410fc090 @ ID of ARM Cortex A9 r0p? + movweq r1, #:lower16:0x410fc090 @ ID of ARM Cortex A9 r0p? + movteq r1, #:upper16:0x410fc090 biceq r2, r2, #0x0000000f @ clear minor revision number teqeq r2, r1 @ test for errata affected core and if so... orreqs r3, #(1 << 21) @ fix LoUIS value (and set flags state to 'ne') @@ -140,10 +141,10 @@ flush_levels: #endif and r2, r1, #7 @ extract the length of the cache lines add r2, r2, #4 @ add 4 (line length offset) - ldr r4, =0x3ff + movw r4, #0x3ff ands r4, r4, r1, lsr #3 @ find maximum number on the way size clz r5, r4 @ find bit position of way size increment - ldr r7, =0x7fff + movw r7, #0x7fff ands r7, r7, r1, lsr #13 @ extract max number of the index size loop1: mov r9, r7 @ create working copy of max index -- cgit v0.10.2 From 47b8484ea6569511a3cd915bea29886b4cd08333 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 3 Apr 2015 11:15:53 +0100 Subject: ARM: cache-v7: shift CLIDR to extract appropriate field before masking Rather than have code which masks and then shifts, such as: mrc p15, 1, r0, c0, c0, 1 ALT_SMP(ands r3, r0, #7 << 21) ALT_UP( ands r3, r0, #7 << 27) ALT_SMP(mov r3, r3, lsr #20) ALT_UP( mov r3, r3, lsr #26) re-arrange this as a shift and then mask. The masking is the same for each field which we want to extract, so this allows the mask to be shared amongst code paths: mrc p15, 1, r0, c0, c0, 1 ALT_SMP(mov r3, r0, lsr #20) ALT_UP( mov r3, r0, lsr #26) ands r3, r3, #7 << 1 Use this method for the LoUIS, LoUU and LoC fields. Reviewed-by: Catalin Marinas Signed-off-by: Russell King diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S index 30c81e7..d062f8c 100644 --- a/arch/arm/mm/cache-v7.S +++ b/arch/arm/mm/cache-v7.S @@ -90,8 +90,9 @@ ENDPROC(v7_flush_icache_all) ENTRY(v7_flush_dcache_louis) dmb @ ensure ordering with previous memory accesses mrc p15, 1, r0, c0, c0, 1 @ read clidr, r0 = clidr - ALT_SMP(ands r3, r0, #(7 << 21)) @ extract LoUIS from clidr - ALT_UP(ands r3, r0, #(7 << 27)) @ extract LoUU from clidr +ALT_SMP(mov r3, r0, lsr #20) @ move LoUIS into position +ALT_UP( mov r3, r0, lsr #26) @ move LoUU into position + ands r3, r3, #7 << 1 @ extract LoU*2 field from clidr #ifdef CONFIG_ARM_ERRATA_643719 ALT_SMP(mrceq p15, 0, r2, c0, c0, 0) @ read main ID register ALT_UP(reteq lr) @ LoUU is zero, so nothing to do @@ -99,10 +100,8 @@ ENTRY(v7_flush_dcache_louis) movteq r1, #:upper16:0x410fc090 biceq r2, r2, #0x0000000f @ clear minor revision number teqeq r2, r1 @ test for errata affected core and if so... - orreqs r3, #(1 << 21) @ fix LoUIS value (and set flags state to 'ne') + moveqs r3, #1 << 1 @ fix LoUIS value (and set flags state to 'ne') #endif - ALT_SMP(mov r3, r3, lsr #20) @ r3 = LoUIS * 2 - ALT_UP(mov r3, r3, lsr #26) @ r3 = LoUU * 2 reteq lr @ return if level == 0 mov r10, #0 @ r10 (starting level) = 0 b flush_levels @ start flushing cache levels @@ -120,8 +119,8 @@ ENDPROC(v7_flush_dcache_louis) ENTRY(v7_flush_dcache_all) dmb @ ensure ordering with previous memory accesses mrc p15, 1, r0, c0, c0, 1 @ read clidr - ands r3, r0, #0x7000000 @ extract loc from clidr - mov r3, r3, lsr #23 @ left align loc bit field + mov r3, r0, lsr #23 @ move LoC into position + ands r3, r3, #7 << 1 @ extract LoC*2 from clidr beq finished @ if loc is 0, then no need to clean mov r10, #0 @ start clean at cache level 0 flush_levels: -- cgit v0.10.2 From cd8b24d9e852b53e68c69a086358c81423dfb8d1 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 3 Apr 2015 11:21:42 +0100 Subject: ARM: cache-v7: consolidate initialisation of cache level index Both v7_flush_cache_louis and v7_flush_dcache_all both begin the flush_levels loop with r10 initialised to zero. In each case, this is done immediately prior to entering the loop. Branch to this instruction in v7_flush_dcache_all from v7_flush_cache_louis and eliminate the unnecessary initialisation in v7_flush_cache_louis. Reviewed-by: Catalin Marinas Signed-off-by: Russell King diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S index d062f8c..5b5d0c0 100644 --- a/arch/arm/mm/cache-v7.S +++ b/arch/arm/mm/cache-v7.S @@ -103,8 +103,7 @@ ALT_UP( mov r3, r0, lsr #26) @ move LoUU into position moveqs r3, #1 << 1 @ fix LoUIS value (and set flags state to 'ne') #endif reteq lr @ return if level == 0 - mov r10, #0 @ r10 (starting level) = 0 - b flush_levels @ start flushing cache levels + b start_flush_levels @ start flushing cache levels ENDPROC(v7_flush_dcache_louis) /* @@ -122,6 +121,7 @@ ENTRY(v7_flush_dcache_all) mov r3, r0, lsr #23 @ move LoC into position ands r3, r3, #7 << 1 @ extract LoC*2 from clidr beq finished @ if loc is 0, then no need to clean +start_flush_levels: mov r10, #0 @ start clean at cache level 0 flush_levels: add r2, r10, r10, lsr #1 @ work out 3x current cache level -- cgit v0.10.2 From d3cd451dfb579367b4c5968256b3d8342dd0b0e8 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 3 Apr 2015 11:25:39 +0100 Subject: ARM: cache-v7: optimise branches in v7_flush_cache_louis Optimise the branches such that for the majority of unaffected devices, we avoid needing to execute the errata work-around code path by branching to start_flush_levels early. Reviewed-by: Catalin Marinas Signed-off-by: Russell King diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S index 5b5d0c0..793d061 100644 --- a/arch/arm/mm/cache-v7.S +++ b/arch/arm/mm/cache-v7.S @@ -93,17 +93,18 @@ ENTRY(v7_flush_dcache_louis) ALT_SMP(mov r3, r0, lsr #20) @ move LoUIS into position ALT_UP( mov r3, r0, lsr #26) @ move LoUU into position ands r3, r3, #7 << 1 @ extract LoU*2 field from clidr + bne start_flush_levels @ LoU != 0, start flushing #ifdef CONFIG_ARM_ERRATA_643719 - ALT_SMP(mrceq p15, 0, r2, c0, c0, 0) @ read main ID register - ALT_UP(reteq lr) @ LoUU is zero, so nothing to do - movweq r1, #:lower16:0x410fc090 @ ID of ARM Cortex A9 r0p? - movteq r1, #:upper16:0x410fc090 - biceq r2, r2, #0x0000000f @ clear minor revision number - teqeq r2, r1 @ test for errata affected core and if so... - moveqs r3, #1 << 1 @ fix LoUIS value (and set flags state to 'ne') +ALT_SMP(mrc p15, 0, r2, c0, c0, 0) @ read main ID register +ALT_UP( ret lr) @ LoUU is zero, so nothing to do + movw r1, #:lower16:0x410fc090 @ ID of ARM Cortex A9 r0p? + movt r1, #:upper16:0x410fc090 + bic r2, r2, #0x0000000f @ clear minor revision number + teq r2, r1 @ test for errata affected core and if so... + moveq r3, #1 << 1 @ fix LoUIS value + beq start_flush_levels @ start flushing cache levels #endif - reteq lr @ return if level == 0 - b start_flush_levels @ start flushing cache levels + ret lr ENDPROC(v7_flush_dcache_louis) /* -- cgit v0.10.2 From aaf4b5d92ce8299a363f1c0d7dc00aafde532e56 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 3 Apr 2015 11:32:34 +0100 Subject: ARM: cache-v7: optimise test for Cortex A9 r0pX devices Eliminate one unnecessary instruction from this test by pre-shifting the Cortex A9 ID - we can shift the actual ID in the teq instruction thereby losing the pX bit of the ID at no cost. Reviewed-by: Catalin Marinas Signed-off-by: Russell King diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S index 793d061..a134d8a 100644 --- a/arch/arm/mm/cache-v7.S +++ b/arch/arm/mm/cache-v7.S @@ -97,10 +97,9 @@ ALT_UP( mov r3, r0, lsr #26) @ move LoUU into position #ifdef CONFIG_ARM_ERRATA_643719 ALT_SMP(mrc p15, 0, r2, c0, c0, 0) @ read main ID register ALT_UP( ret lr) @ LoUU is zero, so nothing to do - movw r1, #:lower16:0x410fc090 @ ID of ARM Cortex A9 r0p? - movt r1, #:upper16:0x410fc090 - bic r2, r2, #0x0000000f @ clear minor revision number - teq r2, r1 @ test for errata affected core and if so... + movw r1, #:lower16:(0x410fc090 >> 4) @ ID of ARM Cortex A9 r0p? + movt r1, #:upper16:(0x410fc090 >> 4) + teq r1, r2, lsr #4 @ test for errata affected core and if so... moveq r3, #1 << 1 @ fix LoUIS value beq start_flush_levels @ start flushing cache levels #endif -- cgit v0.10.2 From e5a5de4447471ab1a01585f075400c2be36e2cb6 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 2 Apr 2015 23:58:55 +0100 Subject: ARM: enable ARM errata 643719 workaround by default The effects of not having ARM errata 643719 enabled on affected CPUs can be very confusing and hard to debug. Rather than leave this to chance, enable this workaround by default. Now that we have rearranged the code, it should have a low impact on the majority of CPUs. Acked-by: Catalin Marinas Reviewed-by: Catalin Marinas Signed-off-by: Russell King diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 35fed4b..bca01e2 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1127,6 +1127,7 @@ config ARM_ERRATA_742231 config ARM_ERRATA_643719 bool "ARM errata: LoUIS bit field in CLIDR register is incorrect" depends on CPU_V7 && SMP + default y help This option enables the workaround for the 643719 Cortex-A9 (prior to r1p0) erratum. On affected cores the LoUIS bit field of the CLIDR -- cgit v0.10.2 From a6d746789825e4d7229523eebee233b03ad48c54 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 7 Apr 2015 15:35:24 +0100 Subject: ARM: proc-v7: avoid errata 430973 workaround for non-Cortex A8 CPUs Avoid the errata 430973 workaround for non-Cortex A8 CPUs. Having this workaround enabled introduces an additional branch target buffer flush into the context switching path, something we wish to avoid. To allow this errata to be enabled in multiplatform kernels while reducing its impact, rearrange the Cortex-A8 CPU support to avoid impacting on other Version 7 CPUs. Tested-by: Tony Lindgren Signed-off-by: Russell King diff --git a/arch/arm/mm/proc-v7-2level.S b/arch/arm/mm/proc-v7-2level.S index ed448d8..10405b8 100644 --- a/arch/arm/mm/proc-v7-2level.S +++ b/arch/arm/mm/proc-v7-2level.S @@ -37,15 +37,18 @@ * It is assumed that: * - we are not using split page tables */ -ENTRY(cpu_v7_switch_mm) +ENTRY(cpu_ca8_switch_mm) #ifdef CONFIG_MMU mov r2, #0 - mmid r1, r1 @ get mm->context.id - ALT_SMP(orr r0, r0, #TTB_FLAGS_SMP) - ALT_UP(orr r0, r0, #TTB_FLAGS_UP) #ifdef CONFIG_ARM_ERRATA_430973 mcr p15, 0, r2, c7, c5, 6 @ flush BTAC/BTB #endif +#endif +ENTRY(cpu_v7_switch_mm) +#ifdef CONFIG_MMU + mmid r1, r1 @ get mm->context.id + ALT_SMP(orr r0, r0, #TTB_FLAGS_SMP) + ALT_UP(orr r0, r0, #TTB_FLAGS_UP) #ifdef CONFIG_PID_IN_CONTEXTIDR mrc p15, 0, r2, c13, c0, 1 @ read current context ID lsr r2, r2, #8 @ extract the PID @@ -61,6 +64,7 @@ ENTRY(cpu_v7_switch_mm) #endif bx lr ENDPROC(cpu_v7_switch_mm) +ENDPROC(cpu_ca8_switch_mm) /* * cpu_v7_set_pte_ext(ptep, pte) diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index 6bdaa4c..3d1054f 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -153,6 +153,21 @@ ENDPROC(cpu_v7_do_resume) #endif /* + * Cortex-A8 + */ + globl_equ cpu_ca8_proc_init, cpu_v7_proc_init + globl_equ cpu_ca8_proc_fin, cpu_v7_proc_fin + globl_equ cpu_ca8_reset, cpu_v7_reset + globl_equ cpu_ca8_do_idle, cpu_v7_do_idle + globl_equ cpu_ca8_dcache_clean_area, cpu_v7_dcache_clean_area + globl_equ cpu_ca8_set_pte_ext, cpu_v7_set_pte_ext + globl_equ cpu_ca8_suspend_size, cpu_v7_suspend_size +#ifdef CONFIG_ARM_CPU_SUSPEND + globl_equ cpu_ca8_do_suspend, cpu_v7_do_suspend + globl_equ cpu_ca8_do_resume, cpu_v7_do_resume +#endif + +/* * Cortex-A9 processor functions */ globl_equ cpu_ca9mp_proc_init, cpu_v7_proc_init @@ -451,7 +466,10 @@ __v7_setup_stack: @ define struct processor (see and proc-macros.S) define_processor_functions v7, dabort=v7_early_abort, pabort=v7_pabort, suspend=1 +#ifndef CONFIG_ARM_LPAE + define_processor_functions ca8, dabort=v7_early_abort, pabort=v7_pabort, suspend=1 define_processor_functions ca9mp, dabort=v7_early_abort, pabort=v7_pabort, suspend=1 +#endif #ifdef CONFIG_CPU_PJ4B define_processor_functions pj4b, dabort=v7_early_abort, pabort=v7_pabort, suspend=1 #endif @@ -507,6 +525,16 @@ __v7_ca9mp_proc_info: __v7_proc __v7_ca9mp_proc_info, __v7_ca9mp_setup, proc_fns = ca9mp_processor_functions .size __v7_ca9mp_proc_info, . - __v7_ca9mp_proc_info + /* + * ARM Ltd. Cortex A8 processor. + */ + .type __v7_ca8_proc_info, #object +__v7_ca8_proc_info: + .long 0x410fc080 + .long 0xff0ffff0 + __v7_proc __v7_ca8_proc_info, __v7_setup, proc_fns = ca8_processor_functions + .size __v7_ca8_proc_info, . - __v7_ca8_proc_info + #endif /* CONFIG_ARM_LPAE */ /* -- cgit v0.10.2 From 37463be8658ae5fba153f4029ca3ec3f8a64fd51 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 26 Mar 2015 11:43:51 +0000 Subject: ARM: switch to use the generic show_mem() implementation Switch ARM to use the generic show_mem() implementation, which displays the statistics from the mm zone rather than walking the page arrays. Acked-by: Mel Gorman Tested-by: Gregory Fong Signed-off-by: Russell King diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 1609b02..ae369c1 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -86,55 +86,6 @@ static int __init parse_tag_initrd2(const struct tag *tag) __tagtable(ATAG_INITRD2, parse_tag_initrd2); -/* - * This keeps memory configuration data used by a couple memory - * initialization functions, as well as show_mem() for the skipping - * of holes in the memory map. It is populated by arm_add_memory(). - */ -void show_mem(unsigned int filter) -{ - int free = 0, total = 0, reserved = 0; - int shared = 0, cached = 0, slab = 0; - struct memblock_region *reg; - - printk("Mem-info:\n"); - show_free_areas(filter); - - for_each_memblock (memory, reg) { - unsigned int pfn1, pfn2; - struct page *page, *end; - - pfn1 = memblock_region_memory_base_pfn(reg); - pfn2 = memblock_region_memory_end_pfn(reg); - - page = pfn_to_page(pfn1); - end = pfn_to_page(pfn2 - 1) + 1; - - do { - total++; - if (PageReserved(page)) - reserved++; - else if (PageSwapCache(page)) - cached++; - else if (PageSlab(page)) - slab++; - else if (!page_count(page)) - free++; - else - shared += page_count(page) - 1; - pfn1++; - page = pfn_to_page(pfn1); - } while (pfn1 < pfn2); - } - - printk("%d pages of RAM\n", total); - printk("%d free pages\n", free); - printk("%d reserved pages\n", reserved); - printk("%d slab pages\n", slab); - printk("%d pages shared\n", shared); - printk("%d pages swap cached\n", cached); -} - static void __init find_limits(unsigned long *min, unsigned long *max_low, unsigned long *max_high) { -- cgit v0.10.2 From 57ca654bef6c43bbbccfb2d231fd245d3f67dd46 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 13 Apr 2015 10:36:04 +0100 Subject: ARM: ensure delay timer has sufficient accuracy for delays We have recently had an example of someone wanting to use a 90kHz timer for the software delay loop. udelay() needs to have at least microsecond resolution to allow drivers access to a delay mechanism with a reasonable chance of delaying the period they requested within at least a 50% marging of error, especially for small delays. Discussion about the udelay() accuracy can be found at: https://lkml.org/lkml/2011/1/9/37 Reject timers which are unable to supply this level of resolution. Acked-by: Nicolas Pitre Signed-off-by: Russell King diff --git a/arch/arm/lib/delay.c b/arch/arm/lib/delay.c index 312d43e..8044591 100644 --- a/arch/arm/lib/delay.c +++ b/arch/arm/lib/delay.c @@ -83,6 +83,12 @@ void __init register_current_timer_delay(const struct delay_timer *timer) NSEC_PER_SEC, 3600); res = cyc_to_ns(1ULL, new_mult, new_shift); + if (res > 1000) { + pr_err("Ignoring delay timer %ps, which has insufficient resolution of %lluns\n", + timer, res); + return; + } + if (!delay_calibrated && (!delay_res || (res < delay_res))) { pr_info("Switching to timer-based delay loop, resolution %lluns\n", res); delay_timer = timer; -- cgit v0.10.2 From 79403cda37204b06b9f96351a35251ff7d88de4b Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 13 Apr 2015 16:14:37 +0100 Subject: ARM: update errata 430973 documentation to cover Cortex A8 r1p* This errata covers all r1 variants of Cortex A8, it's not limited to just r1p0..r1p2. Update the documentation to reflect this. The code already applies the workaround to all r1p* A8 CPUs. Signed-off-by: Russell King diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index bca01e2..0a9dcde 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1058,7 +1058,7 @@ config ARM_ERRATA_430973 depends on CPU_V7 help This option enables the workaround for the 430973 Cortex-A8 - (r1p0..r1p2) erratum. If a code sequence containing an ARM/Thumb + r1p* erratum. If a code sequence containing an ARM/Thumb interworking branch is replaced with another code sequence at the same virtual address, whether due to self-modifying code or virtual to physical address re-mapping, Cortex-A8 does not recover from the -- cgit v0.10.2