From 75da01e127f7db3b23effa6118336d303e7572a7 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 31 Jan 2013 11:25:52 +0000 Subject: ARM: KVM: vgic: force EOIed LRs to the empty state The VGIC doesn't guarantee that an EOIed LR that has been configured to generate a maintenance interrupt will appear as empty. While the code recovers from this situation, it is better to clean the LR and flag it as empty so it can be quickly recycled. Signed-off-by: Marc Zyngier diff --git a/arch/arm/kvm/vgic.c b/arch/arm/kvm/vgic.c index c9a1731..76ea1aa 100644 --- a/arch/arm/kvm/vgic.c +++ b/arch/arm/kvm/vgic.c @@ -883,8 +883,7 @@ static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) lr, irq, vgic_cpu->vgic_lr[lr]); BUG_ON(!test_bit(lr, vgic_cpu->lr_used)); vgic_cpu->vgic_lr[lr] |= GICH_LR_PENDING_BIT; - - goto out; + return true; } /* Try to use another LR for this interrupt */ @@ -898,7 +897,6 @@ static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) vgic_cpu->vgic_irq_lr_map[irq] = lr; set_bit(lr, vgic_cpu->lr_used); -out: if (!vgic_irq_is_edge(vcpu, irq)) vgic_cpu->vgic_lr[lr] |= GICH_LR_EOI; @@ -1054,6 +1052,13 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) } else { vgic_cpu_irq_clear(vcpu, irq); } + + /* + * Despite being EOIed, the LR may not have + * been marked as empty. + */ + set_bit(lr, (unsigned long *)vgic_cpu->vgic_elrsr); + vgic_cpu->vgic_lr[lr] &= ~GICH_LR_ACTIVE_BIT; } } -- cgit v0.10.2 From 33c83cb3c1d84b76c8270abe5487e77f83a81b22 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 1 Feb 2013 18:28:30 +0000 Subject: ARM: KVM: vgic: take distributor lock on sync_hwstate path Now that the maintenance interrupt handling is actually out of the handler itself, the code becomes quite racy as we can get preempted while we process the state. Wrapping this code around the distributor lock ensures that we're not preempted and relatively race-free. Signed-off-by: Marc Zyngier diff --git a/arch/arm/kvm/vgic.c b/arch/arm/kvm/vgic.c index 76ea1aa..0e4cfe1 100644 --- a/arch/arm/kvm/vgic.c +++ b/arch/arm/kvm/vgic.c @@ -1016,21 +1016,6 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) kvm_debug("MISR = %08x\n", vgic_cpu->vgic_misr); - /* - * We do not need to take the distributor lock here, since the only - * action we perform is clearing the irq_active_bit for an EOIed - * level interrupt. There is a potential race with - * the queuing of an interrupt in __kvm_vgic_flush_hwstate(), where we - * check if the interrupt is already active. Two possibilities: - * - * - The queuing is occurring on the same vcpu: cannot happen, - * as we're already in the context of this vcpu, and - * executing the handler - * - The interrupt has been migrated to another vcpu, and we - * ignore this interrupt for this run. Big deal. It is still - * pending though, and will get considered when this vcpu - * exits. - */ if (vgic_cpu->vgic_misr & GICH_MISR_EOI) { /* * Some level interrupts have been EOIed. Clear their @@ -1069,9 +1054,8 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) } /* - * Sync back the VGIC state after a guest run. We do not really touch - * the distributor here (the irq_pending_on_cpu bit is safe to set), - * so there is no need for taking its lock. + * Sync back the VGIC state after a guest run. The distributor lock is + * needed so we don't get preempted in the middle of the state processing. */ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) { @@ -1117,10 +1101,14 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) { + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + if (!irqchip_in_kernel(vcpu->kvm)) return; + spin_lock(&dist->lock); __kvm_vgic_sync_hwstate(vcpu); + spin_unlock(&dist->lock); } int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu) -- cgit v0.10.2 From e651eab0af88aa7a281fe9e8c36c0846552aa7fc Mon Sep 17 00:00:00 2001 From: Sricharan R Date: Mon, 18 Mar 2013 12:24:04 +0100 Subject: ARM: 7677/1: LPAE: Fix mapping in alloc_init_section for unaligned addresses With LPAE enabled, alloc_init_section() does not map the entire address space for unaligned addresses. The issue also reproduced with CMA + LPAE. CMA tries to map 16MB with page granularity mappings during boot. alloc_init_pte() is called and out of 16MB, only 2MB gets mapped and rest remains unaccessible. Because of this OMAP5 boot is broken with CMA + LPAE enabled. Fix the issue by ensuring that the entire addresses are mapped. Signed-off-by: R Sricharan Cc: Catalin Marinas Cc: Christoffer Dall Cc: Santosh Shilimkar Tested-by: Laura Abbott Acked-by: Catalin Marinas Acked-by: Christoffer Dall Signed-off-by: Russell King diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index e95a996..7897894 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -598,39 +598,60 @@ static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr, } while (pte++, addr += PAGE_SIZE, addr != end); } -static void __init alloc_init_section(pud_t *pud, unsigned long addr, - unsigned long end, phys_addr_t phys, - const struct mem_type *type) +static void __init map_init_section(pmd_t *pmd, unsigned long addr, + unsigned long end, phys_addr_t phys, + const struct mem_type *type) { - pmd_t *pmd = pmd_offset(pud, addr); - +#ifndef CONFIG_ARM_LPAE /* - * Try a section mapping - end, addr and phys must all be aligned - * to a section boundary. Note that PMDs refer to the individual - * L1 entries, whereas PGDs refer to a group of L1 entries making - * up one logical pointer to an L2 table. + * In classic MMU format, puds and pmds are folded in to + * the pgds. pmd_offset gives the PGD entry. PGDs refer to a + * group of L1 entries making up one logical pointer to + * an L2 table (2MB), where as PMDs refer to the individual + * L1 entries (1MB). Hence increment to get the correct + * offset for odd 1MB sections. + * (See arch/arm/include/asm/pgtable-2level.h) */ - if (type->prot_sect && ((addr | end | phys) & ~SECTION_MASK) == 0) { - pmd_t *p = pmd; - -#ifndef CONFIG_ARM_LPAE - if (addr & SECTION_SIZE) - pmd++; + if (addr & SECTION_SIZE) + pmd++; #endif + do { + *pmd = __pmd(phys | type->prot_sect); + phys += SECTION_SIZE; + } while (pmd++, addr += SECTION_SIZE, addr != end); - do { - *pmd = __pmd(phys | type->prot_sect); - phys += SECTION_SIZE; - } while (pmd++, addr += SECTION_SIZE, addr != end); + flush_pmd_entry(pmd); +} - flush_pmd_entry(p); - } else { +static void __init alloc_init_pmd(pud_t *pud, unsigned long addr, + unsigned long end, phys_addr_t phys, + const struct mem_type *type) +{ + pmd_t *pmd = pmd_offset(pud, addr); + unsigned long next; + + do { /* - * No need to loop; pte's aren't interested in the - * individual L1 entries. + * With LPAE, we must loop over to map + * all the pmds for the given range. */ - alloc_init_pte(pmd, addr, end, __phys_to_pfn(phys), type); - } + next = pmd_addr_end(addr, end); + + /* + * Try a section mapping - addr, next and phys must all be + * aligned to a section boundary. + */ + if (type->prot_sect && + ((addr | next | phys) & ~SECTION_MASK) == 0) { + map_init_section(pmd, addr, next, phys, type); + } else { + alloc_init_pte(pmd, addr, next, + __phys_to_pfn(phys), type); + } + + phys += next - addr; + + } while (pmd++, addr = next, addr != end); } static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr, @@ -641,7 +662,7 @@ static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr, do { next = pud_addr_end(addr, end); - alloc_init_section(pud, addr, next, phys, type); + alloc_init_pmd(pud, addr, next, phys, type); phys += next - addr; } while (pud++, addr = next, addr != end); } -- cgit v0.10.2 From c40e3641670eb6ebfdb71d4b0c775416ef95f4f0 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Mon, 18 Mar 2013 19:44:14 +0100 Subject: ARM: 7679/1: Clear IDIVT hwcap if CONFIG_ARM_THUMB=n Don't advertise support for the SDIV/UDIV thumb instructions if the kernel is not compiled with support for thumb userspace. This is in line with how we remove the THUMB hwcap in these configurations. Acked-by: Will Deacon Cc: Stepan Moskovchenko Signed-off-by: Stephen Boyd Signed-off-by: Russell King diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 3f6cbb2..e2c8bbf 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -484,7 +484,7 @@ static void __init setup_processor(void) list->elf_name, ENDIANNESS); elf_hwcap = list->elf_hwcap; #ifndef CONFIG_ARM_THUMB - elf_hwcap &= ~HWCAP_THUMB; + elf_hwcap &= ~(HWCAP_THUMB | HWCAP_IDIVT); #endif feat_v6_fixup(); -- cgit v0.10.2 From 8164f7af88d9ad3a757bd14f634b23997ee77f6b Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Mon, 18 Mar 2013 19:44:15 +0100 Subject: ARM: 7680/1: Detect support for SDIV/UDIV from ISAR0 register The ISAR0 register indicates support for the SDIV and UDIV instructions in both the Thumb and ARM instruction set. Read the register to detect the supported instructions and update the elf_hwcap mask as appropriate. This is better than adding more and more cpuid checks in proc-v7.S for each new cpu variant that supports these instructions. Acked-by: Will Deacon Cc: Stepan Moskovchenko Signed-off-by: Stephen Boyd Signed-off-by: Russell King diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index e2c8bbf..f3ac13f 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -353,6 +353,23 @@ void __init early_print(const char *str, ...) printk("%s", buf); } +static void __init cpuid_init_hwcaps(void) +{ + unsigned int divide_instrs; + + if (cpu_architecture() < CPU_ARCH_ARMv7) + return; + + divide_instrs = (read_cpuid_ext(CPUID_EXT_ISAR0) & 0x0f000000) >> 24; + + switch (divide_instrs) { + case 2: + elf_hwcap |= HWCAP_IDIVA; + case 1: + elf_hwcap |= HWCAP_IDIVT; + } +} + static void __init feat_v6_fixup(void) { int id = read_cpuid_id(); @@ -483,6 +500,9 @@ static void __init setup_processor(void) snprintf(elf_platform, ELF_PLATFORM_SIZE, "%s%c", list->elf_name, ENDIANNESS); elf_hwcap = list->elf_hwcap; + + cpuid_init_hwcaps(); + #ifndef CONFIG_ARM_THUMB elf_hwcap &= ~(HWCAP_THUMB | HWCAP_IDIVT); #endif diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index 3a3c015..bcd3d48 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -420,7 +420,7 @@ __v7_pj4b_proc_info: __v7_ca7mp_proc_info: .long 0x410fc070 .long 0xff0ffff0 - __v7_proc __v7_ca7mp_setup, hwcaps = HWCAP_IDIV + __v7_proc __v7_ca7mp_setup .size __v7_ca7mp_proc_info, . - __v7_ca7mp_proc_info /* @@ -430,7 +430,7 @@ __v7_ca7mp_proc_info: __v7_ca15mp_proc_info: .long 0x410fc0f0 .long 0xff0ffff0 - __v7_proc __v7_ca15mp_setup, hwcaps = HWCAP_IDIV + __v7_proc __v7_ca15mp_setup .size __v7_ca15mp_proc_info, . - __v7_ca15mp_proc_info /* -- cgit v0.10.2 From 120ecfafabec382c4feb79ff159ef42a39b6d33b Mon Sep 17 00:00:00 2001 From: Stepan Moskovchenko Date: Mon, 18 Mar 2013 19:44:16 +0100 Subject: ARM: 7678/1: Work around faulty ISAR0 register in some Krait CPUs Some early versions of the Krait CPU design incorrectly indicate that they only support the UDIV and SDIV instructions in Thumb mode when they actually support them in ARM and Thumb mode. It seems that these CPUs follow the DDI0406B ARM ARM which has two possible values for the divide instructions field, instead of the DDI0406C document which has three possible values. Work around this problem by checking the MIDR against Krait CPUs with this faulty ISAR0 register and force the hwcaps to indicate support in both modes. [sboyd: Rewrote commit text to reflect real reasoning now that we autodetect udiv/sdiv] Signed-off-by: Stepan Moskovchenko Acked-by: Will Deacon Signed-off-by: Stephen Boyd Signed-off-by: Russell King diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index bcd3d48..f584d3f 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -434,6 +434,21 @@ __v7_ca15mp_proc_info: .size __v7_ca15mp_proc_info, . - __v7_ca15mp_proc_info /* + * Qualcomm Inc. Krait processors. + */ + .type __krait_proc_info, #object +__krait_proc_info: + .long 0x510f0400 @ Required ID value + .long 0xff0ffc00 @ Mask for ID + /* + * Some Krait processors don't indicate support for SDIV and UDIV + * instructions in the ARM instruction set, even though they actually + * do support them. + */ + __v7_proc __v7_setup, hwcaps = HWCAP_IDIV + .size __krait_proc_info, . - __krait_proc_info + + /* * Match any ARMv7 processor core. */ .type __v7_proc_info, #object -- cgit v0.10.2 From 68a154fc53ddd3f7b33e482847a411bf54a50855 Mon Sep 17 00:00:00 2001 From: Santosh Shilimkar Date: Wed, 20 Mar 2013 17:30:30 +0100 Subject: ARM: 7681/1: hw_breakpoint: use warn_once to avoid spam from reset_ctrl_regs() CPU debug features like hardware break, watchpoints can be used only when the debug mode is enabled and available. Unfortunately on OMAP4 based devices, after a CPU power cycle, the debug feature gets disabled which leads to a flood of messages coming from reset_ctrl_regs() which gets called on every CPU_PM_EXIT with CPUidle enabled. So make use of warn_once() so that system is usable. Thanks to Will for pointers and Lokesh for the analysis of the issue. Tested-by: Lokesh Vutla Signed-off-by: Santosh Shilimkar Signed-off-by: Will Deacon Signed-off-by: Russell King diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index 96093b7..5dc1aa6 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -966,7 +966,7 @@ static void reset_ctrl_regs(void *unused) } if (err) { - pr_warning("CPU %d debug is powered down!\n", cpu); + pr_warn_once("CPU %d debug is powered down!\n", cpu); cpumask_or(&debug_err_mask, &debug_err_mask, cpumask_of(cpu)); return; } @@ -987,7 +987,7 @@ clear_vcr: isb(); if (cpumask_intersects(&debug_err_mask, cpumask_of(cpu))) { - pr_warning("CPU %d failed to disable vector catch\n", cpu); + pr_warn_once("CPU %d failed to disable vector catch\n", cpu); return; } @@ -1007,7 +1007,7 @@ clear_vcr: } if (cpumask_intersects(&debug_err_mask, cpumask_of(cpu))) { - pr_warning("CPU %d failed to clear debug register pairs\n", cpu); + pr_warn_once("CPU %d failed to clear debug register pairs\n", cpu); return; } -- cgit v0.10.2 From 698613b63817f2f6ca79831cd1c37aae67025323 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 3 Apr 2013 16:33:26 +0100 Subject: ARM: iWMMXt: always enable iWMMXt support with PJ4 CPUs Jason Cooper reports these build errors: arch/arm/kernel/built-in.o: In function `iwmmxt_do': /.../arch/arm/kernel/pj4-cp0.c:36: undefined reference to `iwmmxt_task_release' /.../arch/arm/kernel/pj4-cp0.c:40: undefined reference to `iwmmxt_task_switch' make: *** [vmlinux] Error 1 This is caused because the PJ4 code explicitly references the iWMMXt code, but doesn't require it to be built. Fix this by ensuring that iWMMXt is always enabled with PJ4. Reported-by: Jason Cooper Signed-off-by: Russell King diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 13b7394..12ea3b3 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1183,9 +1183,9 @@ config ARM_NR_BANKS default 8 config IWMMXT - bool "Enable iWMMXt support" + bool "Enable iWMMXt support" if !CPU_PJ4 depends on CPU_XSCALE || CPU_XSC3 || CPU_MOHAWK || CPU_PJ4 - default y if PXA27x || PXA3xx || ARCH_MMP + default y if PXA27x || PXA3xx || ARCH_MMP || CPU_PJ4 help Enable support for iWMMXt context switching at run time if running on a CPU that supports it. -- cgit v0.10.2 From 6e7aceeb7c70b9ebad79bcfe91fcf738826e8e6d Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 25 Mar 2013 17:02:48 +0100 Subject: ARM: 7682/1: cache-l2x0: fix masking of RTL revision numbering and set_debug init Commit b8db6b8 (ARM: 7547/4: cache-l2x0: add support for Aurora L2 cache ctrl) moved the masking of the part ID which caused the RTL version to be lost. Commit 6248d06 (ARM: 7545/1: cache-l2x0: make outer_cache_fns a field of l2x0_of_data) changed how .set_debug is initialized. Both commits break commit 74ddcdb (ARM: 7608/1: l2x0: Only set .set_debug on PL310 r3p0 and earlier) which uses the RTL version to conditionally set .set_debug function pointer. Commit b8db6b8 also caused the printed cache ID to be missing the version information. Fix this by reverting how the part number is masked so the RTL version info is maintained. The cache-id-part DT property does not set the RTL bits so masking them should have no effect. Also, re-arrange the order of the function pointer init so the .set_debug function can be overridden. Reported-by: Paolo Pisati Signed-off-by: Rob Herring Cc: Gregory CLEMENT Cc: Yehuda Yitschak Signed-off-by: Russell King diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c index c2f3739..c465fac 100644 --- a/arch/arm/mm/cache-l2x0.c +++ b/arch/arm/mm/cache-l2x0.c @@ -299,7 +299,7 @@ static void l2x0_unlock(u32 cache_id) int lockregs; int i; - switch (cache_id) { + switch (cache_id & L2X0_CACHE_ID_PART_MASK) { case L2X0_CACHE_ID_PART_L310: lockregs = 8; break; @@ -333,15 +333,14 @@ void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask) if (cache_id_part_number_from_dt) cache_id = cache_id_part_number_from_dt; else - cache_id = readl_relaxed(l2x0_base + L2X0_CACHE_ID) - & L2X0_CACHE_ID_PART_MASK; + cache_id = readl_relaxed(l2x0_base + L2X0_CACHE_ID); aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL); aux &= aux_mask; aux |= aux_val; /* Determine the number of ways */ - switch (cache_id) { + switch (cache_id & L2X0_CACHE_ID_PART_MASK) { case L2X0_CACHE_ID_PART_L310: if (aux & (1 << 16)) ways = 16; @@ -725,7 +724,6 @@ static const struct l2x0_of_data pl310_data = { .flush_all = l2x0_flush_all, .inv_all = l2x0_inv_all, .disable = l2x0_disable, - .set_debug = pl310_set_debug, }, }; @@ -814,9 +812,8 @@ int __init l2x0_of_init(u32 aux_val, u32 aux_mask) data->save(); of_init = true; - l2x0_init(l2x0_base, aux_val, aux_mask); - memcpy(&outer_cache, &data->outer_cache, sizeof(outer_cache)); + l2x0_init(l2x0_base, aux_val, aux_mask); return 0; } -- cgit v0.10.2 From 93dc68876b608da041fe40ed39424b0fcd5aa2fb Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 26 Mar 2013 23:35:04 +0100 Subject: ARM: 7684/1: errata: Workaround for Cortex-A15 erratum 798181 (TLBI/DSB operations) On Cortex-A15 (r0p0..r3p2) the TLBI/DSB are not adequately shooting down all use of the old entries. This patch implements the erratum workaround which consists of: 1. Dummy TLBIMVAIS and DSB on the CPU doing the TLBI operation. 2. Send IPI to the CPUs that are running the same mm (and ASID) as the one being invalidated (or all the online CPUs for global pages). 3. CPU receiving the IPI executes a DMB and CLREX (part of the exception return code already). Signed-off-by: Catalin Marinas Signed-off-by: Russell King diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 12ea3b3..1cacda4 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1439,6 +1439,16 @@ config ARM_ERRATA_775420 to deadlock. This workaround puts DSB before executing ISB if an abort may occur on cache maintenance. +config ARM_ERRATA_798181 + bool "ARM errata: TLBI/DSB failure on Cortex-A15" + depends on CPU_V7 && SMP + help + On Cortex-A15 (r0p0..r3p2) the TLBI*IS/DSB operations are not + adequately shooting down all use of the old entries. This + option enables the Linux kernel workaround for this erratum + which sends an IPI to the CPUs that are running the same ASID + as the one being invalidated. + endmenu source "arch/arm/common/Kconfig" diff --git a/arch/arm/include/asm/highmem.h b/arch/arm/include/asm/highmem.h index 8c5e828..91b99ab 100644 --- a/arch/arm/include/asm/highmem.h +++ b/arch/arm/include/asm/highmem.h @@ -41,6 +41,13 @@ extern void kunmap_high(struct page *page); #endif #endif +/* + * Needed to be able to broadcast the TLB invalidation for kmap. + */ +#ifdef CONFIG_ARM_ERRATA_798181 +#undef ARCH_NEEDS_KMAP_HIGH_GET +#endif + #ifdef ARCH_NEEDS_KMAP_HIGH_GET extern void *kmap_high_get(struct page *page); #else diff --git a/arch/arm/include/asm/mmu_context.h b/arch/arm/include/asm/mmu_context.h index 863a661..a7b85e0 100644 --- a/arch/arm/include/asm/mmu_context.h +++ b/arch/arm/include/asm/mmu_context.h @@ -27,6 +27,8 @@ void __check_vmalloc_seq(struct mm_struct *mm); void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk); #define init_new_context(tsk,mm) ({ atomic64_set(&mm->context.id, 0); 0; }) +DECLARE_PER_CPU(atomic64_t, active_asids); + #else /* !CONFIG_CPU_HAS_ASID */ #ifdef CONFIG_MMU diff --git a/arch/arm/include/asm/tlbflush.h b/arch/arm/include/asm/tlbflush.h index 4db8c88..9e9c041 100644 --- a/arch/arm/include/asm/tlbflush.h +++ b/arch/arm/include/asm/tlbflush.h @@ -450,6 +450,21 @@ static inline void local_flush_bp_all(void) isb(); } +#ifdef CONFIG_ARM_ERRATA_798181 +static inline void dummy_flush_tlb_a15_erratum(void) +{ + /* + * Dummy TLBIMVAIS. Using the unmapped address 0 and ASID 0. + */ + asm("mcr p15, 0, %0, c8, c3, 1" : : "r" (0)); + dsb(); +} +#else +static inline void dummy_flush_tlb_a15_erratum(void) +{ +} +#endif + /* * flush_pmd_entry * diff --git a/arch/arm/kernel/smp_tlb.c b/arch/arm/kernel/smp_tlb.c index bd03005..e82e1d2 100644 --- a/arch/arm/kernel/smp_tlb.c +++ b/arch/arm/kernel/smp_tlb.c @@ -12,6 +12,7 @@ #include #include +#include /**********************************************************************/ @@ -69,12 +70,72 @@ static inline void ipi_flush_bp_all(void *ignored) local_flush_bp_all(); } +#ifdef CONFIG_ARM_ERRATA_798181 +static int erratum_a15_798181(void) +{ + unsigned int midr = read_cpuid_id(); + + /* Cortex-A15 r0p0..r3p2 affected */ + if ((midr & 0xff0ffff0) != 0x410fc0f0 || midr > 0x413fc0f2) + return 0; + return 1; +} +#else +static int erratum_a15_798181(void) +{ + return 0; +} +#endif + +static void ipi_flush_tlb_a15_erratum(void *arg) +{ + dmb(); +} + +static void broadcast_tlb_a15_erratum(void) +{ + if (!erratum_a15_798181()) + return; + + dummy_flush_tlb_a15_erratum(); + smp_call_function_many(cpu_online_mask, ipi_flush_tlb_a15_erratum, + NULL, 1); +} + +static void broadcast_tlb_mm_a15_erratum(struct mm_struct *mm) +{ + int cpu; + cpumask_t mask = { CPU_BITS_NONE }; + + if (!erratum_a15_798181()) + return; + + dummy_flush_tlb_a15_erratum(); + for_each_online_cpu(cpu) { + if (cpu == smp_processor_id()) + continue; + /* + * We only need to send an IPI if the other CPUs are running + * the same ASID as the one being invalidated. There is no + * need for locking around the active_asids check since the + * switch_mm() function has at least one dmb() (as required by + * this workaround) in case a context switch happens on + * another CPU after the condition below. + */ + if (atomic64_read(&mm->context.id) == + atomic64_read(&per_cpu(active_asids, cpu))) + cpumask_set_cpu(cpu, &mask); + } + smp_call_function_many(&mask, ipi_flush_tlb_a15_erratum, NULL, 1); +} + void flush_tlb_all(void) { if (tlb_ops_need_broadcast()) on_each_cpu(ipi_flush_tlb_all, NULL, 1); else local_flush_tlb_all(); + broadcast_tlb_a15_erratum(); } void flush_tlb_mm(struct mm_struct *mm) @@ -83,6 +144,7 @@ void flush_tlb_mm(struct mm_struct *mm) on_each_cpu_mask(mm_cpumask(mm), ipi_flush_tlb_mm, mm, 1); else local_flush_tlb_mm(mm); + broadcast_tlb_mm_a15_erratum(mm); } void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr) @@ -95,6 +157,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr) &ta, 1); } else local_flush_tlb_page(vma, uaddr); + broadcast_tlb_mm_a15_erratum(vma->vm_mm); } void flush_tlb_kernel_page(unsigned long kaddr) @@ -105,6 +168,7 @@ void flush_tlb_kernel_page(unsigned long kaddr) on_each_cpu(ipi_flush_tlb_kernel_page, &ta, 1); } else local_flush_tlb_kernel_page(kaddr); + broadcast_tlb_a15_erratum(); } void flush_tlb_range(struct vm_area_struct *vma, @@ -119,6 +183,7 @@ void flush_tlb_range(struct vm_area_struct *vma, &ta, 1); } else local_flush_tlb_range(vma, start, end); + broadcast_tlb_mm_a15_erratum(vma->vm_mm); } void flush_tlb_kernel_range(unsigned long start, unsigned long end) @@ -130,6 +195,7 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end) on_each_cpu(ipi_flush_tlb_kernel_range, &ta, 1); } else local_flush_tlb_kernel_range(start, end); + broadcast_tlb_a15_erratum(); } void flush_bp_all(void) diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c index a5a4b2bc..2ac3737 100644 --- a/arch/arm/mm/context.c +++ b/arch/arm/mm/context.c @@ -48,7 +48,7 @@ static DEFINE_RAW_SPINLOCK(cpu_asid_lock); static atomic64_t asid_generation = ATOMIC64_INIT(ASID_FIRST_VERSION); static DECLARE_BITMAP(asid_map, NUM_USER_ASIDS); -static DEFINE_PER_CPU(atomic64_t, active_asids); +DEFINE_PER_CPU(atomic64_t, active_asids); static DEFINE_PER_CPU(u64, reserved_asids); static cpumask_t tlb_flush_pending; @@ -215,6 +215,7 @@ void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk) if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending)) { local_flush_bp_all(); local_flush_tlb_all(); + dummy_flush_tlb_a15_erratum(); } atomic64_set(&per_cpu(active_asids, cpu), asid); -- cgit v0.10.2 From 6f3d90e55660ba42301b5e9c7eed332cc9f70fd7 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 28 Mar 2013 11:17:55 +0100 Subject: ARM: 7685/1: delay: use private ticks_per_jiffy field for timer-based delay ops Commit 70264367a243 ("ARM: 7653/2: do not scale loops_per_jiffy when using a constant delay clock") fixed a problem with our timer-based delay loop, where loops_per_jiffy is scaled by cpufreq yet used directly by the timer delay ops. This patch fixes the problem in a more elegant way by keeping a private ticks_per_jiffy field in the delay ops, independent of loops_per_jiffy and therefore not subject to scaling. The loop-based delay continues to use loops_per_jiffy directly, as it should. Acked-by: Nicolas Pitre Signed-off-by: Will Deacon Signed-off-by: Russell King diff --git a/arch/arm/include/asm/delay.h b/arch/arm/include/asm/delay.h index 720799f..dff714d 100644 --- a/arch/arm/include/asm/delay.h +++ b/arch/arm/include/asm/delay.h @@ -24,7 +24,7 @@ extern struct arm_delay_ops { void (*delay)(unsigned long); void (*const_udelay)(unsigned long); void (*udelay)(unsigned long); - bool const_clock; + unsigned long ticks_per_jiffy; } arm_delay_ops; #define __delay(n) arm_delay_ops.delay(n) diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 79078ed..1f2cccc 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -673,9 +673,6 @@ static int cpufreq_callback(struct notifier_block *nb, if (freq->flags & CPUFREQ_CONST_LOOPS) return NOTIFY_OK; - if (arm_delay_ops.const_clock) - return NOTIFY_OK; - if (!per_cpu(l_p_j_ref, cpu)) { per_cpu(l_p_j_ref, cpu) = per_cpu(cpu_data, cpu).loops_per_jiffy; diff --git a/arch/arm/lib/delay.c b/arch/arm/lib/delay.c index 6b93f6a..64dbfa5 100644 --- a/arch/arm/lib/delay.c +++ b/arch/arm/lib/delay.c @@ -58,7 +58,7 @@ static void __timer_delay(unsigned long cycles) static void __timer_const_udelay(unsigned long xloops) { unsigned long long loops = xloops; - loops *= loops_per_jiffy; + loops *= arm_delay_ops.ticks_per_jiffy; __timer_delay(loops >> UDELAY_SHIFT); } @@ -73,11 +73,13 @@ void __init register_current_timer_delay(const struct delay_timer *timer) pr_info("Switching to timer-based delay loop\n"); delay_timer = timer; lpj_fine = timer->freq / HZ; - loops_per_jiffy = lpj_fine; + + /* cpufreq may scale loops_per_jiffy, so keep a private copy */ + arm_delay_ops.ticks_per_jiffy = lpj_fine; arm_delay_ops.delay = __timer_delay; arm_delay_ops.const_udelay = __timer_const_udelay; arm_delay_ops.udelay = __timer_udelay; - arm_delay_ops.const_clock = true; + delay_calibrated = true; } else { pr_info("Ignoring duplicate/late registration of read_current_timer delay\n"); -- cgit v0.10.2 From b21e023ba4003fe4b5c32540e4aee5991c019e92 Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Tue, 2 Apr 2013 22:11:46 +0100 Subject: ARM: 7689/1: add unwind annotations to ftrace asm Add unwind annotations to the ftrace assembly code so that the function tracer's stacktracing options (func_stack_trace, etc.) work when CONFIG_ARM_UNWIND is enabled. Signed-off-by: Rabin Vincent Signed-off-by: Russell King diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 3248cde..fefd7f9 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -276,7 +276,13 @@ ENDPROC(ftrace_graph_caller_old) */ .macro mcount_enter +/* + * This pad compensates for the push {lr} at the call site. Note that we are + * unable to unwind through a function which does not otherwise save its lr. + */ + UNWIND(.pad #4) stmdb sp!, {r0-r3, lr} + UNWIND(.save {r0-r3, lr}) .endm .macro mcount_get_lr reg @@ -289,6 +295,7 @@ ENDPROC(ftrace_graph_caller_old) .endm ENTRY(__gnu_mcount_nc) +UNWIND(.fnstart) #ifdef CONFIG_DYNAMIC_FTRACE mov ip, lr ldmia sp!, {lr} @@ -296,17 +303,22 @@ ENTRY(__gnu_mcount_nc) #else __mcount #endif +UNWIND(.fnend) ENDPROC(__gnu_mcount_nc) #ifdef CONFIG_DYNAMIC_FTRACE ENTRY(ftrace_caller) +UNWIND(.fnstart) __ftrace_caller +UNWIND(.fnend) ENDPROC(ftrace_caller) #endif #ifdef CONFIG_FUNCTION_GRAPH_TRACER ENTRY(ftrace_graph_caller) +UNWIND(.fnstart) __ftrace_graph_caller +UNWIND(.fnend) ENDPROC(ftrace_graph_caller) #endif -- cgit v0.10.2 From 4e1db26a0b42e2b6e27c05d68adcc01709c2eed2 Mon Sep 17 00:00:00 2001 From: Paul Bolle Date: Wed, 3 Apr 2013 12:24:45 +0100 Subject: ARM: 7690/1: mm: fix CONFIG_LPAE typos CONFIG_LPAE doesn't exist: the correct option is CONFIG_ARM_LPAE, so fix up the two typos under arch/arm/. The fix to head.S is slightly scary, but this is just for setting up an early io-mapping for the serial port when running on a big-endian, LPAE system. Since these systems don't exist in the wild (at least, I have no access to one outside of kvmtool, which doesn't provide a serial port suitable for earlyprintk), then we can revisit the code later if it causes any problems. Signed-off-by: Paul Bolle Signed-off-by: Will Deacon Signed-off-by: Russell King diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index e0eb9a1..8bac553 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -267,7 +267,7 @@ __create_page_tables: addne r6, r6, #1 << SECTION_SHIFT strne r6, [r3] -#if defined(CONFIG_LPAE) && defined(CONFIG_CPU_ENDIAN_BE8) +#if defined(CONFIG_ARM_LPAE) && defined(CONFIG_CPU_ENDIAN_BE8) sub r4, r4, #4 @ Fixup page table pointer @ for 64-bit descriptors #endif diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index f3ac13f..d343a6c 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -544,7 +544,7 @@ int __init arm_add_memory(phys_addr_t start, phys_addr_t size) size -= start & ~PAGE_MASK; bank->start = PAGE_ALIGN(start); -#ifndef CONFIG_LPAE +#ifndef CONFIG_ARM_LPAE if (bank->start + size < bank->start) { printk(KERN_CRIT "Truncating memory at 0x%08llx to fit in " "32-bit physical address space\n", (long long)start); -- cgit v0.10.2