From 21cdb6b568435738cc0b303b2b3b82742396310c Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Fri, 27 Nov 2015 21:09:30 +0000 Subject: x86/mm: Page align the '_end' symbol to avoid pfn conversion bugs Ingo noted that if we can guarantee _end is aligned to PAGE_SIZE we can automatically avoid bugs along the lines of, size = _end - _text >> PAGE_SHIFT which is missing a call to PFN_ALIGN(). The EFI mixed mode contains this bug, for example. _text is already aligned to PAGE_SIZE through the use of LOAD_PHYSICAL_ADDR, and the BSS and BRK sections are explicitly aligned in the linker script, so it makes sense to align _end to match. Reported-by: Ingo Molnar Signed-off-by: Matt Fleming Acked-by: Borislav Petkov Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Sai Praneeth Prakhya Cc: Thomas Gleixner Cc: Toshi Kani Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1448658575-17029-2-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 74e4bf1..4f19942 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -325,6 +325,7 @@ SECTIONS __brk_limit = .; } + . = ALIGN(PAGE_SIZE); _end = .; STABS_DEBUG -- cgit v0.10.2 From edc3b9129cecd0f0857112136f5b8b1bc1d45918 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Fri, 27 Nov 2015 21:09:31 +0000 Subject: x86/mm/pat: Ensure cpa->pfn only contains page frame numbers The x86 pageattr code is confused about the data that is stored in cpa->pfn, sometimes it's treated as a page frame number, sometimes it's treated as an unshifted physical address, and in one place it's treated as a pte. The result of this is that the mapping functions do not map the intended physical address. This isn't a problem in practice because most of the addresses we're mapping in the EFI code paths are already mapped in 'trampoline_pgd' and so the pageattr mapping functions don't actually do anything in this case. But when we move to using a separate page table for the EFI runtime this will be an issue. Signed-off-by: Matt Fleming Reviewed-by: Borislav Petkov Acked-by: Borislav Petkov Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Sai Praneeth Prakhya Cc: Thomas Gleixner Cc: Toshi Kani Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1448658575-17029-3-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index a3137a4..c70e420 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -905,15 +905,10 @@ static void populate_pte(struct cpa_data *cpa, pte = pte_offset_kernel(pmd, start); while (num_pages-- && start < end) { - - /* deal with the NX bit */ - if (!(pgprot_val(pgprot) & _PAGE_NX)) - cpa->pfn &= ~_PAGE_NX; - - set_pte(pte, pfn_pte(cpa->pfn >> PAGE_SHIFT, pgprot)); + set_pte(pte, pfn_pte(cpa->pfn, pgprot)); start += PAGE_SIZE; - cpa->pfn += PAGE_SIZE; + cpa->pfn++; pte++; } } @@ -969,11 +964,11 @@ static int populate_pmd(struct cpa_data *cpa, pmd = pmd_offset(pud, start); - set_pmd(pmd, __pmd(cpa->pfn | _PAGE_PSE | + set_pmd(pmd, __pmd(cpa->pfn << PAGE_SHIFT | _PAGE_PSE | massage_pgprot(pmd_pgprot))); start += PMD_SIZE; - cpa->pfn += PMD_SIZE; + cpa->pfn += PMD_SIZE >> PAGE_SHIFT; cur_pages += PMD_SIZE >> PAGE_SHIFT; } @@ -1042,11 +1037,11 @@ static int populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd, * Map everything starting from the Gb boundary, possibly with 1G pages */ while (end - start >= PUD_SIZE) { - set_pud(pud, __pud(cpa->pfn | _PAGE_PSE | + set_pud(pud, __pud(cpa->pfn << PAGE_SHIFT | _PAGE_PSE | massage_pgprot(pud_pgprot))); start += PUD_SIZE; - cpa->pfn += PUD_SIZE; + cpa->pfn += PUD_SIZE >> PAGE_SHIFT; cur_pages += PUD_SIZE >> PAGE_SHIFT; pud++; } diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index a0ac0f9..5aa186d 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -143,7 +143,7 @@ void efi_sync_low_kernel_mappings(void) int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) { - unsigned long text; + unsigned long pfn, text; struct page *page; unsigned npages; pgd_t *pgd; @@ -160,7 +160,8 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) * and ident-map those pages containing the map before calling * phys_efi_set_virtual_address_map(). */ - if (kernel_map_pages_in_pgd(pgd, pa_memmap, pa_memmap, num_pages, _PAGE_NX)) { + pfn = pa_memmap >> PAGE_SHIFT; + if (kernel_map_pages_in_pgd(pgd, pfn, pa_memmap, num_pages, _PAGE_NX)) { pr_err("Error ident-mapping new memmap (0x%lx)!\n", pa_memmap); return 1; } @@ -185,8 +186,9 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) npages = (_end - _text) >> PAGE_SHIFT; text = __pa(_text); + pfn = text >> PAGE_SHIFT; - if (kernel_map_pages_in_pgd(pgd, text >> PAGE_SHIFT, text, npages, 0)) { + if (kernel_map_pages_in_pgd(pgd, pfn, text, npages, 0)) { pr_err("Failed to map kernel text 1:1\n"); return 1; } @@ -204,12 +206,14 @@ void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages) static void __init __map_region(efi_memory_desc_t *md, u64 va) { pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd); - unsigned long pf = 0; + unsigned long flags = 0; + unsigned long pfn; if (!(md->attribute & EFI_MEMORY_WB)) - pf |= _PAGE_PCD; + flags |= _PAGE_PCD; - if (kernel_map_pages_in_pgd(pgd, md->phys_addr, va, md->num_pages, pf)) + pfn = md->phys_addr >> PAGE_SHIFT; + if (kernel_map_pages_in_pgd(pgd, pfn, va, md->num_pages, flags)) pr_warn("Error mapping PA 0x%llx -> VA 0x%llx!\n", md->phys_addr, va); } -- cgit v0.10.2 From b61a76f8850d2979550abc42d7e09154ebb8d785 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Fri, 27 Nov 2015 21:09:32 +0000 Subject: x86/efi: Map RAM into the identity page table for mixed mode We are relying on the pre-existing mappings in 'trampoline_pgd' when accessing function arguments in the EFI mixed mode thunking code. Instead let's map memory explicitly so that things will continue to work when we move to a separate page table in the future. Signed-off-by: Matt Fleming Reviewed-by: Borislav Petkov Acked-by: Borislav Petkov Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Sai Praneeth Prakhya Cc: Thomas Gleixner Cc: Toshi Kani Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1448658575-17029-4-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 5aa186d..102976d 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -144,6 +144,7 @@ void efi_sync_low_kernel_mappings(void) int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) { unsigned long pfn, text; + efi_memory_desc_t *md; struct page *page; unsigned npages; pgd_t *pgd; @@ -177,6 +178,25 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) if (!IS_ENABLED(CONFIG_EFI_MIXED)) return 0; + /* + * Map all of RAM so that we can access arguments in the 1:1 + * mapping when making EFI runtime calls. + */ + for_each_efi_memory_desc(&memmap, md) { + if (md->type != EFI_CONVENTIONAL_MEMORY && + md->type != EFI_LOADER_DATA && + md->type != EFI_LOADER_CODE) + continue; + + pfn = md->phys_addr >> PAGE_SHIFT; + npages = md->num_pages; + + if (kernel_map_pages_in_pgd(pgd, pfn, md->phys_addr, npages, 0)) { + pr_err("Failed to map 1:1 memory\n"); + return 1; + } + } + page = alloc_page(GFP_KERNEL|__GFP_DMA32); if (!page) panic("Unable to allocate EFI runtime stack < 4GB\n"); -- cgit v0.10.2 From c9f2a9a65e4855b74d92cdad688f6ee4a1a323ff Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Fri, 27 Nov 2015 21:09:33 +0000 Subject: x86/efi: Hoist page table switching code into efi_call_virt() This change is a prerequisite for pending patches that switch to a dedicated EFI page table, instead of using 'trampoline_pgd' which shares PGD entries with 'swapper_pg_dir'. The pending patches make it impossible to dereference the runtime service function pointer without first switching %cr3. It's true that we now have duplicated switching code in efi_call_virt() and efi_call_phys_{prolog,epilog}() but we are sacrificing code duplication for a little more clarity and the ease of writing the page table switching code in C instead of asm. Signed-off-by: Matt Fleming Reviewed-by: Borislav Petkov Acked-by: Borislav Petkov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Jones Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Sai Praneeth Prakhya Cc: Stephen Smalley Cc: Thomas Gleixner Cc: Toshi Kani Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1448658575-17029-5-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 0010c78..347eeac 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -3,6 +3,7 @@ #include #include +#include /* * We map the EFI regions needed for runtime services non-contiguously, @@ -64,6 +65,17 @@ extern u64 asmlinkage efi_call(void *fp, ...); #define efi_call_phys(f, args...) efi_call((f), args) +/* + * Scratch space used for switching the pagetable in the EFI stub + */ +struct efi_scratch { + u64 r15; + u64 prev_cr3; + pgd_t *efi_pgt; + bool use_pgd; + u64 phys_stack; +} __packed; + #define efi_call_virt(f, ...) \ ({ \ efi_status_t __s; \ @@ -71,7 +83,20 @@ extern u64 asmlinkage efi_call(void *fp, ...); efi_sync_low_kernel_mappings(); \ preempt_disable(); \ __kernel_fpu_begin(); \ + \ + if (efi_scratch.use_pgd) { \ + efi_scratch.prev_cr3 = read_cr3(); \ + write_cr3((unsigned long)efi_scratch.efi_pgt); \ + __flush_tlb_all(); \ + } \ + \ __s = efi_call((void *)efi.systab->runtime->f, __VA_ARGS__); \ + \ + if (efi_scratch.use_pgd) { \ + write_cr3(efi_scratch.prev_cr3); \ + __flush_tlb_all(); \ + } \ + \ __kernel_fpu_end(); \ preempt_enable(); \ __s; \ diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 102976d..b19cdac 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -47,16 +47,7 @@ */ static u64 efi_va = EFI_VA_START; -/* - * Scratch space used for switching the pagetable in the EFI stub - */ -struct efi_scratch { - u64 r15; - u64 prev_cr3; - pgd_t *efi_pgt; - bool use_pgd; - u64 phys_stack; -} __packed; +struct efi_scratch efi_scratch; static void __init early_code_mapping_set_exec(int executable) { @@ -83,8 +74,11 @@ pgd_t * __init efi_call_phys_prolog(void) int pgd; int n_pgds; - if (!efi_enabled(EFI_OLD_MEMMAP)) - return NULL; + if (!efi_enabled(EFI_OLD_MEMMAP)) { + save_pgd = (pgd_t *)read_cr3(); + write_cr3((unsigned long)efi_scratch.efi_pgt); + goto out; + } early_code_mapping_set_exec(1); @@ -96,6 +90,7 @@ pgd_t * __init efi_call_phys_prolog(void) vaddress = (unsigned long)__va(pgd * PGDIR_SIZE); set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), *pgd_offset_k(vaddress)); } +out: __flush_tlb_all(); return save_pgd; @@ -109,8 +104,11 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd) int pgd_idx; int nr_pgds; - if (!save_pgd) + if (!efi_enabled(EFI_OLD_MEMMAP)) { + write_cr3((unsigned long)save_pgd); + __flush_tlb_all(); return; + } nr_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE); diff --git a/arch/x86/platform/efi/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S index 86d0f9e..32020cb 100644 --- a/arch/x86/platform/efi/efi_stub_64.S +++ b/arch/x86/platform/efi/efi_stub_64.S @@ -38,41 +38,6 @@ mov %rsi, %cr0; \ mov (%rsp), %rsp - /* stolen from gcc */ - .macro FLUSH_TLB_ALL - movq %r15, efi_scratch(%rip) - movq %r14, efi_scratch+8(%rip) - movq %cr4, %r15 - movq %r15, %r14 - andb $0x7f, %r14b - movq %r14, %cr4 - movq %r15, %cr4 - movq efi_scratch+8(%rip), %r14 - movq efi_scratch(%rip), %r15 - .endm - - .macro SWITCH_PGT - cmpb $0, efi_scratch+24(%rip) - je 1f - movq %r15, efi_scratch(%rip) # r15 - # save previous CR3 - movq %cr3, %r15 - movq %r15, efi_scratch+8(%rip) # prev_cr3 - movq efi_scratch+16(%rip), %r15 # EFI pgt - movq %r15, %cr3 - 1: - .endm - - .macro RESTORE_PGT - cmpb $0, efi_scratch+24(%rip) - je 2f - movq efi_scratch+8(%rip), %r15 - movq %r15, %cr3 - movq efi_scratch(%rip), %r15 - FLUSH_TLB_ALL - 2: - .endm - ENTRY(efi_call) SAVE_XMM mov (%rsp), %rax @@ -83,16 +48,8 @@ ENTRY(efi_call) mov %r8, %r9 mov %rcx, %r8 mov %rsi, %rcx - SWITCH_PGT call *%rdi - RESTORE_PGT addq $48, %rsp RESTORE_XMM ret ENDPROC(efi_call) - - .data -ENTRY(efi_scratch) - .fill 3,8,0 - .byte 0 - .quad 0 -- cgit v0.10.2 From 67a9108ed4313b85a9c53406d80dc1ae3f8c3e36 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Fri, 27 Nov 2015 21:09:34 +0000 Subject: x86/efi: Build our own page table structures With commit e1a58320a38d ("x86/mm: Warn on W^X mappings") all users booting on 64-bit UEFI machines see the following warning, ------------[ cut here ]------------ WARNING: CPU: 7 PID: 1 at arch/x86/mm/dump_pagetables.c:225 note_page+0x5dc/0x780() x86/mm: Found insecure W+X mapping at address ffff88000005f000/0xffff88000005f000 ... x86/mm: Checked W+X mappings: FAILED, 165660 W+X pages found. ... This is caused by mapping EFI regions with RWX permissions. There isn't much we can do to restrict the permissions for these regions due to the way the firmware toolchains mix code and data, but we can at least isolate these mappings so that they do not appear in the regular kernel page tables. In commit d2f7cbe7b26a ("x86/efi: Runtime services virtual mapping") we started using 'trampoline_pgd' to map the EFI regions because there was an existing identity mapping there which we use during the SetVirtualAddressMap() call and for broken firmware that accesses those addresses. But 'trampoline_pgd' shares some PGD entries with 'swapper_pg_dir' and does not provide the isolation we require. Notably the virtual address for __START_KERNEL_map and MODULES_START are mapped by the same PGD entry so we need to be more careful when copying changes over in efi_sync_low_kernel_mappings(). This patch doesn't go the full mile, we still want to share some PGD entries with 'swapper_pg_dir'. Having completely separate page tables brings its own issues such as synchronising new mappings after memory hotplug and module loading. Sharing also keeps memory usage down. Signed-off-by: Matt Fleming Reviewed-by: Borislav Petkov Acked-by: Borislav Petkov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Jones Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Sai Praneeth Prakhya Cc: Stephen Smalley Cc: Thomas Gleixner Cc: Toshi Kani Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1448658575-17029-6-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 347eeac..8fd9e63 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -136,6 +136,7 @@ extern void __init efi_memory_uc(u64 addr, unsigned long size); extern void __init efi_map_region(efi_memory_desc_t *md); extern void __init efi_map_region_fixed(efi_memory_desc_t *md); extern void efi_sync_low_kernel_mappings(void); +extern int __init efi_alloc_page_tables(void); extern int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages); extern void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages); extern void __init old_map_region(efi_memory_desc_t *md); diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index ad28540..3c1f3cd 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -869,7 +869,7 @@ static void __init kexec_enter_virtual_mode(void) * This function will switch the EFI runtime services to virtual mode. * Essentially, we look through the EFI memmap and map every region that * has the runtime attribute bit set in its memory descriptor into the - * ->trampoline_pgd page table using a top-down VA allocation scheme. + * efi_pgd page table. * * The old method which used to update that memory descriptor with the * virtual address obtained from ioremap() is still supported when the @@ -879,8 +879,8 @@ static void __init kexec_enter_virtual_mode(void) * * The new method does a pagetable switch in a preemption-safe manner * so that we're in a different address space when calling a runtime - * function. For function arguments passing we do copy the PGDs of the - * kernel page table into ->trampoline_pgd prior to each call. + * function. For function arguments passing we do copy the PUDs of the + * kernel page table into efi_pgd prior to each call. * * Specially for kexec boot, efi runtime maps in previous kernel should * be passed in via setup_data. In that case runtime ranges will be mapped @@ -895,6 +895,12 @@ static void __init __efi_enter_virtual_mode(void) efi.systab = NULL; + if (efi_alloc_page_tables()) { + pr_err("Failed to allocate EFI page tables\n"); + clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); + return; + } + efi_merge_regions(); new_memmap = efi_map_regions(&count, &pg_shift); if (!new_memmap) { @@ -954,28 +960,11 @@ static void __init __efi_enter_virtual_mode(void) efi_runtime_mkexec(); /* - * We mapped the descriptor array into the EFI pagetable above but we're - * not unmapping it here. Here's why: - * - * We're copying select PGDs from the kernel page table to the EFI page - * table and when we do so and make changes to those PGDs like unmapping - * stuff from them, those changes appear in the kernel page table and we - * go boom. - * - * From setup_real_mode(): - * - * ... - * trampoline_pgd[0] = init_level4_pgt[pgd_index(__PAGE_OFFSET)].pgd; - * - * In this particular case, our allocation is in PGD 0 of the EFI page - * table but we've copied that PGD from PGD[272] of the EFI page table: - * - * pgd_index(__PAGE_OFFSET = 0xffff880000000000) = 272 - * - * where the direct memory mapping in kernel space is. - * - * new_memmap's VA comes from that direct mapping and thus clearing it, - * it would get cleared in the kernel page table too. + * We mapped the descriptor array into the EFI pagetable above + * but we're not unmapping it here because if we're running in + * EFI mixed mode we need all of memory to be accessible when + * we pass parameters to the EFI runtime services in the + * thunking code. * * efi_cleanup_page_tables(__pa(new_memmap), 1 << pg_shift); */ diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c index ed5b673..58d669b 100644 --- a/arch/x86/platform/efi/efi_32.c +++ b/arch/x86/platform/efi/efi_32.c @@ -38,6 +38,11 @@ * say 0 - 3G. */ +int __init efi_alloc_page_tables(void) +{ + return 0; +} + void efi_sync_low_kernel_mappings(void) {} void __init efi_dump_pagetable(void) {} int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index b19cdac..4897f51 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -40,6 +40,7 @@ #include #include #include +#include /* * We allocate runtime services regions bottom-up, starting from -4G, i.e. @@ -121,22 +122,92 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd) early_code_mapping_set_exec(0); } +static pgd_t *efi_pgd; + +/* + * We need our own copy of the higher levels of the page tables + * because we want to avoid inserting EFI region mappings (EFI_VA_END + * to EFI_VA_START) into the standard kernel page tables. Everything + * else can be shared, see efi_sync_low_kernel_mappings(). + */ +int __init efi_alloc_page_tables(void) +{ + pgd_t *pgd; + pud_t *pud; + gfp_t gfp_mask; + + if (efi_enabled(EFI_OLD_MEMMAP)) + return 0; + + gfp_mask = GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO; + efi_pgd = (pgd_t *)__get_free_page(gfp_mask); + if (!efi_pgd) + return -ENOMEM; + + pgd = efi_pgd + pgd_index(EFI_VA_END); + + pud = pud_alloc_one(NULL, 0); + if (!pud) { + free_page((unsigned long)efi_pgd); + return -ENOMEM; + } + + pgd_populate(NULL, pgd, pud); + + return 0; +} + /* * Add low kernel mappings for passing arguments to EFI functions. */ void efi_sync_low_kernel_mappings(void) { - unsigned num_pgds; - pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd); + unsigned num_entries; + pgd_t *pgd_k, *pgd_efi; + pud_t *pud_k, *pud_efi; if (efi_enabled(EFI_OLD_MEMMAP)) return; - num_pgds = pgd_index(MODULES_END - 1) - pgd_index(PAGE_OFFSET); + /* + * We can share all PGD entries apart from the one entry that + * covers the EFI runtime mapping space. + * + * Make sure the EFI runtime region mappings are guaranteed to + * only span a single PGD entry and that the entry also maps + * other important kernel regions. + */ + BUILD_BUG_ON(pgd_index(EFI_VA_END) != pgd_index(MODULES_END)); + BUILD_BUG_ON((EFI_VA_START & PGDIR_MASK) != + (EFI_VA_END & PGDIR_MASK)); + + pgd_efi = efi_pgd + pgd_index(PAGE_OFFSET); + pgd_k = pgd_offset_k(PAGE_OFFSET); + + num_entries = pgd_index(EFI_VA_END) - pgd_index(PAGE_OFFSET); + memcpy(pgd_efi, pgd_k, sizeof(pgd_t) * num_entries); - memcpy(pgd + pgd_index(PAGE_OFFSET), - init_mm.pgd + pgd_index(PAGE_OFFSET), - sizeof(pgd_t) * num_pgds); + /* + * We share all the PUD entries apart from those that map the + * EFI regions. Copy around them. + */ + BUILD_BUG_ON((EFI_VA_START & ~PUD_MASK) != 0); + BUILD_BUG_ON((EFI_VA_END & ~PUD_MASK) != 0); + + pgd_efi = efi_pgd + pgd_index(EFI_VA_END); + pud_efi = pud_offset(pgd_efi, 0); + + pgd_k = pgd_offset_k(EFI_VA_END); + pud_k = pud_offset(pgd_k, 0); + + num_entries = pud_index(EFI_VA_END); + memcpy(pud_efi, pud_k, sizeof(pud_t) * num_entries); + + pud_efi = pud_offset(pgd_efi, EFI_VA_START); + pud_k = pud_offset(pgd_k, EFI_VA_START); + + num_entries = PTRS_PER_PUD - pud_index(EFI_VA_START); + memcpy(pud_efi, pud_k, sizeof(pud_t) * num_entries); } int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) @@ -150,8 +221,8 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) if (efi_enabled(EFI_OLD_MEMMAP)) return 0; - efi_scratch.efi_pgt = (pgd_t *)(unsigned long)real_mode_header->trampoline_pgd; - pgd = __va(efi_scratch.efi_pgt); + efi_scratch.efi_pgt = (pgd_t *)__pa(efi_pgd); + pgd = efi_pgd; /* * It can happen that the physical address of new_memmap lands in memory @@ -216,16 +287,14 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages) { - pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd); - - kernel_unmap_pages_in_pgd(pgd, pa_memmap, num_pages); + kernel_unmap_pages_in_pgd(efi_pgd, pa_memmap, num_pages); } static void __init __map_region(efi_memory_desc_t *md, u64 va) { - pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd); unsigned long flags = 0; unsigned long pfn; + pgd_t *pgd = efi_pgd; if (!(md->attribute & EFI_MEMORY_WB)) flags |= _PAGE_PCD; @@ -334,9 +403,7 @@ void __init efi_runtime_mkexec(void) void __init efi_dump_pagetable(void) { #ifdef CONFIG_EFI_PGT_DUMP - pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd); - - ptdump_walk_pgd_level(NULL, pgd); + ptdump_walk_pgd_level(NULL, efi_pgd); #endif } -- cgit v0.10.2 From ff3d0a12fb2dc123e2b46e9524ebf4e08de5c59c Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Fri, 27 Nov 2015 21:09:35 +0000 Subject: Documentation/x86: Update EFI memory region description Make it clear that the EFI page tables are only available during EFI runtime calls since that subject has come up a fair numbers of times in the past. Additionally, add the EFI region start and end addresses to the table so that it's possible to see at a glance where they fall in relation to other regions. Signed-off-by: Matt Fleming Reviewed-by: Borislav Petkov Acked-by: Borislav Petkov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Jones Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Sai Praneeth Prakhya Cc: Stephen Smalley Cc: Thomas Gleixner Cc: Toshi Kani Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1448658575-17029-7-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt index 05712ac..c518dce 100644 --- a/Documentation/x86/x86_64/mm.txt +++ b/Documentation/x86/x86_64/mm.txt @@ -16,6 +16,8 @@ ffffec0000000000 - fffffc0000000000 (=44 bits) kasan shadow memory (16TB) ... unused hole ... ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks ... unused hole ... +ffffffef00000000 - ffffffff00000000 (=64 GB) EFI region mapping space +... unused hole ... ffffffff80000000 - ffffffffa0000000 (=512 MB) kernel text mapping, from phys 0 ffffffffa0000000 - ffffffffff5fffff (=1525 MB) module mapping space ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls @@ -32,11 +34,9 @@ reference. Current X86-64 implementations only support 40 bits of address space, but we support up to 46 bits. This expands into MBZ space in the page tables. -->trampoline_pgd: - -We map EFI runtime services in the aforementioned PGD in the virtual -range of 64Gb (arbitrarily set, can be raised if needed) - -0xffffffef00000000 - 0xffffffff00000000 +We map EFI runtime services in the 'efi_pgd' PGD in a 64Gb large virtual +memory window (this size is arbitrary, it can be raised later if needed). +The mappings are not part of any other kernel PGD and are only available +during EFI runtime calls. -Andi Kleen, Jul 2004 -- cgit v0.10.2 From 7f83773ced2f9f41dec5e6eb3eb4a7ef23ca1d75 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 20 Nov 2015 11:30:17 +0100 Subject: efi/esrt: Don't preformat name kobject_init_and_add takes a format string+args, so there's no reason to do this formatting in advance. Signed-off-by: Rasmus Villemoes Cc: Peter Jones Signed-off-by: Matt Fleming diff --git a/drivers/firmware/efi/esrt.c b/drivers/firmware/efi/esrt.c index 22c5285..75feb3f 100644 --- a/drivers/firmware/efi/esrt.c +++ b/drivers/firmware/efi/esrt.c @@ -167,14 +167,11 @@ static struct kset *esrt_kset; static int esre_create_sysfs_entry(void *esre, int entry_num) { struct esre_entry *entry; - char name[20]; entry = kzalloc(sizeof(*entry), GFP_KERNEL); if (!entry) return -ENOMEM; - sprintf(name, "entry%d", entry_num); - entry->kobj.kset = esrt_kset; if (esrt->fw_resource_version == 1) { @@ -182,7 +179,7 @@ static int esre_create_sysfs_entry(void *esre, int entry_num) entry->esre.esre1 = esre; rc = kobject_init_and_add(&entry->kobj, &esre1_ktype, NULL, - "%s", name); + "entry%d", entry_num); if (rc) { kfree(entry); return rc; -- cgit v0.10.2 From 3bb9eee61c64d8cd64814e582cf5d72095554473 Mon Sep 17 00:00:00 2001 From: Alan Ott Date: Mon, 30 Nov 2015 09:36:54 -0500 Subject: doc: efi-stub.txt: Fix arm64 paths Update documented paths for arm64 files to match current tree. Signed-off-by: Alan Ott Cc: Roy Franz Cc: Jonathan Corbet Cc: Ard Biesheuvel Cc: Leif Lindholm Signed-off-by: Matt Fleming diff --git a/Documentation/efi-stub.txt b/Documentation/efi-stub.txt index 7747024..e157469 100644 --- a/Documentation/efi-stub.txt +++ b/Documentation/efi-stub.txt @@ -10,12 +10,12 @@ arch/x86/boot/header.S and arch/x86/boot/compressed/eboot.c, respectively. For ARM the EFI stub is implemented in arch/arm/boot/compressed/efi-header.S and arch/arm/boot/compressed/efi-stub.c. EFI stub code that is shared -between architectures is in drivers/firmware/efi/efi-stub-helper.c. +between architectures is in drivers/firmware/efi/libstub. For arm64, there is no compressed kernel support, so the Image itself masquerades as a PE/COFF image and the EFI stub is linked into the kernel. The arm64 EFI stub lives in arch/arm64/kernel/efi-entry.S -and arch/arm64/kernel/efi-stub.c. +and drivers/firmware/efi/libstub/arm64-stub.c. By using the EFI boot stub it's possible to boot a Linux kernel without the use of a conventional EFI boot loader, such as grub or -- cgit v0.10.2 From 26d7f65fbd22168c33d2350f3e7e3021f5761256 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Sun, 25 Oct 2015 10:26:35 +0000 Subject: x86/efi: Preface all print statements with efi* tag The pr_*() calls in the x86 EFI code may or may not include a subsystem tag, which makes it difficult to grep the kernel log for all relevant EFI messages and leads users to miss important information. Recently, a bug reporter provided all the EFI print messages from the kernel log when trying to diagnose an issue but missed the following statement because it wasn't prefixed with anything indicating it was related to EFI, pr_err("Error ident-mapping new memmap (0x%lx)!\n", pa_memmap); Cc: Borislav Petkov Reviewed-by: Josh Triplett Signed-off-by: Matt Fleming diff --git a/arch/x86/platform/efi/efi-bgrt.c b/arch/x86/platform/efi/efi-bgrt.c index ea48449..9a52b5c 100644 --- a/arch/x86/platform/efi/efi-bgrt.c +++ b/arch/x86/platform/efi/efi-bgrt.c @@ -10,6 +10,9 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index a0ac0f9..d347e85 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -15,6 +15,8 @@ * */ +#define pr_fmt(fmt) "efi: " fmt + #include #include #include diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index 1c7380d..6452070 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -1,3 +1,5 @@ +#define pr_fmt(fmt) "efi: " fmt + #include #include #include @@ -256,7 +258,7 @@ void __init efi_apply_memmap_quirks(void) * services. */ if (!efi_runtime_supported()) { - pr_info("efi: Setup done, disabling due to 32/64-bit mismatch\n"); + pr_info("Setup done, disabling due to 32/64-bit mismatch\n"); efi_unmap_memmap(); } -- cgit v0.10.2 From 50a0cb565246f20d59cdb161778531e4b19d35ac Mon Sep 17 00:00:00 2001 From: Sai Praneeth Date: Wed, 9 Dec 2015 15:41:08 -0800 Subject: x86/efi-bgrt: Fix kernel panic when mapping BGRT data Starting with this commit 35eb8b81edd4 ("x86/efi: Build our own page table structures") efi regions have a separate page directory called "efi_pgd". In order to access any efi region we have to first shift %cr3 to this page table. In the bgrt code we are trying to copy bgrt_header and image, but these regions fall under "EFI_BOOT_SERVICES_DATA" and to access these regions we have to shift %cr3 to efi_pgd and not doing so will cause page fault as shown below. [ 0.251599] Last level dTLB entries: 4KB 64, 2MB 0, 4MB 0, 1GB 4 [ 0.259126] Freeing SMP alternatives memory: 32K (ffffffff8230e000 - ffffffff82316000) [ 0.271803] BUG: unable to handle kernel paging request at fffffffefce35002 [ 0.279740] IP: [] efi_bgrt_init+0x144/0x1fd [ 0.286383] PGD 300f067 PUD 0 [ 0.289879] Oops: 0000 [#1] SMP [ 0.293566] Modules linked in: [ 0.297039] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.4.0-rc1-eywa-eywa-built-in-47041+ #2 [ 0.306619] Hardware name: Intel Corporation Skylake Client platform/Skylake Y LPDDR3 RVP3, BIOS SKLSE2R1.R00.B104.B01.1511110114 11/11/2015 [ 0.320925] task: ffffffff820134c0 ti: ffffffff82000000 task.ti: ffffffff82000000 [ 0.329420] RIP: 0010:[] [] efi_bgrt_init+0x144/0x1fd [ 0.338821] RSP: 0000:ffffffff82003f18 EFLAGS: 00010246 [ 0.344852] RAX: fffffffefce35000 RBX: fffffffefce35000 RCX: fffffffefce2b000 [ 0.352952] RDX: 000000008a82b000 RSI: ffffffff8235bb80 RDI: 000000008a835000 [ 0.361050] RBP: ffffffff82003f30 R08: 000000008a865000 R09: ffffffffff202850 [ 0.369149] R10: ffffffff811ad62f R11: 0000000000000000 R12: 0000000000000000 [ 0.377248] R13: ffff88016dbaea40 R14: ffffffff822622c0 R15: ffffffff82003fb0 [ 0.385348] FS: 0000000000000000(0000) GS:ffff88016d800000(0000) knlGS:0000000000000000 [ 0.394533] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 0.401054] CR2: fffffffefce35002 CR3: 000000000300c000 CR4: 00000000003406f0 [ 0.409153] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 0.417252] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 0.425350] Stack: [ 0.427638] ffffffffffffffff ffffffff82256900 ffff88016dbaea40 ffffffff82003f40 [ 0.436086] ffffffff821bbce0 ffffffff82003f88 ffffffff8219c0c2 0000000000000000 [ 0.444533] ffffffff8219ba4a ffffffff822622c0 0000000000083000 00000000ffffffff [ 0.452978] Call Trace: [ 0.455763] [] efi_late_init+0x9/0xb [ 0.461697] [] start_kernel+0x463/0x47f [ 0.467928] [] ? set_init_arg+0x55/0x55 [ 0.474159] [] ? early_idt_handler_array+0x120/0x120 [ 0.481669] [] x86_64_start_reservations+0x2a/0x2c [ 0.488982] [] x86_64_start_kernel+0x13d/0x14c [ 0.495897] Code: 00 41 b4 01 48 8b 78 28 e8 09 36 01 00 48 85 c0 48 89 c3 75 13 48 c7 c7 f8 ac d3 81 31 c0 e8 d7 3b fb fe e9 b5 00 00 00 45 84 e4 <44> 8b 6b 02 74 0d be 06 00 00 00 48 89 df e8 ae 34 0$ [ 0.518151] RIP [] efi_bgrt_init+0x144/0x1fd [ 0.524888] RSP [ 0.528851] CR2: fffffffefce35002 [ 0.532615] ---[ end trace 7b06521e6ebf2aea ]--- [ 0.537852] Kernel panic - not syncing: Attempted to kill the idle task! As said above one way to fix this bug is to shift %cr3 to efi_pgd but we are not doing that way because it leaks inner details of how we switch to EFI page tables into a new call site and it also adds duplicate code. Instead, we remove the call to efi_lookup_mapped_addr() and always perform early_mem*() instead of early_io*() because we want to remap RAM regions and not I/O regions. We also delete efi_lookup_mapped_addr() because we are no longer using it. Signed-off-by: Sai Praneeth Prakhya Reported-by: Wendy Wang Cc: Borislav Petkov Cc: Josh Triplett Cc: Ricardo Neri Cc: Ravi Shankar Signed-off-by: Matt Fleming diff --git a/arch/x86/platform/efi/efi-bgrt.c b/arch/x86/platform/efi/efi-bgrt.c index 9a52b5c..bf51f4c 100644 --- a/arch/x86/platform/efi/efi-bgrt.c +++ b/arch/x86/platform/efi/efi-bgrt.c @@ -31,8 +31,7 @@ struct bmp_header { void __init efi_bgrt_init(void) { acpi_status status; - void __iomem *image; - bool ioremapped = false; + void *image; struct bmp_header bmp_header; if (acpi_disabled) @@ -73,20 +72,14 @@ void __init efi_bgrt_init(void) return; } - image = efi_lookup_mapped_addr(bgrt_tab->image_address); + image = early_memremap(bgrt_tab->image_address, sizeof(bmp_header)); if (!image) { - image = early_ioremap(bgrt_tab->image_address, - sizeof(bmp_header)); - ioremapped = true; - if (!image) { - pr_err("Ignoring BGRT: failed to map image header memory\n"); - return; - } + pr_err("Ignoring BGRT: failed to map image header memory\n"); + return; } - memcpy_fromio(&bmp_header, image, sizeof(bmp_header)); - if (ioremapped) - early_iounmap(image, sizeof(bmp_header)); + memcpy(&bmp_header, image, sizeof(bmp_header)); + early_memunmap(image, sizeof(bmp_header)); bgrt_image_size = bmp_header.size; bgrt_image = kmalloc(bgrt_image_size, GFP_KERNEL | __GFP_NOWARN); @@ -96,18 +89,14 @@ void __init efi_bgrt_init(void) return; } - if (ioremapped) { - image = early_ioremap(bgrt_tab->image_address, - bmp_header.size); - if (!image) { - pr_err("Ignoring BGRT: failed to map image memory\n"); - kfree(bgrt_image); - bgrt_image = NULL; - return; - } + image = early_memremap(bgrt_tab->image_address, bmp_header.size); + if (!image) { + pr_err("Ignoring BGRT: failed to map image memory\n"); + kfree(bgrt_image); + bgrt_image = NULL; + return; } - memcpy_fromio(bgrt_image, image, bgrt_image_size); - if (ioremapped) - early_iounmap(image, bmp_header.size); + memcpy(bgrt_image, image, bgrt_image_size); + early_memunmap(image, bmp_header.size); } diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 027ca21..e9c458b 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -324,38 +324,6 @@ u64 __init efi_mem_desc_end(efi_memory_desc_t *md) return end; } -/* - * We can't ioremap data in EFI boot services RAM, because we've already mapped - * it as RAM. So, look it up in the existing EFI memory map instead. Only - * callable after efi_enter_virtual_mode and before efi_free_boot_services. - */ -void __iomem *efi_lookup_mapped_addr(u64 phys_addr) -{ - struct efi_memory_map *map; - void *p; - map = efi.memmap; - if (!map) - return NULL; - if (WARN_ON(!map->map)) - return NULL; - for (p = map->map; p < map->map_end; p += map->desc_size) { - efi_memory_desc_t *md = p; - u64 size = md->num_pages << EFI_PAGE_SHIFT; - u64 end = md->phys_addr + size; - if (!(md->attribute & EFI_MEMORY_RUNTIME) && - md->type != EFI_BOOT_SERVICES_CODE && - md->type != EFI_BOOT_SERVICES_DATA) - continue; - if (!md->virt_addr) - continue; - if (phys_addr >= md->phys_addr && phys_addr < end) { - phys_addr += md->virt_addr - md->phys_addr; - return (__force void __iomem *)(unsigned long)phys_addr; - } - } - return NULL; -} - static __initdata efi_config_table_type_t common_tables[] = { {ACPI_20_TABLE_GUID, "ACPI 2.0", &efi.acpi20}, {ACPI_TABLE_GUID, "ACPI", &efi.acpi}, -- cgit v0.10.2 From e2c90dd7e11e3025b46719a79fb4bb1e7a5cef9f Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Mon, 21 Dec 2015 14:12:52 +0000 Subject: x86/efi-bgrt: Replace early_memremap() with memremap() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Môshe reported the following warning triggered on his machine since commit 50a0cb565246 ("x86/efi-bgrt: Fix kernel panic when mapping BGRT data"), [ 0.026936] ------------[ cut here ]------------ [ 0.026941] WARNING: CPU: 0 PID: 0 at mm/early_ioremap.c:137 __early_ioremap+0x102/0x1bb() [ 0.026941] Modules linked in: [ 0.026944] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.4.0-rc1 #2 [ 0.026945] Hardware name: Dell Inc. XPS 13 9343/09K8G1, BIOS A05 07/14/2015 [ 0.026946] 0000000000000000 900f03d5a116524d ffffffff81c03e60 ffffffff813a3fff [ 0.026948] 0000000000000000 ffffffff81c03e98 ffffffff810a0852 00000000d7b76000 [ 0.026949] 0000000000000000 0000000000000001 0000000000000001 000000000000017c [ 0.026951] Call Trace: [ 0.026955] [] dump_stack+0x44/0x55 [ 0.026958] [] warn_slowpath_common+0x82/0xc0 [ 0.026959] [] warn_slowpath_null+0x1a/0x20 [ 0.026961] [] __early_ioremap+0x102/0x1bb [ 0.026962] [] early_memremap+0x13/0x15 [ 0.026964] [] efi_bgrt_init+0x162/0x1ad [ 0.026966] [] efi_late_init+0x9/0xb [ 0.026968] [] start_kernel+0x46f/0x49f [ 0.026970] [] ? early_idt_handler_array+0x120/0x120 [ 0.026972] [] x86_64_start_reservations+0x2a/0x2c [ 0.026974] [] x86_64_start_kernel+0x14a/0x16d [ 0.026977] ---[ end trace f9b3812eb8e24c58 ]--- [ 0.026978] efi_bgrt: Ignoring BGRT: failed to map image memory early_memremap() has an upper limit on the size of mapping it can handle which is ~200KB. Clearly the BGRT image on Môshe's machine is much larger than that. There's actually no reason to restrict ourselves to using the early_* version of memremap() - the ACPI BGRT driver is invoked late enough in boot that we can use the standard version, with the benefit that the late version allows mappings of arbitrary size. Reported-by: Môshe van der Sterre Tested-by: Môshe van der Sterre Signed-off-by: Matt Fleming Cc: Josh Triplett Cc: Sai Praneeth Prakhya Cc: Borislav Petkov Link: http://lkml.kernel.org/r/1450707172-12561-1-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Thomas Gleixner diff --git a/arch/x86/platform/efi/efi-bgrt.c b/arch/x86/platform/efi/efi-bgrt.c index bf51f4c..b097066 100644 --- a/arch/x86/platform/efi/efi-bgrt.c +++ b/arch/x86/platform/efi/efi-bgrt.c @@ -72,14 +72,14 @@ void __init efi_bgrt_init(void) return; } - image = early_memremap(bgrt_tab->image_address, sizeof(bmp_header)); + image = memremap(bgrt_tab->image_address, sizeof(bmp_header), MEMREMAP_WB); if (!image) { pr_err("Ignoring BGRT: failed to map image header memory\n"); return; } memcpy(&bmp_header, image, sizeof(bmp_header)); - early_memunmap(image, sizeof(bmp_header)); + memunmap(image); bgrt_image_size = bmp_header.size; bgrt_image = kmalloc(bgrt_image_size, GFP_KERNEL | __GFP_NOWARN); @@ -89,7 +89,7 @@ void __init efi_bgrt_init(void) return; } - image = early_memremap(bgrt_tab->image_address, bmp_header.size); + image = memremap(bgrt_tab->image_address, bmp_header.size, MEMREMAP_WB); if (!image) { pr_err("Ignoring BGRT: failed to map image memory\n"); kfree(bgrt_image); @@ -98,5 +98,5 @@ void __init efi_bgrt_init(void) } memcpy(bgrt_image, image, bgrt_image_size); - early_memunmap(image, bmp_header.size); + memunmap(image); } -- cgit v0.10.2 From 753b11ef8e92a1c1bbe97f2a5ec14bdd1ef2e6fe Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Thu, 21 Jan 2016 14:11:59 +0000 Subject: x86/efi: Setup separate EFI page tables in kexec paths The switch to using a new dedicated page table for EFI runtime calls in commit commit 67a9108ed431 ("x86/efi: Build our own page table structures") failed to take into account changes required for the kexec code paths, which are unfortunately duplicated in the EFI code. Call the allocation and setup functions in kexec_enter_virtual_mode() just like we do for __efi_enter_virtual_mode() to avoid hitting NULL-pointer dereferences when making EFI runtime calls. At the very least, the call to efi_setup_page_tables() should have existed for kexec before the following commit: 67a9108ed431 ("x86/efi: Build our own page table structures") Things just magically worked because we were actually using the kernel's page tables that contained the required mappings. Reported-by: Srikar Dronamraju Tested-by: Srikar Dronamraju Signed-off-by: Matt Fleming Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Young Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Raghavendra K T Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1453385519-11477-1-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 3c1f3cd..bdd9477 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -815,6 +815,7 @@ static void __init kexec_enter_virtual_mode(void) { #ifdef CONFIG_KEXEC_CORE efi_memory_desc_t *md; + unsigned int num_pages; void *p; efi.systab = NULL; @@ -829,6 +830,12 @@ static void __init kexec_enter_virtual_mode(void) return; } + if (efi_alloc_page_tables()) { + pr_err("Failed to allocate EFI page tables\n"); + clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); + return; + } + /* * Map efi regions which were passed via setup_data. The virt_addr is a * fixed addr which was used in first kernel of a kexec boot. @@ -843,6 +850,14 @@ static void __init kexec_enter_virtual_mode(void) BUG_ON(!efi.systab); + num_pages = ALIGN(memmap.nr_map * memmap.desc_size, PAGE_SIZE); + num_pages >>= PAGE_SHIFT; + + if (efi_setup_page_tables(memmap.phys_map, num_pages)) { + clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); + return; + } + efi_sync_low_kernel_mappings(); /* -- cgit v0.10.2 From 9c6672ac9c91f7eb1ec436be1442b8c26d098e55 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 1 Feb 2016 22:06:55 +0000 Subject: efi: Expose non-blocking set_variable() wrapper to efivars Commit 6d80dba1c9fe ("efi: Provide a non-blocking SetVariable() operation") implemented a non-blocking alternative for the UEFI SetVariable() invocation performed by efivars, since it may occur in atomic context. However, this version of the function was never exposed via the efivars struct, so the non-blocking versions was not actually callable. Fix that. Signed-off-by: Ard Biesheuvel Signed-off-by: Matt Fleming Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Fixes: 6d80dba1c9fe ("efi: Provide a non-blocking SetVariable() operation") Link: http://lkml.kernel.org/r/1454364428-494-2-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 9b815c8..20451c2 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -182,6 +182,7 @@ static int generic_ops_register(void) { generic_ops.get_variable = efi.get_variable; generic_ops.set_variable = efi.set_variable; + generic_ops.set_variable_nonblocking = efi.set_variable_nonblocking; generic_ops.get_next_variable = efi.get_next_variable; generic_ops.query_variable_store = efi_query_variable_store; -- cgit v0.10.2 From 70d2a3cf2f4ae2e93b7a661842d84c2b5132cee7 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 1 Feb 2016 22:06:56 +0000 Subject: efi: Remove redundant efi_set_variable_nonblocking() prototype There is no need for a separate nonblocking prototype definition for the SetVariable() UEFI Runtime Service, since it is identical to the blocking version. Signed-off-by: Ard Biesheuvel Signed-off-by: Matt Fleming Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1454364428-494-3-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar diff --git a/include/linux/efi.h b/include/linux/efi.h index 569b5a8..8706e0a 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -507,10 +507,6 @@ typedef efi_status_t efi_get_next_variable_t (unsigned long *name_size, efi_char typedef efi_status_t efi_set_variable_t (efi_char16_t *name, efi_guid_t *vendor, u32 attr, unsigned long data_size, void *data); -typedef efi_status_t -efi_set_variable_nonblocking_t(efi_char16_t *name, efi_guid_t *vendor, - u32 attr, unsigned long data_size, void *data); - typedef efi_status_t efi_get_next_high_mono_count_t (u32 *count); typedef void efi_reset_system_t (int reset_type, efi_status_t status, unsigned long data_size, efi_char16_t *data); @@ -851,7 +847,7 @@ extern struct efi { efi_get_variable_t *get_variable; efi_get_next_variable_t *get_next_variable; efi_set_variable_t *set_variable; - efi_set_variable_nonblocking_t *set_variable_nonblocking; + efi_set_variable_t *set_variable_nonblocking; efi_query_variable_info_t *query_variable_info; efi_update_capsule_t *update_capsule; efi_query_capsule_caps_t *query_capsule_caps; @@ -1091,7 +1087,7 @@ struct efivar_operations { efi_get_variable_t *get_variable; efi_get_next_variable_t *get_next_variable; efi_set_variable_t *set_variable; - efi_set_variable_nonblocking_t *set_variable_nonblocking; + efi_set_variable_t *set_variable_nonblocking; efi_query_variable_store_t *query_variable_store; }; -- cgit v0.10.2 From d3cac1f83c631b9fe5edaebcba49f6989bfff089 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 1 Feb 2016 22:06:57 +0000 Subject: efi/runtime-wrappers: Add a nonblocking version of QueryVariableInfo() This introduces a new runtime wrapper for the QueryVariableInfo() UEFI Runtime Service, which gives up immediately rather than spins on failure to grab the efi_runtime spinlock. This is required in the non-blocking path of the efi-pstore code. Signed-off-by: Ard Biesheuvel Signed-off-by: Matt Fleming Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1454364428-494-4-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar diff --git a/drivers/firmware/efi/runtime-wrappers.c b/drivers/firmware/efi/runtime-wrappers.c index 228bbf9..e9f2867 100644 --- a/drivers/firmware/efi/runtime-wrappers.c +++ b/drivers/firmware/efi/runtime-wrappers.c @@ -230,6 +230,27 @@ static efi_status_t virt_efi_query_variable_info(u32 attr, return status; } +static efi_status_t +virt_efi_query_variable_info_nonblocking(u32 attr, + u64 *storage_space, + u64 *remaining_space, + u64 *max_variable_size) +{ + unsigned long flags; + efi_status_t status; + + if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION) + return EFI_UNSUPPORTED; + + if (!spin_trylock_irqsave(&efi_runtime_lock, flags)) + return EFI_NOT_READY; + + status = efi_call_virt(query_variable_info, attr, storage_space, + remaining_space, max_variable_size); + spin_unlock_irqrestore(&efi_runtime_lock, flags); + return status; +} + static efi_status_t virt_efi_get_next_high_mono_count(u32 *count) { unsigned long flags; @@ -300,6 +321,7 @@ void efi_native_runtime_setup(void) efi.get_next_high_mono_count = virt_efi_get_next_high_mono_count; efi.reset_system = virt_efi_reset_system; efi.query_variable_info = virt_efi_query_variable_info; + efi.query_variable_info_nonblocking = virt_efi_query_variable_info_nonblocking; efi.update_capsule = virt_efi_update_capsule; efi.query_capsule_caps = virt_efi_query_capsule_caps; } diff --git a/include/linux/efi.h b/include/linux/efi.h index 8706e0a..ad1e177 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -849,6 +849,7 @@ extern struct efi { efi_set_variable_t *set_variable; efi_set_variable_t *set_variable_nonblocking; efi_query_variable_info_t *query_variable_info; + efi_query_variable_info_t *query_variable_info_nonblocking; efi_update_capsule_t *update_capsule; efi_query_capsule_caps_t *query_capsule_caps; efi_get_next_high_mono_count_t *get_next_high_mono_count; -- cgit v0.10.2 From ca0e30dcaa53a3fcb2dfdf74252d30bc40603eea Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 1 Feb 2016 22:06:58 +0000 Subject: efi: Add nonblocking option to efi_query_variable_store() The function efi_query_variable_store() may be invoked by efivar_entry_set_nonblocking(), which itself takes care to only call a non-blocking version of the SetVariable() runtime wrapper. However, efi_query_variable_store() may call the SetVariable() wrapper directly, as well as the wrapper for QueryVariableInfo(), both of which could deadlock in the same way we are trying to prevent by calling efivar_entry_set_nonblocking() in the first place. So instead, modify efi_query_variable_store() to use the non-blocking variants of QueryVariableInfo() (and give up rather than free up space if the available space is below EFI_MIN_RESERVE) if invoked with the 'nonblocking' argument set to true. Signed-off-by: Ard Biesheuvel Signed-off-by: Matt Fleming Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1454364428-494-5-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index 4535046..2326bf5 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -57,13 +57,41 @@ void efi_delete_dummy_variable(void) } /* + * In the nonblocking case we do not attempt to perform garbage + * collection if we do not have enough free space. Rather, we do the + * bare minimum check and give up immediately if the available space + * is below EFI_MIN_RESERVE. + * + * This function is intended to be small and simple because it is + * invoked from crash handler paths. + */ +static efi_status_t +query_variable_store_nonblocking(u32 attributes, unsigned long size) +{ + efi_status_t status; + u64 storage_size, remaining_size, max_size; + + status = efi.query_variable_info_nonblocking(attributes, &storage_size, + &remaining_size, + &max_size); + if (status != EFI_SUCCESS) + return status; + + if (remaining_size - size < EFI_MIN_RESERVE) + return EFI_OUT_OF_RESOURCES; + + return EFI_SUCCESS; +} + +/* * Some firmware implementations refuse to boot if there's insufficient space * in the variable store. Ensure that we never use more than a safe limit. * * Return EFI_SUCCESS if it is safe to write 'size' bytes to the variable * store. */ -efi_status_t efi_query_variable_store(u32 attributes, unsigned long size) +efi_status_t efi_query_variable_store(u32 attributes, unsigned long size, + bool nonblocking) { efi_status_t status; u64 storage_size, remaining_size, max_size; @@ -71,6 +99,9 @@ efi_status_t efi_query_variable_store(u32 attributes, unsigned long size) if (!(attributes & EFI_VARIABLE_NON_VOLATILE)) return 0; + if (nonblocking) + return query_variable_store_nonblocking(attributes, size); + status = efi.query_variable_info(attributes, &storage_size, &remaining_size, &max_size); if (status != EFI_SUCCESS) diff --git a/drivers/firmware/efi/vars.c b/drivers/firmware/efi/vars.c index 70a0fb1..d2a4962 100644 --- a/drivers/firmware/efi/vars.c +++ b/drivers/firmware/efi/vars.c @@ -234,7 +234,18 @@ check_var_size(u32 attributes, unsigned long size) if (!fops->query_variable_store) return EFI_UNSUPPORTED; - return fops->query_variable_store(attributes, size); + return fops->query_variable_store(attributes, size, false); +} + +static efi_status_t +check_var_size_nonblocking(u32 attributes, unsigned long size) +{ + const struct efivar_operations *fops = __efivars->ops; + + if (!fops->query_variable_store) + return EFI_UNSUPPORTED; + + return fops->query_variable_store(attributes, size, true); } static int efi_status_to_err(efi_status_t status) @@ -615,7 +626,8 @@ efivar_entry_set_nonblocking(efi_char16_t *name, efi_guid_t vendor, if (!spin_trylock_irqsave(&__efivars->lock, flags)) return -EBUSY; - status = check_var_size(attributes, size + ucs2_strsize(name, 1024)); + status = check_var_size_nonblocking(attributes, + size + ucs2_strsize(name, 1024)); if (status != EFI_SUCCESS) { spin_unlock_irqrestore(&__efivars->lock, flags); return -ENOSPC; diff --git a/include/linux/efi.h b/include/linux/efi.h index ad1e177..09f1559 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -525,7 +525,9 @@ typedef efi_status_t efi_query_capsule_caps_t(efi_capsule_header_t **capsules, unsigned long count, u64 *max_size, int *reset_type); -typedef efi_status_t efi_query_variable_store_t(u32 attributes, unsigned long size); +typedef efi_status_t efi_query_variable_store_t(u32 attributes, + unsigned long size, + bool nonblocking); void efi_native_runtime_setup(void); @@ -881,13 +883,17 @@ extern void efi_enter_virtual_mode (void); /* switch EFI to virtual mode, if pos #ifdef CONFIG_X86 extern void efi_late_init(void); extern void efi_free_boot_services(void); -extern efi_status_t efi_query_variable_store(u32 attributes, unsigned long size); +extern efi_status_t efi_query_variable_store(u32 attributes, + unsigned long size, + bool nonblocking); extern void efi_find_mirror(void); #else static inline void efi_late_init(void) {} static inline void efi_free_boot_services(void) {} -static inline efi_status_t efi_query_variable_store(u32 attributes, unsigned long size) +static inline efi_status_t efi_query_variable_store(u32 attributes, + unsigned long size, + bool nonblocking) { return EFI_SUCCESS; } -- cgit v0.10.2 From 774846defceb16dcab2f0215cfc467f7c93f1c26 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 1 Feb 2016 22:06:59 +0000 Subject: efi/runtime-wrappers: Remove out of date comment regarding in_nmi() This code is long gone, so remove the comment as well. Signed-off-by: Ard Biesheuvel Signed-off-by: Matt Fleming Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1454364428-494-6-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar diff --git a/drivers/firmware/efi/runtime-wrappers.c b/drivers/firmware/efi/runtime-wrappers.c index e9f2867..311f415 100644 --- a/drivers/firmware/efi/runtime-wrappers.c +++ b/drivers/firmware/efi/runtime-wrappers.c @@ -62,32 +62,6 @@ static DEFINE_SPINLOCK(efi_runtime_lock); /* - * Some runtime services calls can be reentrant under NMI, even if the table - * above says they are not. (source: UEFI Specification v2.4A) - * - * Table 32. Functions that may be called after Machine Check, INIT and NMI - * +----------------------------+------------------------------------------+ - * | Function | Called after Machine Check, INIT and NMI | - * +----------------------------+------------------------------------------+ - * | GetTime() | Yes, even if previously busy. | - * | GetVariable() | Yes, even if previously busy | - * | GetNextVariableName() | Yes, even if previously busy | - * | QueryVariableInfo() | Yes, even if previously busy | - * | SetVariable() | Yes, even if previously busy | - * | UpdateCapsule() | Yes, even if previously busy | - * | QueryCapsuleCapabilities() | Yes, even if previously busy | - * | ResetSystem() | Yes, even if previously busy | - * +----------------------------+------------------------------------------+ - * - * In order to prevent deadlocks under NMI, the wrappers for these functions - * may only grab the efi_runtime_lock or rtc_lock spinlocks if !efi_in_nmi(). - * However, not all of the services listed are reachable through NMI code paths, - * so the the special handling as suggested by the UEFI spec is only implemented - * for QueryVariableInfo() and SetVariable(), as these can be reached in NMI - * context through efi_pstore_write(). - */ - -/* * As per commit ef68c8f87ed1 ("x86: Serialize EFI time accesses on rtc_lock"), * the EFI specification requires that callers of the time related runtime * functions serialize with other CMOS accesses in the kernel, as the EFI time -- cgit v0.10.2 From 1bb6936473c07b5a7c8daced1000893b7145bb14 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 1 Feb 2016 22:07:00 +0000 Subject: efi: Runtime-wrapper: Get rid of the rtc_lock spinlock The rtc_lock spinlock aims to serialize access to the CMOS RTC between the UEFI firmware and the kernel drivers that use it directly. However, x86 is the only arch that performs such direct accesses, and that never uses the time related UEFI runtime services. Since no other UEFI enlightened architectures have a legcay CMOS RTC anyway, we can remove the rtc_lock spinlock entirely. Signed-off-by: Ard Biesheuvel Signed-off-by: Matt Fleming Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1454364428-494-7-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar diff --git a/drivers/firmware/efi/runtime-wrappers.c b/drivers/firmware/efi/runtime-wrappers.c index 311f415..7b8b2f2 100644 --- a/drivers/firmware/efi/runtime-wrappers.c +++ b/drivers/firmware/efi/runtime-wrappers.c @@ -61,24 +61,14 @@ */ static DEFINE_SPINLOCK(efi_runtime_lock); -/* - * As per commit ef68c8f87ed1 ("x86: Serialize EFI time accesses on rtc_lock"), - * the EFI specification requires that callers of the time related runtime - * functions serialize with other CMOS accesses in the kernel, as the EFI time - * functions may choose to also use the legacy CMOS RTC. - */ -__weak DEFINE_SPINLOCK(rtc_lock); - static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc) { unsigned long flags; efi_status_t status; - spin_lock_irqsave(&rtc_lock, flags); - spin_lock(&efi_runtime_lock); + spin_lock_irqsave(&efi_runtime_lock, flags); status = efi_call_virt(get_time, tm, tc); - spin_unlock(&efi_runtime_lock); - spin_unlock_irqrestore(&rtc_lock, flags); + spin_unlock_irqrestore(&efi_runtime_lock, flags); return status; } @@ -87,11 +77,9 @@ static efi_status_t virt_efi_set_time(efi_time_t *tm) unsigned long flags; efi_status_t status; - spin_lock_irqsave(&rtc_lock, flags); - spin_lock(&efi_runtime_lock); + spin_lock_irqsave(&efi_runtime_lock, flags); status = efi_call_virt(set_time, tm); - spin_unlock(&efi_runtime_lock); - spin_unlock_irqrestore(&rtc_lock, flags); + spin_unlock_irqrestore(&efi_runtime_lock, flags); return status; } @@ -102,11 +90,9 @@ static efi_status_t virt_efi_get_wakeup_time(efi_bool_t *enabled, unsigned long flags; efi_status_t status; - spin_lock_irqsave(&rtc_lock, flags); - spin_lock(&efi_runtime_lock); + spin_lock_irqsave(&efi_runtime_lock, flags); status = efi_call_virt(get_wakeup_time, enabled, pending, tm); - spin_unlock(&efi_runtime_lock); - spin_unlock_irqrestore(&rtc_lock, flags); + spin_unlock_irqrestore(&efi_runtime_lock, flags); return status; } @@ -115,11 +101,9 @@ static efi_status_t virt_efi_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm) unsigned long flags; efi_status_t status; - spin_lock_irqsave(&rtc_lock, flags); - spin_lock(&efi_runtime_lock); + spin_lock_irqsave(&efi_runtime_lock, flags); status = efi_call_virt(set_wakeup_time, enabled, tm); - spin_unlock(&efi_runtime_lock); - spin_unlock_irqrestore(&rtc_lock, flags); + spin_unlock_irqrestore(&efi_runtime_lock, flags); return status; } -- cgit v0.10.2 From 9c09a342eb27902955ca939bd6ba29d875bd8b6b Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Mon, 1 Feb 2016 22:07:02 +0000 Subject: efivars: Use to_efivar_entry Use to_efivar_entry() instead of open-coding it. Signed-off-by: Geliang Tang Signed-off-by: Matt Fleming Link: http://lkml.kernel.org/r/1454364428-494-9-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar diff --git a/drivers/firmware/efi/efivars.c b/drivers/firmware/efi/efivars.c index 756eca8..c5b0d2b 100644 --- a/drivers/firmware/efi/efivars.c +++ b/drivers/firmware/efi/efivars.c @@ -386,7 +386,7 @@ static const struct sysfs_ops efivar_attr_ops = { static void efivar_release(struct kobject *kobj) { - struct efivar_entry *var = container_of(kobj, struct efivar_entry, kobj); + struct efivar_entry *var = to_efivar_entry(kobj); kfree(var); } -- cgit v0.10.2 From 66dbe99cfe30e113d2e571e68b9b6a1a8985a157 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=B4she=20van=20der=20Sterre?= Date: Mon, 1 Feb 2016 22:07:03 +0000 Subject: x86/efi/bgrt: Don't ignore the BGRT if the 'valid' bit is 0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unintuitively, the BGRT graphic is apparently meant to be usable if the valid bit in not set. The valid bit only conveys uncertainty about the validity in relation to the screen state. Windows 10 actually uses the BGRT image for its boot screen even if not 'valid', for example when the user triggered the boot menu. Because it is unclear if all firmwares will provide a usable graphic in this case, we now look at the BMP magic number as an additional check. Reviewed-by: Josh Triplett Signed-off-by: Môshe van der Sterre Signed-off-by: Matt Fleming Cc: =?UTF-8?q?M=C3=B4she=20van=20der=20Sterre?= Link: http://lkml.kernel.org/r/1454364428-494-10-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar diff --git a/arch/x86/platform/efi/efi-bgrt.c b/arch/x86/platform/efi/efi-bgrt.c index b097066..a243381 100644 --- a/arch/x86/platform/efi/efi-bgrt.c +++ b/arch/x86/platform/efi/efi-bgrt.c @@ -57,11 +57,6 @@ void __init efi_bgrt_init(void) bgrt_tab->status); return; } - if (bgrt_tab->status != 1) { - pr_debug("Ignoring BGRT: invalid status %u (expected 1)\n", - bgrt_tab->status); - return; - } if (bgrt_tab->image_type != 0) { pr_err("Ignoring BGRT: invalid image type %u (expected 0)\n", bgrt_tab->image_type); @@ -80,6 +75,11 @@ void __init efi_bgrt_init(void) memcpy(&bmp_header, image, sizeof(bmp_header)); memunmap(image); + if (bmp_header.id != 0x4d42) { + pr_err("Ignoring BGRT: Incorrect BMP magic number 0x%x (expected 0x4d42)\n", + bmp_header.id); + return; + } bgrt_image_size = bmp_header.size; bgrt_image = kmalloc(bgrt_image_size, GFP_KERNEL | __GFP_NOWARN); -- cgit v0.10.2 From 1e82b94790709fb2a22d16d53bb04d751fb3878d Mon Sep 17 00:00:00 2001 From: Robert Elliott Date: Mon, 1 Feb 2016 22:07:05 +0000 Subject: x86/efi: Show actual ending addresses in efi_print_memmap Adjust efi_print_memmap to print the real end address of each range, not 1 byte beyond. This matches other prints like those for SRAT and nosave memory. While investigating grub persistent memory corruption issues, it was helpful to make this table match the ending address convention used by: * the kernel's e820 table prints BIOS-e820: [mem 0x0000001680000000-0x0000001c7fffffff] reserved * the kernel's nosave memory prints PM: Registered nosave memory: [mem 0x880000000-0xc7fffffff] * the kernel's ACPI System Resource Affinity Table prints SRAT: Node 1 PXM 1 [mem 0x480000000-0x87fffffff] * grub's lsmmap and lsefimmap commands reserved 0000001680000000-0000001c7fffffff 00600000 24GiB UC WC WT WB NV * the UEFI shell's memmap command Reserved 000000007FC00000-000000007FFFFFFF 0000000000000400 0000000000000001 For example, if you grep all the various logs for c7fffffff, you won't find the kernel's line if it uses c80000000. Also, change the closing ) to ] to match the opening [. old: efi: mem61: [Persistent Memory | | | | | | | |WB|WT|WC|UC] range=[0x0000000880000000-0x0000000c80000000) (16384MB) new: efi: mem61: [Persistent Memory | | | | | | | |WB|WT|WC|UC] range=[0x0000000880000000-0x0000000c7fffffff] (16384MB) Signed-off-by: Robert Elliott Signed-off-by: Matt Fleming Reviewed-by: Laszlo Ersek Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Leif Lindholm Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1454364428-494-12-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index bdd9477..e80826e 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -235,10 +235,10 @@ void __init efi_print_memmap(void) char buf[64]; md = p; - pr_info("mem%02u: %s range=[0x%016llx-0x%016llx) (%lluMB)\n", + pr_info("mem%02u: %s range=[0x%016llx-0x%016llx] (%lluMB)\n", i, efi_md_typeattr_format(buf, sizeof(buf), md), md->phys_addr, - md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT), + md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - 1, (md->num_pages >> (20 - EFI_PAGE_SHIFT))); } #endif /* EFI_DEBUG */ -- cgit v0.10.2 From c016ca08f89c6c78ed815f025262bdb87aba3f4c Mon Sep 17 00:00:00 2001 From: Robert Elliott Date: Mon, 1 Feb 2016 22:07:06 +0000 Subject: efi: Add NV memory attribute Add the NV memory attribute introduced in UEFI 2.5 and add a column for it in the types and attributes string used when printing the UEFI memory map. old: efi: mem61: [type=14 | | | | | | | |WB|WT|WC|UC] range=[0x0000000880000000-0x0000000c7fffffff) (16384MB) new: efi: mem61: [type=14 | | |NV| | | | | |WB|WT|WC|UC] range=[0x0000000880000000-0x0000000c7fffffff) (16384MB) Signed-off-by: Robert Elliott Signed-off-by: Matt Fleming Reviewed-by: Laszlo Ersek Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Borislav Petkov Cc: Brian Gerst Cc: Dan Williams Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Ross Zwisler Cc: Taku Izumi Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1454364428-494-13-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 20451c2..f437048 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -582,13 +582,16 @@ char * __init efi_md_typeattr_format(char *buf, size_t size, if (attr & ~(EFI_MEMORY_UC | EFI_MEMORY_WC | EFI_MEMORY_WT | EFI_MEMORY_WB | EFI_MEMORY_UCE | EFI_MEMORY_RO | EFI_MEMORY_WP | EFI_MEMORY_RP | EFI_MEMORY_XP | + EFI_MEMORY_NV | EFI_MEMORY_RUNTIME | EFI_MEMORY_MORE_RELIABLE)) snprintf(pos, size, "|attr=0x%016llx]", (unsigned long long)attr); else - snprintf(pos, size, "|%3s|%2s|%2s|%2s|%2s|%2s|%3s|%2s|%2s|%2s|%2s]", + snprintf(pos, size, + "|%3s|%2s|%2s|%2s|%2s|%2s|%2s|%3s|%2s|%2s|%2s|%2s]", attr & EFI_MEMORY_RUNTIME ? "RUN" : "", attr & EFI_MEMORY_MORE_RELIABLE ? "MR" : "", + attr & EFI_MEMORY_NV ? "NV" : "", attr & EFI_MEMORY_XP ? "XP" : "", attr & EFI_MEMORY_RP ? "RP" : "", attr & EFI_MEMORY_WP ? "WP" : "", diff --git a/include/linux/efi.h b/include/linux/efi.h index 09f1559..3c6cbbd 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -97,6 +97,7 @@ typedef struct { #define EFI_MEMORY_WP ((u64)0x0000000000001000ULL) /* write-protect */ #define EFI_MEMORY_RP ((u64)0x0000000000002000ULL) /* read-protect */ #define EFI_MEMORY_XP ((u64)0x0000000000004000ULL) /* execute-protect */ +#define EFI_MEMORY_NV ((u64)0x0000000000008000ULL) /* non-volatile */ #define EFI_MEMORY_MORE_RELIABLE \ ((u64)0x0000000000010000ULL) /* higher reliability */ #define EFI_MEMORY_RO ((u64)0x0000000000020000ULL) /* read-only */ -- cgit v0.10.2 From 35575e0e8ba633fc8276509a21f89b599b4f9006 Mon Sep 17 00:00:00 2001 From: Robert Elliott Date: Mon, 1 Feb 2016 22:07:07 +0000 Subject: efi: Add Persistent Memory type name Add the "Persistent Memory" string for type 14 introduced in UEFI 2.5. This is used when printing the UEFI memory map. old: efi: mem61: [type=14 | | | | | | | |WB|WT|WC|UC] range=[0x0000000880000000-0x0000000c7fffffff) (16384MB) new: efi: mem61: [Persistent Memory | | | | | | | |WB|WT|WC|UC] range=[0x0000000880000000-0x0000000c7fffffff) (16384MB) Signed-off-by: Robert Elliott Signed-off-by: Matt Fleming Reviewed-by: Laszlo Ersek Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Borislav Petkov Cc: Brian Gerst Cc: Dan Williams Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Ross Zwisler Cc: Taku Izumi Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1454364428-494-14-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index f437048..3a69ed5 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -555,7 +555,8 @@ static __initdata char memory_type_name[][20] = { "ACPI Memory NVS", "Memory Mapped I/O", "MMIO Port Space", - "PAL Code" + "PAL Code", + "Persistent Memory", }; char * __init efi_md_typeattr_format(char *buf, size_t size, -- cgit v0.10.2 From 662b1d890c593673964758fe5b6f22067bffba7a Mon Sep 17 00:00:00 2001 From: Peter Jones Date: Wed, 17 Feb 2016 12:35:54 +0000 Subject: efi: Reformat GUID tables to follow the format in UEFI spec This makes it much easier to hunt for typos in the GUID definitions. It also makes checkpatch complain less about efi.h GUID additions, so that if you add another one with the same style, checkpatch won't complain about it. Signed-off-by: Peter Jones Signed-off-by: Matt Fleming Cc: Ard Biesheuvel Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1455712566-16727-2-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar diff --git a/include/linux/efi.h b/include/linux/efi.h index 1acd723..42be9c9 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -536,67 +536,88 @@ void efi_native_runtime_setup(void); * EFI Configuration Table and GUID definitions */ #define NULL_GUID \ - EFI_GUID( 0x00000000, 0x0000, 0x0000, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ) + EFI_GUID(0x00000000, 0x0000, 0x0000, \ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00) #define MPS_TABLE_GUID \ - EFI_GUID( 0xeb9d2d2f, 0x2d88, 0x11d3, 0x9a, 0x16, 0x0, 0x90, 0x27, 0x3f, 0xc1, 0x4d ) + EFI_GUID(0xeb9d2d2f, 0x2d88, 0x11d3, \ + 0x9a, 0x16, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d) #define ACPI_TABLE_GUID \ - EFI_GUID( 0xeb9d2d30, 0x2d88, 0x11d3, 0x9a, 0x16, 0x0, 0x90, 0x27, 0x3f, 0xc1, 0x4d ) + EFI_GUID(0xeb9d2d30, 0x2d88, 0x11d3, \ + 0x9a, 0x16, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d) #define ACPI_20_TABLE_GUID \ - EFI_GUID( 0x8868e871, 0xe4f1, 0x11d3, 0xbc, 0x22, 0x0, 0x80, 0xc7, 0x3c, 0x88, 0x81 ) + EFI_GUID(0x8868e871, 0xe4f1, 0x11d3, \ + 0xbc, 0x22, 0x00, 0x80, 0xc7, 0x3c, 0x88, 0x81) #define SMBIOS_TABLE_GUID \ - EFI_GUID( 0xeb9d2d31, 0x2d88, 0x11d3, 0x9a, 0x16, 0x0, 0x90, 0x27, 0x3f, 0xc1, 0x4d ) + EFI_GUID(0xeb9d2d31, 0x2d88, 0x11d3, \ + 0x9a, 0x16, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d) #define SMBIOS3_TABLE_GUID \ - EFI_GUID( 0xf2fd1544, 0x9794, 0x4a2c, 0x99, 0x2e, 0xe5, 0xbb, 0xcf, 0x20, 0xe3, 0x94 ) + EFI_GUID(0xf2fd1544, 0x9794, 0x4a2c, \ + 0x99, 0x2e, 0xe5, 0xbb, 0xcf, 0x20, 0xe3, 0x94) #define SAL_SYSTEM_TABLE_GUID \ - EFI_GUID( 0xeb9d2d32, 0x2d88, 0x11d3, 0x9a, 0x16, 0x0, 0x90, 0x27, 0x3f, 0xc1, 0x4d ) + EFI_GUID(0xeb9d2d32, 0x2d88, 0x11d3, \ + 0x9a, 0x16, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d) #define HCDP_TABLE_GUID \ - EFI_GUID( 0xf951938d, 0x620b, 0x42ef, 0x82, 0x79, 0xa8, 0x4b, 0x79, 0x61, 0x78, 0x98 ) + EFI_GUID(0xf951938d, 0x620b, 0x42ef, \ + 0x82, 0x79, 0xa8, 0x4b, 0x79, 0x61, 0x78, 0x98) #define UGA_IO_PROTOCOL_GUID \ - EFI_GUID( 0x61a4d49e, 0x6f68, 0x4f1b, 0xb9, 0x22, 0xa8, 0x6e, 0xed, 0xb, 0x7, 0xa2 ) + EFI_GUID(0x61a4d49e, 0x6f68, 0x4f1b, \ + 0xb9, 0x22, 0xa8, 0x6e, 0xed, 0x0b, 0x07, 0xa2) #define EFI_GLOBAL_VARIABLE_GUID \ - EFI_GUID( 0x8be4df61, 0x93ca, 0x11d2, 0xaa, 0x0d, 0x00, 0xe0, 0x98, 0x03, 0x2b, 0x8c ) + EFI_GUID(0x8be4df61, 0x93ca, 0x11d2, \ + 0xaa, 0x0d, 0x00, 0xe0, 0x98, 0x03, 0x2b, 0x8c) #define UV_SYSTEM_TABLE_GUID \ - EFI_GUID( 0x3b13a7d4, 0x633e, 0x11dd, 0x93, 0xec, 0xda, 0x25, 0x56, 0xd8, 0x95, 0x93 ) + EFI_GUID(0x3b13a7d4, 0x633e, 0x11dd, \ + 0x93, 0xec, 0xda, 0x25, 0x56, 0xd8, 0x95, 0x93) #define LINUX_EFI_CRASH_GUID \ - EFI_GUID( 0xcfc8fc79, 0xbe2e, 0x4ddc, 0x97, 0xf0, 0x9f, 0x98, 0xbf, 0xe2, 0x98, 0xa0 ) + EFI_GUID(0xcfc8fc79, 0xbe2e, 0x4ddc, \ + 0x97, 0xf0, 0x9f, 0x98, 0xbf, 0xe2, 0x98, 0xa0) #define LOADED_IMAGE_PROTOCOL_GUID \ - EFI_GUID( 0x5b1b31a1, 0x9562, 0x11d2, 0x8e, 0x3f, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b ) + EFI_GUID(0x5b1b31a1, 0x9562, 0x11d2, \ + 0x8e, 0x3f, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b) #define EFI_GRAPHICS_OUTPUT_PROTOCOL_GUID \ - EFI_GUID( 0x9042a9de, 0x23dc, 0x4a38, 0x96, 0xfb, 0x7a, 0xde, 0xd0, 0x80, 0x51, 0x6a ) + EFI_GUID(0x9042a9de, 0x23dc, 0x4a38, \ + 0x96, 0xfb, 0x7a, 0xde, 0xd0, 0x80, 0x51, 0x6a) #define EFI_UGA_PROTOCOL_GUID \ - EFI_GUID( 0x982c298b, 0xf4fa, 0x41cb, 0xb8, 0x38, 0x77, 0xaa, 0x68, 0x8f, 0xb8, 0x39 ) + EFI_GUID(0x982c298b, 0xf4fa, 0x41cb, \ + 0xb8, 0x38, 0x77, 0xaa, 0x68, 0x8f, 0xb8, 0x39) #define EFI_PCI_IO_PROTOCOL_GUID \ - EFI_GUID( 0x4cf5b200, 0x68b8, 0x4ca5, 0x9e, 0xec, 0xb2, 0x3e, 0x3f, 0x50, 0x2, 0x9a ) + EFI_GUID(0x4cf5b200, 0x68b8, 0x4ca5, \ + 0x9e, 0xec, 0xb2, 0x3e, 0x3f, 0x50, 0x02, 0x9a) #define EFI_FILE_INFO_ID \ - EFI_GUID( 0x9576e92, 0x6d3f, 0x11d2, 0x8e, 0x39, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b ) + EFI_GUID(0x9576e92, 0x6d3f, 0x11d2, \ + 0x8e, 0x39, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b) #define EFI_SYSTEM_RESOURCE_TABLE_GUID \ - EFI_GUID( 0xb122a263, 0x3661, 0x4f68, 0x99, 0x29, 0x78, 0xf8, 0xb0, 0xd6, 0x21, 0x80 ) + EFI_GUID(0xb122a263, 0x3661, 0x4f68, \ + 0x99, 0x29, 0x78, 0xf8, 0xb0, 0xd6, 0x21, 0x80) #define EFI_FILE_SYSTEM_GUID \ - EFI_GUID( 0x964e5b22, 0x6459, 0x11d2, 0x8e, 0x39, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b ) + EFI_GUID(0x964e5b22, 0x6459, 0x11d2, \ + 0x8e, 0x39, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b) #define DEVICE_TREE_GUID \ - EFI_GUID( 0xb1b621d5, 0xf19c, 0x41a5, 0x83, 0x0b, 0xd9, 0x15, 0x2c, 0x69, 0xaa, 0xe0 ) + EFI_GUID(0xb1b621d5, 0xf19c, 0x41a5, \ + 0x83, 0x0b, 0xd9, 0x15, 0x2c, 0x69, 0xaa, 0xe0) #define EFI_PROPERTIES_TABLE_GUID \ - EFI_GUID( 0x880aaca3, 0x4adc, 0x4a04, 0x90, 0x79, 0xb7, 0x47, 0x34, 0x08, 0x25, 0xe5 ) + EFI_GUID(0x880aaca3, 0x4adc, 0x4a04, \ + 0x90, 0x79, 0xb7, 0x47, 0x34, 0x08, 0x25, 0xe5) typedef struct { efi_guid_t guid; -- cgit v0.10.2 From fe3244945c47161e2486412d6412c87ba279305d Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 17 Feb 2016 12:35:55 +0000 Subject: efi/runtime-wrappers: Run UEFI Runtime Services with interrupts enabled The UEFI spec allows Runtime Services to be invoked with interrupts enabled. The only reason we were disabling interrupts was to prevent recursive calls into the services on the same CPU, which will lead to deadlock. However, the only context where such invocations may occur legally is from efi-pstore via efivars, and that code has been updated to call a non-blocking alternative when invoked from a non-interruptible context. So instead, update the ordinary, blocking UEFI Runtime Services wrappers to execute with interrupts enabled. This aims to prevent excessive interrupt latencies on uniprocessor platforms with slow variable stores. Note that other OSes such as Windows call UEFI Runtime Services with interrupts enabled as well. Signed-off-by: Ard Biesheuvel Signed-off-by: Matt Fleming Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Sai Praneeth Prakhya Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1455712566-16727-3-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar diff --git a/drivers/firmware/efi/runtime-wrappers.c b/drivers/firmware/efi/runtime-wrappers.c index 7b8b2f2..de69530 100644 --- a/drivers/firmware/efi/runtime-wrappers.c +++ b/drivers/firmware/efi/runtime-wrappers.c @@ -63,23 +63,21 @@ static DEFINE_SPINLOCK(efi_runtime_lock); static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc) { - unsigned long flags; efi_status_t status; - spin_lock_irqsave(&efi_runtime_lock, flags); + spin_lock(&efi_runtime_lock); status = efi_call_virt(get_time, tm, tc); - spin_unlock_irqrestore(&efi_runtime_lock, flags); + spin_unlock(&efi_runtime_lock); return status; } static efi_status_t virt_efi_set_time(efi_time_t *tm) { - unsigned long flags; efi_status_t status; - spin_lock_irqsave(&efi_runtime_lock, flags); + spin_lock(&efi_runtime_lock); status = efi_call_virt(set_time, tm); - spin_unlock_irqrestore(&efi_runtime_lock, flags); + spin_unlock(&efi_runtime_lock); return status; } @@ -87,23 +85,21 @@ static efi_status_t virt_efi_get_wakeup_time(efi_bool_t *enabled, efi_bool_t *pending, efi_time_t *tm) { - unsigned long flags; efi_status_t status; - spin_lock_irqsave(&efi_runtime_lock, flags); + spin_lock(&efi_runtime_lock); status = efi_call_virt(get_wakeup_time, enabled, pending, tm); - spin_unlock_irqrestore(&efi_runtime_lock, flags); + spin_unlock(&efi_runtime_lock); return status; } static efi_status_t virt_efi_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm) { - unsigned long flags; efi_status_t status; - spin_lock_irqsave(&efi_runtime_lock, flags); + spin_lock(&efi_runtime_lock); status = efi_call_virt(set_wakeup_time, enabled, tm); - spin_unlock_irqrestore(&efi_runtime_lock, flags); + spin_unlock(&efi_runtime_lock); return status; } @@ -113,13 +109,12 @@ static efi_status_t virt_efi_get_variable(efi_char16_t *name, unsigned long *data_size, void *data) { - unsigned long flags; efi_status_t status; - spin_lock_irqsave(&efi_runtime_lock, flags); + spin_lock(&efi_runtime_lock); status = efi_call_virt(get_variable, name, vendor, attr, data_size, data); - spin_unlock_irqrestore(&efi_runtime_lock, flags); + spin_unlock(&efi_runtime_lock); return status; } @@ -127,12 +122,11 @@ static efi_status_t virt_efi_get_next_variable(unsigned long *name_size, efi_char16_t *name, efi_guid_t *vendor) { - unsigned long flags; efi_status_t status; - spin_lock_irqsave(&efi_runtime_lock, flags); + spin_lock(&efi_runtime_lock); status = efi_call_virt(get_next_variable, name_size, name, vendor); - spin_unlock_irqrestore(&efi_runtime_lock, flags); + spin_unlock(&efi_runtime_lock); return status; } @@ -142,13 +136,12 @@ static efi_status_t virt_efi_set_variable(efi_char16_t *name, unsigned long data_size, void *data) { - unsigned long flags; efi_status_t status; - spin_lock_irqsave(&efi_runtime_lock, flags); + spin_lock(&efi_runtime_lock); status = efi_call_virt(set_variable, name, vendor, attr, data_size, data); - spin_unlock_irqrestore(&efi_runtime_lock, flags); + spin_unlock(&efi_runtime_lock); return status; } @@ -157,15 +150,14 @@ virt_efi_set_variable_nonblocking(efi_char16_t *name, efi_guid_t *vendor, u32 attr, unsigned long data_size, void *data) { - unsigned long flags; efi_status_t status; - if (!spin_trylock_irqsave(&efi_runtime_lock, flags)) + if (!spin_trylock(&efi_runtime_lock)) return EFI_NOT_READY; status = efi_call_virt(set_variable, name, vendor, attr, data_size, data); - spin_unlock_irqrestore(&efi_runtime_lock, flags); + spin_unlock(&efi_runtime_lock); return status; } @@ -175,16 +167,15 @@ static efi_status_t virt_efi_query_variable_info(u32 attr, u64 *remaining_space, u64 *max_variable_size) { - unsigned long flags; efi_status_t status; if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION) return EFI_UNSUPPORTED; - spin_lock_irqsave(&efi_runtime_lock, flags); + spin_lock(&efi_runtime_lock); status = efi_call_virt(query_variable_info, attr, storage_space, remaining_space, max_variable_size); - spin_unlock_irqrestore(&efi_runtime_lock, flags); + spin_unlock(&efi_runtime_lock); return status; } @@ -194,29 +185,27 @@ virt_efi_query_variable_info_nonblocking(u32 attr, u64 *remaining_space, u64 *max_variable_size) { - unsigned long flags; efi_status_t status; if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION) return EFI_UNSUPPORTED; - if (!spin_trylock_irqsave(&efi_runtime_lock, flags)) + if (!spin_trylock(&efi_runtime_lock)) return EFI_NOT_READY; status = efi_call_virt(query_variable_info, attr, storage_space, remaining_space, max_variable_size); - spin_unlock_irqrestore(&efi_runtime_lock, flags); + spin_unlock(&efi_runtime_lock); return status; } static efi_status_t virt_efi_get_next_high_mono_count(u32 *count) { - unsigned long flags; efi_status_t status; - spin_lock_irqsave(&efi_runtime_lock, flags); + spin_lock(&efi_runtime_lock); status = efi_call_virt(get_next_high_mono_count, count); - spin_unlock_irqrestore(&efi_runtime_lock, flags); + spin_unlock(&efi_runtime_lock); return status; } @@ -225,26 +214,23 @@ static void virt_efi_reset_system(int reset_type, unsigned long data_size, efi_char16_t *data) { - unsigned long flags; - - spin_lock_irqsave(&efi_runtime_lock, flags); + spin_lock(&efi_runtime_lock); __efi_call_virt(reset_system, reset_type, status, data_size, data); - spin_unlock_irqrestore(&efi_runtime_lock, flags); + spin_unlock(&efi_runtime_lock); } static efi_status_t virt_efi_update_capsule(efi_capsule_header_t **capsules, unsigned long count, unsigned long sg_list) { - unsigned long flags; efi_status_t status; if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION) return EFI_UNSUPPORTED; - spin_lock_irqsave(&efi_runtime_lock, flags); + spin_lock(&efi_runtime_lock); status = efi_call_virt(update_capsule, capsules, count, sg_list); - spin_unlock_irqrestore(&efi_runtime_lock, flags); + spin_unlock(&efi_runtime_lock); return status; } @@ -253,16 +239,15 @@ static efi_status_t virt_efi_query_capsule_caps(efi_capsule_header_t **capsules, u64 *max_size, int *reset_type) { - unsigned long flags; efi_status_t status; if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION) return EFI_UNSUPPORTED; - spin_lock_irqsave(&efi_runtime_lock, flags); + spin_lock(&efi_runtime_lock); status = efi_call_virt(query_capsule_caps, capsules, count, max_size, reset_type); - spin_unlock_irqrestore(&efi_runtime_lock, flags); + spin_unlock(&efi_runtime_lock); return status; } -- cgit v0.10.2 From 397630150632639b3ca5b4414accd5011c45e276 Mon Sep 17 00:00:00 2001 From: Sai Praneeth Date: Wed, 17 Feb 2016 12:35:56 +0000 Subject: x86/mm/pat: Use _PAGE_GLOBAL bit for EFI page table mappings Since EFI page tables can be treated as kernel page tables they should be global. All the other page mapping functions in pageattr.c set the _PAGE_GLOBAL bit and we want to avoid inconsistencies when we map a page in the EFI code paths, for example when that page is split in __split_large_page(), etc. It also makes it easier to validate that the EFI region mappings have the correct attributes because there are fewer differences compared with regular kernel mappings. Signed-off-by: Sai Praneeth Prakhya Signed-off-by: Matt Fleming Cc: Andrew Morton Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Hugh Dickins Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Ravi Shankar Cc: Ricardo Neri Cc: Thomas Gleixner Cc: Toshi Kani Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1455712566-16727-4-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 632d34d..bf312da 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -909,6 +909,20 @@ static void populate_pte(struct cpa_data *cpa, pte = pte_offset_kernel(pmd, start); + /* + * Set the GLOBAL flags only if the PRESENT flag is + * set otherwise pte_present will return true even on + * a non present pte. The canon_pgprot will clear + * _PAGE_GLOBAL for the ancient hardware that doesn't + * support it. + */ + if (pgprot_val(pgprot) & _PAGE_PRESENT) + pgprot_val(pgprot) |= _PAGE_GLOBAL; + else + pgprot_val(pgprot) &= ~_PAGE_GLOBAL; + + pgprot = canon_pgprot(pgprot); + while (num_pages-- && start < end) { set_pte(pte, pfn_pte(cpa->pfn, pgprot)); -- cgit v0.10.2 From dae31fd2b74c35cc84128733bc210bf6b26ae408 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 17 Feb 2016 12:35:57 +0000 Subject: efi/arm64: Drop __init annotation from handle_kernel_image() After moving arm64-stub.c to libstub/, all of its sections are emitted as .init.xxx sections automatically, and the __init annotation of handle_kernel_image() causes it to end up in .init.init.text, which is not recognized as an __init section by the linker scripts. So drop the annotation. Tested-by: Mark Rutland Signed-off-by: Ard Biesheuvel Signed-off-by: Matt Fleming Acked-by: Will Deacon Acked-by: Mark Rutland Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1455712566-16727-5-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c index 78dfbd3..9e03427 100644 --- a/drivers/firmware/efi/libstub/arm64-stub.c +++ b/drivers/firmware/efi/libstub/arm64-stub.c @@ -13,13 +13,13 @@ #include #include -efi_status_t __init handle_kernel_image(efi_system_table_t *sys_table_arg, - unsigned long *image_addr, - unsigned long *image_size, - unsigned long *reserve_addr, - unsigned long *reserve_size, - unsigned long dram_base, - efi_loaded_image_t *image) +efi_status_t handle_kernel_image(efi_system_table_t *sys_table_arg, + unsigned long *image_addr, + unsigned long *image_size, + unsigned long *reserve_addr, + unsigned long *reserve_size, + unsigned long dram_base, + efi_loaded_image_t *image) { efi_status_t status; unsigned long kernel_size, kernel_memsize = 0; -- cgit v0.10.2 From 1ce99bf45306ba889faadced6baabebf7770c546 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 17 Feb 2016 12:35:58 +0000 Subject: arm64/vmlinux.lds.S: Handle .init.rodata.xxx and .init.bss sections The EFI stub is typically built into the decompressor (x86, ARM) so none of its symbols are annotated as __init. However, on arm64, the stub is linked into the kernel proper, and the code is __init annotated at the section level by prepending all names of SHF_ALLOC sections with '.init'. This results in section names like .init.rodata.str1.8 (for string literals) and .init.bss (which is tiny), both of which can be moved into the .init.data output section. Tested-by: Mark Rutland Signed-off-by: Ard Biesheuvel Signed-off-by: Matt Fleming Acked-by: Will Deacon Acked-by: Mark Rutland Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1455712566-16727-6-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index e3928f5..cbf4db4 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -134,6 +134,7 @@ SECTIONS CON_INITCALL SECURITY_INITCALL INIT_RAM_FS + *(.init.rodata.* .init.bss) /* from the EFI stub */ } .exit.data : { ARM_EXIT_KEEP(EXIT_DATA) -- cgit v0.10.2 From 07e83dbb75865b016f6493c119a30aac7c25051a Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 17 Feb 2016 12:35:59 +0000 Subject: efi/efistub: Prevent __init annotations from being used __init annotations should not be used in the EFI stub, since the code is either included in the decompressor (x86, ARM) where they have no effect, or the whole stub is __init annotated at the section level (arm64), by renaming the sections. In the second case the __init annotations will be redundant, and will result in section names like .init.init.text, and our linker script does not expect that. So un-#define __init so that its inadvertent use will force a build error. Signed-off-by: Ard Biesheuvel Signed-off-by: Matt Fleming Acked-by: Mark Rutland Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1455712566-16727-7-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h index 6b6548f..86ff7bf 100644 --- a/drivers/firmware/efi/libstub/efistub.h +++ b/drivers/firmware/efi/libstub/efistub.h @@ -5,6 +5,16 @@ /* error code which can't be mistaken for valid address */ #define EFI_ERROR (~0UL) +/* + * __init annotations should not be used in the EFI stub, since the code is + * either included in the decompressor (x86, ARM) where they have no effect, + * or the whole stub is __init annotated at the section level (arm64), by + * renaming the sections, in which case the __init annotation will be + * redundant, and will result in section names like .init.init.text, and our + * linker script does not expect that. + */ +#undef __init + void efi_char16_printk(efi_system_table_t *, efi_char16_t *); efi_status_t efi_open_volume(efi_system_table_t *sys_table_arg, void *__image, -- cgit v0.10.2 From 2eec5dedf770dc85c1fdf6b86873165e61bb1fff Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 17 Feb 2016 12:36:00 +0000 Subject: efi/arm-init: Use read-only early mappings The early mappings of the EFI system table contents and the UEFI memory map are read-only from the OS point of view. So map them read-only to protect them from inadvertent modification. Tested-by: Mark Rutland Signed-off-by: Ard Biesheuvel Signed-off-by: Matt Fleming Acked-by: Mark Rutland Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1455712566-16727-8-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar diff --git a/drivers/firmware/efi/arm-init.c b/drivers/firmware/efi/arm-init.c index 9e15d57..aa1f743 100644 --- a/drivers/firmware/efi/arm-init.c +++ b/drivers/firmware/efi/arm-init.c @@ -61,8 +61,8 @@ static int __init uefi_init(void) char vendor[100] = "unknown"; int i, retval; - efi.systab = early_memremap(efi_system_table, - sizeof(efi_system_table_t)); + efi.systab = early_memremap_ro(efi_system_table, + sizeof(efi_system_table_t)); if (efi.systab == NULL) { pr_warn("Unable to map EFI system table.\n"); return -ENOMEM; @@ -86,8 +86,8 @@ static int __init uefi_init(void) efi.systab->hdr.revision & 0xffff); /* Show what we know for posterity */ - c16 = early_memremap(efi_to_phys(efi.systab->fw_vendor), - sizeof(vendor) * sizeof(efi_char16_t)); + c16 = early_memremap_ro(efi_to_phys(efi.systab->fw_vendor), + sizeof(vendor) * sizeof(efi_char16_t)); if (c16) { for (i = 0; i < (int) sizeof(vendor) - 1 && *c16; ++i) vendor[i] = c16[i]; @@ -100,8 +100,8 @@ static int __init uefi_init(void) efi.systab->hdr.revision & 0xffff, vendor); table_size = sizeof(efi_config_table_64_t) * efi.systab->nr_tables; - config_tables = early_memremap(efi_to_phys(efi.systab->tables), - table_size); + config_tables = early_memremap_ro(efi_to_phys(efi.systab->tables), + table_size); if (config_tables == NULL) { pr_warn("Unable to map EFI config table array.\n"); retval = -ENOMEM; @@ -185,7 +185,7 @@ void __init efi_init(void) efi_system_table = params.system_table; memmap.phys_map = params.mmap; - memmap.map = early_memremap(params.mmap, params.mmap_size); + memmap.map = early_memremap_ro(params.mmap, params.mmap_size); if (memmap.map == NULL) { /* * If we are booting via UEFI, the UEFI memory map is the only -- cgit v0.10.2 From 2ec0f0a3a4bfab90eda8b81656f62e07abf2321f Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 17 Feb 2016 12:36:01 +0000 Subject: efi/arm: Check for LPAE support before booting a LPAE kernel A kernel built with support for LPAE cannot boot to a state where it can inform the user about if it has to fail due to missing LPAE support in the hardware. If we happen to be booting via UEFI, we can fail gracefully so check for LPAE support in the hardware on CONFIG_ARM_LPAE builds before entering the kernel proper. Signed-off-by: Ard Biesheuvel Signed-off-by: Matt Fleming Reviewed-by: Jeremy Linton Acked-by: Mark Rutland Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1455712566-16727-9-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar diff --git a/drivers/firmware/efi/libstub/arm32-stub.c b/drivers/firmware/efi/libstub/arm32-stub.c index 495ebd6..6f42be4 100644 --- a/drivers/firmware/efi/libstub/arm32-stub.c +++ b/drivers/firmware/efi/libstub/arm32-stub.c @@ -9,6 +9,23 @@ #include #include +efi_status_t check_platform_features(efi_system_table_t *sys_table_arg) +{ + int block; + + /* non-LPAE kernels can run anywhere */ + if (!IS_ENABLED(CONFIG_ARM_LPAE)) + return EFI_SUCCESS; + + /* LPAE kernels need compatible hardware */ + block = cpuid_feature_extract(CPUID_EXT_MMFR0, 0); + if (block < 5) { + pr_efi_err(sys_table_arg, "This LPAE kernel is not supported by your CPU\n"); + return EFI_UNSUPPORTED; + } + return EFI_SUCCESS; +} + efi_status_t handle_kernel_image(efi_system_table_t *sys_table, unsigned long *image_addr, unsigned long *image_size, -- cgit v0.10.2 From 42b55734030c1f724d5f47aeb872e2cccd650d79 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 17 Feb 2016 12:36:02 +0000 Subject: efi/arm64: Check for h/w support before booting a >4 KB granular kernel A kernel built with support for a page size that is not supported by the hardware it runs on cannot boot to a state where it can inform the user about the failure. If we happen to be booting via UEFI, we can fail gracefully so check if the currently configured page size is supported by the hardware before entering the kernel proper. Note that UEFI mandates support for 4 KB pages, so in that case, no check is needed. Tested-by: Suzuki K Poulose Signed-off-by: Ard Biesheuvel Signed-off-by: Matt Fleming Reviewed-by: Jeremy Linton Acked-by: Mark Rutland Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1455712566-16727-10-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c index 9e03427..047fc34 100644 --- a/drivers/firmware/efi/libstub/arm64-stub.c +++ b/drivers/firmware/efi/libstub/arm64-stub.c @@ -12,6 +12,26 @@ #include #include #include +#include + +efi_status_t check_platform_features(efi_system_table_t *sys_table_arg) +{ + u64 tg; + + /* UEFI mandates support for 4 KB granularity, no need to check */ + if (IS_ENABLED(CONFIG_ARM64_4K_PAGES)) + return EFI_SUCCESS; + + tg = (read_cpuid(ID_AA64MMFR0_EL1) >> ID_AA64MMFR0_TGRAN_SHIFT) & 0xf; + if (tg != ID_AA64MMFR0_TGRAN_SUPPORTED) { + if (IS_ENABLED(CONFIG_ARM64_64K_PAGES)) + pr_efi_err(sys_table_arg, "This 64 KB granular kernel is not supported by your CPU\n"); + else + pr_efi_err(sys_table_arg, "This 16 KB granular kernel is not supported by your CPU\n"); + return EFI_UNSUPPORTED; + } + return EFI_SUCCESS; +} efi_status_t handle_kernel_image(efi_system_table_t *sys_table_arg, unsigned long *image_addr, -- cgit v0.10.2 From b9d6769b5678dbd6cb328d20716561d35b2b1510 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 17 Feb 2016 12:36:03 +0000 Subject: efi/arm*: Perform hardware compatibility check Before proceeding with relocating the kernel and parsing the command line, insert a call to check_platform_features() to allow an arch specific check to be performed whether the current kernel can execute on the current hardware. Tested-by: Suzuki K Poulose Signed-off-by: Ard Biesheuvel Signed-off-by: Matt Fleming Reviewed-by: Jeremy Linton Acked-by: Mark Rutland Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1455712566-16727-11-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar diff --git a/drivers/firmware/efi/libstub/arm-stub.c b/drivers/firmware/efi/libstub/arm-stub.c index 3397902..6086a87 100644 --- a/drivers/firmware/efi/libstub/arm-stub.c +++ b/drivers/firmware/efi/libstub/arm-stub.c @@ -190,6 +190,10 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table, pr_efi(sys_table, "Booting Linux Kernel...\n"); + status = check_platform_features(sys_table); + if (status != EFI_SUCCESS) + goto fail; + /* * Get a handle to the loaded image protocol. This is used to get * information about the running image, such as size and the command diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h index 86ff7bf..981c603 100644 --- a/drivers/firmware/efi/libstub/efistub.h +++ b/drivers/firmware/efi/libstub/efistub.h @@ -53,4 +53,6 @@ void efi_get_virtmap(efi_memory_desc_t *memory_map, unsigned long map_size, unsigned long desc_size, efi_memory_desc_t *runtime_map, int *count); +efi_status_t check_platform_features(efi_system_table_t *sys_table_arg); + #endif -- cgit v0.10.2 From 15f003d20782a4079e078d16df57081ebd1fc150 Mon Sep 17 00:00:00 2001 From: Sai Praneeth Date: Wed, 17 Feb 2016 12:36:04 +0000 Subject: x86/mm/pat: Don't implicitly allow _PAGE_RW in kernel_map_pages_in_pgd() As part of the preparation for the EFI_MEMORY_RO flag added in the UEFI 2.5 specification, we need the ability to map pages in kernel page tables without _PAGE_RW being set. Modify kernel_map_pages_in_pgd() to require its callers to pass _PAGE_RW if the pages need to be mapped read/write. Otherwise, we'll map the pages as read-only. Signed-off-by: Sai Praneeth Prakhya Signed-off-by: Matt Fleming Cc: Andrew Morton Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Hugh Dickins Cc: Lee, Chun-Yi Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Ravi Shankar Cc: Ricardo Neri Cc: Thomas Gleixner Cc: Toshi Kani Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1455712566-16727-12-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index bf312da..14c38ae 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -1971,6 +1971,9 @@ int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address, if (!(page_flags & _PAGE_NX)) cpa.mask_clr = __pgprot(_PAGE_NX); + if (!(page_flags & _PAGE_RW)) + cpa.mask_clr = __pgprot(_PAGE_RW); + cpa.mask_set = __pgprot(_PAGE_PRESENT | page_flags); retval = __change_page_attr_set_clr(&cpa, 0); diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index b492521..b0965b2 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -233,7 +233,7 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) * phys_efi_set_virtual_address_map(). */ pfn = pa_memmap >> PAGE_SHIFT; - if (kernel_map_pages_in_pgd(pgd, pfn, pa_memmap, num_pages, _PAGE_NX)) { + if (kernel_map_pages_in_pgd(pgd, pfn, pa_memmap, num_pages, _PAGE_NX | _PAGE_RW)) { pr_err("Error ident-mapping new memmap (0x%lx)!\n", pa_memmap); return 1; } @@ -262,7 +262,7 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) pfn = md->phys_addr >> PAGE_SHIFT; npages = md->num_pages; - if (kernel_map_pages_in_pgd(pgd, pfn, md->phys_addr, npages, 0)) { + if (kernel_map_pages_in_pgd(pgd, pfn, md->phys_addr, npages, _PAGE_RW)) { pr_err("Failed to map 1:1 memory\n"); return 1; } @@ -279,7 +279,7 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) text = __pa(_text); pfn = text >> PAGE_SHIFT; - if (kernel_map_pages_in_pgd(pgd, pfn, text, npages, 0)) { + if (kernel_map_pages_in_pgd(pgd, pfn, text, npages, _PAGE_RW)) { pr_err("Failed to map kernel text 1:1\n"); return 1; } @@ -294,7 +294,7 @@ void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages) static void __init __map_region(efi_memory_desc_t *md, u64 va) { - unsigned long flags = 0; + unsigned long flags = _PAGE_RW; unsigned long pfn; pgd_t *pgd = efi_pgd; -- cgit v0.10.2 From 6d0cc887d571e96f928be83f094322451fd4bf6f Mon Sep 17 00:00:00 2001 From: Sai Praneeth Date: Wed, 17 Feb 2016 12:36:05 +0000 Subject: x86/efi: Map EFI_MEMORY_{XP,RO} memory region bits to EFI page tables Now that we have EFI memory region bits that indicate which regions do not need execute permission or read/write permission in the page tables, let's use them. We also check for EFI_NX_PE_DATA and only enforce the restrictive mappings if it's present (to allow us to ignore buggy firmware that sets bits it didn't mean to and to preserve backwards compatibility). Instead of assuming that firmware would set appropriate attributes in memory descriptor like EFI_MEMORY_RO for code and EFI_MEMORY_XP for data, we can expect some firmware out there which might only set *type* in memory descriptor to be EFI_RUNTIME_SERVICES_CODE or EFI_RUNTIME_SERVICES_DATA leaving away attribute. This will lead to improper mappings of EFI runtime regions. In order to avoid it, we check attribute and type of memory descriptor to update mappings and moreover Windows works this way. Signed-off-by: Sai Praneeth Prakhya Signed-off-by: Matt Fleming Cc: Andrew Morton Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Kees Cook Cc: Lee, Chun-Yi Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Ravi Shankar Cc: Ricardo Neri Cc: Thomas Gleixner Cc: Toshi Kani Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1455712566-16727-13-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 8fd9e63..7bb206f 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -141,7 +141,7 @@ extern int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pa extern void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages); extern void __init old_map_region(efi_memory_desc_t *md); extern void __init runtime_code_page_mkexec(void); -extern void __init efi_runtime_mkexec(void); +extern void __init efi_runtime_update_mappings(void); extern void __init efi_dump_pagetable(void); extern void __init efi_apply_memmap_quirks(void); extern int __init efi_reuse_config(u64 tables, int nr_tables); diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index e80826e..994a7df8 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -934,7 +934,6 @@ static void __init __efi_enter_virtual_mode(void) } efi_sync_low_kernel_mappings(); - efi_dump_pagetable(); if (efi_is_native()) { status = phys_efi_set_virtual_address_map( @@ -972,7 +971,13 @@ static void __init __efi_enter_virtual_mode(void) efi.set_virtual_address_map = NULL; - efi_runtime_mkexec(); + /* + * Apply more restrictive page table mapping attributes now that + * SVAM() has been called and the firmware has performed all + * necessary relocation fixups for the new virtual addresses. + */ + efi_runtime_update_mappings(); + efi_dump_pagetable(); /* * We mapped the descriptor array into the EFI pagetable above diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c index 58d669b..338402b 100644 --- a/arch/x86/platform/efi/efi_32.c +++ b/arch/x86/platform/efi/efi_32.c @@ -90,7 +90,7 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd) __flush_tlb_all(); } -void __init efi_runtime_mkexec(void) +void __init efi_runtime_update_mappings(void) { if (__supported_pte_mask & _PAGE_NX) runtime_code_page_mkexec(); diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index b0965b2..40d2f44 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -393,13 +393,50 @@ void __init parse_efi_setup(u64 phys_addr, u32 data_len) efi_setup = phys_addr + sizeof(struct setup_data); } -void __init efi_runtime_mkexec(void) +void __init efi_runtime_update_mappings(void) { - if (!efi_enabled(EFI_OLD_MEMMAP)) + unsigned long pfn; + pgd_t *pgd = efi_pgd; + efi_memory_desc_t *md; + void *p; + + if (efi_enabled(EFI_OLD_MEMMAP)) { + if (__supported_pte_mask & _PAGE_NX) + runtime_code_page_mkexec(); + return; + } + + if (!efi_enabled(EFI_NX_PE_DATA)) return; - if (__supported_pte_mask & _PAGE_NX) - runtime_code_page_mkexec(); + for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { + unsigned long pf = 0; + md = p; + + if (!(md->attribute & EFI_MEMORY_RUNTIME)) + continue; + + if (!(md->attribute & EFI_MEMORY_WB)) + pf |= _PAGE_PCD; + + if ((md->attribute & EFI_MEMORY_XP) || + (md->type == EFI_RUNTIME_SERVICES_DATA)) + pf |= _PAGE_NX; + + if (!(md->attribute & EFI_MEMORY_RO) && + (md->type != EFI_RUNTIME_SERVICES_CODE)) + pf |= _PAGE_RW; + + /* Update the 1:1 mapping */ + pfn = md->phys_addr >> PAGE_SHIFT; + if (kernel_map_pages_in_pgd(pgd, pfn, md->phys_addr, md->num_pages, pf)) + pr_warn("Error mapping PA 0x%llx -> VA 0x%llx!\n", + md->phys_addr, md->virt_addr); + + if (kernel_map_pages_in_pgd(pgd, pfn, md->virt_addr, md->num_pages, pf)) + pr_warn("Error mapping PA 0x%llx -> VA 0x%llx!\n", + md->phys_addr, md->virt_addr); + } } void __init efi_dump_pagetable(void) -- cgit v0.10.2 From 2ad510dc372c2caac9aada9ff6dd10e787616e1d Mon Sep 17 00:00:00 2001 From: Sai Praneeth Date: Wed, 17 Feb 2016 12:36:06 +0000 Subject: x86/efi: Only map kernel text for EFI mixed mode The correct symbol to use when figuring out the size of the kernel text is '_etext', not '_end' which is the symbol for the entire kernel image includes data and debug sections. Signed-off-by: Sai Praneeth Prakhya Signed-off-by: Matt Fleming Cc: Ard Biesheuvel Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Ravi Shankar Cc: Ricardo Neri Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1455712566-16727-14-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 40d2f44..49e4dd4 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -275,7 +275,7 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) efi_scratch.phys_stack = virt_to_phys(page_address(page)); efi_scratch.phys_stack += PAGE_SIZE; /* stack grows down */ - npages = (_end - _text) >> PAGE_SHIFT; + npages = (_etext - _text) >> PAGE_SHIFT; text = __pa(_text); pfn = text >> PAGE_SHIFT; -- cgit v0.10.2 From d367cef0a7f0c6ee86e997c0cb455b21b3c6b9ba Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Mon, 14 Mar 2016 10:33:01 +0000 Subject: x86/mm/pat: Fix boot crash when 1GB pages are not supported by the CPU Scott reports that with the new separate EFI page tables he's seeing the following error on boot, caused by setting reserved bits in the page table structures (fault code is PF_RSVD | PF_PROT), swapper/0: Corrupted page table at address 17b102020 PGD 17b0e5063 PUD 1400000e3 Bad pagetable: 0009 [#1] SMP On first inspection the PUD is using a 1GB page size (_PAGE_PSE) and looks fine but that's only true if support for 1GB PUD pages ("pdpe1gb") is present in the CPU. Scott's Intel Celeron N2820 does not have that feature and so the _PAGE_PSE bit is reserved. Fix this issue by making the 1GB mapping code in conditional on "cpu_has_gbpages". This issue didn't come up in the past because the required mapping for the faulting address (0x17b102020) will already have been setup by the kernel in early boot before we got to efi_map_regions(), but we no longer use the standard kernel page tables during EFI calls. Reported-by: Scott Ashcroft Tested-by: Scott Ashcroft Signed-off-by: Matt Fleming Acked-by: Borislav Petkov Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Ben Hutchings Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Maarten Lankhorst Cc: Matthew Garrett Cc: Peter Zijlstra Cc: Raphael Hertzog Cc: Roger Shimizu Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1457951581-27353-2-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 14c38ae..fcf8e29 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -1055,7 +1055,7 @@ static int populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd, /* * Map everything starting from the Gb boundary, possibly with 1G pages */ - while (end - start >= PUD_SIZE) { + while (cpu_has_gbpages && end - start >= PUD_SIZE) { set_pud(pud, __pud(cpa->pfn << PAGE_SHIFT | _PAGE_PSE | massage_pgprot(pud_pgprot))); -- cgit v0.10.2