From a5cab83cd3d2d75d3893276cb5f27e163484ef04 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Thu, 3 Mar 2016 15:26:53 +1100
Subject: powerpc: Create a helper for getting the kernel toc value

Move the logic to work out the kernel toc pointer into a header. This is
a good cleanup, and also means we can use it elsewhere in future.

Reviewed-by: Kamalesh Babulal <kamalesh@linux.vnet.ibm.com>
Reviewed-by: Torsten Duwe <duwe@suse.de>
Reviewed-by: Balbir Singh <bsingharora@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Tested-by: Kamalesh Babulal <kamalesh@linux.vnet.ibm.com>

diff --git a/arch/powerpc/include/asm/sections.h b/arch/powerpc/include/asm/sections.h
index a5e930a..abf5866 100644
--- a/arch/powerpc/include/asm/sections.h
+++ b/arch/powerpc/include/asm/sections.h
@@ -22,6 +22,18 @@ static inline int in_kernel_text(unsigned long addr)
 	return 0;
 }
 
+static inline unsigned long kernel_toc_addr(void)
+{
+	/* Defined by the linker, see vmlinux.lds.S */
+	extern unsigned long __toc_start;
+
+	/*
+	 * The TOC register (r2) points 32kB into the TOC, so that 64kB of
+	 * the TOC can be addressed using a single machine instruction.
+	 */
+	return (unsigned long)(&__toc_start) + 0x8000UL;
+}
+
 static inline int overlaps_interrupt_vector_text(unsigned long start,
 							unsigned long end)
 {
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index 01ea0ed..93dae29 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -17,10 +17,6 @@
 #include <asm/pgtable.h>
 #include <asm/kexec.h>
 
-/* This symbol is provided by the linker - let it fill in the paca
- * field correctly */
-extern unsigned long __toc_start;
-
 #ifdef CONFIG_PPC_BOOK3S
 
 /*
@@ -149,11 +145,6 @@ EXPORT_SYMBOL(paca);
 
 void __init initialise_paca(struct paca_struct *new_paca, int cpu)
 {
-       /* The TOC register (GPR2) points 32kB into the TOC, so that 64kB
-	* of the TOC can be addressed using a single machine instruction.
-	*/
-	unsigned long kernel_toc = (unsigned long)(&__toc_start) + 0x8000UL;
-
 #ifdef CONFIG_PPC_BOOK3S
 	new_paca->lppaca_ptr = new_lppaca(cpu);
 #else
@@ -161,7 +152,7 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu)
 #endif
 	new_paca->lock_token = 0x8000;
 	new_paca->paca_index = cpu;
-	new_paca->kernel_toc = kernel_toc;
+	new_paca->kernel_toc = kernel_toc_addr();
 	new_paca->kernelbase = (unsigned long) _stext;
 	/* Only set MSR:IR/DR when MMU is initialized */
 	new_paca->kernel_msr = MSR_KERNEL & ~(MSR_IR | MSR_DR);
-- 
cgit v0.10.2


From 136cd3450af8092f30d0e289806f08ac2aeee38f Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Thu, 3 Mar 2016 15:26:54 +1100
Subject: powerpc/module: Only try to generate the ftrace_caller() stub once

Currently we generate the module stub for ftrace_caller() at the bottom
of apply_relocate_add(). However apply_relocate_add() is potentially
called more than once per module, which means we will try to generate
the ftrace_caller() stub multiple times.

Although the current code deals with that correctly, ie. it only
generates a stub the first time, it would be clearer to only try to
generate the stub once.

Note also on first reading it may appear that we generate a different
stub for each section that requires relocation, but that is not the
case. The code in stub_for_addr() that searches for an existing stub
uses sechdrs[me->arch.stubs_section], ie. the single stub section for
this module.

A cleaner approach is to only generate the ftrace_caller() stub once,
from module_finalize(). Although the original code didn't check to see
if the stub was actually generated correctly, it seems prudent to add a
check, so do that. And an additional benefit is we can clean the ifdefs
up a little.

Finally we must propagate the const'ness of some of the pointers passed
to module_finalize(), but that is also an improvement.

Reviewed-by: Balbir Singh <bsingharora@gmail.com>
Reviewed-by: Torsten Duwe <duwe@suse.de>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>

diff --git a/arch/powerpc/include/asm/module.h b/arch/powerpc/include/asm/module.h
index dcfcad1..74d25a7 100644
--- a/arch/powerpc/include/asm/module.h
+++ b/arch/powerpc/include/asm/module.h
@@ -82,6 +82,15 @@ bool is_module_trampoline(u32 *insns);
 int module_trampoline_target(struct module *mod, u32 *trampoline,
 			     unsigned long *target);
 
+#ifdef CONFIG_DYNAMIC_FTRACE
+int module_finalize_ftrace(struct module *mod, const Elf_Shdr *sechdrs);
+#else
+static inline int module_finalize_ftrace(struct module *mod, const Elf_Shdr *sechdrs)
+{
+	return 0;
+}
+#endif
+
 struct exception_table_entry;
 void sort_ex_table(struct exception_table_entry *start,
 		   struct exception_table_entry *finish);
diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c
index 9547381..d1f1b35 100644
--- a/arch/powerpc/kernel/module.c
+++ b/arch/powerpc/kernel/module.c
@@ -47,6 +47,11 @@ int module_finalize(const Elf_Ehdr *hdr,
 		const Elf_Shdr *sechdrs, struct module *me)
 {
 	const Elf_Shdr *sect;
+	int rc;
+
+	rc = module_finalize_ftrace(me, sechdrs);
+	if (rc)
+		return rc;
 
 	/* Apply feature fixups */
 	sect = find_section(hdr, sechdrs, "__ftr_fixup");
diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c
index 2c01665..5a7a78f 100644
--- a/arch/powerpc/kernel/module_32.c
+++ b/arch/powerpc/kernel/module_32.c
@@ -181,7 +181,7 @@ static inline int entry_matches(struct ppc_plt_entry *entry, Elf32_Addr val)
 /* Set up a trampoline in the PLT to bounce us to the distant function */
 static uint32_t do_plt_call(void *location,
 			    Elf32_Addr val,
-			    Elf32_Shdr *sechdrs,
+			    const Elf32_Shdr *sechdrs,
 			    struct module *mod)
 {
 	struct ppc_plt_entry *entry;
@@ -294,11 +294,19 @@ int apply_relocate_add(Elf32_Shdr *sechdrs,
 			return -ENOEXEC;
 		}
 	}
+
+	return 0;
+}
+
 #ifdef CONFIG_DYNAMIC_FTRACE
-	module->arch.tramp =
-		do_plt_call(module->core_layout.base,
-			    (unsigned long)ftrace_caller,
-			    sechdrs, module);
-#endif
+int module_finalize_ftrace(struct module *module, const Elf_Shdr *sechdrs)
+{
+	module->arch.tramp = do_plt_call(module->core_layout.base,
+					 (unsigned long)ftrace_caller,
+					 sechdrs, module);
+	if (!module->arch.tramp)
+		return -ENOENT;
+
 	return 0;
 }
+#endif
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index ac64ffd..599c753 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -413,7 +413,7 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr,
 /* r2 is the TOC pointer: it actually points 0x8000 into the TOC (this
    gives the value maximum span in an instruction which uses a signed
    offset) */
-static inline unsigned long my_r2(Elf64_Shdr *sechdrs, struct module *me)
+static inline unsigned long my_r2(const Elf64_Shdr *sechdrs, struct module *me)
 {
 	return sechdrs[me->arch.toc_section].sh_addr + 0x8000;
 }
@@ -426,7 +426,7 @@ static inline unsigned long my_r2(Elf64_Shdr *sechdrs, struct module *me)
 #define PPC_HA(v) PPC_HI ((v) + 0x8000)
 
 /* Patch stub to reference function and correct r2 value. */
-static inline int create_stub(Elf64_Shdr *sechdrs,
+static inline int create_stub(const Elf64_Shdr *sechdrs,
 			      struct ppc64_stub_entry *entry,
 			      unsigned long addr,
 			      struct module *me)
@@ -452,7 +452,7 @@ static inline int create_stub(Elf64_Shdr *sechdrs,
 
 /* Create stub to jump to function described in this OPD/ptr: we need the
    stub to set up the TOC ptr (r2) for the function. */
-static unsigned long stub_for_addr(Elf64_Shdr *sechdrs,
+static unsigned long stub_for_addr(const Elf64_Shdr *sechdrs,
 				   unsigned long addr,
 				   struct module *me)
 {
@@ -693,12 +693,18 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
 		}
 	}
 
+	return 0;
+}
+
 #ifdef CONFIG_DYNAMIC_FTRACE
-	me->arch.toc = my_r2(sechdrs, me);
-	me->arch.tramp = stub_for_addr(sechdrs,
-				       (unsigned long)ftrace_caller,
-				       me);
-#endif
+int module_finalize_ftrace(struct module *mod, const Elf_Shdr *sechdrs)
+{
+	mod->arch.toc = my_r2(sechdrs, mod);
+	mod->arch.tramp = stub_for_addr(sechdrs, (unsigned long)ftrace_caller, mod);
+
+	if (!mod->arch.tramp)
+		return -ENOENT;
 
 	return 0;
 }
+#endif
-- 
cgit v0.10.2


From f17c4e01e906c568fb03a2e6c2b99e4ee4587fbf Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Thu, 3 Mar 2016 15:26:55 +1100
Subject: powerpc/module: Mark module stubs with a magic value

When a module is loaded, calls out to the kernel go via a stub which is
generated at runtime. One of these stubs is used to call _mcount(),
which is the default target of tracing calls generated by the compiler
with -pg.

If dynamic ftrace is enabled (which it typically is), another stub is
used to call ftrace_caller(), which is the target of tracing calls when
ftrace is actually active.

ftrace then wants to disable the calls to _mcount() at module startup,
and enable/disable the calls to ftrace_caller() when enabling/disabling
tracing - all of these it does by patching the code.

As part of that code patching, the ftrace code wants to confirm that the
branch it is about to modify, is in fact a call to a module stub which
calls _mcount() or ftrace_caller().

Currently it does that by inspecting the instructions and confirming
they are what it expects. Although that works, the code to do it is
pretty intricate because it requires lots of knowledge about the exact
format of the stub.

We can make that process easier by marking the generated stubs with a
magic value, and then looking for that magic value. Altough this is not
as rigorous as the current method, I believe it is sufficient in
practice.

Reviewed-by: Balbir Singh <bsingharora@gmail.com>
Reviewed-by: Torsten Duwe <duwe@suse.de>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>

diff --git a/arch/powerpc/include/asm/module.h b/arch/powerpc/include/asm/module.h
index 74d25a7..5b6b5a4 100644
--- a/arch/powerpc/include/asm/module.h
+++ b/arch/powerpc/include/asm/module.h
@@ -78,8 +78,7 @@ struct mod_arch_specific {
 #    endif	/* MODULE */
 #endif
 
-bool is_module_trampoline(u32 *insns);
-int module_trampoline_target(struct module *mod, u32 *trampoline,
+int module_trampoline_target(struct module *mod, unsigned long trampoline,
 			     unsigned long *target);
 
 #ifdef CONFIG_DYNAMIC_FTRACE
diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c
index 44d4d8e..4505cbf 100644
--- a/arch/powerpc/kernel/ftrace.c
+++ b/arch/powerpc/kernel/ftrace.c
@@ -106,10 +106,9 @@ static int
 __ftrace_make_nop(struct module *mod,
 		  struct dyn_ftrace *rec, unsigned long addr)
 {
-	unsigned int op;
-	unsigned long entry, ptr;
+	unsigned long entry, ptr, tramp;
 	unsigned long ip = rec->ip;
-	void *tramp;
+	unsigned int op;
 
 	/* read where this goes */
 	if (probe_kernel_read(&op, (void *)ip, sizeof(int)))
@@ -122,14 +121,9 @@ __ftrace_make_nop(struct module *mod,
 	}
 
 	/* lets find where the pointer goes */
-	tramp = (void *)find_bl_target(ip, op);
-
-	pr_devel("ip:%lx jumps to %p", ip, tramp);
+	tramp = find_bl_target(ip, op);
 
-	if (!is_module_trampoline(tramp)) {
-		pr_err("Not a trampoline\n");
-		return -EINVAL;
-	}
+	pr_devel("ip:%lx jumps to %lx", ip, tramp);
 
 	if (module_trampoline_target(mod, tramp, &ptr)) {
 		pr_err("Failed to get trampoline target\n");
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index 599c753..9629966 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -96,6 +96,8 @@ static unsigned int local_entry_offset(const Elf64_Sym *sym)
 }
 #endif
 
+#define STUB_MAGIC 0x73747562 /* stub */
+
 /* Like PPC32, we need little trampolines to do > 24-bit jumps (into
    the kernel itself).  But on PPC64, these need to be used for every
    jump, actually, to reset r2 (TOC+0x8000). */
@@ -105,7 +107,8 @@ struct ppc64_stub_entry
 	 * need 6 instructions on ABIv2 but we always allocate 7 so
 	 * so we don't have to modify the trampoline load instruction. */
 	u32 jump[7];
-	u32 unused;
+	/* Used by ftrace to identify stubs */
+	u32 magic;
 	/* Data for the above code */
 	func_desc_t funcdata;
 };
@@ -139,70 +142,39 @@ static u32 ppc64_stub_insns[] = {
 };
 
 #ifdef CONFIG_DYNAMIC_FTRACE
-
-static u32 ppc64_stub_mask[] = {
-	0xffff0000,
-	0xffff0000,
-	0xffffffff,
-	0xffffffff,
-#if !defined(_CALL_ELF) || _CALL_ELF != 2
-	0xffffffff,
-#endif
-	0xffffffff,
-	0xffffffff
-};
-
-bool is_module_trampoline(u32 *p)
+int module_trampoline_target(struct module *mod, unsigned long addr,
+			     unsigned long *target)
 {
-	unsigned int i;
-	u32 insns[ARRAY_SIZE(ppc64_stub_insns)];
-
-	BUILD_BUG_ON(sizeof(ppc64_stub_insns) != sizeof(ppc64_stub_mask));
+	struct ppc64_stub_entry *stub;
+	func_desc_t funcdata;
+	u32 magic;
 
-	if (probe_kernel_read(insns, p, sizeof(insns)))
+	if (!within_module_core(addr, mod)) {
+		pr_err("%s: stub %lx not in module %s\n", __func__, addr, mod->name);
 		return -EFAULT;
-
-	for (i = 0; i < ARRAY_SIZE(ppc64_stub_insns); i++) {
-		u32 insna = insns[i];
-		u32 insnb = ppc64_stub_insns[i];
-		u32 mask = ppc64_stub_mask[i];
-
-		if ((insna & mask) != (insnb & mask))
-			return false;
 	}
 
-	return true;
-}
+	stub = (struct ppc64_stub_entry *)addr;
 
-int module_trampoline_target(struct module *mod, u32 *trampoline,
-			     unsigned long *target)
-{
-	u32 buf[2];
-	u16 upper, lower;
-	long offset;
-	void *toc_entry;
-
-	if (probe_kernel_read(buf, trampoline, sizeof(buf)))
+	if (probe_kernel_read(&magic, &stub->magic, sizeof(magic))) {
+		pr_err("%s: fault reading magic for stub %lx for %s\n", __func__, addr, mod->name);
 		return -EFAULT;
+	}
 
-	upper = buf[0] & 0xffff;
-	lower = buf[1] & 0xffff;
-
-	/* perform the addis/addi, both signed */
-	offset = ((short)upper << 16) + (short)lower;
+	if (magic != STUB_MAGIC) {
+		pr_err("%s: bad magic for stub %lx for %s\n", __func__, addr, mod->name);
+		return -EFAULT;
+	}
 
-	/*
-	 * Now get the address this trampoline jumps to. This
-	 * is always 32 bytes into our trampoline stub.
-	 */
-	toc_entry = (void *)mod->arch.toc + offset + 32;
+	if (probe_kernel_read(&funcdata, &stub->funcdata, sizeof(funcdata))) {
+		pr_err("%s: fault reading funcdata for stub %lx for %s\n", __func__, addr, mod->name);
+                return -EFAULT;
+	}
 
-	if (probe_kernel_read(target, toc_entry, sizeof(*target)))
-		return -EFAULT;
+	*target = stub_func_addr(funcdata);
 
 	return 0;
 }
-
 #endif
 
 /* Count how many different 24-bit relocations (different symbol,
@@ -447,6 +419,8 @@ static inline int create_stub(const Elf64_Shdr *sechdrs,
 	entry->jump[0] |= PPC_HA(reladdr);
 	entry->jump[1] |= PPC_LO(reladdr);
 	entry->funcdata = func_desc(addr);
+	entry->magic = STUB_MAGIC;
+
 	return 1;
 }
 
-- 
cgit v0.10.2


From 336a7b5dd80a2a7b463dc8ede4862425940edeb5 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Thu, 3 Mar 2016 15:26:56 +1100
Subject: powerpc/module: Create a special stub for ftrace_caller()

In order to support the new -mprofile-kernel ABI, we need to be able to
call from the module back to ftrace_caller() (in the kernel) without
using the module's r2. That is because the function in this module which
is calling ftrace_caller() may not have setup r2, if it doesn't
otherwise need it (ie. it accesses no globals).

To make that work we add a new stub which is used for calling
ftrace_caller(), which uses the kernel toc instead of the module toc.

Reviewed-by: Balbir Singh <bsingharora@gmail.com>
Reviewed-by: Torsten Duwe <duwe@suse.de>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>

diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index 9629966..76c0963 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -31,6 +31,7 @@
 #include <asm/code-patching.h>
 #include <linux/sort.h>
 #include <asm/setup.h>
+#include <asm/sections.h>
 
 /* FIXME: We don't do .init separately.  To do this, we'd need to have
    a separate r2 value in the init and core section, and stub between
@@ -671,10 +672,76 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
 }
 
 #ifdef CONFIG_DYNAMIC_FTRACE
+
+#ifdef CC_USING_MPROFILE_KERNEL
+
+#define PACATOC offsetof(struct paca_struct, kernel_toc)
+
+/*
+ * For mprofile-kernel we use a special stub for ftrace_caller() because we
+ * can't rely on r2 containing this module's TOC when we enter the stub.
+ *
+ * That can happen if the function calling us didn't need to use the toc. In
+ * that case it won't have setup r2, and the r2 value will be either the
+ * kernel's toc, or possibly another modules toc.
+ *
+ * To deal with that this stub uses the kernel toc, which is always accessible
+ * via the paca (in r13). The target (ftrace_caller()) is responsible for
+ * saving and restoring the toc before returning.
+ */
+static unsigned long create_ftrace_stub(const Elf64_Shdr *sechdrs, struct module *me)
+{
+	struct ppc64_stub_entry *entry;
+	unsigned int i, num_stubs;
+	static u32 stub_insns[] = {
+		0xe98d0000 | PACATOC, 	/* ld      r12,PACATOC(r13)	*/
+		0x3d8c0000,		/* addis   r12,r12,<high>	*/
+		0x398c0000, 		/* addi    r12,r12,<low>	*/
+		0x7d8903a6, 		/* mtctr   r12			*/
+		0x4e800420, 		/* bctr				*/
+	};
+	long reladdr;
+
+	num_stubs = sechdrs[me->arch.stubs_section].sh_size / sizeof(*entry);
+
+	/* Find the next available stub entry */
+	entry = (void *)sechdrs[me->arch.stubs_section].sh_addr;
+	for (i = 0; i < num_stubs && stub_func_addr(entry->funcdata); i++, entry++);
+
+	if (i >= num_stubs) {
+		pr_err("%s: Unable to find a free slot for ftrace stub.\n", me->name);
+		return 0;
+	}
+
+	memcpy(entry->jump, stub_insns, sizeof(stub_insns));
+
+	/* Stub uses address relative to kernel toc (from the paca) */
+	reladdr = (unsigned long)ftrace_caller - kernel_toc_addr();
+	if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) {
+		pr_err("%s: Address of ftrace_caller out of range of kernel_toc.\n", me->name);
+		return 0;
+	}
+
+	entry->jump[1] |= PPC_HA(reladdr);
+	entry->jump[2] |= PPC_LO(reladdr);
+
+	/* Eventhough we don't use funcdata in the stub, it's needed elsewhere. */
+	entry->funcdata = func_desc((unsigned long)ftrace_caller);
+	entry->magic = STUB_MAGIC;
+
+	return (unsigned long)entry;
+}
+#else
+static unsigned long create_ftrace_stub(const Elf64_Shdr *sechdrs, struct module *me)
+{
+	return stub_for_addr(sechdrs, (unsigned long)ftrace_caller, me);
+}
+#endif
+
 int module_finalize_ftrace(struct module *mod, const Elf_Shdr *sechdrs)
 {
 	mod->arch.toc = my_r2(sechdrs, mod);
-	mod->arch.tramp = stub_for_addr(sechdrs, (unsigned long)ftrace_caller, mod);
+	mod->arch.tramp = create_ftrace_stub(sechdrs, mod);
 
 	if (!mod->arch.tramp)
 		return -ENOENT;
-- 
cgit v0.10.2


From c96f83856f56a66a5d7fd730958bec80c9b5a020 Mon Sep 17 00:00:00 2001
From: Torsten Duwe <duwe@lst.de>
Date: Thu, 3 Mar 2016 15:26:57 +1100
Subject: powerpc/ftrace: Use generic ftrace_modify_all_code()

Convert powerpc's arch_ftrace_update_code() from its own version to use
the generic default functionality (without stop_machine -- our
instructions are properly aligned and the replacements atomic).

With this we gain error checking and the much-needed function_trace_op
handling.

Reviewed-by: Balbir Singh <bsingharora@gmail.com>
Reviewed-by: Kamalesh Babulal <kamalesh@linux.vnet.ibm.com>
Signed-off-by: Torsten Duwe <duwe@suse.de>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>

diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c
index 4505cbf..62899fb 100644
--- a/arch/powerpc/kernel/ftrace.c
+++ b/arch/powerpc/kernel/ftrace.c
@@ -449,20 +449,13 @@ void ftrace_replace_code(int enable)
 	}
 }
 
+/*
+ * Use the default ftrace_modify_all_code, but without
+ * stop_machine().
+ */
 void arch_ftrace_update_code(int command)
 {
-	if (command & FTRACE_UPDATE_CALLS)
-		ftrace_replace_code(1);
-	else if (command & FTRACE_DISABLE_CALLS)
-		ftrace_replace_code(0);
-
-	if (command & FTRACE_UPDATE_TRACE_FUNC)
-		ftrace_update_ftrace_func(ftrace_trace_function);
-
-	if (command & FTRACE_START_FUNC_RET)
-		ftrace_enable_ftrace_graph_caller();
-	else if (command & FTRACE_STOP_FUNC_RET)
-		ftrace_disable_ftrace_graph_caller();
+	ftrace_modify_all_code(command);
 }
 
 int __init ftrace_dyn_arch_init(void)
-- 
cgit v0.10.2


From 9a7841ae8d6ce9b7a7cf879c9968fcf4c9545563 Mon Sep 17 00:00:00 2001
From: Torsten Duwe <duwe@lst.de>
Date: Thu, 3 Mar 2016 15:26:58 +1100
Subject: powerpc/ftrace: Use $(CC_FLAGS_FTRACE) when disabling ftrace

Rather than open-coding -pg whereever we want to disable ftrace, use the
existing $(CC_FLAGS_FTRACE) variable.

This has the advantage that it will work in future when we use a
different set of flags to enable ftrace.

Signed-off-by: Torsten Duwe <duwe@suse.de>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>

diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 794f22a..2da380f 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -16,14 +16,14 @@ endif
 
 ifdef CONFIG_FUNCTION_TRACER
 # Do not trace early boot code
-CFLAGS_REMOVE_cputable.o = -pg -mno-sched-epilog
-CFLAGS_REMOVE_prom_init.o = -pg -mno-sched-epilog
-CFLAGS_REMOVE_btext.o = -pg -mno-sched-epilog
-CFLAGS_REMOVE_prom.o = -pg -mno-sched-epilog
+CFLAGS_REMOVE_cputable.o = -mno-sched-epilog $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_prom_init.o = -mno-sched-epilog $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_btext.o = -mno-sched-epilog $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_prom.o = -mno-sched-epilog $(CC_FLAGS_FTRACE)
 # do not trace tracer code
-CFLAGS_REMOVE_ftrace.o = -pg -mno-sched-epilog
+CFLAGS_REMOVE_ftrace.o = -mno-sched-epilog $(CC_FLAGS_FTRACE)
 # timers used by tracing
-CFLAGS_REMOVE_time.o = -pg -mno-sched-epilog
+CFLAGS_REMOVE_time.o = -mno-sched-epilog $(CC_FLAGS_FTRACE)
 endif
 
 obj-y				:= cputable.o ptrace.o syscalls.o \
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index a47e142..4513d1a 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -6,8 +6,8 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
 
 ccflags-$(CONFIG_PPC64)	:= $(NO_MINIMAL_TOC)
 
-CFLAGS_REMOVE_code-patching.o = -pg
-CFLAGS_REMOVE_feature-fixups.o = -pg
+CFLAGS_REMOVE_code-patching.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_feature-fixups.o = $(CC_FLAGS_FTRACE)
 
 obj-y += string.o alloc.o crtsavres.o ppc_ksyms.o code-patching.o \
 	 feature-fixups.o
diff --git a/arch/powerpc/platforms/powermac/Makefile b/arch/powerpc/platforms/powermac/Makefile
index 52c6ce1..1eb7b45 100644
--- a/arch/powerpc/platforms/powermac/Makefile
+++ b/arch/powerpc/platforms/powermac/Makefile
@@ -2,7 +2,7 @@ CFLAGS_bootx_init.o  		+= -fPIC
 
 ifdef CONFIG_FUNCTION_TRACER
 # Do not trace early boot code
-CFLAGS_REMOVE_bootx_init.o = -pg -mno-sched-epilog
+CFLAGS_REMOVE_bootx_init.o = -mno-sched-epilog $(CC_FLAGS_FTRACE)
 endif
 
 obj-y				+= pic.o setup.o time.o feature.o pci.o \
-- 
cgit v0.10.2


From 153086644fd1fb07fb3af84d9f11542a19b1e8b6 Mon Sep 17 00:00:00 2001
From: Torsten Duwe <duwe@suse.de>
Date: Thu, 3 Mar 2016 15:26:59 +1100
Subject: powerpc/ftrace: Add support for -mprofile-kernel ftrace ABI

The gcc switch -mprofile-kernel defines a new ABI for calling _mcount()
very early in the function with minimal overhead.

Although mprofile-kernel has been available since GCC 3.4, there were
bugs which were only fixed recently. Currently it is known to work in
GCC 4.9, 5 and 6.

Additionally there are two possible code sequences generated by the
flag, the first uses mflr/std/bl and the second is optimised to omit the
std. Currently only gcc 6 has the optimised sequence. This patch
supports both sequences.

Initial work started by Vojtech Pavlik, used with permission.

Key changes:
 - rework _mcount() to work for both the old and new ABIs.
 - implement new versions of ftrace_caller() and ftrace_graph_caller()
   which deal with the new ABI.
 - updates to __ftrace_make_nop() to recognise the new mcount calling
   sequence.
 - updates to __ftrace_make_call() to recognise the nop'ed sequence.
 - implement ftrace_modify_call().
 - updates to the module loader to surpress the toc save in the module
   stub when calling mcount with the new ABI.

Reviewed-by: Balbir Singh <bsingharora@gmail.com>
Signed-off-by: Torsten Duwe <duwe@suse.de>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>

diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h
index 840a550..994c60a 100644
--- a/arch/powerpc/include/asm/code-patching.h
+++ b/arch/powerpc/include/asm/code-patching.h
@@ -99,4 +99,25 @@ static inline unsigned long ppc_global_function_entry(void *func)
 #endif
 }
 
+#ifdef CONFIG_PPC64
+/*
+ * Some instruction encodings commonly used in dynamic ftracing
+ * and function live patching.
+ */
+
+/* This must match the definition of STK_GOT in <asm/ppc_asm.h> */
+#if defined(_CALL_ELF) && _CALL_ELF == 2
+#define R2_STACK_OFFSET         24
+#else
+#define R2_STACK_OFFSET         40
+#endif
+
+#define PPC_INST_LD_TOC		(PPC_INST_LD  | ___PPC_RT(__REG_R2) | \
+				 ___PPC_RA(__REG_R1) | R2_STACK_OFFSET)
+
+/* usually preceded by a mflr r0 */
+#define PPC_INST_STD_LR		(PPC_INST_STD | ___PPC_RS(__REG_R0) | \
+				 ___PPC_RA(__REG_R1) | PPC_LR_STKOFF)
+#endif /* CONFIG_PPC64 */
+
 #endif /* _ASM_POWERPC_CODE_PATCHING_H */
diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h
index ef89b14..50ca758 100644
--- a/arch/powerpc/include/asm/ftrace.h
+++ b/arch/powerpc/include/asm/ftrace.h
@@ -46,6 +46,8 @@
 extern void _mcount(void);
 
 #ifdef CONFIG_DYNAMIC_FTRACE
+# define FTRACE_ADDR ((unsigned long)ftrace_caller)
+# define FTRACE_REGS_ADDR FTRACE_ADDR
 static inline unsigned long ftrace_call_adjust(unsigned long addr)
 {
        /* reloction of mcount call site is the same as the address */
@@ -58,6 +60,9 @@ struct dyn_arch_ftrace {
 #endif /*  CONFIG_DYNAMIC_FTRACE */
 #endif /* __ASSEMBLY__ */
 
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+#define ARCH_SUPPORTS_FTRACE_OPS 1
+#endif
 #endif
 
 #if defined(CONFIG_FTRACE_SYSCALLS) && defined(CONFIG_PPC64) && !defined(__ASSEMBLY__)
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 0d525ce..ec7f8aa 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -1143,8 +1143,12 @@ _GLOBAL(enter_prom)
 #ifdef CONFIG_DYNAMIC_FTRACE
 _GLOBAL(mcount)
 _GLOBAL(_mcount)
-	blr
+	mflr	r12
+	mtctr	r12
+	mtlr	r0
+	bctr
 
+#ifndef CC_USING_MPROFILE_KERNEL
 _GLOBAL_TOC(ftrace_caller)
 	/* Taken from output of objdump from lib64/glibc */
 	mflr	r3
@@ -1166,6 +1170,115 @@ _GLOBAL(ftrace_graph_stub)
 	ld	r0, 128(r1)
 	mtlr	r0
 	addi	r1, r1, 112
+
+#else /* CC_USING_MPROFILE_KERNEL */
+/*
+ *
+ * ftrace_caller() is the function that replaces _mcount() when ftrace is
+ * active.
+ *
+ * We arrive here after a function A calls function B, and we are the trace
+ * function for B. When we enter r1 points to A's stack frame, B has not yet
+ * had a chance to allocate one yet.
+ *
+ * Additionally r2 may point either to the TOC for A, or B, depending on
+ * whether B did a TOC setup sequence before calling us.
+ *
+ * On entry the LR points back to the _mcount() call site, and r0 holds the
+ * saved LR as it was on entry to B, ie. the original return address at the
+ * call site in A.
+ *
+ * Our job is to save the register state into a struct pt_regs (on the stack)
+ * and then arrange for the ftrace function to be called.
+ */
+_GLOBAL(ftrace_caller)
+	/* Save the original return address in A's stack frame */
+	std	r0,LRSAVE(r1)
+
+	/* Create our stack frame + pt_regs */
+	stdu	r1,-SWITCH_FRAME_SIZE(r1)
+
+	/* Save all gprs to pt_regs */
+	SAVE_8GPRS(0,r1)
+	SAVE_8GPRS(8,r1)
+	SAVE_8GPRS(16,r1)
+	SAVE_8GPRS(24,r1)
+
+	/* Load special regs for save below */
+	mfmsr   r8
+	mfctr   r9
+	mfxer   r10
+	mfcr	r11
+
+	/* Get the _mcount() call site out of LR */
+	mflr	r7
+	/* Save it as pt_regs->nip & pt_regs->link */
+	std     r7, _NIP(r1)
+	std     r7, _LINK(r1)
+
+	/* Save callee's TOC in the ABI compliant location */
+	std	r2, 24(r1)
+	ld	r2,PACATOC(r13)	/* get kernel TOC in r2 */
+
+	addis	r3,r2,function_trace_op@toc@ha
+	addi	r3,r3,function_trace_op@toc@l
+	ld	r5,0(r3)
+
+	/* Calculate ip from nip-4 into r3 for call below */
+	subi    r3, r7, MCOUNT_INSN_SIZE
+
+	/* Put the original return address in r4 as parent_ip */
+	mr	r4, r0
+
+	/* Save special regs */
+	std     r8, _MSR(r1)
+	std     r9, _CTR(r1)
+	std     r10, _XER(r1)
+	std     r11, _CCR(r1)
+
+	/* Load &pt_regs in r6 for call below */
+	addi    r6, r1 ,STACK_FRAME_OVERHEAD
+
+	/* ftrace_call(r3, r4, r5, r6) */
+.globl ftrace_call
+ftrace_call:
+	bl	ftrace_stub
+	nop
+
+	/* Load ctr with the possibly modified NIP */
+	ld	r3, _NIP(r1)
+	mtctr	r3
+
+	/* Restore gprs */
+	REST_8GPRS(0,r1)
+	REST_8GPRS(8,r1)
+	REST_8GPRS(16,r1)
+	REST_8GPRS(24,r1)
+
+	/* Restore callee's TOC */
+	ld	r2, 24(r1)
+
+	/* Pop our stack frame */
+	addi r1, r1, SWITCH_FRAME_SIZE
+
+	/* Restore original LR for return to B */
+	ld	r0, LRSAVE(r1)
+	mtlr	r0
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	stdu	r1, -112(r1)
+.globl ftrace_graph_call
+ftrace_graph_call:
+	b	ftrace_graph_stub
+_GLOBAL(ftrace_graph_stub)
+	addi	r1, r1, 112
+#endif
+
+	ld	r0,LRSAVE(r1)	/* restore callee's lr at _mcount site */
+	mtlr	r0
+	bctr			/* jump after _mcount site */
+#endif /* CC_USING_MPROFILE_KERNEL */
+
 _GLOBAL(ftrace_stub)
 	blr
 #else
@@ -1198,6 +1311,7 @@ _GLOBAL(ftrace_stub)
 #endif /* CONFIG_DYNAMIC_FTRACE */
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
+#ifndef CC_USING_MPROFILE_KERNEL
 _GLOBAL(ftrace_graph_caller)
 	/* load r4 with local address */
 	ld	r4, 128(r1)
@@ -1222,6 +1336,56 @@ _GLOBAL(ftrace_graph_caller)
 	addi	r1, r1, 112
 	blr
 
+#else /* CC_USING_MPROFILE_KERNEL */
+_GLOBAL(ftrace_graph_caller)
+	/* with -mprofile-kernel, parameter regs are still alive at _mcount */
+	std	r10, 104(r1)
+	std	r9, 96(r1)
+	std	r8, 88(r1)
+	std	r7, 80(r1)
+	std	r6, 72(r1)
+	std	r5, 64(r1)
+	std	r4, 56(r1)
+	std	r3, 48(r1)
+
+	/* Save callee's TOC in the ABI compliant location */
+	std	r2, 24(r1)
+	ld	r2, PACATOC(r13)	/* get kernel TOC in r2 */
+
+	mfctr	r4		/* ftrace_caller has moved local addr here */
+	std	r4, 40(r1)
+	mflr	r3		/* ftrace_caller has restored LR from stack */
+	subi	r4, r4, MCOUNT_INSN_SIZE
+
+	bl	prepare_ftrace_return
+	nop
+
+	/*
+	 * prepare_ftrace_return gives us the address we divert to.
+	 * Change the LR to this.
+	 */
+	mtlr	r3
+
+	ld	r0, 40(r1)
+	mtctr	r0
+	ld	r10, 104(r1)
+	ld	r9, 96(r1)
+	ld	r8, 88(r1)
+	ld	r7, 80(r1)
+	ld	r6, 72(r1)
+	ld	r5, 64(r1)
+	ld	r4, 56(r1)
+	ld	r3, 48(r1)
+
+	/* Restore callee's TOC */
+	ld	r2, 24(r1)
+
+	addi	r1, r1, 112
+	mflr	r0
+	std	r0, LRSAVE(r1)
+	bctr
+#endif /* CC_USING_MPROFILE_KERNEL */
+
 _GLOBAL(return_to_handler)
 	/* need to save return values */
 	std	r4,  -32(r1)
diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c
index 62899fb..9dac18da 100644
--- a/arch/powerpc/kernel/ftrace.c
+++ b/arch/powerpc/kernel/ftrace.c
@@ -61,8 +61,11 @@ ftrace_modify_code(unsigned long ip, unsigned int old, unsigned int new)
 		return -EFAULT;
 
 	/* Make sure it is what we expect it to be */
-	if (replaced != old)
+	if (replaced != old) {
+		pr_err("%p: replaced (%#x) != old (%#x)",
+		(void *)ip, replaced, old);
 		return -EINVAL;
+	}
 
 	/* replace the text with the new text */
 	if (patch_instruction((unsigned int *)ip, new))
@@ -108,11 +111,13 @@ __ftrace_make_nop(struct module *mod,
 {
 	unsigned long entry, ptr, tramp;
 	unsigned long ip = rec->ip;
-	unsigned int op;
+	unsigned int op, pop;
 
 	/* read where this goes */
-	if (probe_kernel_read(&op, (void *)ip, sizeof(int)))
+	if (probe_kernel_read(&op, (void *)ip, sizeof(int))) {
+		pr_err("Fetching opcode failed.\n");
 		return -EFAULT;
+	}
 
 	/* Make sure that that this is still a 24bit jump */
 	if (!is_bl_op(op)) {
@@ -152,10 +157,42 @@ __ftrace_make_nop(struct module *mod,
 	 *
 	 * Use a b +8 to jump over the load.
 	 */
-	op = 0x48000008;	/* b +8 */
 
-	if (patch_instruction((unsigned int *)ip, op))
+	pop = PPC_INST_BRANCH | 8;	/* b +8 */
+
+	/*
+	 * Check what is in the next instruction. We can see ld r2,40(r1), but
+	 * on first pass after boot we will see mflr r0.
+	 */
+	if (probe_kernel_read(&op, (void *)(ip+4), MCOUNT_INSN_SIZE)) {
+		pr_err("Fetching op failed.\n");
+		return -EFAULT;
+	}
+
+	if (op != PPC_INST_LD_TOC) {
+		unsigned int inst;
+
+		if (probe_kernel_read(&inst, (void *)(ip - 4), 4)) {
+			pr_err("Fetching instruction at %lx failed.\n", ip - 4);
+			return -EFAULT;
+		}
+
+		/* We expect either a mlfr r0, or a std r0, LRSAVE(r1) */
+		if (inst != PPC_INST_MFLR && inst != PPC_INST_STD_LR) {
+			pr_err("Unexpected instructions around bl _mcount\n"
+			       "when enabling dynamic ftrace!\t"
+			       "(%08x,bl,%08x)\n", inst, op);
+			return -EINVAL;
+		}
+
+		/* When using -mkernel_profile there is no load to jump over */
+		pop = PPC_INST_NOP;
+	}
+
+	if (patch_instruction((unsigned int *)ip, pop)) {
+		pr_err("Patching NOP failed.\n");
 		return -EPERM;
+	}
 
 	return 0;
 }
@@ -281,16 +318,15 @@ int ftrace_make_nop(struct module *mod,
 
 #ifdef CONFIG_MODULES
 #ifdef CONFIG_PPC64
+/*
+ * Examine the existing instructions for __ftrace_make_call.
+ * They should effectively be a NOP, and follow formal constraints,
+ * depending on the ABI. Return false if they don't.
+ */
+#ifndef CC_USING_MPROFILE_KERNEL
 static int
-__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+expected_nop_sequence(void *ip, unsigned int op0, unsigned int op1)
 {
-	unsigned int op[2];
-	void *ip = (void *)rec->ip;
-
-	/* read where this goes */
-	if (probe_kernel_read(op, ip, sizeof(op)))
-		return -EFAULT;
-
 	/*
 	 * We expect to see:
 	 *
@@ -300,8 +336,34 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 	 * The load offset is different depending on the ABI. For simplicity
 	 * just mask it out when doing the compare.
 	 */
-	if ((op[0] != 0x48000008) || ((op[1] & 0xffff0000) != 0xe8410000)) {
-		pr_err("Unexpected call sequence: %x %x\n", op[0], op[1]);
+	if ((op0 != 0x48000008) || ((op1 & 0xffff0000) != 0xe8410000))
+		return 0;
+	return 1;
+}
+#else
+static int
+expected_nop_sequence(void *ip, unsigned int op0, unsigned int op1)
+{
+	/* look for patched "NOP" on ppc64 with -mprofile-kernel */
+	if (op0 != PPC_INST_NOP)
+		return 0;
+	return 1;
+}
+#endif
+
+static int
+__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+	unsigned int op[2];
+	void *ip = (void *)rec->ip;
+
+	/* read where this goes */
+	if (probe_kernel_read(op, ip, sizeof(op)))
+		return -EFAULT;
+
+	if (!expected_nop_sequence(ip, op[0], op[1])) {
+		pr_err("Unexpected call sequence at %p: %x %x\n",
+		ip, op[0], op[1]);
 		return -EINVAL;
 	}
 
@@ -324,7 +386,16 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 
 	return 0;
 }
-#else
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
+			unsigned long addr)
+{
+	return ftrace_make_call(rec, addr);
+}
+#endif
+
+#else  /* !CONFIG_PPC64: */
 static int
 __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 {
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index 76c0963..848b474 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -42,7 +42,6 @@
    --RR.  */
 
 #if defined(_CALL_ELF) && _CALL_ELF == 2
-#define R2_STACK_OFFSET 24
 
 /* An address is simply the address of the function. */
 typedef unsigned long func_desc_t;
@@ -74,7 +73,6 @@ static unsigned int local_entry_offset(const Elf64_Sym *sym)
 	return PPC64_LOCAL_ENTRY_OFFSET(sym->st_other);
 }
 #else
-#define R2_STACK_OFFSET 40
 
 /* An address is address of the OPD entry, which contains address of fn. */
 typedef struct ppc64_opd_entry func_desc_t;
@@ -451,17 +449,60 @@ static unsigned long stub_for_addr(const Elf64_Shdr *sechdrs,
 	return (unsigned long)&stubs[i];
 }
 
+#ifdef CC_USING_MPROFILE_KERNEL
+static bool is_early_mcount_callsite(u32 *instruction)
+{
+	/*
+	 * Check if this is one of the -mprofile-kernel sequences.
+	 */
+	if (instruction[-1] == PPC_INST_STD_LR &&
+	    instruction[-2] == PPC_INST_MFLR)
+		return true;
+
+	if (instruction[-1] == PPC_INST_MFLR)
+		return true;
+
+	return false;
+}
+
+/*
+ * In case of _mcount calls, do not save the current callee's TOC (in r2) into
+ * the original caller's stack frame. If we did we would clobber the saved TOC
+ * value of the original caller.
+ */
+static void squash_toc_save_inst(const char *name, unsigned long addr)
+{
+	struct ppc64_stub_entry *stub = (struct ppc64_stub_entry *)addr;
+
+	/* Only for calls to _mcount */
+	if (strcmp("_mcount", name) != 0)
+		return;
+
+	stub->jump[2] = PPC_INST_NOP;
+}
+#else
+static void squash_toc_save_inst(const char *name, unsigned long addr) { }
+
+/* without -mprofile-kernel, mcount calls are never early */
+static bool is_early_mcount_callsite(u32 *instruction)
+{
+	return false;
+}
+#endif
+
 /* We expect a noop next: if it is, replace it with instruction to
    restore r2. */
 static int restore_r2(u32 *instruction, struct module *me)
 {
 	if (*instruction != PPC_INST_NOP) {
+		if (is_early_mcount_callsite(instruction - 1))
+			return 1;
 		pr_err("%s: Expect noop after relocate, got %08x\n",
 		       me->name, *instruction);
 		return 0;
 	}
 	/* ld r2,R2_STACK_OFFSET(r1) */
-	*instruction = 0xe8410000 | R2_STACK_OFFSET;
+	*instruction = PPC_INST_LD_TOC;
 	return 1;
 }
 
@@ -586,6 +627,8 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
 					return -ENOENT;
 				if (!restore_r2((u32 *)location + 1, me))
 					return -ENOEXEC;
+
+				squash_toc_save_inst(strtab + sym->st_name, value);
 			} else
 				value += local_entry_offset(sym);
 
-- 
cgit v0.10.2


From 8c50b72a3b4f1f7cdfdfebd233b1cbd121262e65 Mon Sep 17 00:00:00 2001
From: Torsten Duwe <duwe@lst.de>
Date: Thu, 3 Mar 2016 15:27:00 +1100
Subject: powerpc/ftrace: Add Kconfig & Make glue for mprofile-kernel

Firstly we add logic to Kconfig to allow a user to choose if they want
mprofile-kernel. This has to be user-selectable because only some
current toolchains support it. If we enabled it unconditionally we would
prevent some users from building the kernel entirely.

Arguably it would be nice if we could detect if mprofile-kernel was
available, and use it then. However that would violate the principle of
least surprise because a user having choosen options such as live
patching, would then see them quietly disabled at build time.

We also make the user selectable option negative, ie. it disables when
selected, so that allyesconfig continues to build on old toolchains.

Once we've decided we do want to use mprofile-kernel, we then add a
script which checks it actually works. That is because there are
versions of gcc that accept the flag but don't generate correct code.

Due to the way kconfig works, we can't error out when we detect a
non-working toolchain. If we did a user would never be able to modify
their config and run oldconfig - because the check would block oldconfig
from running. Instead we emit a warning and add a bogus flag to CFLAGS
so that the build will fail.

Signed-off-by: Torsten Duwe <duwe@suse.de>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index e4824fd..91da283 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -94,6 +94,7 @@ config PPC
 	select OF_RESERVED_MEM
 	select HAVE_FTRACE_MCOUNT_RECORD
 	select HAVE_DYNAMIC_FTRACE
+	select HAVE_DYNAMIC_FTRACE_WITH_REGS if MPROFILE_KERNEL
 	select HAVE_FUNCTION_TRACER
 	select HAVE_FUNCTION_GRAPH_TRACER
 	select SYSCTL_EXCEPTION_TRACE
@@ -373,6 +374,24 @@ config PPC_TRANSACTIONAL_MEM
        ---help---
          Support user-mode Transactional Memory on POWERPC.
 
+config DISABLE_MPROFILE_KERNEL
+	bool "Disable use of mprofile-kernel for kernel tracing"
+	depends on PPC64 && CPU_LITTLE_ENDIAN
+	default y
+	help
+	  Selecting this options disables use of the mprofile-kernel ABI for
+	  kernel tracing. That will cause options such as live patching
+	  (CONFIG_LIVEPATCH) which depend on CONFIG_DYNAMIC_FTRACE_WITH_REGS to
+	  be disabled also.
+
+	  If you have a toolchain which supports mprofile-kernel, then you can
+	  enable this. Otherwise leave it disabled. If you're not sure, say
+	  "N".
+
+config MPROFILE_KERNEL
+	depends on PPC64 && CPU_LITTLE_ENDIAN
+	def_bool !DISABLE_MPROFILE_KERNEL
+
 config IOMMU_HELPER
 	def_bool PPC64
 
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 96efd82..f4e49a4 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -133,6 +133,21 @@ else
 CFLAGS-$(CONFIG_GENERIC_CPU) += -mcpu=powerpc64
 endif
 
+ifdef CONFIG_MPROFILE_KERNEL
+    ifeq ($(shell $(srctree)/arch/powerpc/scripts/gcc-check-mprofile-kernel.sh $(CC) -I$(srctree)/include -D__KERNEL__),OK)
+        CC_FLAGS_FTRACE := -pg -mprofile-kernel
+        KBUILD_CPPFLAGS += -DCC_USING_MPROFILE_KERNEL
+    else
+        # If the user asked for mprofile-kernel but the toolchain doesn't
+        # support it, emit a warning and deliberately break the build later
+        # with mprofile-kernel-not-supported. We would prefer to make this an
+        # error right here, but then the user would never be able to run
+        # oldconfig to change their configuration.
+        $(warning Compiler does not support mprofile-kernel, set CONFIG_DISABLE_MPROFILE_KERNEL)
+        CC_FLAGS_FTRACE := -mprofile-kernel-not-supported
+    endif
+endif
+
 CFLAGS-$(CONFIG_CELL_CPU) += $(call cc-option,-mcpu=cell)
 CFLAGS-$(CONFIG_POWER4_CPU) += $(call cc-option,-mcpu=power4)
 CFLAGS-$(CONFIG_POWER5_CPU) += $(call cc-option,-mcpu=power5)
diff --git a/arch/powerpc/scripts/gcc-check-mprofile-kernel.sh b/arch/powerpc/scripts/gcc-check-mprofile-kernel.sh
new file mode 100755
index 0000000..c658d8c
--- /dev/null
+++ b/arch/powerpc/scripts/gcc-check-mprofile-kernel.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+set -e
+set -o pipefail
+
+# To debug, uncomment the following line
+# set -x
+
+# Test whether the compile option -mprofile-kernel exists and generates
+# profiling code (ie. a call to _mcount()).
+echo "int func() { return 0; }" | \
+    $* -S -x c -O2 -p -mprofile-kernel - -o - 2> /dev/null | \
+    grep -q "_mcount"
+
+# Test whether the notrace attribute correctly suppresses calls to _mcount().
+
+echo -e "#include <linux/compiler.h>\nnotrace int func() { return 0; }" | \
+    $* -S -x c -O2 -p -mprofile-kernel - -o - 2> /dev/null | \
+    grep -q "_mcount" && \
+    exit 1
+
+echo "OK"
+exit 0
-- 
cgit v0.10.2


From 8d98e96b345e13659aa42bdc611368863ce65cbe Mon Sep 17 00:00:00 2001
From: Jessica Yu <jeyu@redhat.com>
Date: Tue, 22 Mar 2016 20:03:15 -0400
Subject: Elf: add livepatch-specific Elf constants

Livepatch manages its own relocation sections and symbols in order to be
able to reuse module loader code to write relocations. This removes
livepatch's dependence on separate "dynrela" sections to write relocations
and also allows livepatch to patch modules that are not yet loaded.

The livepatch Elf relocation section flag (SHF_RELA_LIVEPATCH),
and symbol section index (SHN_LIVEPATCH) allow both livepatch and the
module loader to identity livepatch relocation sections and livepatch
symbols.

Livepatch relocation sections are marked with SHF_RELA_LIVEPATCH to
indicate to the module loader that it should not apply that relocation
section and that livepatch will handle them.

The SHN_LIVEPATCH shndx marks symbols that will be resolved by livepatch.
The module loader ignores these symbols and does not attempt to resolve
them.

The values of these Elf constants were selected from OS-specific
ranges according to the definitions from glibc.

Signed-off-by: Jessica Yu <jeyu@redhat.com>
Reviewed-by: Miroslav Benes <mbenes@suse.cz>
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h
index 71e1d0e..cb4a72f 100644
--- a/include/uapi/linux/elf.h
+++ b/include/uapi/linux/elf.h
@@ -282,16 +282,18 @@ typedef struct elf64_phdr {
 #define SHT_HIUSER	0xffffffff
 
 /* sh_flags */
-#define SHF_WRITE	0x1
-#define SHF_ALLOC	0x2
-#define SHF_EXECINSTR	0x4
-#define SHF_MASKPROC	0xf0000000
+#define SHF_WRITE		0x1
+#define SHF_ALLOC		0x2
+#define SHF_EXECINSTR		0x4
+#define SHF_RELA_LIVEPATCH	0x00100000
+#define SHF_MASKPROC		0xf0000000
 
 /* special section indexes */
 #define SHN_UNDEF	0
 #define SHN_LORESERVE	0xff00
 #define SHN_LOPROC	0xff00
 #define SHN_HIPROC	0xff1f
+#define SHN_LIVEPATCH	0xff20
 #define SHN_ABS		0xfff1
 #define SHN_COMMON	0xfff2
 #define SHN_HIRESERVE	0xffff
-- 
cgit v0.10.2


From 1ce15ef4f60529cf1313f80f4338c88bd65cc572 Mon Sep 17 00:00:00 2001
From: Jessica Yu <jeyu@redhat.com>
Date: Tue, 22 Mar 2016 20:03:16 -0400
Subject: module: preserve Elf information for livepatch modules

For livepatch modules, copy Elf section, symbol, and string information
from the load_info struct in the module loader. Persist copies of the
original symbol table and string table.

Livepatch manages its own relocation sections in order to reuse module
loader code to write relocations. Livepatch modules must preserve Elf
information such as section indices in order to apply livepatch relocation
sections using the module loader's apply_relocate_add() function.

In order to apply livepatch relocation sections, livepatch modules must
keep a complete copy of their original symbol table in memory. Normally, a
stripped down copy of a module's symbol table (containing only "core"
symbols) is made available through module->core_symtab. But for livepatch
modules, the symbol table copied into memory on module load must be exactly
the same as the symbol table produced when the patch module was compiled.
This is because the relocations in each livepatch relocation section refer
to their respective symbols with their symbol indices, and the original
symbol indices (and thus the symtab ordering) must be preserved in order
for apply_relocate_add() to find the right symbol.

Signed-off-by: Jessica Yu <jeyu@redhat.com>
Reviewed-by: Miroslav Benes <mbenes@suse.cz>
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Reviewed-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/include/linux/module.h b/include/linux/module.h
index 2bb0c30..3daf2b3 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -330,6 +330,15 @@ struct mod_kallsyms {
 	char *strtab;
 };
 
+#ifdef CONFIG_LIVEPATCH
+struct klp_modinfo {
+	Elf_Ehdr hdr;
+	Elf_Shdr *sechdrs;
+	char *secstrings;
+	unsigned int symndx;
+};
+#endif
+
 struct module {
 	enum module_state state;
 
@@ -456,7 +465,11 @@ struct module {
 #endif
 
 #ifdef CONFIG_LIVEPATCH
+	bool klp; /* Is this a livepatch module? */
 	bool klp_alive;
+
+	/* Elf information */
+	struct klp_modinfo *klp_info;
 #endif
 
 #ifdef CONFIG_MODULE_UNLOAD
@@ -630,6 +643,18 @@ static inline bool module_requested_async_probing(struct module *module)
 	return module && module->async_probe_requested;
 }
 
+#ifdef CONFIG_LIVEPATCH
+static inline bool is_livepatch_module(struct module *mod)
+{
+	return mod->klp;
+}
+#else /* !CONFIG_LIVEPATCH */
+static inline bool is_livepatch_module(struct module *mod)
+{
+	return false;
+}
+#endif /* CONFIG_LIVEPATCH */
+
 #else /* !CONFIG_MODULES... */
 
 /* Given an address, look for it in the exception tables. */
diff --git a/kernel/module.c b/kernel/module.c
index 041200c..5f71aa6 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1973,6 +1973,83 @@ static void module_enable_nx(const struct module *mod) { }
 static void module_disable_nx(const struct module *mod) { }
 #endif
 
+#ifdef CONFIG_LIVEPATCH
+/*
+ * Persist Elf information about a module. Copy the Elf header,
+ * section header table, section string table, and symtab section
+ * index from info to mod->klp_info.
+ */
+static int copy_module_elf(struct module *mod, struct load_info *info)
+{
+	unsigned int size, symndx;
+	int ret;
+
+	size = sizeof(*mod->klp_info);
+	mod->klp_info = kmalloc(size, GFP_KERNEL);
+	if (mod->klp_info == NULL)
+		return -ENOMEM;
+
+	/* Elf header */
+	size = sizeof(mod->klp_info->hdr);
+	memcpy(&mod->klp_info->hdr, info->hdr, size);
+
+	/* Elf section header table */
+	size = sizeof(*info->sechdrs) * info->hdr->e_shnum;
+	mod->klp_info->sechdrs = kmalloc(size, GFP_KERNEL);
+	if (mod->klp_info->sechdrs == NULL) {
+		ret = -ENOMEM;
+		goto free_info;
+	}
+	memcpy(mod->klp_info->sechdrs, info->sechdrs, size);
+
+	/* Elf section name string table */
+	size = info->sechdrs[info->hdr->e_shstrndx].sh_size;
+	mod->klp_info->secstrings = kmalloc(size, GFP_KERNEL);
+	if (mod->klp_info->secstrings == NULL) {
+		ret = -ENOMEM;
+		goto free_sechdrs;
+	}
+	memcpy(mod->klp_info->secstrings, info->secstrings, size);
+
+	/* Elf symbol section index */
+	symndx = info->index.sym;
+	mod->klp_info->symndx = symndx;
+
+	/*
+	 * For livepatch modules, core_kallsyms.symtab is a complete
+	 * copy of the original symbol table. Adjust sh_addr to point
+	 * to core_kallsyms.symtab since the copy of the symtab in module
+	 * init memory is freed at the end of do_init_module().
+	 */
+	mod->klp_info->sechdrs[symndx].sh_addr = \
+		(unsigned long) mod->core_kallsyms.symtab;
+
+	return 0;
+
+free_sechdrs:
+	kfree(mod->klp_info->sechdrs);
+free_info:
+	kfree(mod->klp_info);
+	return ret;
+}
+
+static void free_module_elf(struct module *mod)
+{
+	kfree(mod->klp_info->sechdrs);
+	kfree(mod->klp_info->secstrings);
+	kfree(mod->klp_info);
+}
+#else /* !CONFIG_LIVEPATCH */
+static int copy_module_elf(struct module *mod, struct load_info *info)
+{
+	return 0;
+}
+
+static void free_module_elf(struct module *mod)
+{
+}
+#endif /* CONFIG_LIVEPATCH */
+
 void __weak module_memfree(void *module_region)
 {
 	vfree(module_region);
@@ -2011,6 +2088,9 @@ static void free_module(struct module *mod)
 	/* Free any allocated parameters. */
 	destroy_params(mod->kp, mod->num_kp);
 
+	if (is_livepatch_module(mod))
+		free_module_elf(mod);
+
 	/* Now we can delete it from the lists */
 	mutex_lock(&module_mutex);
 	/* Unlink carefully: kallsyms could be walking list. */
@@ -2126,6 +2206,10 @@ static int simplify_symbols(struct module *mod, const struct load_info *info)
 			       (long)sym[i].st_value);
 			break;
 
+		case SHN_LIVEPATCH:
+			/* Livepatch symbols are resolved by livepatch */
+			break;
+
 		case SHN_UNDEF:
 			ksym = resolve_symbol_wait(mod, info, name);
 			/* Ok if resolved.  */
@@ -2174,6 +2258,10 @@ static int apply_relocations(struct module *mod, const struct load_info *info)
 		if (!(info->sechdrs[infosec].sh_flags & SHF_ALLOC))
 			continue;
 
+		/* Livepatch relocation sections are applied by livepatch */
+		if (info->sechdrs[i].sh_flags & SHF_RELA_LIVEPATCH)
+			continue;
+
 		if (info->sechdrs[i].sh_type == SHT_REL)
 			err = apply_relocate(info->sechdrs, info->strtab,
 					     info->index.sym, i, mod);
@@ -2469,7 +2557,7 @@ static void layout_symtab(struct module *mod, struct load_info *info)
 
 	/* Compute total space required for the core symbols' strtab. */
 	for (ndst = i = 0; i < nsrc; i++) {
-		if (i == 0 ||
+		if (i == 0 || is_livepatch_module(mod) ||
 		    is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum,
 				   info->index.pcpu)) {
 			strtab_size += strlen(&info->strtab[src[i].st_name])+1;
@@ -2528,7 +2616,7 @@ static void add_kallsyms(struct module *mod, const struct load_info *info)
 	mod->core_kallsyms.strtab = s = mod->core_layout.base + info->stroffs;
 	src = mod->kallsyms->symtab;
 	for (ndst = i = 0; i < mod->kallsyms->num_symtab; i++) {
-		if (i == 0 ||
+		if (i == 0 || is_livepatch_module(mod) ||
 		    is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum,
 				   info->index.pcpu)) {
 			dst[ndst] = src[i];
@@ -2667,6 +2755,26 @@ static int copy_chunked_from_user(void *dst, const void __user *usrc, unsigned l
 	return 0;
 }
 
+#ifdef CONFIG_LIVEPATCH
+static int find_livepatch_modinfo(struct module *mod, struct load_info *info)
+{
+	mod->klp = get_modinfo(info, "livepatch") ? true : false;
+
+	return 0;
+}
+#else /* !CONFIG_LIVEPATCH */
+static int find_livepatch_modinfo(struct module *mod, struct load_info *info)
+{
+	if (get_modinfo(info, "livepatch")) {
+		pr_err("%s: module is marked as livepatch module, but livepatch support is disabled",
+		       mod->name);
+		return -ENOEXEC;
+	}
+
+	return 0;
+}
+#endif /* CONFIG_LIVEPATCH */
+
 /* Sets info->hdr and info->len. */
 static int copy_module_from_user(const void __user *umod, unsigned long len,
 				  struct load_info *info)
@@ -2821,6 +2929,10 @@ static int check_modinfo(struct module *mod, struct load_info *info, int flags)
 			"is unknown, you have been warned.\n", mod->name);
 	}
 
+	err = find_livepatch_modinfo(mod, info);
+	if (err)
+		return err;
+
 	/* Set up license info based on the info section */
 	set_license(mod, get_modinfo(info, "license"));
 
@@ -3494,6 +3606,12 @@ static int load_module(struct load_info *info, const char __user *uargs,
 	if (err < 0)
 		goto coming_cleanup;
 
+	if (is_livepatch_module(mod)) {
+		err = copy_module_elf(mod, info);
+		if (err < 0)
+			goto sysfs_cleanup;
+	}
+
 	/* Get rid of temporary copy. */
 	free_copy(info);
 
@@ -3502,11 +3620,12 @@ static int load_module(struct load_info *info, const char __user *uargs,
 
 	return do_init_module(mod);
 
+ sysfs_cleanup:
+	mod_sysfs_teardown(mod);
  coming_cleanup:
 	blocking_notifier_call_chain(&module_notify_list,
 				     MODULE_STATE_GOING, mod);
 	klp_module_going(mod);
-
  bug_cleanup:
 	/* module_bug_cleanup needs module_mutex protection */
 	mutex_lock(&module_mutex);
-- 
cgit v0.10.2


From f31e0960f395df0c9197de53674365e2a28a129b Mon Sep 17 00:00:00 2001
From: Jessica Yu <jeyu@redhat.com>
Date: Tue, 22 Mar 2016 20:03:17 -0400
Subject: module: s390: keep mod_arch_specific for livepatch modules

Livepatch needs to utilize the symbol information contained in the
mod_arch_specific struct in order to be able to call the s390
apply_relocate_add() function to apply relocations. Keep a reference to
syminfo if the module is a livepatch module. Remove the redundant vfree()
in module_finalize() since module_arch_freeing_init() (which also frees
those structures) is called in do_init_module(). If the module isn't a
livepatch module, we free the structures in module_arch_freeing_init() as
usual.

Signed-off-by: Jessica Yu <jeyu@redhat.com>
Reviewed-by: Miroslav Benes <mbenes@suse.cz>
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c
index 7873e17..fbc0789 100644
--- a/arch/s390/kernel/module.c
+++ b/arch/s390/kernel/module.c
@@ -51,6 +51,10 @@ void *module_alloc(unsigned long size)
 
 void module_arch_freeing_init(struct module *mod)
 {
+	if (is_livepatch_module(mod) &&
+	    mod->state == MODULE_STATE_LIVE)
+		return;
+
 	vfree(mod->arch.syminfo);
 	mod->arch.syminfo = NULL;
 }
@@ -425,7 +429,5 @@ int module_finalize(const Elf_Ehdr *hdr,
 		    struct module *me)
 {
 	jump_label_apply_nops(me);
-	vfree(me->arch.syminfo);
-	me->arch.syminfo = NULL;
 	return 0;
 }
-- 
cgit v0.10.2


From 425595a7fc2096ab46c741b5ed5372c5ab5bbeac Mon Sep 17 00:00:00 2001
From: Jessica Yu <jeyu@redhat.com>
Date: Tue, 22 Mar 2016 20:03:18 -0400
Subject: livepatch: reuse module loader code to write relocations

Reuse module loader code to write relocations, thereby eliminating the need
for architecture specific relocation code in livepatch. Specifically, reuse
the apply_relocate_add() function in the module loader to write relocations
instead of duplicating functionality in livepatch's arch-dependent
klp_write_module_reloc() function.

In order to accomplish this, livepatch modules manage their own relocation
sections (marked with the SHF_RELA_LIVEPATCH section flag) and
livepatch-specific symbols (marked with SHN_LIVEPATCH symbol section
index). To apply livepatch relocation sections, livepatch symbols
referenced by relocs are resolved and then apply_relocate_add() is called
to apply those relocations.

In addition, remove x86 livepatch relocation code and the s390
klp_write_module_reloc() function stub. They are no longer needed since
relocation work has been offloaded to module loader.

Lastly, mark the module as a livepatch module so that the module loader
canappropriately identify and initialize it.

Signed-off-by: Jessica Yu <jeyu@redhat.com>
Reviewed-by: Miroslav Benes <mbenes@suse.cz>
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com>   # for s390 changes
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/arch/s390/include/asm/livepatch.h b/arch/s390/include/asm/livepatch.h
index d5427c7..2c12137 100644
--- a/arch/s390/include/asm/livepatch.h
+++ b/arch/s390/include/asm/livepatch.h
@@ -24,13 +24,6 @@ static inline int klp_check_compiler_support(void)
 	return 0;
 }
 
-static inline int klp_write_module_reloc(struct module *mod, unsigned long
-		type, unsigned long loc, unsigned long value)
-{
-	/* not supported yet */
-	return -ENOSYS;
-}
-
 static inline void klp_arch_set_pc(struct pt_regs *regs, unsigned long ip)
 {
 	regs->psw.addr = ip;
diff --git a/arch/x86/include/asm/livepatch.h b/arch/x86/include/asm/livepatch.h
index 7e68f95..a7f9181 100644
--- a/arch/x86/include/asm/livepatch.h
+++ b/arch/x86/include/asm/livepatch.h
@@ -32,8 +32,6 @@ static inline int klp_check_compiler_support(void)
 #endif
 	return 0;
 }
-int klp_write_module_reloc(struct module *mod, unsigned long type,
-			   unsigned long loc, unsigned long value);
 
 static inline void klp_arch_set_pc(struct pt_regs *regs, unsigned long ip)
 {
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index b1b78ff..c5e9a5c 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -67,7 +67,6 @@ obj-$(CONFIG_X86_MPPARSE)	+= mpparse.o
 obj-y				+= apic/
 obj-$(CONFIG_X86_REBOOTFIXUPS)	+= reboot_fixups_32.o
 obj-$(CONFIG_DYNAMIC_FTRACE)	+= ftrace.o
-obj-$(CONFIG_LIVEPATCH)		+= livepatch.o
 obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
 obj-$(CONFIG_FTRACE_SYSCALLS)	+= ftrace.o
 obj-$(CONFIG_X86_TSC)		+= trace_clock.o
diff --git a/arch/x86/kernel/livepatch.c b/arch/x86/kernel/livepatch.c
deleted file mode 100644
index 92fc1a5..0000000
--- a/arch/x86/kernel/livepatch.c
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * livepatch.c - x86-specific Kernel Live Patching Core
- *
- * Copyright (C) 2014 Seth Jennings <sjenning@redhat.com>
- * Copyright (C) 2014 SUSE
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/module.h>
-#include <linux/uaccess.h>
-#include <asm/elf.h>
-#include <asm/livepatch.h>
-
-/**
- * klp_write_module_reloc() - write a relocation in a module
- * @mod:	module in which the section to be modified is found
- * @type:	ELF relocation type (see asm/elf.h)
- * @loc:	address that the relocation should be written to
- * @value:	relocation value (sym address + addend)
- *
- * This function writes a relocation to the specified location for
- * a particular module.
- */
-int klp_write_module_reloc(struct module *mod, unsigned long type,
-			   unsigned long loc, unsigned long value)
-{
-	size_t size = 4;
-	unsigned long val;
-	unsigned long core = (unsigned long)mod->core_layout.base;
-	unsigned long core_size = mod->core_layout.size;
-
-	switch (type) {
-	case R_X86_64_NONE:
-		return 0;
-	case R_X86_64_64:
-		val = value;
-		size = 8;
-		break;
-	case R_X86_64_32:
-		val = (u32)value;
-		break;
-	case R_X86_64_32S:
-		val = (s32)value;
-		break;
-	case R_X86_64_PC32:
-		val = (u32)(value - loc);
-		break;
-	default:
-		/* unsupported relocation type */
-		return -EINVAL;
-	}
-
-	if (loc < core || loc >= core + core_size)
-		/* loc does not point to any symbol inside the module */
-		return -EINVAL;
-
-	return probe_kernel_write((void *)loc, &val, size);
-}
diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h
index bd830d5..0933ca4 100644
--- a/include/linux/livepatch.h
+++ b/include/linux/livepatch.h
@@ -65,27 +65,8 @@ struct klp_func {
 };
 
 /**
- * struct klp_reloc - relocation structure for live patching
- * @loc:	address where the relocation will be written
- * @sympos:	position in kallsyms to disambiguate symbols (optional)
- * @type:	ELF relocation type
- * @name:	name of the referenced symbol (for lookup/verification)
- * @addend:	offset from the referenced symbol
- * @external:	symbol is either exported or within the live patch module itself
- */
-struct klp_reloc {
-	unsigned long loc;
-	unsigned long sympos;
-	unsigned long type;
-	const char *name;
-	int addend;
-	int external;
-};
-
-/**
  * struct klp_object - kernel object structure for live patching
  * @name:	module name (or NULL for vmlinux)
- * @relocs:	relocation entries to be applied at load time
  * @funcs:	function entries for functions to be patched in the object
  * @kobj:	kobject for sysfs resources
  * @mod:	kernel module associated with the patched object
@@ -95,7 +76,6 @@ struct klp_reloc {
 struct klp_object {
 	/* external */
 	const char *name;
-	struct klp_reloc *relocs;
 	struct klp_func *funcs;
 
 	/* internal */
diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c
index d68fbf6..eb5db6e 100644
--- a/kernel/livepatch/core.c
+++ b/kernel/livepatch/core.c
@@ -28,6 +28,8 @@
 #include <linux/list.h>
 #include <linux/kallsyms.h>
 #include <linux/livepatch.h>
+#include <linux/elf.h>
+#include <linux/moduleloader.h>
 #include <asm/cacheflush.h>
 
 /**
@@ -204,75 +206,109 @@ static int klp_find_object_symbol(const char *objname, const char *name,
 	return -EINVAL;
 }
 
-/*
- * external symbols are located outside the parent object (where the parent
- * object is either vmlinux or the kmod being patched).
- */
-static int klp_find_external_symbol(struct module *pmod, const char *name,
-				    unsigned long *addr)
+static int klp_resolve_symbols(Elf_Shdr *relasec, struct module *pmod)
 {
-	const struct kernel_symbol *sym;
-
-	/* first, check if it's an exported symbol */
-	preempt_disable();
-	sym = find_symbol(name, NULL, NULL, true, true);
-	if (sym) {
-		*addr = sym->value;
-		preempt_enable();
-		return 0;
-	}
-	preempt_enable();
+	int i, cnt, vmlinux, ret;
+	char objname[MODULE_NAME_LEN];
+	char symname[KSYM_NAME_LEN];
+	char *strtab = pmod->core_kallsyms.strtab;
+	Elf_Rela *relas;
+	Elf_Sym *sym;
+	unsigned long sympos, addr;
 
 	/*
-	 * Check if it's in another .o within the patch module. This also
-	 * checks that the external symbol is unique.
+	 * Since the field widths for objname and symname in the sscanf()
+	 * call are hard-coded and correspond to MODULE_NAME_LEN and
+	 * KSYM_NAME_LEN respectively, we must make sure that MODULE_NAME_LEN
+	 * and KSYM_NAME_LEN have the values we expect them to have.
+	 *
+	 * Because the value of MODULE_NAME_LEN can differ among architectures,
+	 * we use the smallest/strictest upper bound possible (56, based on
+	 * the current definition of MODULE_NAME_LEN) to prevent overflows.
 	 */
-	return klp_find_object_symbol(pmod->name, name, 0, addr);
+	BUILD_BUG_ON(MODULE_NAME_LEN < 56 || KSYM_NAME_LEN != 128);
+
+	relas = (Elf_Rela *) relasec->sh_addr;
+	/* For each rela in this klp relocation section */
+	for (i = 0; i < relasec->sh_size / sizeof(Elf_Rela); i++) {
+		sym = pmod->core_kallsyms.symtab + ELF_R_SYM(relas[i].r_info);
+		if (sym->st_shndx != SHN_LIVEPATCH) {
+			pr_err("symbol %s is not marked as a livepatch symbol",
+			       strtab + sym->st_name);
+			return -EINVAL;
+		}
+
+		/* Format: .klp.sym.objname.symname,sympos */
+		cnt = sscanf(strtab + sym->st_name,
+			     ".klp.sym.%55[^.].%127[^,],%lu",
+			     objname, symname, &sympos);
+		if (cnt != 3) {
+			pr_err("symbol %s has an incorrectly formatted name",
+			       strtab + sym->st_name);
+			return -EINVAL;
+		}
+
+		/* klp_find_object_symbol() treats a NULL objname as vmlinux */
+		vmlinux = !strcmp(objname, "vmlinux");
+		ret = klp_find_object_symbol(vmlinux ? NULL : objname,
+					     symname, sympos, &addr);
+		if (ret)
+			return ret;
+
+		sym->st_value = addr;
+	}
+
+	return 0;
 }
 
 static int klp_write_object_relocations(struct module *pmod,
 					struct klp_object *obj)
 {
-	int ret = 0;
-	unsigned long val;
-	struct klp_reloc *reloc;
+	int i, cnt, ret = 0;
+	const char *objname, *secname;
+	char sec_objname[MODULE_NAME_LEN];
+	Elf_Shdr *sec;
 
 	if (WARN_ON(!klp_is_object_loaded(obj)))
 		return -EINVAL;
 
-	if (WARN_ON(!obj->relocs))
-		return -EINVAL;
+	objname = klp_is_module(obj) ? obj->name : "vmlinux";
 
 	module_disable_ro(pmod);
+	/* For each klp relocation section */
+	for (i = 1; i < pmod->klp_info->hdr.e_shnum; i++) {
+		sec = pmod->klp_info->sechdrs + i;
+		secname = pmod->klp_info->secstrings + sec->sh_name;
+		if (!(sec->sh_flags & SHF_RELA_LIVEPATCH))
+			continue;
 
-	for (reloc = obj->relocs; reloc->name; reloc++) {
-		/* discover the address of the referenced symbol */
-		if (reloc->external) {
-			if (reloc->sympos > 0) {
-				pr_err("non-zero sympos for external reloc symbol '%s' is not supported\n",
-				       reloc->name);
-				ret = -EINVAL;
-				goto out;
-			}
-			ret = klp_find_external_symbol(pmod, reloc->name, &val);
-		} else
-			ret = klp_find_object_symbol(obj->name,
-						     reloc->name,
-						     reloc->sympos,
-						     &val);
+		/*
+		 * Format: .klp.rela.sec_objname.section_name
+		 * See comment in klp_resolve_symbols() for an explanation
+		 * of the selected field width value.
+		 */
+		cnt = sscanf(secname, ".klp.rela.%55[^.]", sec_objname);
+		if (cnt != 1) {
+			pr_err("section %s has an incorrectly formatted name",
+			       secname);
+			ret = -EINVAL;
+			break;
+		}
+
+		if (strcmp(objname, sec_objname))
+			continue;
+
+		ret = klp_resolve_symbols(sec, pmod);
 		if (ret)
-			goto out;
+			break;
 
-		ret = klp_write_module_reloc(pmod, reloc->type, reloc->loc,
-					     val + reloc->addend);
-		if (ret) {
-			pr_err("relocation failed for symbol '%s' at 0x%016lx (%d)\n",
-			       reloc->name, val, ret);
-			goto out;
-		}
+		ret = apply_relocate_add(pmod->klp_info->sechdrs,
+					 pmod->core_kallsyms.strtab,
+					 pmod->klp_info->symndx, i, pmod);
+		if (ret)
+			break;
 	}
 
-out:
 	module_enable_ro(pmod);
 	return ret;
 }
@@ -703,11 +739,9 @@ static int klp_init_object_loaded(struct klp_patch *patch,
 	struct klp_func *func;
 	int ret;
 
-	if (obj->relocs) {
-		ret = klp_write_object_relocations(patch->mod, obj);
-		if (ret)
-			return ret;
-	}
+	ret = klp_write_object_relocations(patch->mod, obj);
+	if (ret)
+		return ret;
 
 	klp_for_each_func(obj, func) {
 		ret = klp_find_object_symbol(obj->name, func->old_name,
@@ -842,6 +876,12 @@ int klp_register_patch(struct klp_patch *patch)
 {
 	int ret;
 
+	if (!is_livepatch_module(patch->mod)) {
+		pr_err("module %s is not marked as a livepatch module",
+		       patch->mod->name);
+		return -EINVAL;
+	}
+
 	if (!klp_initialized())
 		return -ENODEV;
 
diff --git a/samples/livepatch/livepatch-sample.c b/samples/livepatch/livepatch-sample.c
index fb8c861..e34f871 100644
--- a/samples/livepatch/livepatch-sample.c
+++ b/samples/livepatch/livepatch-sample.c
@@ -89,3 +89,4 @@ static void livepatch_exit(void)
 module_init(livepatch_init);
 module_exit(livepatch_exit);
 MODULE_LICENSE("GPL");
+MODULE_INFO(livepatch, "Y");
-- 
cgit v0.10.2


From 3b812ecce736432e6b55e77028ea387eb1517d24 Mon Sep 17 00:00:00 2001
From: Jessica Yu <jeyu@redhat.com>
Date: Tue, 22 Mar 2016 20:03:19 -0400
Subject: Documentation: livepatch: outline Elf format and requirements for
 patch modules

Document livepatch module requirements and the special Elf constants patch
modules use.

Signed-off-by: Jessica Yu <jeyu@redhat.com>
Acked-by: Miroslav Benes <mbenes@suse.cz>
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/Documentation/livepatch/module-elf-format.txt b/Documentation/livepatch/module-elf-format.txt
new file mode 100644
index 0000000..eedbdcf
--- /dev/null
+++ b/Documentation/livepatch/module-elf-format.txt
@@ -0,0 +1,311 @@
+===========================
+Livepatch module Elf format
+===========================
+
+This document outlines the Elf format requirements that livepatch modules must follow.
+
+-----------------
+Table of Contents
+-----------------
+0. Background and motivation
+1. Livepatch modinfo field
+2. Livepatch relocation sections
+   2.1 What are livepatch relocation sections?
+   2.2 Livepatch relocation section format
+       2.2.1 Required flags
+       2.2.2 Required name format
+       2.2.3 Example livepatch relocation section names
+       2.2.4 Example `readelf --sections` output
+       2.2.5 Example `readelf --relocs` output
+3. Livepatch symbols
+   3.1 What are livepatch symbols?
+   3.2 A livepatch module's symbol table
+   3.3 Livepatch symbol format
+       3.3.1 Required flags
+       3.3.2 Required name format
+       3.3.3 Example livepatch symbol names
+       3.3.4 Example `readelf --symbols` output
+4. Symbol table and Elf section access
+
+----------------------------
+0. Background and motivation
+----------------------------
+
+Formerly, livepatch required separate architecture-specific code to write
+relocations. However, arch-specific code to write relocations already
+exists in the module loader, so this former approach produced redundant
+code. So, instead of duplicating code and re-implementing what the module
+loader can already do, livepatch leverages existing code in the module
+loader to perform the all the arch-specific relocation work. Specifically,
+livepatch reuses the apply_relocate_add() function in the module loader to
+write relocations. The patch module Elf format described in this document
+enables livepatch to be able to do this. The hope is that this will make
+livepatch more easily portable to other architectures and reduce the amount
+of arch-specific code required to port livepatch to a particular
+architecture.
+
+Since apply_relocate_add() requires access to a module's section header
+table, symbol table, and relocation section indices, Elf information is
+preserved for livepatch modules (see section 4). Livepatch manages its own
+relocation sections and symbols, which are described in this document. The
+Elf constants used to mark livepatch symbols and relocation sections were
+selected from OS-specific ranges according to the definitions from glibc.
+
+0.1 Why does livepatch need to write its own relocations?
+---------------------------------------------------------
+A typical livepatch module contains patched versions of functions that can
+reference non-exported global symbols and non-included local symbols.
+Relocations referencing these types of symbols cannot be left in as-is
+since the kernel module loader cannot resolve them and will therefore
+reject the livepatch module. Furthermore, we cannot apply relocations that
+affect modules not yet loaded at patch module load time (e.g. a patch to a
+driver that is not loaded). Formerly, livepatch solved this problem by
+embedding special "dynrela" (dynamic rela) sections in the resulting patch
+module Elf output. Using these dynrela sections, livepatch could resolve
+symbols while taking into account its scope and what module the symbol
+belongs to, and then manually apply the dynamic relocations. However this
+approach required livepatch to supply arch-specific code in order to write
+these relocations. In the new format, livepatch manages its own SHT_RELA
+relocation sections in place of dynrela sections, and the symbols that the
+relas reference are special livepatch symbols (see section 2 and 3). The
+arch-specific livepatch relocation code is replaced by a call to
+apply_relocate_add().
+
+================================
+PATCH MODULE FORMAT REQUIREMENTS
+================================
+
+--------------------------
+1. Livepatch modinfo field
+--------------------------
+
+Livepatch modules are required to have the "livepatch" modinfo attribute.
+See the sample livepatch module in samples/livepatch/ for how this is done.
+
+Livepatch modules can be identified by users by using the 'modinfo' command
+and looking for the presence of the "livepatch" field. This field is also
+used by the kernel module loader to identify livepatch modules.
+
+Example modinfo output:
+-----------------------
+% modinfo livepatch-meminfo.ko
+filename:		livepatch-meminfo.ko
+livepatch:		Y
+license:		GPL
+depends:
+vermagic:		4.3.0+ SMP mod_unload
+
+--------------------------------
+2. Livepatch relocation sections
+--------------------------------
+
+-------------------------------------------
+2.1 What are livepatch relocation sections?
+-------------------------------------------
+A livepatch module manages its own Elf relocation sections to apply
+relocations to modules as well as to the kernel (vmlinux) at the
+appropriate time. For example, if a patch module patches a driver that is
+not currently loaded, livepatch will apply the corresponding livepatch
+relocation section(s) to the driver once it loads.
+
+Each "object" (e.g. vmlinux, or a module) within a patch module may have
+multiple livepatch relocation sections associated with it (e.g. patches to
+multiple functions within the same object). There is a 1-1 correspondence
+between a livepatch relocation section and the target section (usually the
+text section of a function) to which the relocation(s) apply. It is
+also possible for a livepatch module to have no livepatch relocation
+sections, as in the case of the sample livepatch module (see
+samples/livepatch).
+
+Since Elf information is preserved for livepatch modules (see Section 4), a
+livepatch relocation section can be applied simply by passing in the
+appropriate section index to apply_relocate_add(), which then uses it to
+access the relocation section and apply the relocations.
+
+Every symbol referenced by a rela in a livepatch relocation section is a
+livepatch symbol. These must be resolved before livepatch can call
+apply_relocate_add(). See Section 3 for more information.
+
+---------------------------------------
+2.2 Livepatch relocation section format
+---------------------------------------
+
+2.2.1 Required flags
+--------------------
+Livepatch relocation sections must be marked with the SHF_RELA_LIVEPATCH
+section flag. See include/uapi/linux/elf.h for the definition. The module
+loader recognizes this flag and will avoid applying those relocation sections
+at patch module load time. These sections must also be marked with SHF_ALLOC,
+so that the module loader doesn't discard them on module load (i.e. they will
+be copied into memory along with the other SHF_ALLOC sections).
+
+2.2.2 Required name format
+--------------------------
+The name of a livepatch relocation section must conform to the following format:
+
+.klp.rela.objname.section_name
+^        ^^     ^ ^          ^
+|________||_____| |__________|
+   [A]      [B]        [C]
+
+[A] The relocation section name is prefixed with the string ".klp.rela."
+[B] The name of the object (i.e. "vmlinux" or name of module) to
+    which the relocation section belongs follows immediately after the prefix.
+[C] The actual name of the section to which this relocation section applies.
+
+2.2.3 Example livepatch relocation section names:
+-------------------------------------------------
+.klp.rela.ext4.text.ext4_attr_store
+.klp.rela.vmlinux.text.cmdline_proc_show
+
+2.2.4 Example `readelf --sections` output for a patch
+module that patches vmlinux and modules 9p, btrfs, ext4:
+--------------------------------------------------------
+  Section Headers:
+  [Nr] Name                          Type                    Address          Off    Size   ES Flg Lk Inf Al
+  [ snip ]
+  [29] .klp.rela.9p.text.caches.show RELA                    0000000000000000 002d58 0000c0 18 AIo 64   9  8
+  [30] .klp.rela.btrfs.text.btrfs.feature.attr.show RELA     0000000000000000 002e18 000060 18 AIo 64  11  8
+  [ snip ]
+  [34] .klp.rela.ext4.text.ext4.attr.store RELA              0000000000000000 002fd8 0000d8 18 AIo 64  13  8
+  [35] .klp.rela.ext4.text.ext4.attr.show RELA               0000000000000000 0030b0 000150 18 AIo 64  15  8
+  [36] .klp.rela.vmlinux.text.cmdline.proc.show RELA         0000000000000000 003200 000018 18 AIo 64  17  8
+  [37] .klp.rela.vmlinux.text.meminfo.proc.show RELA         0000000000000000 003218 0000f0 18 AIo 64  19  8
+  [ snip ]                                       ^                                             ^
+                                                 |                                             |
+                                                [*]                                           [*]
+[*] Livepatch relocation sections are SHT_RELA sections but with a few special
+characteristics. Notice that they are marked SHF_ALLOC ("A") so that they will
+not be discarded when the module is loaded into memory, as well as with the
+SHF_RELA_LIVEPATCH flag ("o" - for OS-specific).
+
+2.2.5 Example `readelf --relocs` output for a patch module:
+-----------------------------------------------------------
+Relocation section '.klp.rela.btrfs.text.btrfs_feature_attr_show' at offset 0x2ba0 contains 4 entries:
+    Offset             Info             Type               Symbol's Value  Symbol's Name + Addend
+000000000000001f  0000005e00000002 R_X86_64_PC32          0000000000000000 .klp.sym.vmlinux.printk,0 - 4
+0000000000000028  0000003d0000000b R_X86_64_32S           0000000000000000 .klp.sym.btrfs.btrfs_ktype,0 + 0
+0000000000000036  0000003b00000002 R_X86_64_PC32          0000000000000000 .klp.sym.btrfs.can_modify_feature.isra.3,0 - 4
+000000000000004c  0000004900000002 R_X86_64_PC32          0000000000000000 .klp.sym.vmlinux.snprintf,0 - 4
+[ snip ]                                                                   ^
+                                                                           |
+                                                                          [*]
+[*] Every symbol referenced by a relocation is a livepatch symbol.
+
+--------------------
+3. Livepatch symbols
+--------------------
+
+-------------------------------
+3.1 What are livepatch symbols?
+-------------------------------
+Livepatch symbols are symbols referred to by livepatch relocation sections.
+These are symbols accessed from new versions of functions for patched
+objects, whose addresses cannot be resolved by the module loader (because
+they are local or unexported global syms). Since the module loader only
+resolves exported syms, and not every symbol referenced by the new patched
+functions is exported, livepatch symbols were introduced. They are used
+also in cases where we cannot immediately know the address of a symbol when
+a patch module loads. For example, this is the case when livepatch patches
+a module that is not loaded yet. In this case, the relevant livepatch
+symbols are resolved simply when the target module loads. In any case, for
+any livepatch relocation section, all livepatch symbols referenced by that
+section must be resolved before livepatch can call apply_relocate_add() for
+that reloc section.
+
+Livepatch symbols must be marked with SHN_LIVEPATCH so that the module
+loader can identify and ignore them. Livepatch modules keep these symbols
+in their symbol tables, and the symbol table is made accessible through
+module->symtab.
+
+-------------------------------------
+3.2 A livepatch module's symbol table
+-------------------------------------
+Normally, a stripped down copy of a module's symbol table (containing only
+"core" symbols) is made available through module->symtab (See layout_symtab()
+in kernel/module.c). For livepatch modules, the symbol table copied into memory
+on module load must be exactly the same as the symbol table produced when the
+patch module was compiled. This is because the relocations in each livepatch
+relocation section refer to their respective symbols with their symbol indices,
+and the original symbol indices (and thus the symtab ordering) must be
+preserved in order for apply_relocate_add() to find the right symbol.
+
+For example, take this particular rela from a livepatch module:
+Relocation section '.klp.rela.btrfs.text.btrfs_feature_attr_show' at offset 0x2ba0 contains 4 entries:
+    Offset             Info             Type               Symbol's Value  Symbol's Name + Addend
+000000000000001f  0000005e00000002 R_X86_64_PC32          0000000000000000 .klp.sym.vmlinux.printk,0 - 4
+
+This rela refers to the symbol '.klp.sym.vmlinux.printk,0', and the symbol index is encoded
+in 'Info'. Here its symbol index is 0x5e, which is 94 in decimal, which refers to the
+symbol index 94.
+And in this patch module's corresponding symbol table, symbol index 94 refers to that very symbol:
+[ snip ]
+94: 0000000000000000     0 NOTYPE  GLOBAL DEFAULT OS [0xff20] .klp.sym.vmlinux.printk,0
+[ snip ]
+
+---------------------------
+3.3 Livepatch symbol format
+---------------------------
+
+3.3.1 Required flags
+--------------------
+Livepatch symbols must have their section index marked as SHN_LIVEPATCH, so
+that the module loader can identify them and not attempt to resolve them.
+See include/uapi/linux/elf.h for the actual definitions.
+
+3.3.2 Required name format
+--------------------------
+Livepatch symbol names must conform to the following format:
+
+.klp.sym.objname.symbol_name,sympos
+^       ^^     ^ ^         ^ ^
+|_______||_____| |_________| |
+   [A]     [B]       [C]    [D]
+
+[A] The symbol name is prefixed with the string ".klp.sym."
+[B] The name of the object (i.e. "vmlinux" or name of module) to
+    which the symbol belongs follows immediately after the prefix.
+[C] The actual name of the symbol.
+[D] The position of the symbol in the object (as according to kallsyms)
+    This is used to differentiate duplicate symbols within the same
+    object. The symbol position is expressed numerically (0, 1, 2...).
+    The symbol position of a unique symbol is 0.
+
+3.3.3 Example livepatch symbol names:
+-------------------------------------
+.klp.sym.vmlinux.snprintf,0
+.klp.sym.vmlinux.printk,0
+.klp.sym.btrfs.btrfs_ktype,0
+
+3.3.4 Example `readelf --symbols` output for a patch module:
+------------------------------------------------------------
+Symbol table '.symtab' contains 127 entries:
+   Num:    Value          Size Type    Bind   Vis     Ndx         Name
+   [ snip ]
+    73: 0000000000000000     0 NOTYPE  GLOBAL DEFAULT OS [0xff20] .klp.sym.vmlinux.snprintf,0
+    74: 0000000000000000     0 NOTYPE  GLOBAL DEFAULT OS [0xff20] .klp.sym.vmlinux.capable,0
+    75: 0000000000000000     0 NOTYPE  GLOBAL DEFAULT OS [0xff20] .klp.sym.vmlinux.find_next_bit,0
+    76: 0000000000000000     0 NOTYPE  GLOBAL DEFAULT OS [0xff20] .klp.sym.vmlinux.si_swapinfo,0
+  [ snip ]                                               ^
+                                                         |
+                                                        [*]
+[*] Note that the 'Ndx' (Section index) for these symbols is SHN_LIVEPATCH (0xff20).
+    "OS" means OS-specific.
+
+--------------------------------------
+4. Symbol table and Elf section access
+--------------------------------------
+A livepatch module's symbol table is accessible through module->symtab.
+
+Since apply_relocate_add() requires access to a module's section headers,
+symbol table, and relocation section indices, Elf information is preserved for
+livepatch modules and is made accessible by the module loader through
+module->klp_info, which is a klp_modinfo struct. When a livepatch module loads,
+this struct is filled in by the module loader. Its fields are documented below:
+
+struct klp_modinfo {
+	Elf_Ehdr hdr; /* Elf header */
+	Elf_Shdr *sechdrs; /* Section header table */
+	char *secstrings; /* String table for the section headers */
+	unsigned int symndx; /* The symbol table section index */
+};
-- 
cgit v0.10.2


From 61bf12d3304d78ff499245ea995858c3bedb162e Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Thu, 7 Apr 2016 18:06:25 +0200
Subject: livepatch: robustify klp_register_patch() API error checking

Commit 425595a7fc20 ("livepatch: reuse module loader code to write
relocations") adds a possibility of dereferncing pointers supplied by the
consumer of the livepatch API before sanity (NULL) checking them (patch
and patch->mod).

Spotted by smatch tool.

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
Acked-by: Jessica Yu <jeyu@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c
index eb5db6e..28c37fa 100644
--- a/kernel/livepatch/core.c
+++ b/kernel/livepatch/core.c
@@ -876,6 +876,9 @@ int klp_register_patch(struct klp_patch *patch)
 {
 	int ret;
 
+	if (!patch || !patch->mod)
+		return -EINVAL;
+
 	if (!is_livepatch_module(patch->mod)) {
 		pr_err("module %s is not marked as a livepatch module",
 		       patch->mod->name);
@@ -885,9 +888,6 @@ int klp_register_patch(struct klp_patch *patch)
 	if (!klp_initialized())
 		return -ENODEV;
 
-	if (!patch || !patch->mod)
-		return -EINVAL;
-
 	/*
 	 * A reference is taken on the patch module to prevent it from being
 	 * unloaded.  Right now, we don't allow patch modules to unload since
-- 
cgit v0.10.2


From 04cf31a759ef575f750a63777cee95500e410994 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Thu, 24 Mar 2016 22:04:01 +1100
Subject: ftrace: Make ftrace_location_range() global

In order to support live patching on powerpc we would like to call
ftrace_location_range(), so make it global.

Signed-off-by: Torsten Duwe <duwe@suse.de>
Signed-off-by: Balbir Singh <bsingharora@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 81de712..3481a8e 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -455,6 +455,7 @@ int ftrace_update_record(struct dyn_ftrace *rec, int enable);
 int ftrace_test_record(struct dyn_ftrace *rec, int enable);
 void ftrace_run_stop_machine(int command);
 unsigned long ftrace_location(unsigned long ip);
+unsigned long ftrace_location_range(unsigned long start, unsigned long end);
 unsigned long ftrace_get_addr_new(struct dyn_ftrace *rec);
 unsigned long ftrace_get_addr_curr(struct dyn_ftrace *rec);
 
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index eca592f..e1b3f23 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1533,7 +1533,19 @@ static int ftrace_cmp_recs(const void *a, const void *b)
 	return 0;
 }
 
-static unsigned long ftrace_location_range(unsigned long start, unsigned long end)
+/**
+ * ftrace_location_range - return the first address of a traced location
+ *	if it touches the given ip range
+ * @start: start of range to search.
+ * @end: end of range to search (inclusive). @end points to the last byte
+ *	to check.
+ *
+ * Returns rec->ip if the related ftrace location is a least partly within
+ * the given address range. That is, the first address of the instruction
+ * that is either a NOP or call to the function tracer. It checks the ftrace
+ * internal tables to determine if the address belongs or not.
+ */
+unsigned long ftrace_location_range(unsigned long start, unsigned long end)
 {
 	struct ftrace_page *pg;
 	struct dyn_ftrace *rec;
-- 
cgit v0.10.2


From 28e7cbd3e0f5fefec892842d1391ebd508fdb5ce Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Thu, 24 Mar 2016 22:04:02 +1100
Subject: livepatch: Allow architectures to specify an alternate ftrace
 location

When livepatch tries to patch a function it takes the function address
and asks ftrace to install the livepatch handler at that location.
ftrace will look for an mcount call site at that exact address.

On powerpc the mcount location is not the first instruction of the
function, and in fact it's not at a constant offset from the start of
the function. To accommodate this add a hook which arch code can
override to customise the behaviour.

Signed-off-by: Torsten Duwe <duwe@suse.de>
Signed-off-by: Balbir Singh <bsingharora@gmail.com>
Signed-off-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>

diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c
index bc2c85c..35ee2c5 100644
--- a/kernel/livepatch/core.c
+++ b/kernel/livepatch/core.c
@@ -298,6 +298,19 @@ unlock:
 	rcu_read_unlock();
 }
 
+/*
+ * Convert a function address into the appropriate ftrace location.
+ *
+ * Usually this is just the address of the function, but on some architectures
+ * it's more complicated so allow them to provide a custom behaviour.
+ */
+#ifndef klp_get_ftrace_location
+static unsigned long klp_get_ftrace_location(unsigned long faddr)
+{
+	return faddr;
+}
+#endif
+
 static void klp_disable_func(struct klp_func *func)
 {
 	struct klp_ops *ops;
@@ -312,8 +325,14 @@ static void klp_disable_func(struct klp_func *func)
 		return;
 
 	if (list_is_singular(&ops->func_stack)) {
+		unsigned long ftrace_loc;
+
+		ftrace_loc = klp_get_ftrace_location(func->old_addr);
+		if (WARN_ON(!ftrace_loc))
+			return;
+
 		WARN_ON(unregister_ftrace_function(&ops->fops));
-		WARN_ON(ftrace_set_filter_ip(&ops->fops, func->old_addr, 1, 0));
+		WARN_ON(ftrace_set_filter_ip(&ops->fops, ftrace_loc, 1, 0));
 
 		list_del_rcu(&func->stack_node);
 		list_del(&ops->node);
@@ -338,6 +357,15 @@ static int klp_enable_func(struct klp_func *func)
 
 	ops = klp_find_ops(func->old_addr);
 	if (!ops) {
+		unsigned long ftrace_loc;
+
+		ftrace_loc = klp_get_ftrace_location(func->old_addr);
+		if (!ftrace_loc) {
+			pr_err("failed to find location for function '%s'\n",
+				func->old_name);
+			return -EINVAL;
+		}
+
 		ops = kzalloc(sizeof(*ops), GFP_KERNEL);
 		if (!ops)
 			return -ENOMEM;
@@ -352,7 +380,7 @@ static int klp_enable_func(struct klp_func *func)
 		INIT_LIST_HEAD(&ops->func_stack);
 		list_add_rcu(&func->stack_node, &ops->func_stack);
 
-		ret = ftrace_set_filter_ip(&ops->fops, func->old_addr, 0, 0);
+		ret = ftrace_set_filter_ip(&ops->fops, ftrace_loc, 0, 0);
 		if (ret) {
 			pr_err("failed to set ftrace filter for function '%s' (%d)\n",
 			       func->old_name, ret);
@@ -363,7 +391,7 @@ static int klp_enable_func(struct klp_func *func)
 		if (ret) {
 			pr_err("failed to register ftrace handler for function '%s' (%d)\n",
 			       func->old_name, ret);
-			ftrace_set_filter_ip(&ops->fops, func->old_addr, 1, 0);
+			ftrace_set_filter_ip(&ops->fops, ftrace_loc, 1, 0);
 			goto err;
 		}
 
-- 
cgit v0.10.2


From f63e6d89876034c21ecd18bb1cd0d6c5b8da11a5 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Thu, 24 Mar 2016 22:04:03 +1100
Subject: powerpc/livepatch: Add livepatch header

Add the powerpc specific livepatch definitions. In particular we provide
a non-default implementation of klp_get_ftrace_location().

This is required because the location of the mcount call is not constant
when using -mprofile-kernel (which we always do for live patching).

Signed-off-by: Torsten Duwe <duwe@suse.de>
Signed-off-by: Balbir Singh <bsingharora@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>

diff --git a/arch/powerpc/include/asm/livepatch.h b/arch/powerpc/include/asm/livepatch.h
new file mode 100644
index 0000000..ad36e8e
--- /dev/null
+++ b/arch/powerpc/include/asm/livepatch.h
@@ -0,0 +1,54 @@
+/*
+ * livepatch.h - powerpc-specific Kernel Live Patching Core
+ *
+ * Copyright (C) 2015-2016, SUSE, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _ASM_POWERPC_LIVEPATCH_H
+#define _ASM_POWERPC_LIVEPATCH_H
+
+#include <linux/module.h>
+#include <linux/ftrace.h>
+
+#ifdef CONFIG_LIVEPATCH
+static inline int klp_check_compiler_support(void)
+{
+	return 0;
+}
+
+static inline int klp_write_module_reloc(struct module *mod, unsigned long
+		type, unsigned long loc, unsigned long value)
+{
+	/* This requires infrastructure changes; we need the loadinfos. */
+	return -ENOSYS;
+}
+
+static inline void klp_arch_set_pc(struct pt_regs *regs, unsigned long ip)
+{
+	regs->nip = ip;
+}
+
+#define klp_get_ftrace_location klp_get_ftrace_location
+static inline unsigned long klp_get_ftrace_location(unsigned long faddr)
+{
+	/*
+	 * Live patch works only with -mprofile-kernel on PPC. In this case,
+	 * the ftrace location is always within the first 16 bytes.
+	 */
+	return ftrace_location_range(faddr, faddr + 16);
+}
+#endif /* CONFIG_LIVEPATCH */
+
+#endif /* _ASM_POWERPC_LIVEPATCH_H */
-- 
cgit v0.10.2


From 5d31a96e6c0187f2c5d7004e005fd094a1277e9e Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Thu, 24 Mar 2016 22:04:04 +1100
Subject: powerpc/livepatch: Add livepatch stack to struct thread_info

In order to support live patching we need to maintain an alternate
stack of TOC & LR values. We use the base of the stack for this, and
store the "live patch stack pointer" in struct thread_info.

Unlike the other fields of thread_info, we can not statically initialise
that value, so it must be done at run time.

This patch just adds the code to support that, it is not enabled until
the next patch which actually adds live patch support.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Acked-by: Balbir Singh <bsingharora@gmail.com>

diff --git a/arch/powerpc/include/asm/livepatch.h b/arch/powerpc/include/asm/livepatch.h
index ad36e8e..a402f7f 100644
--- a/arch/powerpc/include/asm/livepatch.h
+++ b/arch/powerpc/include/asm/livepatch.h
@@ -49,6 +49,14 @@ static inline unsigned long klp_get_ftrace_location(unsigned long faddr)
 	 */
 	return ftrace_location_range(faddr, faddr + 16);
 }
+
+static inline void klp_init_thread_info(struct thread_info *ti)
+{
+	/* + 1 to account for STACK_END_MAGIC */
+	ti->livepatch_sp = (unsigned long *)(ti + 1) + 1;
+}
+#else
+static void klp_init_thread_info(struct thread_info *ti) { }
 #endif /* CONFIG_LIVEPATCH */
 
 #endif /* _ASM_POWERPC_LIVEPATCH_H */
diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
index 7efee4a..8febc3f 100644
--- a/arch/powerpc/include/asm/thread_info.h
+++ b/arch/powerpc/include/asm/thread_info.h
@@ -43,7 +43,9 @@ struct thread_info {
 	int		preempt_count;		/* 0 => preemptable,
 						   <0 => BUG */
 	unsigned long	local_flags;		/* private flags for thread */
-
+#ifdef CONFIG_LIVEPATCH
+	unsigned long *livepatch_sp;
+#endif
 	/* low level flags - has atomic operations done on it */
 	unsigned long	flags ____cacheline_aligned_in_smp;
 };
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 290559d..3cb46a3 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -66,6 +66,7 @@
 #include <asm/udbg.h>
 #include <asm/smp.h>
 #include <asm/debug.h>
+#include <asm/livepatch.h>
 
 #ifdef CONFIG_PPC64
 #include <asm/paca.h>
@@ -607,10 +608,12 @@ void irq_ctx_init(void)
 		memset((void *)softirq_ctx[i], 0, THREAD_SIZE);
 		tp = softirq_ctx[i];
 		tp->cpu = i;
+		klp_init_thread_info(tp);
 
 		memset((void *)hardirq_ctx[i], 0, THREAD_SIZE);
 		tp = hardirq_ctx[i];
 		tp->cpu = i;
+		klp_init_thread_info(tp);
 	}
 }
 
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index dccc87e..a38ce49 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -55,6 +55,8 @@
 #include <asm/firmware.h>
 #endif
 #include <asm/code-patching.h>
+#include <asm/livepatch.h>
+
 #include <linux/kprobes.h>
 #include <linux/kdebug.h>
 
@@ -1267,13 +1269,15 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
 	extern void ret_from_kernel_thread(void);
 	void (*f)(void);
 	unsigned long sp = (unsigned long)task_stack_page(p) + THREAD_SIZE;
+	struct thread_info *ti = task_thread_info(p);
+
+	klp_init_thread_info(ti);
 
 	/* Copy registers */
 	sp -= sizeof(struct pt_regs);
 	childregs = (struct pt_regs *) sp;
 	if (unlikely(p->flags & PF_KTHREAD)) {
 		/* kernel thread */
-		struct thread_info *ti = (void *)task_stack_page(p);
 		memset(childregs, 0, sizeof(struct pt_regs));
 		childregs->gpr[1] = sp + sizeof(struct pt_regs);
 		/* function */
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 5c03a6a..e37b92e 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -69,6 +69,7 @@
 #include <asm/kvm_ppc.h>
 #include <asm/hugetlb.h>
 #include <asm/epapr_hcalls.h>
+#include <asm/livepatch.h>
 
 #ifdef DEBUG
 #define DBG(fmt...) udbg_printf(fmt)
@@ -670,16 +671,16 @@ static void __init emergency_stack_init(void)
 	limit = min(safe_stack_limit(), ppc64_rma_size);
 
 	for_each_possible_cpu(i) {
-		unsigned long sp;
-		sp  = memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit);
-		sp += THREAD_SIZE;
-		paca[i].emergency_sp = __va(sp);
+		struct thread_info *ti;
+		ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
+		klp_init_thread_info(ti);
+		paca[i].emergency_sp = (void *)ti + THREAD_SIZE;
 
 #ifdef CONFIG_PPC_BOOK3S_64
 		/* emergency stack for machine check exception handling. */
-		sp  = memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit);
-		sp += THREAD_SIZE;
-		paca[i].mc_emergency_sp = __va(sp);
+		ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
+		klp_init_thread_info(ti);
+		paca[i].mc_emergency_sp = (void *)ti + THREAD_SIZE;
 #endif
 	}
 }
@@ -703,6 +704,8 @@ void __init setup_arch(char **cmdline_p)
 	if (ppc_md.panic)
 		setup_panic();
 
+	klp_init_thread_info(&init_thread_info);
+
 	init_mm.start_code = (unsigned long)_stext;
 	init_mm.end_code = (unsigned long) _etext;
 	init_mm.end_data = (unsigned long) _edata;
-- 
cgit v0.10.2


From 85baa095497f3e590df9f6c8932121f123efca5c Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Thu, 24 Mar 2016 22:04:05 +1100
Subject: powerpc/livepatch: Add live patching support on ppc64le

Add the kconfig logic & assembly support for handling live patched
functions. This depends on DYNAMIC_FTRACE_WITH_REGS, which in turn
depends on the new -mprofile-kernel ftrace ABI, which is only supported
currently on ppc64le.

Live patching is handled by a special ftrace handler. This means it runs
from ftrace_caller(). The live patch handler modifies the NIP so as to
redirect the return from ftrace_caller() to the new patched function.

However there is one particularly tricky case we need to handle.

If a function A calls another function B, and it is known at link time
that they share the same TOC, then A will not save or restore its TOC,
and will call the local entry point of B.

When we live patch B, we replace it with a new function C, which may
not have the same TOC as A. At live patch time it's too late to modify A
to do the TOC save/restore, so the live patching code must interpose
itself between A and C, and do the TOC save/restore that A omitted.

An additionaly complication is that the livepatch code can not create a
stack frame in order to save the TOC. That is because if C takes > 8
arguments, or is varargs, A will have written the arguments for C in
A's stack frame.

To solve this, we introduce a "livepatch stack" which grows upward from
the base of the regular stack, and is used to store the TOC & LR when
calling a live patched function.

When the patched function returns, we retrieve the real LR & TOC from
the livepatch stack, restore them, and pop the livepatch "stack frame".

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Reviewed-by: Torsten Duwe <duwe@suse.de>
Reviewed-by: Balbir Singh <bsingharora@gmail.com>

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 91da283..926c0ea 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -159,6 +159,7 @@ config PPC
 	select ARCH_HAS_DEVMEM_IS_ALLOWED
 	select HAVE_ARCH_SECCOMP_FILTER
 	select ARCH_HAS_UBSAN_SANITIZE_ALL
+	select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS
 
 config GENERIC_CSUM
 	def_bool CPU_LITTLE_ENDIAN
@@ -1110,3 +1111,5 @@ config PPC_LIB_RHEAP
 	bool
 
 source "arch/powerpc/kvm/Kconfig"
+
+source "kernel/livepatch/Kconfig"
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 07cebc3..723efac 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -86,6 +86,10 @@ int main(void)
 	DEFINE(KSP_LIMIT, offsetof(struct thread_struct, ksp_limit));
 #endif /* CONFIG_PPC64 */
 
+#ifdef CONFIG_LIVEPATCH
+	DEFINE(TI_livepatch_sp, offsetof(struct thread_info, livepatch_sp));
+#endif
+
 	DEFINE(KSP, offsetof(struct thread_struct, ksp));
 	DEFINE(PT_REGS, offsetof(struct thread_struct, regs));
 #ifdef CONFIG_BOOKE
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index ec7f8aa..47dbede 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -20,6 +20,7 @@
 
 #include <linux/errno.h>
 #include <linux/err.h>
+#include <linux/magic.h>
 #include <asm/unistd.h>
 #include <asm/processor.h>
 #include <asm/page.h>
@@ -1224,6 +1225,9 @@ _GLOBAL(ftrace_caller)
 	addi	r3,r3,function_trace_op@toc@l
 	ld	r5,0(r3)
 
+#ifdef CONFIG_LIVEPATCH
+	mr	r14,r7		/* remember old NIP */
+#endif
 	/* Calculate ip from nip-4 into r3 for call below */
 	subi    r3, r7, MCOUNT_INSN_SIZE
 
@@ -1248,6 +1252,9 @@ ftrace_call:
 	/* Load ctr with the possibly modified NIP */
 	ld	r3, _NIP(r1)
 	mtctr	r3
+#ifdef CONFIG_LIVEPATCH
+	cmpd	r14,r3		/* has NIP been altered? */
+#endif
 
 	/* Restore gprs */
 	REST_8GPRS(0,r1)
@@ -1265,6 +1272,11 @@ ftrace_call:
 	ld	r0, LRSAVE(r1)
 	mtlr	r0
 
+#ifdef CONFIG_LIVEPATCH
+        /* Based on the cmpd above, if the NIP was altered handle livepatch */
+	bne-	livepatch_handler
+#endif
+
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 	stdu	r1, -112(r1)
 .globl ftrace_graph_call
@@ -1281,6 +1293,91 @@ _GLOBAL(ftrace_graph_stub)
 
 _GLOBAL(ftrace_stub)
 	blr
+
+#ifdef CONFIG_LIVEPATCH
+	/*
+	 * This function runs in the mcount context, between two functions. As
+	 * such it can only clobber registers which are volatile and used in
+	 * function linkage.
+	 *
+	 * We get here when a function A, calls another function B, but B has
+	 * been live patched with a new function C.
+	 *
+	 * On entry:
+	 *  - we have no stack frame and can not allocate one
+	 *  - LR points back to the original caller (in A)
+	 *  - CTR holds the new NIP in C
+	 *  - r0 & r12 are free
+	 *
+	 * r0 can't be used as the base register for a DS-form load or store, so
+	 * we temporarily shuffle r1 (stack pointer) into r0 and then put it back.
+	 */
+livepatch_handler:
+	CURRENT_THREAD_INFO(r12, r1)
+
+	/* Save stack pointer into r0 */
+	mr	r0, r1
+
+	/* Allocate 3 x 8 bytes */
+	ld	r1, TI_livepatch_sp(r12)
+	addi	r1, r1, 24
+	std	r1, TI_livepatch_sp(r12)
+
+	/* Save toc & real LR on livepatch stack */
+	std	r2,  -24(r1)
+	mflr	r12
+	std	r12, -16(r1)
+
+	/* Store stack end marker */
+	lis     r12, STACK_END_MAGIC@h
+	ori     r12, r12, STACK_END_MAGIC@l
+	std	r12, -8(r1)
+
+	/* Restore real stack pointer */
+	mr	r1, r0
+
+	/* Put ctr in r12 for global entry and branch there */
+	mfctr	r12
+	bctrl
+
+	/*
+	 * Now we are returning from the patched function to the original
+	 * caller A. We are free to use r0 and r12, and we can use r2 until we
+	 * restore it.
+	 */
+
+	CURRENT_THREAD_INFO(r12, r1)
+
+	/* Save stack pointer into r0 */
+	mr	r0, r1
+
+	ld	r1, TI_livepatch_sp(r12)
+
+	/* Check stack marker hasn't been trashed */
+	lis     r2,  STACK_END_MAGIC@h
+	ori     r2,  r2, STACK_END_MAGIC@l
+	ld	r12, -8(r1)
+1:	tdne	r12, r2
+	EMIT_BUG_ENTRY 1b, __FILE__, __LINE__ - 1, 0
+
+	/* Restore LR & toc from livepatch stack */
+	ld	r12, -16(r1)
+	mtlr	r12
+	ld	r2,  -24(r1)
+
+	/* Pop livepatch stack frame */
+	CURRENT_THREAD_INFO(r12, r0)
+	subi	r1, r1, 24
+	std	r1, TI_livepatch_sp(r12)
+
+	/* Restore real stack pointer */
+	mr	r1, r0
+
+	/* Return to original caller of live patched function */
+	blr
+#endif
+
+
 #else
 _GLOBAL_TOC(_mcount)
 	/* Taken from output of objdump from lib64/glibc */
-- 
cgit v0.10.2


From 5e4e38446a62a4f50d77b0dd11d4b379dee08988 Mon Sep 17 00:00:00 2001
From: Petr Mladek <pmladek@suse.com>
Date: Mon, 25 Apr 2016 17:14:35 +0200
Subject: livepatch: Add some basic livepatch documentation

livepatch framework deserves some documentation, definitely.
This is an attempt to provide some basic info. I hope that
it will be useful for both LivePatch producers and also
potential developers of the framework itself.

[jkosina@suse.cz:
 - incorporated feedback (grammar fixes) from
   Chris J Arges <chris.j.arges@canonical.com>
 - s/LivePatch/livepatch in changelog as pointed out by
   Josh Poimboeuf <jpoimboe@redhat.com>
 - incorporated part of feedback (grammar fixes / reformulations) from
   Balbir Singh <bsingharora@gmail.com>
]

Acked-by: Jessica Yu <jeyu@redhat.com>
Signed-off-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/Documentation/livepatch/livepatch.txt b/Documentation/livepatch/livepatch.txt
new file mode 100644
index 0000000..6c43f6e
--- /dev/null
+++ b/Documentation/livepatch/livepatch.txt
@@ -0,0 +1,394 @@
+=========
+Livepatch
+=========
+
+This document outlines basic information about kernel livepatching.
+
+Table of Contents:
+
+1. Motivation
+2. Kprobes, Ftrace, Livepatching
+3. Consistency model
+4. Livepatch module
+   4.1. New functions
+   4.2. Metadata
+   4.3. Livepatch module handling
+5. Livepatch life-cycle
+   5.1. Registration
+   5.2. Enabling
+   5.3. Disabling
+   5.4. Unregistration
+6. Sysfs
+7. Limitations
+
+
+1. Motivation
+=============
+
+There are many situations where users are reluctant to reboot a system. It may
+be because their system is performing complex scientific computations or under
+heavy load during peak usage. In addition to keeping systems up and running,
+users want to also have a stable and secure system. Livepatching gives users
+both by allowing for function calls to be redirected; thus, fixing critical
+functions without a system reboot.
+
+
+2. Kprobes, Ftrace, Livepatching
+================================
+
+There are multiple mechanisms in the Linux kernel that are directly related
+to redirection of code execution; namely: kernel probes, function tracing,
+and livepatching:
+
+  + The kernel probes are the most generic. The code can be redirected by
+    putting a breakpoint instruction instead of any instruction.
+
+  + The function tracer calls the code from a predefined location that is
+    close to the function entry point. This location is generated by the
+    compiler using the '-pg' gcc option.
+
+  + Livepatching typically needs to redirect the code at the very beginning
+    of the function entry before the function parameters or the stack
+    are in any way modified.
+
+All three approaches need to modify the existing code at runtime. Therefore
+they need to be aware of each other and not step over each other's toes.
+Most of these problems are solved by using the dynamic ftrace framework as
+a base. A Kprobe is registered as a ftrace handler when the function entry
+is probed, see CONFIG_KPROBES_ON_FTRACE. Also an alternative function from
+a live patch is called with the help of a custom ftrace handler. But there are
+some limitations, see below.
+
+
+3. Consistency model
+====================
+
+Functions are there for a reason. They take some input parameters, get or
+release locks, read, process, and even write some data in a defined way,
+have return values. In other words, each function has a defined semantic.
+
+Many fixes do not change the semantic of the modified functions. For
+example, they add a NULL pointer or a boundary check, fix a race by adding
+a missing memory barrier, or add some locking around a critical section.
+Most of these changes are self contained and the function presents itself
+the same way to the rest of the system. In this case, the functions might
+be updated independently one by one.
+
+But there are more complex fixes. For example, a patch might change
+ordering of locking in multiple functions at the same time. Or a patch
+might exchange meaning of some temporary structures and update
+all the relevant functions. In this case, the affected unit
+(thread, whole kernel) need to start using all new versions of
+the functions at the same time. Also the switch must happen only
+when it is safe to do so, e.g. when the affected locks are released
+or no data are stored in the modified structures at the moment.
+
+The theory about how to apply functions a safe way is rather complex.
+The aim is to define a so-called consistency model. It attempts to define
+conditions when the new implementation could be used so that the system
+stays consistent. The theory is not yet finished. See the discussion at
+http://thread.gmane.org/gmane.linux.kernel/1823033/focus=1828189
+
+The current consistency model is very simple. It guarantees that either
+the old or the new function is called. But various functions get redirected
+one by one without any synchronization.
+
+In other words, the current implementation _never_ modifies the behavior
+in the middle of the call. It is because it does _not_ rewrite the entire
+function in the memory. Instead, the function gets redirected at the
+very beginning. But this redirection is used immediately even when
+some other functions from the same patch have not been redirected yet.
+
+See also the section "Limitations" below.
+
+
+4. Livepatch module
+===================
+
+Livepatches are distributed using kernel modules, see
+samples/livepatch/livepatch-sample.c.
+
+The module includes a new implementation of functions that we want
+to replace. In addition, it defines some structures describing the
+relation between the original and the new implementation. Then there
+is code that makes the kernel start using the new code when the livepatch
+module is loaded. Also there is code that cleans up before the
+livepatch module is removed. All this is explained in more details in
+the next sections.
+
+
+4.1. New functions
+------------------
+
+New versions of functions are typically just copied from the original
+sources. A good practice is to add a prefix to the names so that they
+can be distinguished from the original ones, e.g. in a backtrace. Also
+they can be declared as static because they are not called directly
+and do not need the global visibility.
+
+The patch contains only functions that are really modified. But they
+might want to access functions or data from the original source file
+that may only be locally accessible. This can be solved by a special
+relocation section in the generated livepatch module, see
+Documentation/livepatch/module-elf-format.txt for more details.
+
+
+4.2. Metadata
+------------
+
+The patch is described by several structures that split the information
+into three levels:
+
+  + struct klp_func is defined for each patched function. It describes
+    the relation between the original and the new implementation of a
+    particular function.
+
+    The structure includes the name, as a string, of the original function.
+    The function address is found via kallsyms at runtime.
+
+    Then it includes the address of the new function. It is defined
+    directly by assigning the function pointer. Note that the new
+    function is typically defined in the same source file.
+
+    As an optional parameter, the symbol position in the kallsyms database can
+    be used to disambiguate functions of the same name. This is not the
+    absolute position in the database, but rather the order it has been found
+    only for a particular object ( vmlinux or a kernel module ). Note that
+    kallsyms allows for searching symbols according to the object name.
+
+  + struct klp_object defines an array of patched functions (struct
+    klp_func) in the same object. Where the object is either vmlinux
+    (NULL) or a module name.
+
+    The structure helps to group and handle functions for each object
+    together. Note that patched modules might be loaded later than
+    the patch itself and the relevant functions might be patched
+    only when they are available.
+
+
+  + struct klp_patch defines an array of patched objects (struct
+    klp_object).
+
+    This structure handles all patched functions consistently and eventually,
+    synchronously. The whole patch is applied only when all patched
+    symbols are found. The only exception are symbols from objects
+    (kernel modules) that have not been loaded yet. Also if a more complex
+    consistency model is supported then a selected unit (thread,
+    kernel as a whole) will see the new code from the entire patch
+    only when it is in a safe state.
+
+
+4.3. Livepatch module handling
+------------------------------
+
+The usual behavior is that the new functions will get used when
+the livepatch module is loaded. For this, the module init() function
+has to register the patch (struct klp_patch) and enable it. See the
+section "Livepatch life-cycle" below for more details about these
+two operations.
+
+Module removal is only safe when there are no users of the underlying
+functions.  The immediate consistency model is not able to detect this;
+therefore livepatch modules cannot be removed. See "Limitations" below.
+
+5. Livepatch life-cycle
+=======================
+
+Livepatching defines four basic operations that define the life cycle of each
+live patch: registration, enabling, disabling and unregistration.  There are
+several reasons why it is done this way.
+
+First, the patch is applied only when all patched symbols for already
+loaded objects are found. The error handling is much easier if this
+check is done before particular functions get redirected.
+
+Second, the immediate consistency model does not guarantee that anyone is not
+sleeping in the new code after the patch is reverted. This means that the new
+code needs to stay around "forever". If the code is there, one could apply it
+again. Therefore it makes sense to separate the operations that might be done
+once and those that need to be repeated when the patch is enabled (applied)
+again.
+
+Third, it might take some time until the entire system is migrated
+when a more complex consistency model is used. The patch revert might
+block the livepatch module removal for too long. Therefore it is useful
+to revert the patch using a separate operation that might be called
+explicitly. But it does not make sense to remove all information
+until the livepatch module is really removed.
+
+
+5.1. Registration
+-----------------
+
+Each patch first has to be registered using klp_register_patch(). This makes
+the patch known to the livepatch framework. Also it does some preliminary
+computing and checks.
+
+In particular, the patch is added into the list of known patches. The
+addresses of the patched functions are found according to their names.
+The special relocations, mentioned in the section "New functions", are
+applied. The relevant entries are created under
+/sys/kernel/livepatch/<name>. The patch is rejected when any operation
+fails.
+
+
+5.2. Enabling
+-------------
+
+Registered patches might be enabled either by calling klp_enable_patch() or
+by writing '1' to /sys/kernel/livepatch/<name>/enabled. The system will
+start using the new implementation of the patched functions at this stage.
+
+In particular, if an original function is patched for the first time, a
+function specific struct klp_ops is created and an universal ftrace handler
+is registered.
+
+Functions might be patched multiple times. The ftrace handler is registered
+only once for the given function. Further patches just add an entry to the
+list (see field `func_stack`) of the struct klp_ops. The last added
+entry is chosen by the ftrace handler and becomes the active function
+replacement.
+
+Note that the patches might be enabled in a different order than they were
+registered.
+
+
+5.3. Disabling
+--------------
+
+Enabled patches might get disabled either by calling klp_disable_patch() or
+by writing '0' to /sys/kernel/livepatch/<name>/enabled. At this stage
+either the code from the previously enabled patch or even the original
+code gets used.
+
+Here all the functions (struct klp_func) associated with the to-be-disabled
+patch are removed from the corresponding struct klp_ops. The ftrace handler
+is unregistered and the struct klp_ops is freed when the func_stack list
+becomes empty.
+
+Patches must be disabled in exactly the reverse order in which they were
+enabled. It makes the problem and the implementation much easier.
+
+
+5.4. Unregistration
+-------------------
+
+Disabled patches might be unregistered by calling klp_unregister_patch().
+This can be done only when the patch is disabled and the code is no longer
+used. It must be called before the livepatch module gets unloaded.
+
+At this stage, all the relevant sys-fs entries are removed and the patch
+is removed from the list of known patches.
+
+
+6. Sysfs
+========
+
+Information about the registered patches can be found under
+/sys/kernel/livepatch. The patches could be enabled and disabled
+by writing there.
+
+See Documentation/ABI/testing/sysfs-kernel-livepatch for more details.
+
+
+7. Limitations
+==============
+
+The current Livepatch implementation has several limitations:
+
+
+  + The patch must not change the semantic of the patched functions.
+
+    The current implementation guarantees only that either the old
+    or the new function is called. The functions are patched one
+    by one. It means that the patch must _not_ change the semantic
+    of the function.
+
+
+  + Data structures can not be patched.
+
+    There is no support to version data structures or anyhow migrate
+    one structure into another. Also the simple consistency model does
+    not allow to switch more functions atomically.
+
+    Once there is more complex consistency mode, it will be possible to
+    use some workarounds. For example, it will be possible to use a hole
+    for a new member because the data structure is aligned. Or it will
+    be possible to use an existing member for something else.
+
+    There are no plans to add more generic support for modified structures
+    at the moment.
+
+
+  + Only functions that can be traced could be patched.
+
+    Livepatch is based on the dynamic ftrace. In particular, functions
+    implementing ftrace or the livepatch ftrace handler could not be
+    patched. Otherwise, the code would end up in an infinite loop. A
+    potential mistake is prevented by marking the problematic functions
+    by "notrace".
+
+
+  + Anything inlined into __schedule() can not be patched.
+
+    The switch_to macro is inlined into __schedule(). It switches the
+    context between two processes in the middle of the macro. It does
+    not save RIP in x86_64 version (contrary to 32-bit version). Instead,
+    the currently used __schedule()/switch_to() handles both processes.
+
+    Now, let's have two different tasks. One calls the original
+    __schedule(), its registers are stored in a defined order and it
+    goes to sleep in the switch_to macro and some other task is restored
+    using the original __schedule(). Then there is the second task which
+    calls patched__schedule(), it goes to sleep there and the first task
+    is picked by the patched__schedule(). Its RSP is restored and now
+    the registers should be restored as well. But the order is different
+    in the new patched__schedule(), so...
+
+    There is work in progress to remove this limitation.
+
+
+  + Livepatch modules can not be removed.
+
+    The current implementation just redirects the functions at the very
+    beginning. It does not check if the functions are in use. In other
+    words, it knows when the functions get called but it does not
+    know when the functions return. Therefore it can not decide when
+    the livepatch module can be safely removed.
+
+    This will get most likely solved once a more complex consistency model
+    is supported. The idea is that a safe state for patching should also
+    mean a safe state for removing the patch.
+
+    Note that the patch itself might get disabled by writing zero
+    to /sys/kernel/livepatch/<patch>/enabled. It causes that the new
+    code will not longer get called. But it does not guarantee
+    that anyone is not sleeping anywhere in the new code.
+
+
+  + Livepatch works reliably only when the dynamic ftrace is located at
+    the very beginning of the function.
+
+    The function need to be redirected before the stack or the function
+    parameters are modified in any way. For example, livepatch requires
+    using -fentry gcc compiler option on x86_64.
+
+    One exception is the PPC port. It uses relative addressing and TOC.
+    Each function has to handle TOC and save LR before it could call
+    the ftrace handler. This operation has to be reverted on return.
+    Fortunately, the generic ftrace code has the same problem and all
+    this is is handled on the ftrace level.
+
+
+  + Kretprobes using the ftrace framework conflict with the patched
+    functions.
+
+    Both kretprobes and livepatches use a ftrace handler that modifies
+    the return address. The first user wins. Either the probe or the patch
+    is rejected when the handler is already in use by the other.
+
+
+  + Kprobes in the original function are ignored when the code is
+    redirected to the new implementation.
+
+    There is a work in progress to add warnings about this situation.
diff --git a/MAINTAINERS b/MAINTAINERS
index 94ea42b..c6baaa6 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6605,6 +6605,7 @@ F:	kernel/livepatch/
 F:	include/linux/livepatch.h
 F:	arch/x86/include/asm/livepatch.h
 F:	arch/x86/kernel/livepatch.c
+F:	Documentation/livepatch/
 F:	Documentation/ABI/testing/sysfs-kernel-livepatch
 F:	samples/livepatch/
 L:	live-patching@vger.kernel.org
-- 
cgit v0.10.2