summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/alpha/kernel/osf_sys.c36
-rw-r--r--arch/alpha/kernel/sys_titan.c1
-rw-r--r--arch/ia64/include/asm/xen/hypercall.h2
-rw-r--r--arch/ia64/xen/suspend.c9
-rw-r--r--arch/mips/Kconfig4
-rw-r--r--arch/mips/alchemy/mtx-1/board_setup.c4
-rw-r--r--arch/mips/alchemy/mtx-1/platform.c9
-rw-r--r--arch/mips/alchemy/xxs1500/board_setup.c4
-rw-r--r--arch/mips/include/asm/perf_event.h12
-rw-r--r--arch/mips/kernel/ftrace.c179
-rw-r--r--arch/mips/kernel/perf_event.c345
-rw-r--r--arch/mips/kernel/perf_event_mipsxx.c4
-rw-r--r--arch/mips/kernel/signal.c2
-rw-r--r--arch/mips/kernel/signal32.c2
-rw-r--r--arch/mips/kernel/smp.c31
-rw-r--r--arch/mips/kernel/syscall.c5
-rw-r--r--arch/mips/kernel/vpe.c4
-rw-r--r--arch/mips/loongson/Kconfig5
-rw-r--r--arch/mips/loongson/common/cmdline.c5
-rw-r--r--arch/mips/loongson/common/machtype.c3
-rw-r--r--arch/mips/math-emu/ieee754int.h4
-rw-r--r--arch/mips/mm/init.c2
-rw-r--r--arch/mips/mm/tlbex.c2
-rw-r--r--arch/mips/pci/ops-pmcmsp.c4
-rw-r--r--arch/mips/pmc-sierra/Kconfig4
-rw-r--r--arch/mips/pmc-sierra/msp71xx/msp_time.c2
-rw-r--r--arch/mn10300/include/asm/atomic.h2
-rw-r--r--arch/mn10300/include/asm/uaccess.h5
-rw-r--r--arch/mn10300/mm/cache-inv-icache.c4
-rw-r--r--arch/parisc/hpux/sys_hpux.c65
-rw-r--r--arch/powerpc/include/asm/lppaca.h16
-rw-r--r--arch/powerpc/kernel/paca.c14
-rw-r--r--arch/powerpc/mm/numa.c3
-rw-r--r--arch/powerpc/platforms/cell/spufs/syscalls.c2
-rw-r--r--arch/powerpc/platforms/iseries/dt.c6
-rw-r--r--arch/powerpc/platforms/iseries/setup.c1
-rw-r--r--arch/um/drivers/mconsole_kern.c21
-rw-r--r--arch/x86/boot/compressed/mkpiggy.c7
-rw-r--r--arch/x86/ia32/ia32entry.S2
-rw-r--r--arch/x86/include/asm/ce4100.h6
-rw-r--r--arch/x86/include/asm/unistd_32.h4
-rw-r--r--arch/x86/include/asm/unistd_64.h4
-rw-r--r--arch/x86/include/asm/uv/uv_bau.h2
-rw-r--r--arch/x86/include/asm/xen/hypercall.h15
-rw-r--r--arch/x86/include/asm/xen/page.h47
-rw-r--r--arch/x86/include/asm/xen/pci.h8
-rw-r--r--arch/x86/kernel/check.c8
-rw-r--r--arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c2
-rw-r--r--arch/x86/kernel/syscall_table_32.S2
-rw-r--r--arch/x86/mm/fault.c14
-rw-r--r--arch/x86/mm/init_64.c6
-rw-r--r--arch/x86/mm/numa_64.c6
-rw-r--r--arch/x86/mm/pageattr.c18
-rw-r--r--arch/x86/mm/pgtable.c11
-rw-r--r--arch/x86/pci/ce4100.c7
-rw-r--r--arch/x86/pci/xen.c159
-rw-r--r--arch/x86/platform/ce4100/ce4100.c2
-rw-r--r--arch/x86/platform/uv/tlb_uv.c4
-rw-r--r--arch/x86/xen/Kconfig8
-rw-r--r--arch/x86/xen/enlighten.c8
-rw-r--r--arch/x86/xen/mmu.c82
-rw-r--r--arch/x86/xen/p2m.c330
-rw-r--r--arch/x86/xen/setup.c68
-rw-r--r--arch/x86/xen/smp.c38
-rw-r--r--arch/x86/xen/suspend.c8
-rw-r--r--arch/x86/xen/time.c4
-rw-r--r--arch/x86/xen/xen-ops.h2
67 files changed, 1120 insertions, 585 deletions
diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index fe698b5..376f221 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -230,44 +230,24 @@ linux_to_osf_statfs(struct kstatfs *linux_stat, struct osf_statfs __user *osf_st
return copy_to_user(osf_stat, &tmp_stat, bufsiz) ? -EFAULT : 0;
}
-static int
-do_osf_statfs(struct path *path, struct osf_statfs __user *buffer,
- unsigned long bufsiz)
+SYSCALL_DEFINE3(osf_statfs, const char __user *, pathname,
+ struct osf_statfs __user *, buffer, unsigned long, bufsiz)
{
struct kstatfs linux_stat;
- int error = vfs_statfs(path, &linux_stat);
+ int error = user_statfs(pathname, &linux_stat);
if (!error)
error = linux_to_osf_statfs(&linux_stat, buffer, bufsiz);
return error;
}
-SYSCALL_DEFINE3(osf_statfs, const char __user *, pathname,
- struct osf_statfs __user *, buffer, unsigned long, bufsiz)
-{
- struct path path;
- int retval;
-
- retval = user_path(pathname, &path);
- if (!retval) {
- retval = do_osf_statfs(&path, buffer, bufsiz);
- path_put(&path);
- }
- return retval;
-}
-
SYSCALL_DEFINE3(osf_fstatfs, unsigned long, fd,
struct osf_statfs __user *, buffer, unsigned long, bufsiz)
{
- struct file *file;
- int retval;
-
- retval = -EBADF;
- file = fget(fd);
- if (file) {
- retval = do_osf_statfs(&file->f_path, buffer, bufsiz);
- fput(file);
- }
- return retval;
+ struct kstatfs linux_stat;
+ int error = fd_statfs(fd, &linux_stat);
+ if (!error)
+ error = linux_to_osf_statfs(&linux_stat, buffer, bufsiz);
+ return error;
}
/*
diff --git a/arch/alpha/kernel/sys_titan.c b/arch/alpha/kernel/sys_titan.c
index f6c108a..8c13a0c 100644
--- a/arch/alpha/kernel/sys_titan.c
+++ b/arch/alpha/kernel/sys_titan.c
@@ -149,6 +149,7 @@ static int
titan_set_irq_affinity(struct irq_data *d, const struct cpumask *affinity,
bool force)
{
+ unsigned int irq = d->irq;
spin_lock(&titan_irq_lock);
titan_cpu_set_irq_affinity(irq - 16, *affinity);
titan_update_irq_hw(titan_cached_irq_mask);
diff --git a/arch/ia64/include/asm/xen/hypercall.h b/arch/ia64/include/asm/xen/hypercall.h
index 96fc623..ed28bcd 100644
--- a/arch/ia64/include/asm/xen/hypercall.h
+++ b/arch/ia64/include/asm/xen/hypercall.h
@@ -107,7 +107,7 @@ extern unsigned long __hypercall(unsigned long a1, unsigned long a2,
static inline int
xencomm_arch_hypercall_sched_op(int cmd, struct xencomm_handle *arg)
{
- return _hypercall2(int, sched_op_new, cmd, arg);
+ return _hypercall2(int, sched_op, cmd, arg);
}
static inline long
diff --git a/arch/ia64/xen/suspend.c b/arch/ia64/xen/suspend.c
index fd66b04..419c862 100644
--- a/arch/ia64/xen/suspend.c
+++ b/arch/ia64/xen/suspend.c
@@ -37,19 +37,14 @@ xen_mm_unpin_all(void)
/* nothing */
}
-void xen_pre_device_suspend(void)
-{
- /* nothing */
-}
-
void
-xen_pre_suspend()
+xen_arch_pre_suspend()
{
/* nothing */
}
void
-xen_post_suspend(int suspend_cancelled)
+xen_arch_post_suspend(int suspend_cancelled)
{
if (suspend_cancelled)
return;
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index f5ecc05..d889835 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -4,6 +4,7 @@ config MIPS
select HAVE_GENERIC_DMA_COHERENT
select HAVE_IDE
select HAVE_OPROFILE
+ select HAVE_IRQ_WORK
select HAVE_PERF_EVENTS
select PERF_USE_VMALLOC
select HAVE_ARCH_KGDB
@@ -208,6 +209,7 @@ config MACH_JZ4740
select ARCH_REQUIRE_GPIOLIB
select SYS_HAS_EARLY_PRINTK
select HAVE_PWM
+ select HAVE_CLK
config LASAT
bool "LASAT Networks platforms"
@@ -333,6 +335,8 @@ config PNX8550_STB810
config PMC_MSP
bool "PMC-Sierra MSP chipsets"
depends on EXPERIMENTAL
+ select CEVT_R4K
+ select CSRC_R4K
select DMA_NONCOHERENT
select SWAP_IO_SPACE
select NO_EXCEPT_FILL
diff --git a/arch/mips/alchemy/mtx-1/board_setup.c b/arch/mips/alchemy/mtx-1/board_setup.c
index 6398fa9..40b84b9 100644
--- a/arch/mips/alchemy/mtx-1/board_setup.c
+++ b/arch/mips/alchemy/mtx-1/board_setup.c
@@ -54,8 +54,8 @@ int mtx1_pci_idsel(unsigned int devsel, int assert);
static void mtx1_reset(char *c)
{
- /* Hit BCSR.SYSTEM_CONTROL[SW_RST] */
- au_writel(0x00000000, 0xAE00001C);
+ /* Jump to the reset vector */
+ __asm__ __volatile__("jr\t%0"::"r"(0xbfc00000));
}
static void mtx1_power_off(void)
diff --git a/arch/mips/alchemy/mtx-1/platform.c b/arch/mips/alchemy/mtx-1/platform.c
index e30e42a..956f946 100644
--- a/arch/mips/alchemy/mtx-1/platform.c
+++ b/arch/mips/alchemy/mtx-1/platform.c
@@ -28,6 +28,8 @@
#include <linux/mtd/physmap.h>
#include <mtd/mtd-abi.h>
+#include <asm/mach-au1x00/au1xxx_eth.h>
+
static struct gpio_keys_button mtx1_gpio_button[] = {
{
.gpio = 207,
@@ -140,10 +142,17 @@ static struct __initdata platform_device * mtx1_devs[] = {
&mtx1_mtd,
};
+static struct au1000_eth_platform_data mtx1_au1000_eth0_pdata = {
+ .phy_search_highest_addr = 1,
+ .phy1_search_mac0 = 1,
+};
+
static int __init mtx1_register_devices(void)
{
int rc;
+ au1xxx_override_eth_cfg(0, &mtx1_au1000_eth0_pdata);
+
rc = gpio_request(mtx1_gpio_button[0].gpio,
mtx1_gpio_button[0].desc);
if (rc < 0) {
diff --git a/arch/mips/alchemy/xxs1500/board_setup.c b/arch/mips/alchemy/xxs1500/board_setup.c
index b43c918..80c521e 100644
--- a/arch/mips/alchemy/xxs1500/board_setup.c
+++ b/arch/mips/alchemy/xxs1500/board_setup.c
@@ -36,8 +36,8 @@
static void xxs1500_reset(char *c)
{
- /* Hit BCSR.SYSTEM_CONTROL[SW_RST] */
- au_writel(0x00000000, 0xAE00001C);
+ /* Jump to the reset vector */
+ __asm__ __volatile__("jr\t%0"::"r"(0xbfc00000));
}
static void xxs1500_power_off(void)
diff --git a/arch/mips/include/asm/perf_event.h b/arch/mips/include/asm/perf_event.h
index e00007c..d0c7749 100644
--- a/arch/mips/include/asm/perf_event.h
+++ b/arch/mips/include/asm/perf_event.h
@@ -11,15 +11,5 @@
#ifndef __MIPS_PERF_EVENT_H__
#define __MIPS_PERF_EVENT_H__
-
-/*
- * MIPS performance counters do not raise NMI upon overflow, a regular
- * interrupt will be signaled. Hence we can do the pending perf event
- * work at the tail of the irq handler.
- */
-static inline void
-set_perf_event_pending(void)
-{
-}
-
+/* Leave it empty here. The file is required by linux/perf_event.h */
#endif /* __MIPS_PERF_EVENT_H__ */
diff --git a/arch/mips/kernel/ftrace.c b/arch/mips/kernel/ftrace.c
index 5a84a1f..94ca2b0 100644
--- a/arch/mips/kernel/ftrace.c
+++ b/arch/mips/kernel/ftrace.c
@@ -17,29 +17,13 @@
#include <asm/cacheflush.h>
#include <asm/uasm.h>
-/*
- * If the Instruction Pointer is in module space (0xc0000000), return true;
- * otherwise, it is in kernel space (0x80000000), return false.
- *
- * FIXME: This will not work when the kernel space and module space are the
- * same. If they are the same, we need to modify scripts/recordmcount.pl,
- * ftrace_make_nop/call() and the other related parts to ensure the
- * enabling/disabling of the calling site to _mcount is right for both kernel
- * and module.
- */
-
-static inline int in_module(unsigned long ip)
-{
- return ip & 0x40000000;
-}
+#include <asm-generic/sections.h>
#ifdef CONFIG_DYNAMIC_FTRACE
#define JAL 0x0c000000 /* jump & link: ip --> ra, jump to target */
#define ADDR_MASK 0x03ffffff /* op_code|addr : 31...26|25 ....0 */
-#define INSN_B_1F_4 0x10000004 /* b 1f; offset = 4 */
-#define INSN_B_1F_5 0x10000005 /* b 1f; offset = 5 */
#define INSN_NOP 0x00000000 /* nop */
#define INSN_JAL(addr) \
((unsigned int)(JAL | (((addr) >> 2) & ADDR_MASK)))
@@ -69,6 +53,20 @@ static inline void ftrace_dyn_arch_init_insns(void)
#endif
}
+/*
+ * Check if the address is in kernel space
+ *
+ * Clone core_kernel_text() from kernel/extable.c, but doesn't call
+ * init_kernel_text() for Ftrace doesn't trace functions in init sections.
+ */
+static inline int in_kernel_space(unsigned long ip)
+{
+ if (ip >= (unsigned long)_stext &&
+ ip <= (unsigned long)_etext)
+ return 1;
+ return 0;
+}
+
static int ftrace_modify_code(unsigned long ip, unsigned int new_code)
{
int faulted;
@@ -84,6 +82,42 @@ static int ftrace_modify_code(unsigned long ip, unsigned int new_code)
return 0;
}
+/*
+ * The details about the calling site of mcount on MIPS
+ *
+ * 1. For kernel:
+ *
+ * move at, ra
+ * jal _mcount --> nop
+ *
+ * 2. For modules:
+ *
+ * 2.1 For KBUILD_MCOUNT_RA_ADDRESS and CONFIG_32BIT
+ *
+ * lui v1, hi_16bit_of_mcount --> b 1f (0x10000005)
+ * addiu v1, v1, low_16bit_of_mcount
+ * move at, ra
+ * move $12, ra_address
+ * jalr v1
+ * sub sp, sp, 8
+ * 1: offset = 5 instructions
+ * 2.2 For the Other situations
+ *
+ * lui v1, hi_16bit_of_mcount --> b 1f (0x10000004)
+ * addiu v1, v1, low_16bit_of_mcount
+ * move at, ra
+ * jalr v1
+ * nop | move $12, ra_address | sub sp, sp, 8
+ * 1: offset = 4 instructions
+ */
+
+#if defined(KBUILD_MCOUNT_RA_ADDRESS) && defined(CONFIG_32BIT)
+#define MCOUNT_OFFSET_INSNS 5
+#else
+#define MCOUNT_OFFSET_INSNS 4
+#endif
+#define INSN_B_1F (0x10000000 | MCOUNT_OFFSET_INSNS)
+
int ftrace_make_nop(struct module *mod,
struct dyn_ftrace *rec, unsigned long addr)
{
@@ -91,39 +125,11 @@ int ftrace_make_nop(struct module *mod,
unsigned long ip = rec->ip;
/*
- * We have compiled module with -mlong-calls, but compiled the kernel
- * without it, we need to cope with them respectively.
+ * If ip is in kernel space, no long call, otherwise, long call is
+ * needed.
*/
- if (in_module(ip)) {
-#if defined(KBUILD_MCOUNT_RA_ADDRESS) && defined(CONFIG_32BIT)
- /*
- * lui v1, hi_16bit_of_mcount --> b 1f (0x10000005)
- * addiu v1, v1, low_16bit_of_mcount
- * move at, ra
- * move $12, ra_address
- * jalr v1
- * sub sp, sp, 8
- * 1: offset = 5 instructions
- */
- new = INSN_B_1F_5;
-#else
- /*
- * lui v1, hi_16bit_of_mcount --> b 1f (0x10000004)
- * addiu v1, v1, low_16bit_of_mcount
- * move at, ra
- * jalr v1
- * nop | move $12, ra_address | sub sp, sp, 8
- * 1: offset = 4 instructions
- */
- new = INSN_B_1F_4;
-#endif
- } else {
- /*
- * move at, ra
- * jal _mcount --> nop
- */
- new = INSN_NOP;
- }
+ new = in_kernel_space(ip) ? INSN_NOP : INSN_B_1F;
+
return ftrace_modify_code(ip, new);
}
@@ -132,8 +138,8 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
unsigned int new;
unsigned long ip = rec->ip;
- /* ip, module: 0xc0000000, kernel: 0x80000000 */
- new = in_module(ip) ? insn_lui_v1_hi16_mcount : insn_jal_ftrace_caller;
+ new = in_kernel_space(ip) ? insn_jal_ftrace_caller :
+ insn_lui_v1_hi16_mcount;
return ftrace_modify_code(ip, new);
}
@@ -190,29 +196,25 @@ int ftrace_disable_ftrace_graph_caller(void)
#define S_R_SP (0xafb0 << 16) /* s{d,w} R, offset(sp) */
#define OFFSET_MASK 0xffff /* stack offset range: 0 ~ PT_SIZE */
-unsigned long ftrace_get_parent_addr(unsigned long self_addr,
- unsigned long parent,
- unsigned long parent_addr,
- unsigned long fp)
+unsigned long ftrace_get_parent_ra_addr(unsigned long self_ra, unsigned long
+ old_parent_ra, unsigned long parent_ra_addr, unsigned long fp)
{
- unsigned long sp, ip, ra;
+ unsigned long sp, ip, tmp;
unsigned int code;
int faulted;
/*
- * For module, move the ip from calling site of mcount to the
- * instruction "lui v1, hi_16bit_of_mcount"(offset is 20), but for
- * kernel, move to the instruction "move ra, at"(offset is 12)
+ * For module, move the ip from the return address after the
+ * instruction "lui v1, hi_16bit_of_mcount"(offset is 24), but for
+ * kernel, move after the instruction "move ra, at"(offset is 16)
*/
- ip = self_addr - (in_module(self_addr) ? 20 : 12);
+ ip = self_ra - (in_kernel_space(self_ra) ? 16 : 24);
/*
* search the text until finding the non-store instruction or "s{d,w}
* ra, offset(sp)" instruction
*/
do {
- ip -= 4;
-
/* get the code at "ip": code = *(unsigned int *)ip; */
safe_load_code(code, ip, faulted);
@@ -224,18 +226,20 @@ unsigned long ftrace_get_parent_addr(unsigned long self_addr,
* store the ra on the stack
*/
if ((code & S_R_SP) != S_R_SP)
- return parent_addr;
+ return parent_ra_addr;
- } while (((code & S_RA_SP) != S_RA_SP));
+ /* Move to the next instruction */
+ ip -= 4;
+ } while ((code & S_RA_SP) != S_RA_SP);
sp = fp + (code & OFFSET_MASK);
- /* ra = *(unsigned long *)sp; */
- safe_load_stack(ra, sp, faulted);
+ /* tmp = *(unsigned long *)sp; */
+ safe_load_stack(tmp, sp, faulted);
if (unlikely(faulted))
return 0;
- if (ra == parent)
+ if (tmp == old_parent_ra)
return sp;
return 0;
}
@@ -246,21 +250,21 @@ unsigned long ftrace_get_parent_addr(unsigned long self_addr,
* Hook the return address and push it in the stack of return addrs
* in current thread info.
*/
-void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
+void prepare_ftrace_return(unsigned long *parent_ra_addr, unsigned long self_ra,
unsigned long fp)
{
- unsigned long old;
+ unsigned long old_parent_ra;
struct ftrace_graph_ent trace;
unsigned long return_hooker = (unsigned long)
&return_to_handler;
- int faulted;
+ int faulted, insns;
if (unlikely(atomic_read(&current->tracing_graph_pause)))
return;
/*
- * "parent" is the stack address saved the return address of the caller
- * of _mcount.
+ * "parent_ra_addr" is the stack address saved the return address of
+ * the caller of _mcount.
*
* if the gcc < 4.5, a leaf function does not save the return address
* in the stack address, so, we "emulate" one in _mcount's stack space,
@@ -275,37 +279,44 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
* do it in ftrace_graph_caller of mcount.S.
*/
- /* old = *parent; */
- safe_load_stack(old, parent, faulted);
+ /* old_parent_ra = *parent_ra_addr; */
+ safe_load_stack(old_parent_ra, parent_ra_addr, faulted);
if (unlikely(faulted))
goto out;
#ifndef KBUILD_MCOUNT_RA_ADDRESS
- parent = (unsigned long *)ftrace_get_parent_addr(self_addr, old,
- (unsigned long)parent, fp);
+ parent_ra_addr = (unsigned long *)ftrace_get_parent_ra_addr(self_ra,
+ old_parent_ra, (unsigned long)parent_ra_addr, fp);
/*
* If fails when getting the stack address of the non-leaf function's
* ra, stop function graph tracer and return
*/
- if (parent == 0)
+ if (parent_ra_addr == 0)
goto out;
#endif
- /* *parent = return_hooker; */
- safe_store_stack(return_hooker, parent, faulted);
+ /* *parent_ra_addr = return_hooker; */
+ safe_store_stack(return_hooker, parent_ra_addr, faulted);
if (unlikely(faulted))
goto out;
- if (ftrace_push_return_trace(old, self_addr, &trace.depth, fp) ==
- -EBUSY) {
- *parent = old;
+ if (ftrace_push_return_trace(old_parent_ra, self_ra, &trace.depth, fp)
+ == -EBUSY) {
+ *parent_ra_addr = old_parent_ra;
return;
}
- trace.func = self_addr;
+ /*
+ * Get the recorded ip of the current mcount calling site in the
+ * __mcount_loc section, which will be used to filter the function
+ * entries configured through the tracing/set_graph_function interface.
+ */
+
+ insns = in_kernel_space(self_ra) ? 2 : MCOUNT_OFFSET_INSNS + 1;
+ trace.func = self_ra - (MCOUNT_INSN_SIZE * insns);
/* Only trace if the calling function expects to */
if (!ftrace_graph_entry(&trace)) {
current->curr_ret_stack--;
- *parent = old;
+ *parent_ra_addr = old_parent_ra;
}
return;
out:
diff --git a/arch/mips/kernel/perf_event.c b/arch/mips/kernel/perf_event.c
index 2b7f3f7..a824485 100644
--- a/arch/mips/kernel/perf_event.c
+++ b/arch/mips/kernel/perf_event.c
@@ -161,41 +161,6 @@ mipspmu_event_set_period(struct perf_event *event,
return ret;
}
-static int mipspmu_enable(struct perf_event *event)
-{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
- struct hw_perf_event *hwc = &event->hw;
- int idx;
- int err = 0;
-
- /* To look for a free counter for this event. */
- idx = mipspmu->alloc_counter(cpuc, hwc);
- if (idx < 0) {
- err = idx;
- goto out;
- }
-
- /*
- * If there is an event in the counter we are going to use then
- * make sure it is disabled.
- */
- event->hw.idx = idx;
- mipspmu->disable_event(idx);
- cpuc->events[idx] = event;
-
- /* Set the period for the event. */
- mipspmu_event_set_period(event, hwc, idx);
-
- /* Enable the event. */
- mipspmu->enable_event(hwc, idx);
-
- /* Propagate our changes to the userspace mapping. */
- perf_event_update_userpage(event);
-
-out:
- return err;
-}
-
static void mipspmu_event_update(struct perf_event *event,
struct hw_perf_event *hwc,
int idx)
@@ -204,7 +169,7 @@ static void mipspmu_event_update(struct perf_event *event,
unsigned long flags;
int shift = 64 - TOTAL_BITS;
s64 prev_raw_count, new_raw_count;
- s64 delta;
+ u64 delta;
again:
prev_raw_count = local64_read(&hwc->prev_count);
@@ -231,32 +196,90 @@ again:
return;
}
-static void mipspmu_disable(struct perf_event *event)
+static void mipspmu_start(struct perf_event *event, int flags)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (!mipspmu)
+ return;
+
+ if (flags & PERF_EF_RELOAD)
+ WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
+
+ hwc->state = 0;
+
+ /* Set the period for the event. */
+ mipspmu_event_set_period(event, hwc, hwc->idx);
+
+ /* Enable the event. */
+ mipspmu->enable_event(hwc, hwc->idx);
+}
+
+static void mipspmu_stop(struct perf_event *event, int flags)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (!mipspmu)
+ return;
+
+ if (!(hwc->state & PERF_HES_STOPPED)) {
+ /* We are working on a local event. */
+ mipspmu->disable_event(hwc->idx);
+ barrier();
+ mipspmu_event_update(event, hwc, hwc->idx);
+ hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
+ }
+}
+
+static int mipspmu_add(struct perf_event *event, int flags)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
- int idx = hwc->idx;
+ int idx;
+ int err = 0;
+ perf_pmu_disable(event->pmu);
- WARN_ON(idx < 0 || idx >= mipspmu->num_counters);
+ /* To look for a free counter for this event. */
+ idx = mipspmu->alloc_counter(cpuc, hwc);
+ if (idx < 0) {
+ err = idx;
+ goto out;
+ }
- /* We are working on a local event. */
+ /*
+ * If there is an event in the counter we are going to use then
+ * make sure it is disabled.
+ */
+ event->hw.idx = idx;
mipspmu->disable_event(idx);
+ cpuc->events[idx] = event;
- barrier();
-
- mipspmu_event_update(event, hwc, idx);
- cpuc->events[idx] = NULL;
- clear_bit(idx, cpuc->used_mask);
+ hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+ if (flags & PERF_EF_START)
+ mipspmu_start(event, PERF_EF_RELOAD);
+ /* Propagate our changes to the userspace mapping. */
perf_event_update_userpage(event);
+
+out:
+ perf_pmu_enable(event->pmu);
+ return err;
}
-static void mipspmu_unthrottle(struct perf_event *event)
+static void mipspmu_del(struct perf_event *event, int flags)
{
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
- mipspmu->enable_event(hwc, hwc->idx);
+ WARN_ON(idx < 0 || idx >= mipspmu->num_counters);
+
+ mipspmu_stop(event, PERF_EF_UPDATE);
+ cpuc->events[idx] = NULL;
+ clear_bit(idx, cpuc->used_mask);
+
+ perf_event_update_userpage(event);
}
static void mipspmu_read(struct perf_event *event)
@@ -270,12 +293,17 @@ static void mipspmu_read(struct perf_event *event)
mipspmu_event_update(event, hwc, hwc->idx);
}
-static struct pmu pmu = {
- .enable = mipspmu_enable,
- .disable = mipspmu_disable,
- .unthrottle = mipspmu_unthrottle,
- .read = mipspmu_read,
-};
+static void mipspmu_enable(struct pmu *pmu)
+{
+ if (mipspmu)
+ mipspmu->start();
+}
+
+static void mipspmu_disable(struct pmu *pmu)
+{
+ if (mipspmu)
+ mipspmu->stop();
+}
static atomic_t active_events = ATOMIC_INIT(0);
static DEFINE_MUTEX(pmu_reserve_mutex);
@@ -318,6 +346,82 @@ static void mipspmu_free_irq(void)
perf_irq = save_perf_irq;
}
+/*
+ * mipsxx/rm9000/loongson2 have different performance counters, they have
+ * specific low-level init routines.
+ */
+static void reset_counters(void *arg);
+static int __hw_perf_event_init(struct perf_event *event);
+
+static void hw_perf_event_destroy(struct perf_event *event)
+{
+ if (atomic_dec_and_mutex_lock(&active_events,
+ &pmu_reserve_mutex)) {
+ /*
+ * We must not call the destroy function with interrupts
+ * disabled.
+ */
+ on_each_cpu(reset_counters,
+ (void *)(long)mipspmu->num_counters, 1);
+ mipspmu_free_irq();
+ mutex_unlock(&pmu_reserve_mutex);
+ }
+}
+
+static int mipspmu_event_init(struct perf_event *event)
+{
+ int err = 0;
+
+ switch (event->attr.type) {
+ case PERF_TYPE_RAW:
+ case PERF_TYPE_HARDWARE:
+ case PERF_TYPE_HW_CACHE:
+ break;
+
+ default:
+ return -ENOENT;
+ }
+
+ if (!mipspmu || event->cpu >= nr_cpumask_bits ||
+ (event->cpu >= 0 && !cpu_online(event->cpu)))
+ return -ENODEV;
+
+ if (!atomic_inc_not_zero(&active_events)) {
+ if (atomic_read(&active_events) > MIPS_MAX_HWEVENTS) {
+ atomic_dec(&active_events);
+ return -ENOSPC;
+ }
+
+ mutex_lock(&pmu_reserve_mutex);
+ if (atomic_read(&active_events) == 0)
+ err = mipspmu_get_irq();
+
+ if (!err)
+ atomic_inc(&active_events);
+ mutex_unlock(&pmu_reserve_mutex);
+ }
+
+ if (err)
+ return err;
+
+ err = __hw_perf_event_init(event);
+ if (err)
+ hw_perf_event_destroy(event);
+
+ return err;
+}
+
+static struct pmu pmu = {
+ .pmu_enable = mipspmu_enable,
+ .pmu_disable = mipspmu_disable,
+ .event_init = mipspmu_event_init,
+ .add = mipspmu_add,
+ .del = mipspmu_del,
+ .start = mipspmu_start,
+ .stop = mipspmu_stop,
+ .read = mipspmu_read,
+};
+
static inline unsigned int
mipspmu_perf_event_encode(const struct mips_perf_event *pev)
{
@@ -382,8 +486,9 @@ static int validate_event(struct cpu_hw_events *cpuc,
{
struct hw_perf_event fake_hwc = event->hw;
- if (event->pmu && event->pmu != &pmu)
- return 0;
+ /* Allow mixed event group. So return 1 to pass validation. */
+ if (event->pmu != &pmu || event->state <= PERF_EVENT_STATE_OFF)
+ return 1;
return mipspmu->alloc_counter(cpuc, &fake_hwc) >= 0;
}
@@ -409,73 +514,6 @@ static int validate_group(struct perf_event *event)
return 0;
}
-/*
- * mipsxx/rm9000/loongson2 have different performance counters, they have
- * specific low-level init routines.
- */
-static void reset_counters(void *arg);
-static int __hw_perf_event_init(struct perf_event *event);
-
-static void hw_perf_event_destroy(struct perf_event *event)
-{
- if (atomic_dec_and_mutex_lock(&active_events,
- &pmu_reserve_mutex)) {
- /*
- * We must not call the destroy function with interrupts
- * disabled.
- */
- on_each_cpu(reset_counters,
- (void *)(long)mipspmu->num_counters, 1);
- mipspmu_free_irq();
- mutex_unlock(&pmu_reserve_mutex);
- }
-}
-
-const struct pmu *hw_perf_event_init(struct perf_event *event)
-{
- int err = 0;
-
- if (!mipspmu || event->cpu >= nr_cpumask_bits ||
- (event->cpu >= 0 && !cpu_online(event->cpu)))
- return ERR_PTR(-ENODEV);
-
- if (!atomic_inc_not_zero(&active_events)) {
- if (atomic_read(&active_events) > MIPS_MAX_HWEVENTS) {
- atomic_dec(&active_events);
- return ERR_PTR(-ENOSPC);
- }
-
- mutex_lock(&pmu_reserve_mutex);
- if (atomic_read(&active_events) == 0)
- err = mipspmu_get_irq();
-
- if (!err)
- atomic_inc(&active_events);
- mutex_unlock(&pmu_reserve_mutex);
- }
-
- if (err)
- return ERR_PTR(err);
-
- err = __hw_perf_event_init(event);
- if (err)
- hw_perf_event_destroy(event);
-
- return err ? ERR_PTR(err) : &pmu;
-}
-
-void hw_perf_enable(void)
-{
- if (mipspmu)
- mipspmu->start();
-}
-
-void hw_perf_disable(void)
-{
- if (mipspmu)
- mipspmu->stop();
-}
-
/* This is needed by specific irq handlers in perf_event_*.c */
static void
handle_associated_event(struct cpu_hw_events *cpuc,
@@ -496,21 +534,13 @@ handle_associated_event(struct cpu_hw_events *cpuc,
#include "perf_event_mipsxx.c"
/* Callchain handling code. */
-static inline void
-callchain_store(struct perf_callchain_entry *entry,
- u64 ip)
-{
- if (entry->nr < PERF_MAX_STACK_DEPTH)
- entry->ip[entry->nr++] = ip;
-}
/*
* Leave userspace callchain empty for now. When we find a way to trace
* the user stack callchains, we add here.
*/
-static void
-perf_callchain_user(struct pt_regs *regs,
- struct perf_callchain_entry *entry)
+void perf_callchain_user(struct perf_callchain_entry *entry,
+ struct pt_regs *regs)
{
}
@@ -523,23 +553,21 @@ static void save_raw_perf_callchain(struct perf_callchain_entry *entry,
while (!kstack_end(sp)) {
addr = *sp++;
if (__kernel_text_address(addr)) {
- callchain_store(entry, addr);
+ perf_callchain_store(entry, addr);
if (entry->nr >= PERF_MAX_STACK_DEPTH)
break;
}
}
}
-static void
-perf_callchain_kernel(struct pt_regs *regs,
- struct perf_callchain_entry *entry)
+void perf_callchain_kernel(struct perf_callchain_entry *entry,
+ struct pt_regs *regs)
{
unsigned long sp = regs->regs[29];
#ifdef CONFIG_KALLSYMS
unsigned long ra = regs->regs[31];
unsigned long pc = regs->cp0_epc;
- callchain_store(entry, PERF_CONTEXT_KERNEL);
if (raw_show_trace || !__kernel_text_address(pc)) {
unsigned long stack_page =
(unsigned long)task_stack_page(current);
@@ -549,53 +577,12 @@ perf_callchain_kernel(struct pt_regs *regs,
return;
}
do {
- callchain_store(entry, pc);
+ perf_callchain_store(entry, pc);
if (entry->nr >= PERF_MAX_STACK_DEPTH)
break;
pc = unwind_stack(current, &sp, pc, &ra);
} while (pc);
#else
- callchain_store(entry, PERF_CONTEXT_KERNEL);
save_raw_perf_callchain(entry, sp);
#endif
}
-
-static void
-perf_do_callchain(struct pt_regs *regs,
- struct perf_callchain_entry *entry)
-{
- int is_user;
-
- if (!regs)
- return;
-
- is_user = user_mode(regs);
-
- if (!current || !current->pid)
- return;
-
- if (is_user && current->state != TASK_RUNNING)
- return;
-
- if (!is_user) {
- perf_callchain_kernel(regs, entry);
- if (current->mm)
- regs = task_pt_regs(current);
- else
- regs = NULL;
- }
- if (regs)
- perf_callchain_user(regs, entry);
-}
-
-static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry);
-
-struct perf_callchain_entry *
-perf_callchain(struct pt_regs *regs)
-{
- struct perf_callchain_entry *entry = &__get_cpu_var(pmc_irq_entry);
-
- entry->nr = 0;
- perf_do_callchain(regs, entry);
- return entry;
-}
diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c
index 183e0d2..d9a7db7 100644
--- a/arch/mips/kernel/perf_event_mipsxx.c
+++ b/arch/mips/kernel/perf_event_mipsxx.c
@@ -696,7 +696,7 @@ static int mipsxx_pmu_handle_shared_irq(void)
* interrupt, not NMI.
*/
if (handled == IRQ_HANDLED)
- perf_event_do_pending();
+ irq_work_run();
#ifdef CONFIG_MIPS_MT_SMP
read_unlock(&pmuint_rwlock);
@@ -1045,6 +1045,8 @@ init_hw_perf_events(void)
"CPU, irq %d%s\n", mipspmu->name, counters, irq,
irq < 0 ? " (share with timer interrupt)" : "");
+ perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
+
return 0;
}
early_initcall(init_hw_perf_events);
diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c
index 5922342..dbbe0ce 100644
--- a/arch/mips/kernel/signal.c
+++ b/arch/mips/kernel/signal.c
@@ -84,7 +84,7 @@ static int protected_save_fp_context(struct sigcontext __user *sc)
static int protected_restore_fp_context(struct sigcontext __user *sc)
{
- int err, tmp;
+ int err, tmp __maybe_unused;
while (1) {
lock_fpu_owner();
own_fpu_inatomic(0);
diff --git a/arch/mips/kernel/signal32.c b/arch/mips/kernel/signal32.c
index a0ed0e0..aae9866 100644
--- a/arch/mips/kernel/signal32.c
+++ b/arch/mips/kernel/signal32.c
@@ -115,7 +115,7 @@ static int protected_save_fp_context32(struct sigcontext32 __user *sc)
static int protected_restore_fp_context32(struct sigcontext32 __user *sc)
{
- int err, tmp;
+ int err, tmp __maybe_unused;
while (1) {
lock_fpu_owner();
own_fpu_inatomic(0);
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index 383aeb9..32a2561 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -193,6 +193,22 @@ void __devinit smp_prepare_boot_cpu(void)
*/
static struct task_struct *cpu_idle_thread[NR_CPUS];
+struct create_idle {
+ struct work_struct work;
+ struct task_struct *idle;
+ struct completion done;
+ int cpu;
+};
+
+static void __cpuinit do_fork_idle(struct work_struct *work)
+{
+ struct create_idle *c_idle =
+ container_of(work, struct create_idle, work);
+
+ c_idle->idle = fork_idle(c_idle->cpu);
+ complete(&c_idle->done);
+}
+
int __cpuinit __cpu_up(unsigned int cpu)
{
struct task_struct *idle;
@@ -203,8 +219,19 @@ int __cpuinit __cpu_up(unsigned int cpu)
* Linux can schedule processes on this slave.
*/
if (!cpu_idle_thread[cpu]) {
- idle = fork_idle(cpu);
- cpu_idle_thread[cpu] = idle;
+ /*
+ * Schedule work item to avoid forking user task
+ * Ported from arch/x86/kernel/smpboot.c
+ */
+ struct create_idle c_idle = {
+ .cpu = cpu,
+ .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done),
+ };
+
+ INIT_WORK_ONSTACK(&c_idle.work, do_fork_idle);
+ schedule_work(&c_idle.work);
+ wait_for_completion(&c_idle.done);
+ idle = cpu_idle_thread[cpu] = c_idle.idle;
if (IS_ERR(idle))
panic(KERN_ERR "Fork failed for CPU %d", cpu);
diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c
index 1dc6edf..58beabf 100644
--- a/arch/mips/kernel/syscall.c
+++ b/arch/mips/kernel/syscall.c
@@ -383,12 +383,11 @@ save_static_function(sys_sysmips);
static int __used noinline
_sys_sysmips(nabi_no_regargs struct pt_regs regs)
{
- long cmd, arg1, arg2, arg3;
+ long cmd, arg1, arg2;
cmd = regs.regs[4];
arg1 = regs.regs[5];
arg2 = regs.regs[6];
- arg3 = regs.regs[7];
switch (cmd) {
case MIPS_ATOMIC_SET:
@@ -405,7 +404,7 @@ _sys_sysmips(nabi_no_regargs struct pt_regs regs)
if (arg1 & 2)
set_thread_flag(TIF_LOGADE);
else
- clear_thread_flag(TIF_FIXADE);
+ clear_thread_flag(TIF_LOGADE);
return 0;
diff --git a/arch/mips/kernel/vpe.c b/arch/mips/kernel/vpe.c
index 6a1fdfe..ab52b7c 100644
--- a/arch/mips/kernel/vpe.c
+++ b/arch/mips/kernel/vpe.c
@@ -148,9 +148,9 @@ struct {
spinlock_t tc_list_lock;
struct list_head tc_list; /* Thread contexts */
} vpecontrol = {
- .vpe_list_lock = SPIN_LOCK_UNLOCKED,
+ .vpe_list_lock = __SPIN_LOCK_UNLOCKED(vpe_list_lock),
.vpe_list = LIST_HEAD_INIT(vpecontrol.vpe_list),
- .tc_list_lock = SPIN_LOCK_UNLOCKED,
+ .tc_list_lock = __SPIN_LOCK_UNLOCKED(tc_list_lock),
.tc_list = LIST_HEAD_INIT(vpecontrol.tc_list)
};
diff --git a/arch/mips/loongson/Kconfig b/arch/mips/loongson/Kconfig
index 6e1b77f..aca93ee 100644
--- a/arch/mips/loongson/Kconfig
+++ b/arch/mips/loongson/Kconfig
@@ -1,6 +1,7 @@
+if MACH_LOONGSON
+
choice
prompt "Machine Type"
- depends on MACH_LOONGSON
config LEMOTE_FULOONG2E
bool "Lemote Fuloong(2e) mini-PC"
@@ -87,3 +88,5 @@ config LOONGSON_UART_BASE
config LOONGSON_MC146818
bool
default n
+
+endif # MACH_LOONGSON
diff --git a/arch/mips/loongson/common/cmdline.c b/arch/mips/loongson/common/cmdline.c
index 1a06def..353e1d2 100644
--- a/arch/mips/loongson/common/cmdline.c
+++ b/arch/mips/loongson/common/cmdline.c
@@ -44,10 +44,5 @@ void __init prom_init_cmdline(void)
strcat(arcs_cmdline, " ");
}
- if ((strstr(arcs_cmdline, "console=")) == NULL)
- strcat(arcs_cmdline, " console=ttyS0,115200");
- if ((strstr(arcs_cmdline, "root=")) == NULL)
- strcat(arcs_cmdline, " root=/dev/hda1");
-
prom_init_machtype();
}
diff --git a/arch/mips/loongson/common/machtype.c b/arch/mips/loongson/common/machtype.c
index 81fbe6b..2efd5d9 100644
--- a/arch/mips/loongson/common/machtype.c
+++ b/arch/mips/loongson/common/machtype.c
@@ -41,7 +41,7 @@ void __weak __init mach_prom_init_machtype(void)
void __init prom_init_machtype(void)
{
- char *p, str[MACHTYPE_LEN];
+ char *p, str[MACHTYPE_LEN + 1];
int machtype = MACH_LEMOTE_FL2E;
mips_machtype = LOONGSON_MACHTYPE;
@@ -53,6 +53,7 @@ void __init prom_init_machtype(void)
}
p += strlen("machtype=");
strncpy(str, p, MACHTYPE_LEN);
+ str[MACHTYPE_LEN] = '\0';
p = strstr(str, " ");
if (p)
*p = '\0';
diff --git a/arch/mips/math-emu/ieee754int.h b/arch/mips/math-emu/ieee754int.h
index 2701d95..2a7d43f 100644
--- a/arch/mips/math-emu/ieee754int.h
+++ b/arch/mips/math-emu/ieee754int.h
@@ -70,7 +70,7 @@
#define COMPXSP \
- unsigned xm; int xe; int xs; int xc
+ unsigned xm; int xe; int xs __maybe_unused; int xc
#define COMPYSP \
unsigned ym; int ye; int ys; int yc
@@ -104,7 +104,7 @@
#define COMPXDP \
-u64 xm; int xe; int xs; int xc
+u64 xm; int xe; int xs __maybe_unused; int xc
#define COMPYDP \
u64 ym; int ye; int ys; int yc
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index 2efcbd2..279599e 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -324,7 +324,7 @@ int page_is_ram(unsigned long pagenr)
void __init paging_init(void)
{
unsigned long max_zone_pfns[MAX_NR_ZONES];
- unsigned long lastpfn;
+ unsigned long lastpfn __maybe_unused;
pagetable_init();
diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
index 083d341..04f9e17 100644
--- a/arch/mips/mm/tlbex.c
+++ b/arch/mips/mm/tlbex.c
@@ -109,6 +109,8 @@ static bool scratchpad_available(void)
static int scratchpad_offset(int i)
{
BUG();
+ /* Really unreachable, but evidently some GCC want this. */
+ return 0;
}
#endif
/*
diff --git a/arch/mips/pci/ops-pmcmsp.c b/arch/mips/pci/ops-pmcmsp.c
index b7c03d8..68798f8 100644
--- a/arch/mips/pci/ops-pmcmsp.c
+++ b/arch/mips/pci/ops-pmcmsp.c
@@ -308,7 +308,7 @@ static struct resource pci_mem_resource = {
* RETURNS: PCIBIOS_SUCCESSFUL - success
*
****************************************************************************/
-static int bpci_interrupt(int irq, void *dev_id)
+static irqreturn_t bpci_interrupt(int irq, void *dev_id)
{
struct msp_pci_regs *preg = (void *)PCI_BASE_REG;
unsigned int stat = preg->if_status;
@@ -326,7 +326,7 @@ static int bpci_interrupt(int irq, void *dev_id)
/* write to clear all asserted interrupts */
preg->if_status = stat;
- return PCIBIOS_SUCCESSFUL;
+ return IRQ_HANDLED;
}
/*****************************************************************************
diff --git a/arch/mips/pmc-sierra/Kconfig b/arch/mips/pmc-sierra/Kconfig
index c139988..8d79849 100644
--- a/arch/mips/pmc-sierra/Kconfig
+++ b/arch/mips/pmc-sierra/Kconfig
@@ -4,15 +4,11 @@ choice
config PMC_MSP4200_EVAL
bool "PMC-Sierra MSP4200 Eval Board"
- select CEVT_R4K
- select CSRC_R4K
select IRQ_MSP_SLP
select HW_HAS_PCI
config PMC_MSP4200_GW
bool "PMC-Sierra MSP4200 VoIP Gateway"
- select CEVT_R4K
- select CSRC_R4K
select IRQ_MSP_SLP
select HW_HAS_PCI
diff --git a/arch/mips/pmc-sierra/msp71xx/msp_time.c b/arch/mips/pmc-sierra/msp71xx/msp_time.c
index cca64e1..01df84c 100644
--- a/arch/mips/pmc-sierra/msp71xx/msp_time.c
+++ b/arch/mips/pmc-sierra/msp71xx/msp_time.c
@@ -81,7 +81,7 @@ void __init plat_time_init(void)
mips_hpt_frequency = cpu_rate/2;
}
-unsigned int __init get_c0_compare_int(void)
+unsigned int __cpuinit get_c0_compare_int(void)
{
return MSP_INT_VPE0_TIMER;
}
diff --git a/arch/mn10300/include/asm/atomic.h b/arch/mn10300/include/asm/atomic.h
index 92d2f92..9d773a6 100644
--- a/arch/mn10300/include/asm/atomic.h
+++ b/arch/mn10300/include/asm/atomic.h
@@ -139,7 +139,7 @@ static inline unsigned long __cmpxchg(volatile unsigned long *m,
* Atomically reads the value of @v. Note that the guaranteed
* useful range of an atomic_t is only 24 bits.
*/
-#define atomic_read(v) ((v)->counter)
+#define atomic_read(v) (ACCESS_ONCE((v)->counter))
/**
* atomic_set - set atomic variable
diff --git a/arch/mn10300/include/asm/uaccess.h b/arch/mn10300/include/asm/uaccess.h
index 679dee0..3d6e60d 100644
--- a/arch/mn10300/include/asm/uaccess.h
+++ b/arch/mn10300/include/asm/uaccess.h
@@ -160,9 +160,10 @@ struct __large_struct { unsigned long buf[100]; };
#define __get_user_check(x, ptr, size) \
({ \
+ const __typeof__(ptr) __guc_ptr = (ptr); \
int _e; \
- if (likely(__access_ok((unsigned long) (ptr), (size)))) \
- _e = __get_user_nocheck((x), (ptr), (size)); \
+ if (likely(__access_ok((unsigned long) __guc_ptr, (size)))) \
+ _e = __get_user_nocheck((x), __guc_ptr, (size)); \
else { \
_e = -EFAULT; \
(x) = (__typeof__(x))0; \
diff --git a/arch/mn10300/mm/cache-inv-icache.c b/arch/mn10300/mm/cache-inv-icache.c
index a8933a6..a6b63dd 100644
--- a/arch/mn10300/mm/cache-inv-icache.c
+++ b/arch/mn10300/mm/cache-inv-icache.c
@@ -69,7 +69,7 @@ static void flush_icache_page_range(unsigned long start, unsigned long end)
/* invalidate the icache coverage on that region */
mn10300_local_icache_inv_range2(addr + off, size);
- smp_cache_call(SMP_ICACHE_INV_FLUSH_RANGE, start, end);
+ smp_cache_call(SMP_ICACHE_INV_RANGE, start, end);
}
/**
@@ -101,7 +101,7 @@ void flush_icache_range(unsigned long start, unsigned long end)
* directly */
start_page = (start >= 0x80000000UL) ? start : 0x80000000UL;
mn10300_icache_inv_range(start_page, end);
- smp_cache_call(SMP_ICACHE_INV_FLUSH_RANGE, start, end);
+ smp_cache_call(SMP_ICACHE_INV_RANGE, start, end);
if (start_page == start)
goto done;
end = start_page;
diff --git a/arch/parisc/hpux/sys_hpux.c b/arch/parisc/hpux/sys_hpux.c
index 3039408..6ab9580 100644
--- a/arch/parisc/hpux/sys_hpux.c
+++ b/arch/parisc/hpux/sys_hpux.c
@@ -185,26 +185,21 @@ struct hpux_statfs {
int16_t f_pad;
};
-static int do_statfs_hpux(struct path *path, struct hpux_statfs *buf)
+static int do_statfs_hpux(struct kstatfs *st, struct hpux_statfs __user *p)
{
- struct kstatfs st;
- int retval;
-
- retval = vfs_statfs(path, &st);
- if (retval)
- return retval;
-
- memset(buf, 0, sizeof(*buf));
- buf->f_type = st.f_type;
- buf->f_bsize = st.f_bsize;
- buf->f_blocks = st.f_blocks;
- buf->f_bfree = st.f_bfree;
- buf->f_bavail = st.f_bavail;
- buf->f_files = st.f_files;
- buf->f_ffree = st.f_ffree;
- buf->f_fsid[0] = st.f_fsid.val[0];
- buf->f_fsid[1] = st.f_fsid.val[1];
-
+ struct hpux_statfs buf;
+ memset(&buf, 0, sizeof(buf));
+ buf.f_type = st->f_type;
+ buf.f_bsize = st->f_bsize;
+ buf.f_blocks = st->f_blocks;
+ buf.f_bfree = st->f_bfree;
+ buf.f_bavail = st->f_bavail;
+ buf.f_files = st->f_files;
+ buf.f_ffree = st->f_ffree;
+ buf.f_fsid[0] = st->f_fsid.val[0];
+ buf.f_fsid[1] = st->f_fsid.val[1];
+ if (copy_to_user(p, &buf, sizeof(buf)))
+ return -EFAULT;
return 0;
}
@@ -212,35 +207,19 @@ static int do_statfs_hpux(struct path *path, struct hpux_statfs *buf)
asmlinkage long hpux_statfs(const char __user *pathname,
struct hpux_statfs __user *buf)
{
- struct path path;
- int error;
-
- error = user_path(pathname, &path);
- if (!error) {
- struct hpux_statfs tmp;
- error = do_statfs_hpux(&path, &tmp);
- if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
- error = -EFAULT;
- path_put(&path);
- }
+ struct kstatfs st;
+ int error = user_statfs(pathname, &st);
+ if (!error)
+ error = do_statfs_hpux(&st, buf);
return error;
}
asmlinkage long hpux_fstatfs(unsigned int fd, struct hpux_statfs __user * buf)
{
- struct file *file;
- struct hpux_statfs tmp;
- int error;
-
- error = -EBADF;
- file = fget(fd);
- if (!file)
- goto out;
- error = do_statfs_hpux(&file->f_path, &tmp);
- if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
- error = -EFAULT;
- fput(file);
- out:
+ struct kstatfs st;
+ int error = fd_statfs(fd, &st);
+ if (!error)
+ error = do_statfs_hpux(&st, buf);
return error;
}
diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h
index 380d48b..26b8c80 100644
--- a/arch/powerpc/include/asm/lppaca.h
+++ b/arch/powerpc/include/asm/lppaca.h
@@ -33,9 +33,25 @@
//
//----------------------------------------------------------------------------
#include <linux/cache.h>
+#include <linux/threads.h>
#include <asm/types.h>
#include <asm/mmu.h>
+/*
+ * We only have to have statically allocated lppaca structs on
+ * legacy iSeries, which supports at most 64 cpus.
+ */
+#ifdef CONFIG_PPC_ISERIES
+#if NR_CPUS < 64
+#define NR_LPPACAS NR_CPUS
+#else
+#define NR_LPPACAS 64
+#endif
+#else /* not iSeries */
+#define NR_LPPACAS 1
+#endif
+
+
/* The Hypervisor barfs if the lppaca crosses a page boundary. A 1k
* alignment is sufficient to prevent this */
struct lppaca {
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index ebf9846..f4adf89 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -27,20 +27,6 @@ extern unsigned long __toc_start;
#ifdef CONFIG_PPC_BOOK3S
/*
- * We only have to have statically allocated lppaca structs on
- * legacy iSeries, which supports at most 64 cpus.
- */
-#ifdef CONFIG_PPC_ISERIES
-#if NR_CPUS < 64
-#define NR_LPPACAS NR_CPUS
-#else
-#define NR_LPPACAS 64
-#endif
-#else /* not iSeries */
-#define NR_LPPACAS 1
-#endif
-
-/*
* The structure which the hypervisor knows about - this structure
* should not cross a page boundary. The vpa_init/register_vpa call
* is now known to fail if the lppaca structure crosses a page
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index fd48123..0dc95c0 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -1516,7 +1516,8 @@ int start_topology_update(void)
{
int rc = 0;
- if (firmware_has_feature(FW_FEATURE_VPHN) &&
+ /* Disabled until races with load balancing are fixed */
+ if (0 && firmware_has_feature(FW_FEATURE_VPHN) &&
get_lppaca()->shared_proc) {
vphn_enabled = 1;
setup_cpu_associativity_change_counters();
diff --git a/arch/powerpc/platforms/cell/spufs/syscalls.c b/arch/powerpc/platforms/cell/spufs/syscalls.c
index 187a7d3..a3d2ce5 100644
--- a/arch/powerpc/platforms/cell/spufs/syscalls.c
+++ b/arch/powerpc/platforms/cell/spufs/syscalls.c
@@ -70,7 +70,7 @@ static long do_spu_create(const char __user *pathname, unsigned int flags,
if (!IS_ERR(tmp)) {
struct nameidata nd;
- ret = path_lookup(tmp, LOOKUP_PARENT, &nd);
+ ret = kern_path_parent(tmp, &nd);
if (!ret) {
nd.flags |= LOOKUP_OPEN | LOOKUP_CREATE;
ret = spufs_create(&nd, flags, mode, neighbor);
diff --git a/arch/powerpc/platforms/iseries/dt.c b/arch/powerpc/platforms/iseries/dt.c
index fdb7384..f0491cc 100644
--- a/arch/powerpc/platforms/iseries/dt.c
+++ b/arch/powerpc/platforms/iseries/dt.c
@@ -242,8 +242,8 @@ static void __init dt_cpus(struct iseries_flat_dt *dt)
pft_size[0] = 0; /* NUMA CEC cookie, 0 for non NUMA */
pft_size[1] = __ilog2(HvCallHpt_getHptPages() * HW_PAGE_SIZE);
- for (i = 0; i < NR_CPUS; i++) {
- if (lppaca_of(i).dyn_proc_status >= 2)
+ for (i = 0; i < NR_LPPACAS; i++) {
+ if (lppaca[i].dyn_proc_status >= 2)
continue;
snprintf(p, 32 - (p - buf), "@%d", i);
@@ -251,7 +251,7 @@ static void __init dt_cpus(struct iseries_flat_dt *dt)
dt_prop_str(dt, "device_type", device_type_cpu);
- index = lppaca_of(i).dyn_hv_phys_proc_index;
+ index = lppaca[i].dyn_hv_phys_proc_index;
d = &xIoHriProcessorVpd[index];
dt_prop_u32(dt, "i-cache-size", d->xInstCacheSize * 1024);
diff --git a/arch/powerpc/platforms/iseries/setup.c b/arch/powerpc/platforms/iseries/setup.c
index b086341..2946ae1 100644
--- a/arch/powerpc/platforms/iseries/setup.c
+++ b/arch/powerpc/platforms/iseries/setup.c
@@ -680,6 +680,7 @@ void * __init iSeries_early_setup(void)
* on but calling this function multiple times is fine.
*/
identify_cpu(0, mfspr(SPRN_PVR));
+ initialise_paca(&boot_paca, 0);
powerpc_firmware_features |= FW_FEATURE_ISERIES;
powerpc_firmware_features |= FW_FEATURE_LPAR;
diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c
index 975613b..c70e047 100644
--- a/arch/um/drivers/mconsole_kern.c
+++ b/arch/um/drivers/mconsole_kern.c
@@ -124,35 +124,18 @@ void mconsole_log(struct mc_request *req)
#if 0
void mconsole_proc(struct mc_request *req)
{
- struct nameidata nd;
struct vfsmount *mnt = current->nsproxy->pid_ns->proc_mnt;
struct file *file;
- int n, err;
+ int n;
char *ptr = req->request.data, *buf;
mm_segment_t old_fs = get_fs();
ptr += strlen("proc");
ptr = skip_spaces(ptr);
- err = vfs_path_lookup(mnt->mnt_root, mnt, ptr, LOOKUP_FOLLOW, &nd);
- if (err) {
- mconsole_reply(req, "Failed to look up file", 1, 0);
- goto out;
- }
-
- err = may_open(&nd.path, MAY_READ, O_RDONLY);
- if (result) {
- mconsole_reply(req, "Failed to open file", 1, 0);
- path_put(&nd.path);
- goto out;
- }
-
- file = dentry_open(nd.path.dentry, nd.path.mnt, O_RDONLY,
- current_cred());
- err = PTR_ERR(file);
+ file = file_open_root(mnt->mnt_root, mnt, ptr, O_RDONLY);
if (IS_ERR(file)) {
mconsole_reply(req, "Failed to open file", 1, 0);
- path_put(&nd.path);
goto out;
}
diff --git a/arch/x86/boot/compressed/mkpiggy.c b/arch/x86/boot/compressed/mkpiggy.c
index 646aa78..46a8238 100644
--- a/arch/x86/boot/compressed/mkpiggy.c
+++ b/arch/x86/boot/compressed/mkpiggy.c
@@ -62,7 +62,12 @@ int main(int argc, char *argv[])
if (fseek(f, -4L, SEEK_END)) {
perror(argv[1]);
}
- fread(&olen, sizeof olen, 1, f);
+
+ if (fread(&olen, sizeof(olen), 1, f) != 1) {
+ perror(argv[1]);
+ return 1;
+ }
+
ilen = ftell(f);
olen = getle32(&olen);
fclose(f);
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 518bb99..98d353e 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -851,4 +851,6 @@ ia32_sys_call_table:
.quad sys_fanotify_init
.quad sys32_fanotify_mark
.quad sys_prlimit64 /* 340 */
+ .quad sys_name_to_handle_at
+ .quad compat_sys_open_by_handle_at
ia32_syscall_end:
diff --git a/arch/x86/include/asm/ce4100.h b/arch/x86/include/asm/ce4100.h
new file mode 100644
index 0000000..e656ad8
--- /dev/null
+++ b/arch/x86/include/asm/ce4100.h
@@ -0,0 +1,6 @@
+#ifndef _ASM_CE4100_H_
+#define _ASM_CE4100_H_
+
+int ce4100_pci_init(void);
+
+#endif
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
index b766a5e..f4c4973 100644
--- a/arch/x86/include/asm/unistd_32.h
+++ b/arch/x86/include/asm/unistd_32.h
@@ -346,10 +346,12 @@
#define __NR_fanotify_init 338
#define __NR_fanotify_mark 339
#define __NR_prlimit64 340
+#define __NR_name_to_handle_at 341
+#define __NR_open_by_handle_at 342
#ifdef __KERNEL__
-#define NR_syscalls 341
+#define NR_syscalls 343
#define __ARCH_WANT_IPC_PARSE_VERSION
#define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index 363e9b8..81a3d5b 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -669,6 +669,10 @@ __SYSCALL(__NR_fanotify_init, sys_fanotify_init)
__SYSCALL(__NR_fanotify_mark, sys_fanotify_mark)
#define __NR_prlimit64 302
__SYSCALL(__NR_prlimit64, sys_prlimit64)
+#define __NR_name_to_handle_at 303
+__SYSCALL(__NR_name_to_handle_at, sys_name_to_handle_at)
+#define __NR_open_by_handle_at 304
+__SYSCALL(__NR_open_by_handle_at, sys_open_by_handle_at)
#ifndef __NO_STUBS
#define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index ce1d54c..3e094af 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -176,7 +176,7 @@ struct bau_msg_payload {
struct bau_msg_header {
unsigned int dest_subnodeid:6; /* must be 0x10, for the LB */
/* bits 5:0 */
- unsigned int base_dest_nodeid:15; /* nasid (pnode<<1) of */
+ unsigned int base_dest_nodeid:15; /* nasid of the */
/* bits 20:6 */ /* first bit in uvhub map */
unsigned int command:8; /* message type */
/* bits 28:21 */
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index a3c28ae..8508bfe 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -287,7 +287,7 @@ HYPERVISOR_fpu_taskswitch(int set)
static inline int
HYPERVISOR_sched_op(int cmd, void *arg)
{
- return _hypercall2(int, sched_op_new, cmd, arg);
+ return _hypercall2(int, sched_op, cmd, arg);
}
static inline long
@@ -422,10 +422,17 @@ HYPERVISOR_set_segment_base(int reg, unsigned long value)
#endif
static inline int
-HYPERVISOR_suspend(unsigned long srec)
+HYPERVISOR_suspend(unsigned long start_info_mfn)
{
- return _hypercall3(int, sched_op, SCHEDOP_shutdown,
- SHUTDOWN_suspend, srec);
+ struct sched_shutdown r = { .reason = SHUTDOWN_suspend };
+
+ /*
+ * For a PV guest the tools require that the start_info mfn be
+ * present in rdx/edx when the hypercall is made. Per the
+ * hypercall calling convention this is the third hypercall
+ * argument, which is start_info_mfn here.
+ */
+ return _hypercall3(int, sched_op, SCHEDOP_shutdown, &r, start_info_mfn);
}
static inline int
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index f25bdf2..c61934f 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -29,8 +29,10 @@ typedef struct xpaddr {
/**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/
#define INVALID_P2M_ENTRY (~0UL)
-#define FOREIGN_FRAME_BIT (1UL<<31)
+#define FOREIGN_FRAME_BIT (1UL<<(BITS_PER_LONG-1))
+#define IDENTITY_FRAME_BIT (1UL<<(BITS_PER_LONG-2))
#define FOREIGN_FRAME(m) ((m) | FOREIGN_FRAME_BIT)
+#define IDENTITY_FRAME(m) ((m) | IDENTITY_FRAME_BIT)
/* Maximum amount of memory we can handle in a domain in pages */
#define MAX_DOMAIN_PAGES \
@@ -41,12 +43,18 @@ extern unsigned int machine_to_phys_order;
extern unsigned long get_phys_to_machine(unsigned long pfn);
extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn);
+extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn);
+extern unsigned long set_phys_range_identity(unsigned long pfn_s,
+ unsigned long pfn_e);
extern int m2p_add_override(unsigned long mfn, struct page *page);
extern int m2p_remove_override(struct page *page);
extern struct page *m2p_find_override(unsigned long mfn);
extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn);
+#ifdef CONFIG_XEN_DEBUG_FS
+extern int p2m_dump_show(struct seq_file *m, void *v);
+#endif
static inline unsigned long pfn_to_mfn(unsigned long pfn)
{
unsigned long mfn;
@@ -57,7 +65,7 @@ static inline unsigned long pfn_to_mfn(unsigned long pfn)
mfn = get_phys_to_machine(pfn);
if (mfn != INVALID_P2M_ENTRY)
- mfn &= ~FOREIGN_FRAME_BIT;
+ mfn &= ~(FOREIGN_FRAME_BIT | IDENTITY_FRAME_BIT);
return mfn;
}
@@ -73,25 +81,44 @@ static inline int phys_to_machine_mapping_valid(unsigned long pfn)
static inline unsigned long mfn_to_pfn(unsigned long mfn)
{
unsigned long pfn;
+ int ret = 0;
if (xen_feature(XENFEAT_auto_translated_physmap))
return mfn;
+ if (unlikely((mfn >> machine_to_phys_order) != 0)) {
+ pfn = ~0;
+ goto try_override;
+ }
pfn = 0;
/*
* The array access can fail (e.g., device space beyond end of RAM).
* In such cases it doesn't matter what we return (we return garbage),
* but we must handle the fault without crashing!
*/
- __get_user(pfn, &machine_to_phys_mapping[mfn]);
-
- /*
- * If this appears to be a foreign mfn (because the pfn
- * doesn't map back to the mfn), then check the local override
- * table to see if there's a better pfn to use.
+ ret = __get_user(pfn, &machine_to_phys_mapping[mfn]);
+try_override:
+ /* ret might be < 0 if there are no entries in the m2p for mfn */
+ if (ret < 0)
+ pfn = ~0;
+ else if (get_phys_to_machine(pfn) != mfn)
+ /*
+ * If this appears to be a foreign mfn (because the pfn
+ * doesn't map back to the mfn), then check the local override
+ * table to see if there's a better pfn to use.
+ *
+ * m2p_find_override_pfn returns ~0 if it doesn't find anything.
+ */
+ pfn = m2p_find_override_pfn(mfn, ~0);
+
+ /*
+ * pfn is ~0 if there are no entries in the m2p for mfn or if the
+ * entry doesn't map back to the mfn and m2p_override doesn't have a
+ * valid entry for it.
*/
- if (get_phys_to_machine(pfn) != mfn)
- pfn = m2p_find_override_pfn(mfn, pfn);
+ if (pfn == ~0 &&
+ get_phys_to_machine(mfn) == IDENTITY_FRAME(mfn))
+ pfn = mfn;
return pfn;
}
diff --git a/arch/x86/include/asm/xen/pci.h b/arch/x86/include/asm/xen/pci.h
index 2329b3e..aa86209 100644
--- a/arch/x86/include/asm/xen/pci.h
+++ b/arch/x86/include/asm/xen/pci.h
@@ -27,16 +27,16 @@ static inline void __init xen_setup_pirqs(void)
* its own functions.
*/
struct xen_pci_frontend_ops {
- int (*enable_msi)(struct pci_dev *dev, int **vectors);
+ int (*enable_msi)(struct pci_dev *dev, int vectors[]);
void (*disable_msi)(struct pci_dev *dev);
- int (*enable_msix)(struct pci_dev *dev, int **vectors, int nvec);
+ int (*enable_msix)(struct pci_dev *dev, int vectors[], int nvec);
void (*disable_msix)(struct pci_dev *dev);
};
extern struct xen_pci_frontend_ops *xen_pci_frontend;
static inline int xen_pci_frontend_enable_msi(struct pci_dev *dev,
- int **vectors)
+ int vectors[])
{
if (xen_pci_frontend && xen_pci_frontend->enable_msi)
return xen_pci_frontend->enable_msi(dev, vectors);
@@ -48,7 +48,7 @@ static inline void xen_pci_frontend_disable_msi(struct pci_dev *dev)
xen_pci_frontend->disable_msi(dev);
}
static inline int xen_pci_frontend_enable_msix(struct pci_dev *dev,
- int **vectors, int nvec)
+ int vectors[], int nvec)
{
if (xen_pci_frontend && xen_pci_frontend->enable_msix)
return xen_pci_frontend->enable_msix(dev, vectors, nvec);
diff --git a/arch/x86/kernel/check.c b/arch/x86/kernel/check.c
index 13a3891..452932d 100644
--- a/arch/x86/kernel/check.c
+++ b/arch/x86/kernel/check.c
@@ -106,8 +106,8 @@ void __init setup_bios_corruption_check(void)
addr += size;
}
- printk(KERN_INFO "Scanning %d areas for low memory corruption\n",
- num_scan_areas);
+ if (num_scan_areas)
+ printk(KERN_INFO "Scanning %d areas for low memory corruption\n", num_scan_areas);
}
@@ -143,12 +143,12 @@ static void check_corruption(struct work_struct *dummy)
{
check_for_bios_corruption();
schedule_delayed_work(&bios_check_work,
- round_jiffies_relative(corruption_check_period*HZ));
+ round_jiffies_relative(corruption_check_period*HZ));
}
static int start_periodic_check_for_corruption(void)
{
- if (!memory_corruption_check || corruption_check_period == 0)
+ if (!num_scan_areas || !memory_corruption_check || corruption_check_period == 0)
return 0;
printk(KERN_INFO "Scanning for low memory corruption every %d seconds\n",
diff --git a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
index 4f6f679..4a5a42b 100644
--- a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
@@ -195,7 +195,7 @@ static unsigned int pcc_get_freq(unsigned int cpu)
cmd_incomplete:
iowrite16(0, &pcch_hdr->status);
spin_unlock(&pcc_lock);
- return -EINVAL;
+ return 0;
}
static int pcc_cpufreq_target(struct cpufreq_policy *policy,
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index b35786d..c314b21 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -340,3 +340,5 @@ ENTRY(sys_call_table)
.long sys_fanotify_init
.long sys_fanotify_mark
.long sys_prlimit64 /* 340 */
+ .long sys_name_to_handle_at
+ .long sys_open_by_handle_at
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 7d90ceb..20e3f87 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -229,15 +229,14 @@ void vmalloc_sync_all(void)
for (address = VMALLOC_START & PMD_MASK;
address >= TASK_SIZE && address < FIXADDR_TOP;
address += PMD_SIZE) {
-
- unsigned long flags;
struct page *page;
- spin_lock_irqsave(&pgd_lock, flags);
+ spin_lock(&pgd_lock);
list_for_each_entry(page, &pgd_list, lru) {
spinlock_t *pgt_lock;
pmd_t *ret;
+ /* the pgt_lock only for Xen */
pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
spin_lock(pgt_lock);
@@ -247,7 +246,7 @@ void vmalloc_sync_all(void)
if (!ret)
break;
}
- spin_unlock_irqrestore(&pgd_lock, flags);
+ spin_unlock(&pgd_lock);
}
}
@@ -828,6 +827,13 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
unsigned long address, unsigned int fault)
{
if (fault & VM_FAULT_OOM) {
+ /* Kernel mode? Handle exceptions or die: */
+ if (!(error_code & PF_USER)) {
+ up_read(&current->mm->mmap_sem);
+ no_context(regs, error_code, address);
+ return;
+ }
+
out_of_memory(regs, error_code, address);
} else {
if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 71a5929..c14a542 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -105,18 +105,18 @@ void sync_global_pgds(unsigned long start, unsigned long end)
for (address = start; address <= end; address += PGDIR_SIZE) {
const pgd_t *pgd_ref = pgd_offset_k(address);
- unsigned long flags;
struct page *page;
if (pgd_none(*pgd_ref))
continue;
- spin_lock_irqsave(&pgd_lock, flags);
+ spin_lock(&pgd_lock);
list_for_each_entry(page, &pgd_list, lru) {
pgd_t *pgd;
spinlock_t *pgt_lock;
pgd = (pgd_t *)page_address(page) + pgd_index(address);
+ /* the pgt_lock only for Xen */
pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
spin_lock(pgt_lock);
@@ -128,7 +128,7 @@ void sync_global_pgds(unsigned long start, unsigned long end)
spin_unlock(pgt_lock);
}
- spin_unlock_irqrestore(&pgd_lock, flags);
+ spin_unlock(&pgd_lock);
}
}
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 95ea155..1337c51 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -780,11 +780,7 @@ void __cpuinit numa_add_cpu(int cpu)
int physnid;
int nid = NUMA_NO_NODE;
- apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
- if (apicid != BAD_APICID)
- nid = apicid_to_node[apicid];
- if (nid == NUMA_NO_NODE)
- nid = early_cpu_to_node(cpu);
+ nid = early_cpu_to_node(cpu);
BUG_ON(nid == NUMA_NO_NODE || !node_online(nid));
/*
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index d343b3c..90825f2 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -57,12 +57,10 @@ static unsigned long direct_pages_count[PG_LEVEL_NUM];
void update_page_count(int level, unsigned long pages)
{
- unsigned long flags;
-
/* Protect against CPA */
- spin_lock_irqsave(&pgd_lock, flags);
+ spin_lock(&pgd_lock);
direct_pages_count[level] += pages;
- spin_unlock_irqrestore(&pgd_lock, flags);
+ spin_unlock(&pgd_lock);
}
static void split_page_count(int level)
@@ -394,7 +392,7 @@ static int
try_preserve_large_page(pte_t *kpte, unsigned long address,
struct cpa_data *cpa)
{
- unsigned long nextpage_addr, numpages, pmask, psize, flags, addr, pfn;
+ unsigned long nextpage_addr, numpages, pmask, psize, addr, pfn;
pte_t new_pte, old_pte, *tmp;
pgprot_t old_prot, new_prot, req_prot;
int i, do_split = 1;
@@ -403,7 +401,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
if (cpa->force_split)
return 1;
- spin_lock_irqsave(&pgd_lock, flags);
+ spin_lock(&pgd_lock);
/*
* Check for races, another CPU might have split this page
* up already:
@@ -498,14 +496,14 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
}
out_unlock:
- spin_unlock_irqrestore(&pgd_lock, flags);
+ spin_unlock(&pgd_lock);
return do_split;
}
static int split_large_page(pte_t *kpte, unsigned long address)
{
- unsigned long flags, pfn, pfninc = 1;
+ unsigned long pfn, pfninc = 1;
unsigned int i, level;
pte_t *pbase, *tmp;
pgprot_t ref_prot;
@@ -519,7 +517,7 @@ static int split_large_page(pte_t *kpte, unsigned long address)
if (!base)
return -ENOMEM;
- spin_lock_irqsave(&pgd_lock, flags);
+ spin_lock(&pgd_lock);
/*
* Check for races, another CPU might have split this page
* up for us already:
@@ -591,7 +589,7 @@ out_unlock:
*/
if (base)
__free_page(base);
- spin_unlock_irqrestore(&pgd_lock, flags);
+ spin_unlock(&pgd_lock);
return 0;
}
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 500242d..0113d19 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -121,14 +121,12 @@ static void pgd_ctor(struct mm_struct *mm, pgd_t *pgd)
static void pgd_dtor(pgd_t *pgd)
{
- unsigned long flags; /* can be called from interrupt context */
-
if (SHARED_KERNEL_PMD)
return;
- spin_lock_irqsave(&pgd_lock, flags);
+ spin_lock(&pgd_lock);
pgd_list_del(pgd);
- spin_unlock_irqrestore(&pgd_lock, flags);
+ spin_unlock(&pgd_lock);
}
/*
@@ -260,7 +258,6 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
{
pgd_t *pgd;
pmd_t *pmds[PREALLOCATED_PMDS];
- unsigned long flags;
pgd = (pgd_t *)__get_free_page(PGALLOC_GFP);
@@ -280,12 +277,12 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
* respect to anything walking the pgd_list, so that they
* never see a partially populated pgd.
*/
- spin_lock_irqsave(&pgd_lock, flags);
+ spin_lock(&pgd_lock);
pgd_ctor(mm, pgd);
pgd_prepopulate_pmd(mm, pgd, pmds);
- spin_unlock_irqrestore(&pgd_lock, flags);
+ spin_unlock(&pgd_lock);
return pgd;
diff --git a/arch/x86/pci/ce4100.c b/arch/x86/pci/ce4100.c
index 85b68ef..9260b3e 100644
--- a/arch/x86/pci/ce4100.c
+++ b/arch/x86/pci/ce4100.c
@@ -34,6 +34,7 @@
#include <linux/pci.h>
#include <linux/init.h>
+#include <asm/ce4100.h>
#include <asm/pci_x86.h>
struct sim_reg {
@@ -306,10 +307,10 @@ struct pci_raw_ops ce4100_pci_conf = {
.write = ce4100_conf_write,
};
-static int __init ce4100_pci_init(void)
+int __init ce4100_pci_init(void)
{
init_sim_regs();
raw_pci_ops = &ce4100_pci_conf;
- return 0;
+ /* Indicate caller that it should invoke pci_legacy_init() */
+ return 1;
}
-subsys_initcall(ce4100_pci_init);
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index 25cd4a0..8c4085a 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -20,7 +20,8 @@
#include <asm/xen/pci.h>
#ifdef CONFIG_ACPI
-static int xen_hvm_register_pirq(u32 gsi, int triggering)
+static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi,
+ int trigger, int polarity)
{
int rc, irq;
struct physdev_map_pirq map_irq;
@@ -41,7 +42,7 @@ static int xen_hvm_register_pirq(u32 gsi, int triggering)
return -1;
}
- if (triggering == ACPI_EDGE_SENSITIVE) {
+ if (trigger == ACPI_EDGE_SENSITIVE) {
shareable = 0;
name = "ioapic-edge";
} else {
@@ -55,12 +56,6 @@ static int xen_hvm_register_pirq(u32 gsi, int triggering)
return irq;
}
-
-static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi,
- int trigger, int polarity)
-{
- return xen_hvm_register_pirq(gsi, trigger);
-}
#endif
#if defined(CONFIG_PCI_MSI)
@@ -91,7 +86,7 @@ static void xen_msi_compose_msg(struct pci_dev *pdev, unsigned int pirq,
static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
{
- int irq, pirq, ret = 0;
+ int irq, pirq;
struct msi_desc *msidesc;
struct msi_msg msg;
@@ -99,39 +94,32 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
__read_msi_msg(msidesc, &msg);
pirq = MSI_ADDR_EXT_DEST_ID(msg.address_hi) |
((msg.address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff);
- if (xen_irq_from_pirq(pirq) >= 0 && msg.data == XEN_PIRQ_MSI_DATA) {
- xen_allocate_pirq_msi((type == PCI_CAP_ID_MSIX) ?
- "msi-x" : "msi", &irq, &pirq, XEN_ALLOC_IRQ);
- if (irq < 0)
+ if (msg.data != XEN_PIRQ_MSI_DATA ||
+ xen_irq_from_pirq(pirq) < 0) {
+ pirq = xen_allocate_pirq_msi(dev, msidesc);
+ if (pirq < 0)
goto error;
- ret = set_irq_msi(irq, msidesc);
- if (ret < 0)
- goto error_while;
- printk(KERN_DEBUG "xen: msi already setup: msi --> irq=%d"
- " pirq=%d\n", irq, pirq);
- return 0;
+ xen_msi_compose_msg(dev, pirq, &msg);
+ __write_msi_msg(msidesc, &msg);
+ dev_dbg(&dev->dev, "xen: msi bound to pirq=%d\n", pirq);
+ } else {
+ dev_dbg(&dev->dev,
+ "xen: msi already bound to pirq=%d\n", pirq);
}
- xen_allocate_pirq_msi((type == PCI_CAP_ID_MSIX) ?
- "msi-x" : "msi", &irq, &pirq, (XEN_ALLOC_IRQ | XEN_ALLOC_PIRQ));
- if (irq < 0 || pirq < 0)
+ irq = xen_bind_pirq_msi_to_irq(dev, msidesc, pirq, 0,
+ (type == PCI_CAP_ID_MSIX) ?
+ "msi-x" : "msi");
+ if (irq < 0)
goto error;
- printk(KERN_DEBUG "xen: msi --> irq=%d, pirq=%d\n", irq, pirq);
- xen_msi_compose_msg(dev, pirq, &msg);
- ret = set_irq_msi(irq, msidesc);
- if (ret < 0)
- goto error_while;
- write_msi_msg(irq, &msg);
+ dev_dbg(&dev->dev,
+ "xen: msi --> pirq=%d --> irq=%d\n", pirq, irq);
}
return 0;
-error_while:
- unbind_from_irqhandler(irq, NULL);
error:
- if (ret == -ENODEV)
- dev_err(&dev->dev, "Xen PCI frontend has not registered" \
- " MSI/MSI-X support!\n");
-
- return ret;
+ dev_err(&dev->dev,
+ "Xen PCI frontend has not registered MSI/MSI-X support!\n");
+ return -ENODEV;
}
/*
@@ -150,35 +138,26 @@ static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
return -ENOMEM;
if (type == PCI_CAP_ID_MSIX)
- ret = xen_pci_frontend_enable_msix(dev, &v, nvec);
+ ret = xen_pci_frontend_enable_msix(dev, v, nvec);
else
- ret = xen_pci_frontend_enable_msi(dev, &v);
+ ret = xen_pci_frontend_enable_msi(dev, v);
if (ret)
goto error;
i = 0;
list_for_each_entry(msidesc, &dev->msi_list, list) {
- irq = xen_allocate_pirq(v[i], 0, /* not sharable */
- (type == PCI_CAP_ID_MSIX) ?
- "pcifront-msi-x" : "pcifront-msi");
- if (irq < 0) {
- ret = -1;
+ irq = xen_bind_pirq_msi_to_irq(dev, msidesc, v[i], 0,
+ (type == PCI_CAP_ID_MSIX) ?
+ "pcifront-msi-x" :
+ "pcifront-msi");
+ if (irq < 0)
goto free;
- }
-
- ret = set_irq_msi(irq, msidesc);
- if (ret)
- goto error_while;
i++;
}
kfree(v);
return 0;
-error_while:
- unbind_from_irqhandler(irq, NULL);
error:
- if (ret == -ENODEV)
- dev_err(&dev->dev, "Xen PCI frontend has not registered" \
- " MSI/MSI-X support!\n");
+ dev_err(&dev->dev, "Xen PCI frontend has not registered MSI/MSI-X support!\n");
free:
kfree(v);
return ret;
@@ -193,6 +172,9 @@ static void xen_teardown_msi_irqs(struct pci_dev *dev)
xen_pci_frontend_disable_msix(dev);
else
xen_pci_frontend_disable_msi(dev);
+
+ /* Free the IRQ's and the msidesc using the generic code. */
+ default_teardown_msi_irqs(dev);
}
static void xen_teardown_msi_irq(unsigned int irq)
@@ -200,47 +182,82 @@ static void xen_teardown_msi_irq(unsigned int irq)
xen_destroy_irq(irq);
}
+#ifdef CONFIG_XEN_DOM0
static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
{
- int irq, ret;
+ int ret = 0;
struct msi_desc *msidesc;
list_for_each_entry(msidesc, &dev->msi_list, list) {
- irq = xen_create_msi_irq(dev, msidesc, type);
- if (irq < 0)
- return -1;
+ struct physdev_map_pirq map_irq;
- ret = set_irq_msi(irq, msidesc);
- if (ret)
- goto error;
- }
- return 0;
+ memset(&map_irq, 0, sizeof(map_irq));
+ map_irq.domid = DOMID_SELF;
+ map_irq.type = MAP_PIRQ_TYPE_MSI;
+ map_irq.index = -1;
+ map_irq.pirq = -1;
+ map_irq.bus = dev->bus->number;
+ map_irq.devfn = dev->devfn;
-error:
- xen_destroy_irq(irq);
+ if (type == PCI_CAP_ID_MSIX) {
+ int pos;
+ u32 table_offset, bir;
+
+ pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
+
+ pci_read_config_dword(dev, pos + PCI_MSIX_TABLE,
+ &table_offset);
+ bir = (u8)(table_offset & PCI_MSIX_FLAGS_BIRMASK);
+
+ map_irq.table_base = pci_resource_start(dev, bir);
+ map_irq.entry_nr = msidesc->msi_attrib.entry_nr;
+ }
+
+ ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
+ if (ret) {
+ dev_warn(&dev->dev, "xen map irq failed %d\n", ret);
+ goto out;
+ }
+
+ ret = xen_bind_pirq_msi_to_irq(dev, msidesc,
+ map_irq.pirq, map_irq.index,
+ (type == PCI_CAP_ID_MSIX) ?
+ "msi-x" : "msi");
+ if (ret < 0)
+ goto out;
+ }
+ ret = 0;
+out:
return ret;
}
#endif
+#endif
static int xen_pcifront_enable_irq(struct pci_dev *dev)
{
int rc;
int share = 1;
+ u8 gsi;
- dev_info(&dev->dev, "Xen PCI enabling IRQ: %d\n", dev->irq);
-
- if (dev->irq < 0)
- return -EINVAL;
+ rc = pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &gsi);
+ if (rc < 0) {
+ dev_warn(&dev->dev, "Xen PCI: failed to read interrupt line: %d\n",
+ rc);
+ return rc;
+ }
- if (dev->irq < NR_IRQS_LEGACY)
+ if (gsi < NR_IRQS_LEGACY)
share = 0;
- rc = xen_allocate_pirq(dev->irq, share, "pcifront");
+ rc = xen_allocate_pirq(gsi, share, "pcifront");
if (rc < 0) {
- dev_warn(&dev->dev, "Xen PCI IRQ: %d, failed to register:%d\n",
- dev->irq, rc);
+ dev_warn(&dev->dev, "Xen PCI: failed to register GSI%d: %d\n",
+ gsi, rc);
return rc;
}
+
+ dev->irq = rc;
+ dev_info(&dev->dev, "Xen PCI mapped GSI%d to IRQ%d\n", gsi, dev->irq);
return 0;
}
diff --git a/arch/x86/platform/ce4100/ce4100.c b/arch/x86/platform/ce4100/ce4100.c
index d2c0d51..cd6f184 100644
--- a/arch/x86/platform/ce4100/ce4100.c
+++ b/arch/x86/platform/ce4100/ce4100.c
@@ -15,6 +15,7 @@
#include <linux/serial_reg.h>
#include <linux/serial_8250.h>
+#include <asm/ce4100.h>
#include <asm/setup.h>
#include <asm/io.h>
@@ -129,4 +130,5 @@ void __init x86_ce4100_early_setup(void)
x86_init.resources.probe_roms = x86_init_noop;
x86_init.mpparse.get_smp_config = x86_init_uint_noop;
x86_init.mpparse.find_smp_config = sdv_find_smp_config;
+ x86_init.pci.init = ce4100_pci_init;
}
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index df58e9c..a7b38d3 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -1364,11 +1364,11 @@ uv_activation_descriptor_init(int node, int pnode)
memset(bd2, 0, sizeof(struct bau_desc));
bd2->header.sw_ack_flag = 1;
/*
- * base_dest_nodeid is the nasid (pnode<<1) of the first uvhub
+ * base_dest_nodeid is the nasid of the first uvhub
* in the partition. The bit map will indicate uvhub numbers,
* which are 0-N in a partition. Pnodes are unique system-wide.
*/
- bd2->header.base_dest_nodeid = uv_partition_base_pnode << 1;
+ bd2->header.base_dest_nodeid = UV_PNODE_TO_NASID(uv_partition_base_pnode);
bd2->header.dest_subnodeid = 0x10; /* the LB */
bd2->header.command = UV_NET_ENDPOINT_INTD;
bd2->header.int_both = 1;
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index 5b54892..e4343fe 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -48,3 +48,11 @@ config XEN_DEBUG_FS
help
Enable statistics output and various tuning options in debugfs.
Enabling this option may incur a significant performance overhead.
+
+config XEN_DEBUG
+ bool "Enable Xen debug checks"
+ depends on XEN
+ default n
+ help
+ Enable various WARN_ON checks in the Xen MMU code.
+ Enabling this option WILL incur a significant performance overhead.
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 50542ef..49dbd78 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1284,15 +1284,14 @@ static int init_hvm_pv_info(int *major, int *minor)
xen_setup_features();
- pv_info = xen_info;
- pv_info.kernel_rpl = 0;
+ pv_info.name = "Xen HVM";
xen_domain_type = XEN_HVM_DOMAIN;
return 0;
}
-void xen_hvm_init_shared_info(void)
+void __ref xen_hvm_init_shared_info(void)
{
int cpu;
struct xen_add_to_physmap xatp;
@@ -1331,6 +1330,8 @@ static int __cpuinit xen_hvm_cpu_notify(struct notifier_block *self,
switch (action) {
case CPU_UP_PREPARE:
per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
+ if (xen_have_vector_callback)
+ xen_init_lock_cpu(cpu);
break;
default:
break;
@@ -1355,6 +1356,7 @@ static void __init xen_hvm_guest_init(void)
if (xen_feature(XENFEAT_hvm_callback_vector))
xen_have_vector_callback = 1;
+ xen_hvm_smp_init();
register_cpu_notifier(&xen_hvm_cpu_notifier);
xen_unplug_emulated_devices();
have_vcpu_info_placement = 0;
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 5e92b61..832765c 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -46,6 +46,7 @@
#include <linux/module.h>
#include <linux/gfp.h>
#include <linux/memblock.h>
+#include <linux/seq_file.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
@@ -416,8 +417,12 @@ static pteval_t pte_pfn_to_mfn(pteval_t val)
if (val & _PAGE_PRESENT) {
unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT;
pteval_t flags = val & PTE_FLAGS_MASK;
- unsigned long mfn = pfn_to_mfn(pfn);
+ unsigned long mfn;
+ if (!xen_feature(XENFEAT_auto_translated_physmap))
+ mfn = get_phys_to_machine(pfn);
+ else
+ mfn = pfn;
/*
* If there's no mfn for the pfn, then just create an
* empty non-present pte. Unfortunately this loses
@@ -427,8 +432,18 @@ static pteval_t pte_pfn_to_mfn(pteval_t val)
if (unlikely(mfn == INVALID_P2M_ENTRY)) {
mfn = 0;
flags = 0;
+ } else {
+ /*
+ * Paramount to do this test _after_ the
+ * INVALID_P2M_ENTRY as INVALID_P2M_ENTRY &
+ * IDENTITY_FRAME_BIT resolves to true.
+ */
+ mfn &= ~FOREIGN_FRAME_BIT;
+ if (mfn & IDENTITY_FRAME_BIT) {
+ mfn &= ~IDENTITY_FRAME_BIT;
+ flags |= _PAGE_IOMAP;
+ }
}
-
val = ((pteval_t)mfn << PAGE_SHIFT) | flags;
}
@@ -532,6 +547,41 @@ pte_t xen_make_pte(pteval_t pte)
}
PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte);
+#ifdef CONFIG_XEN_DEBUG
+pte_t xen_make_pte_debug(pteval_t pte)
+{
+ phys_addr_t addr = (pte & PTE_PFN_MASK);
+ phys_addr_t other_addr;
+ bool io_page = false;
+ pte_t _pte;
+
+ if (pte & _PAGE_IOMAP)
+ io_page = true;
+
+ _pte = xen_make_pte(pte);
+
+ if (!addr)
+ return _pte;
+
+ if (io_page &&
+ (xen_initial_domain() || addr >= ISA_END_ADDRESS)) {
+ other_addr = pfn_to_mfn(addr >> PAGE_SHIFT) << PAGE_SHIFT;
+ WARN(addr != other_addr,
+ "0x%lx is using VM_IO, but it is 0x%lx!\n",
+ (unsigned long)addr, (unsigned long)other_addr);
+ } else {
+ pteval_t iomap_set = (_pte.pte & PTE_FLAGS_MASK) & _PAGE_IOMAP;
+ other_addr = (_pte.pte & PTE_PFN_MASK);
+ WARN((addr == other_addr) && (!io_page) && (!iomap_set),
+ "0x%lx is missing VM_IO (and wasn't fixed)!\n",
+ (unsigned long)addr);
+ }
+
+ return _pte;
+}
+PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte_debug);
+#endif
+
pgd_t xen_make_pgd(pgdval_t pgd)
{
pgd = pte_pfn_to_mfn(pgd);
@@ -986,10 +1036,9 @@ static void xen_pgd_pin(struct mm_struct *mm)
*/
void xen_mm_pin_all(void)
{
- unsigned long flags;
struct page *page;
- spin_lock_irqsave(&pgd_lock, flags);
+ spin_lock(&pgd_lock);
list_for_each_entry(page, &pgd_list, lru) {
if (!PagePinned(page)) {
@@ -998,7 +1047,7 @@ void xen_mm_pin_all(void)
}
}
- spin_unlock_irqrestore(&pgd_lock, flags);
+ spin_unlock(&pgd_lock);
}
/*
@@ -1099,10 +1148,9 @@ static void xen_pgd_unpin(struct mm_struct *mm)
*/
void xen_mm_unpin_all(void)
{
- unsigned long flags;
struct page *page;
- spin_lock_irqsave(&pgd_lock, flags);
+ spin_lock(&pgd_lock);
list_for_each_entry(page, &pgd_list, lru) {
if (PageSavePinned(page)) {
@@ -1112,7 +1160,7 @@ void xen_mm_unpin_all(void)
}
}
- spin_unlock_irqrestore(&pgd_lock, flags);
+ spin_unlock(&pgd_lock);
}
void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next)
@@ -1942,6 +1990,9 @@ __init void xen_ident_map_ISA(void)
static __init void xen_post_allocator_init(void)
{
+#ifdef CONFIG_XEN_DEBUG
+ pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte_debug);
+#endif
pv_mmu_ops.set_pte = xen_set_pte;
pv_mmu_ops.set_pmd = xen_set_pmd;
pv_mmu_ops.set_pud = xen_set_pud;
@@ -2074,7 +2125,7 @@ static void xen_zap_pfn_range(unsigned long vaddr, unsigned int order,
in_frames[i] = virt_to_mfn(vaddr);
MULTI_update_va_mapping(mcs.mc, vaddr, VOID_PTE, 0);
- set_phys_to_machine(virt_to_pfn(vaddr), INVALID_P2M_ENTRY);
+ __set_phys_to_machine(virt_to_pfn(vaddr), INVALID_P2M_ENTRY);
if (out_frames)
out_frames[i] = virt_to_pfn(vaddr);
@@ -2353,6 +2404,18 @@ EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range);
#ifdef CONFIG_XEN_DEBUG_FS
+static int p2m_dump_open(struct inode *inode, struct file *filp)
+{
+ return single_open(filp, p2m_dump_show, NULL);
+}
+
+static const struct file_operations p2m_dump_fops = {
+ .open = p2m_dump_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
static struct dentry *d_mmu_debug;
static int __init xen_mmu_debugfs(void)
@@ -2408,6 +2471,7 @@ static int __init xen_mmu_debugfs(void)
debugfs_create_u32("prot_commit_batched", 0444, d_mmu_debug,
&mmu_stats.prot_commit_batched);
+ debugfs_create_file("p2m", 0600, d_mmu_debug, NULL, &p2m_dump_fops);
return 0;
}
fs_initcall(xen_mmu_debugfs);
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index fd12d7c..215a3ce 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -23,6 +23,129 @@
* P2M_PER_PAGE depends on the architecture, as a mfn is always
* unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to
* 512 and 1024 entries respectively.
+ *
+ * In short, these structures contain the Machine Frame Number (MFN) of the PFN.
+ *
+ * However not all entries are filled with MFNs. Specifically for all other
+ * leaf entries, or for the top root, or middle one, for which there is a void
+ * entry, we assume it is "missing". So (for example)
+ * pfn_to_mfn(0x90909090)=INVALID_P2M_ENTRY.
+ *
+ * We also have the possibility of setting 1-1 mappings on certain regions, so
+ * that:
+ * pfn_to_mfn(0xc0000)=0xc0000
+ *
+ * The benefit of this is, that we can assume for non-RAM regions (think
+ * PCI BARs, or ACPI spaces), we can create mappings easily b/c we
+ * get the PFN value to match the MFN.
+ *
+ * For this to work efficiently we have one new page p2m_identity and
+ * allocate (via reserved_brk) any other pages we need to cover the sides
+ * (1GB or 4MB boundary violations). All entries in p2m_identity are set to
+ * INVALID_P2M_ENTRY type (Xen toolstack only recognizes that and MFNs,
+ * no other fancy value).
+ *
+ * On lookup we spot that the entry points to p2m_identity and return the
+ * identity value instead of dereferencing and returning INVALID_P2M_ENTRY.
+ * If the entry points to an allocated page, we just proceed as before and
+ * return the PFN. If the PFN has IDENTITY_FRAME_BIT set we unmask that in
+ * appropriate functions (pfn_to_mfn).
+ *
+ * The reason for having the IDENTITY_FRAME_BIT instead of just returning the
+ * PFN is that we could find ourselves where pfn_to_mfn(pfn)==pfn for a
+ * non-identity pfn. To protect ourselves against we elect to set (and get) the
+ * IDENTITY_FRAME_BIT on all identity mapped PFNs.
+ *
+ * This simplistic diagram is used to explain the more subtle piece of code.
+ * There is also a digram of the P2M at the end that can help.
+ * Imagine your E820 looking as so:
+ *
+ * 1GB 2GB
+ * /-------------------+---------\/----\ /----------\ /---+-----\
+ * | System RAM | Sys RAM ||ACPI| | reserved | | Sys RAM |
+ * \-------------------+---------/\----/ \----------/ \---+-----/
+ * ^- 1029MB ^- 2001MB
+ *
+ * [1029MB = 263424 (0x40500), 2001MB = 512256 (0x7D100),
+ * 2048MB = 524288 (0x80000)]
+ *
+ * And dom0_mem=max:3GB,1GB is passed in to the guest, meaning memory past 1GB
+ * is actually not present (would have to kick the balloon driver to put it in).
+ *
+ * When we are told to set the PFNs for identity mapping (see patch: "xen/setup:
+ * Set identity mapping for non-RAM E820 and E820 gaps.") we pass in the start
+ * of the PFN and the end PFN (263424 and 512256 respectively). The first step
+ * is to reserve_brk a top leaf page if the p2m[1] is missing. The top leaf page
+ * covers 512^2 of page estate (1GB) and in case the start or end PFN is not
+ * aligned on 512^2*PAGE_SIZE (1GB) we loop on aligned 1GB PFNs from start pfn
+ * to end pfn. We reserve_brk top leaf pages if they are missing (means they
+ * point to p2m_mid_missing).
+ *
+ * With the E820 example above, 263424 is not 1GB aligned so we allocate a
+ * reserve_brk page which will cover the PFNs estate from 0x40000 to 0x80000.
+ * Each entry in the allocate page is "missing" (points to p2m_missing).
+ *
+ * Next stage is to determine if we need to do a more granular boundary check
+ * on the 4MB (or 2MB depending on architecture) off the start and end pfn's.
+ * We check if the start pfn and end pfn violate that boundary check, and if
+ * so reserve_brk a middle (p2m[x][y]) leaf page. This way we have a much finer
+ * granularity of setting which PFNs are missing and which ones are identity.
+ * In our example 263424 and 512256 both fail the check so we reserve_brk two
+ * pages. Populate them with INVALID_P2M_ENTRY (so they both have "missing"
+ * values) and assign them to p2m[1][2] and p2m[1][488] respectively.
+ *
+ * At this point we would at minimum reserve_brk one page, but could be up to
+ * three. Each call to set_phys_range_identity has at maximum a three page
+ * cost. If we were to query the P2M at this stage, all those entries from
+ * start PFN through end PFN (so 1029MB -> 2001MB) would return
+ * INVALID_P2M_ENTRY ("missing").
+ *
+ * The next step is to walk from the start pfn to the end pfn setting
+ * the IDENTITY_FRAME_BIT on each PFN. This is done in set_phys_range_identity.
+ * If we find that the middle leaf is pointing to p2m_missing we can swap it
+ * over to p2m_identity - this way covering 4MB (or 2MB) PFN space. At this
+ * point we do not need to worry about boundary aligment (so no need to
+ * reserve_brk a middle page, figure out which PFNs are "missing" and which
+ * ones are identity), as that has been done earlier. If we find that the
+ * middle leaf is not occupied by p2m_identity or p2m_missing, we dereference
+ * that page (which covers 512 PFNs) and set the appropriate PFN with
+ * IDENTITY_FRAME_BIT. In our example 263424 and 512256 end up there, and we
+ * set from p2m[1][2][256->511] and p2m[1][488][0->256] with
+ * IDENTITY_FRAME_BIT set.
+ *
+ * All other regions that are void (or not filled) either point to p2m_missing
+ * (considered missing) or have the default value of INVALID_P2M_ENTRY (also
+ * considered missing). In our case, p2m[1][2][0->255] and p2m[1][488][257->511]
+ * contain the INVALID_P2M_ENTRY value and are considered "missing."
+ *
+ * This is what the p2m ends up looking (for the E820 above) with this
+ * fabulous drawing:
+ *
+ * p2m /--------------\
+ * /-----\ | &mfn_list[0],| /-----------------\
+ * | 0 |------>| &mfn_list[1],| /---------------\ | ~0, ~0, .. |
+ * |-----| | ..., ~0, ~0 | | ~0, ~0, [x]---+----->| IDENTITY [@256] |
+ * | 1 |---\ \--------------/ | [p2m_identity]+\ | IDENTITY [@257] |
+ * |-----| \ | [p2m_identity]+\\ | .... |
+ * | 2 |--\ \-------------------->| ... | \\ \----------------/
+ * |-----| \ \---------------/ \\
+ * | 3 |\ \ \\ p2m_identity
+ * |-----| \ \-------------------->/---------------\ /-----------------\
+ * | .. +->+ | [p2m_identity]+-->| ~0, ~0, ~0, ... |
+ * \-----/ / | [p2m_identity]+-->| ..., ~0 |
+ * / /---------------\ | .... | \-----------------/
+ * / | IDENTITY[@0] | /-+-[x], ~0, ~0.. |
+ * / | IDENTITY[@256]|<----/ \---------------/
+ * / | ~0, ~0, .... |
+ * | \---------------/
+ * |
+ * p2m_missing p2m_missing
+ * /------------------\ /------------\
+ * | [p2m_mid_missing]+---->| ~0, ~0, ~0 |
+ * | [p2m_mid_missing]+---->| ..., ~0 |
+ * \------------------/ \------------/
+ *
+ * where ~0 is INVALID_P2M_ENTRY. IDENTITY is (PFN | IDENTITY_BIT)
*/
#include <linux/init.h>
@@ -30,6 +153,7 @@
#include <linux/list.h>
#include <linux/hash.h>
#include <linux/sched.h>
+#include <linux/seq_file.h>
#include <asm/cache.h>
#include <asm/setup.h>
@@ -59,9 +183,15 @@ static RESERVE_BRK_ARRAY(unsigned long **, p2m_top, P2M_TOP_PER_PAGE);
static RESERVE_BRK_ARRAY(unsigned long, p2m_top_mfn, P2M_TOP_PER_PAGE);
static RESERVE_BRK_ARRAY(unsigned long *, p2m_top_mfn_p, P2M_TOP_PER_PAGE);
+static RESERVE_BRK_ARRAY(unsigned long, p2m_identity, P2M_PER_PAGE);
+
RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
+/* We might hit two boundary violations at the start and end, at max each
+ * boundary violation will require three middle nodes. */
+RESERVE_BRK(p2m_mid_identity, PAGE_SIZE * 2 * 3);
+
static inline unsigned p2m_top_index(unsigned long pfn)
{
BUG_ON(pfn >= MAX_P2M_PFN);
@@ -136,7 +266,7 @@ static void p2m_init(unsigned long *p2m)
* - After resume we're called from within stop_machine, but the mfn
* tree should alreay be completely allocated.
*/
-void xen_build_mfn_list_list(void)
+void __ref xen_build_mfn_list_list(void)
{
unsigned long pfn;
@@ -221,6 +351,9 @@ void __init xen_build_dynamic_phys_to_machine(void)
p2m_top = extend_brk(PAGE_SIZE, PAGE_SIZE);
p2m_top_init(p2m_top);
+ p2m_identity = extend_brk(PAGE_SIZE, PAGE_SIZE);
+ p2m_init(p2m_identity);
+
/*
* The domain builder gives us a pre-constructed p2m array in
* mfn_list for all the pages initially given to us, so we just
@@ -266,6 +399,14 @@ unsigned long get_phys_to_machine(unsigned long pfn)
mididx = p2m_mid_index(pfn);
idx = p2m_index(pfn);
+ /*
+ * The INVALID_P2M_ENTRY is filled in both p2m_*identity
+ * and in p2m_*missing, so returning the INVALID_P2M_ENTRY
+ * would be wrong.
+ */
+ if (p2m_top[topidx][mididx] == p2m_identity)
+ return IDENTITY_FRAME(pfn);
+
return p2m_top[topidx][mididx][idx];
}
EXPORT_SYMBOL_GPL(get_phys_to_machine);
@@ -335,9 +476,11 @@ static bool alloc_p2m(unsigned long pfn)
p2m_top_mfn_p[topidx] = mid_mfn;
}
- if (p2m_top[topidx][mididx] == p2m_missing) {
+ if (p2m_top[topidx][mididx] == p2m_identity ||
+ p2m_top[topidx][mididx] == p2m_missing) {
/* p2m leaf page is missing */
unsigned long *p2m;
+ unsigned long *p2m_orig = p2m_top[topidx][mididx];
p2m = alloc_p2m_page();
if (!p2m)
@@ -345,7 +488,7 @@ static bool alloc_p2m(unsigned long pfn)
p2m_init(p2m);
- if (cmpxchg(&mid[mididx], p2m_missing, p2m) != p2m_missing)
+ if (cmpxchg(&mid[mididx], p2m_orig, p2m) != p2m_orig)
free_p2m_page(p2m);
else
mid_mfn[mididx] = virt_to_mfn(p2m);
@@ -354,11 +497,91 @@ static bool alloc_p2m(unsigned long pfn)
return true;
}
+bool __early_alloc_p2m(unsigned long pfn)
+{
+ unsigned topidx, mididx, idx;
+
+ topidx = p2m_top_index(pfn);
+ mididx = p2m_mid_index(pfn);
+ idx = p2m_index(pfn);
+
+ /* Pfff.. No boundary cross-over, lets get out. */
+ if (!idx)
+ return false;
+
+ WARN(p2m_top[topidx][mididx] == p2m_identity,
+ "P2M[%d][%d] == IDENTITY, should be MISSING (or alloced)!\n",
+ topidx, mididx);
+
+ /*
+ * Could be done by xen_build_dynamic_phys_to_machine..
+ */
+ if (p2m_top[topidx][mididx] != p2m_missing)
+ return false;
+
+ /* Boundary cross-over for the edges: */
+ if (idx) {
+ unsigned long *p2m = extend_brk(PAGE_SIZE, PAGE_SIZE);
+
+ p2m_init(p2m);
+
+ p2m_top[topidx][mididx] = p2m;
+
+ }
+ return idx != 0;
+}
+unsigned long set_phys_range_identity(unsigned long pfn_s,
+ unsigned long pfn_e)
+{
+ unsigned long pfn;
+
+ if (unlikely(pfn_s >= MAX_P2M_PFN || pfn_e >= MAX_P2M_PFN))
+ return 0;
+
+ if (unlikely(xen_feature(XENFEAT_auto_translated_physmap)))
+ return pfn_e - pfn_s;
+
+ if (pfn_s > pfn_e)
+ return 0;
+
+ for (pfn = (pfn_s & ~(P2M_MID_PER_PAGE * P2M_PER_PAGE - 1));
+ pfn < ALIGN(pfn_e, (P2M_MID_PER_PAGE * P2M_PER_PAGE));
+ pfn += P2M_MID_PER_PAGE * P2M_PER_PAGE)
+ {
+ unsigned topidx = p2m_top_index(pfn);
+ if (p2m_top[topidx] == p2m_mid_missing) {
+ unsigned long **mid = extend_brk(PAGE_SIZE, PAGE_SIZE);
+
+ p2m_mid_init(mid);
+
+ p2m_top[topidx] = mid;
+ }
+ }
+
+ __early_alloc_p2m(pfn_s);
+ __early_alloc_p2m(pfn_e);
+
+ for (pfn = pfn_s; pfn < pfn_e; pfn++)
+ if (!__set_phys_to_machine(pfn, IDENTITY_FRAME(pfn)))
+ break;
+
+ if (!WARN((pfn - pfn_s) != (pfn_e - pfn_s),
+ "Identity mapping failed. We are %ld short of 1-1 mappings!\n",
+ (pfn_e - pfn_s) - (pfn - pfn_s)))
+ printk(KERN_DEBUG "1-1 mapping on %lx->%lx\n", pfn_s, pfn);
+
+ return pfn - pfn_s;
+}
+
/* Try to install p2m mapping; fail if intermediate bits missing */
bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
{
unsigned topidx, mididx, idx;
+ if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
+ BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
+ return true;
+ }
if (unlikely(pfn >= MAX_P2M_PFN)) {
BUG_ON(mfn != INVALID_P2M_ENTRY);
return true;
@@ -368,6 +591,21 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
mididx = p2m_mid_index(pfn);
idx = p2m_index(pfn);
+ /* For sparse holes were the p2m leaf has real PFN along with
+ * PCI holes, stick in the PFN as the MFN value.
+ */
+ if (mfn != INVALID_P2M_ENTRY && (mfn & IDENTITY_FRAME_BIT)) {
+ if (p2m_top[topidx][mididx] == p2m_identity)
+ return true;
+
+ /* Swap over from MISSING to IDENTITY if needed. */
+ if (p2m_top[topidx][mididx] == p2m_missing) {
+ WARN_ON(cmpxchg(&p2m_top[topidx][mididx], p2m_missing,
+ p2m_identity) != p2m_missing);
+ return true;
+ }
+ }
+
if (p2m_top[topidx][mididx] == p2m_missing)
return mfn == INVALID_P2M_ENTRY;
@@ -378,11 +616,6 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
bool set_phys_to_machine(unsigned long pfn, unsigned long mfn)
{
- if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
- BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
- return true;
- }
-
if (unlikely(!__set_phys_to_machine(pfn, mfn))) {
if (!alloc_p2m(pfn))
return false;
@@ -421,7 +654,7 @@ int m2p_add_override(unsigned long mfn, struct page *page)
{
unsigned long flags;
unsigned long pfn;
- unsigned long address;
+ unsigned long uninitialized_var(address);
unsigned level;
pte_t *ptep = NULL;
@@ -455,7 +688,7 @@ int m2p_remove_override(struct page *page)
unsigned long flags;
unsigned long mfn;
unsigned long pfn;
- unsigned long address;
+ unsigned long uninitialized_var(address);
unsigned level;
pte_t *ptep = NULL;
@@ -520,3 +753,80 @@ unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn)
return ret;
}
EXPORT_SYMBOL_GPL(m2p_find_override_pfn);
+
+#ifdef CONFIG_XEN_DEBUG_FS
+
+int p2m_dump_show(struct seq_file *m, void *v)
+{
+ static const char * const level_name[] = { "top", "middle",
+ "entry", "abnormal" };
+ static const char * const type_name[] = { "identity", "missing",
+ "pfn", "abnormal"};
+#define TYPE_IDENTITY 0
+#define TYPE_MISSING 1
+#define TYPE_PFN 2
+#define TYPE_UNKNOWN 3
+ unsigned long pfn, prev_pfn_type = 0, prev_pfn_level = 0;
+ unsigned int uninitialized_var(prev_level);
+ unsigned int uninitialized_var(prev_type);
+
+ if (!p2m_top)
+ return 0;
+
+ for (pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn++) {
+ unsigned topidx = p2m_top_index(pfn);
+ unsigned mididx = p2m_mid_index(pfn);
+ unsigned idx = p2m_index(pfn);
+ unsigned lvl, type;
+
+ lvl = 4;
+ type = TYPE_UNKNOWN;
+ if (p2m_top[topidx] == p2m_mid_missing) {
+ lvl = 0; type = TYPE_MISSING;
+ } else if (p2m_top[topidx] == NULL) {
+ lvl = 0; type = TYPE_UNKNOWN;
+ } else if (p2m_top[topidx][mididx] == NULL) {
+ lvl = 1; type = TYPE_UNKNOWN;
+ } else if (p2m_top[topidx][mididx] == p2m_identity) {
+ lvl = 1; type = TYPE_IDENTITY;
+ } else if (p2m_top[topidx][mididx] == p2m_missing) {
+ lvl = 1; type = TYPE_MISSING;
+ } else if (p2m_top[topidx][mididx][idx] == 0) {
+ lvl = 2; type = TYPE_UNKNOWN;
+ } else if (p2m_top[topidx][mididx][idx] == IDENTITY_FRAME(pfn)) {
+ lvl = 2; type = TYPE_IDENTITY;
+ } else if (p2m_top[topidx][mididx][idx] == INVALID_P2M_ENTRY) {
+ lvl = 2; type = TYPE_MISSING;
+ } else if (p2m_top[topidx][mididx][idx] == pfn) {
+ lvl = 2; type = TYPE_PFN;
+ } else if (p2m_top[topidx][mididx][idx] != pfn) {
+ lvl = 2; type = TYPE_PFN;
+ }
+ if (pfn == 0) {
+ prev_level = lvl;
+ prev_type = type;
+ }
+ if (pfn == MAX_DOMAIN_PAGES-1) {
+ lvl = 3;
+ type = TYPE_UNKNOWN;
+ }
+ if (prev_type != type) {
+ seq_printf(m, " [0x%lx->0x%lx] %s\n",
+ prev_pfn_type, pfn, type_name[prev_type]);
+ prev_pfn_type = pfn;
+ prev_type = type;
+ }
+ if (prev_level != lvl) {
+ seq_printf(m, " [0x%lx->0x%lx] level %s\n",
+ prev_pfn_level, pfn, level_name[prev_level]);
+ prev_pfn_level = pfn;
+ prev_level = lvl;
+ }
+ }
+ return 0;
+#undef TYPE_IDENTITY
+#undef TYPE_MISSING
+#undef TYPE_PFN
+#undef TYPE_UNKNOWN
+}
+#endif
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index a8a66a5..fa0269a 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -52,6 +52,8 @@ phys_addr_t xen_extra_mem_start, xen_extra_mem_size;
static __init void xen_add_extra_mem(unsigned long pages)
{
+ unsigned long pfn;
+
u64 size = (u64)pages * PAGE_SIZE;
u64 extra_start = xen_extra_mem_start + xen_extra_mem_size;
@@ -66,6 +68,9 @@ static __init void xen_add_extra_mem(unsigned long pages)
xen_extra_mem_size += size;
xen_max_p2m_pfn = PFN_DOWN(extra_start + size);
+
+ for (pfn = PFN_DOWN(extra_start); pfn <= xen_max_p2m_pfn; pfn++)
+ __set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
}
static unsigned long __init xen_release_chunk(phys_addr_t start_addr,
@@ -104,7 +109,7 @@ static unsigned long __init xen_release_chunk(phys_addr_t start_addr,
WARN(ret != 1, "Failed to release memory %lx-%lx err=%d\n",
start, end, ret);
if (ret == 1) {
- set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
+ __set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
len++;
}
}
@@ -138,12 +143,55 @@ static unsigned long __init xen_return_unused_memory(unsigned long max_pfn,
return released;
}
+static unsigned long __init xen_set_identity(const struct e820entry *list,
+ ssize_t map_size)
+{
+ phys_addr_t last = xen_initial_domain() ? 0 : ISA_END_ADDRESS;
+ phys_addr_t start_pci = last;
+ const struct e820entry *entry;
+ unsigned long identity = 0;
+ int i;
+
+ for (i = 0, entry = list; i < map_size; i++, entry++) {
+ phys_addr_t start = entry->addr;
+ phys_addr_t end = start + entry->size;
+
+ if (start < last)
+ start = last;
+
+ if (end <= start)
+ continue;
+
+ /* Skip over the 1MB region. */
+ if (last > end)
+ continue;
+
+ if (entry->type == E820_RAM) {
+ if (start > start_pci)
+ identity += set_phys_range_identity(
+ PFN_UP(start_pci), PFN_DOWN(start));
+
+ /* Without saving 'last' we would gooble RAM too
+ * at the end of the loop. */
+ last = end;
+ start_pci = end;
+ continue;
+ }
+ start_pci = min(start, start_pci);
+ last = end;
+ }
+ if (last > start_pci)
+ identity += set_phys_range_identity(
+ PFN_UP(start_pci), PFN_DOWN(last));
+ return identity;
+}
/**
* machine_specific_memory_setup - Hook for machine specific memory setup.
**/
char * __init xen_memory_setup(void)
{
static struct e820entry map[E820MAX] __initdata;
+ static struct e820entry map_raw[E820MAX] __initdata;
unsigned long max_pfn = xen_start_info->nr_pages;
unsigned long long mem_end;
@@ -151,6 +199,7 @@ char * __init xen_memory_setup(void)
struct xen_memory_map memmap;
unsigned long extra_pages = 0;
unsigned long extra_limit;
+ unsigned long identity_pages = 0;
int i;
int op;
@@ -176,6 +225,7 @@ char * __init xen_memory_setup(void)
}
BUG_ON(rc);
+ memcpy(map_raw, map, sizeof(map));
e820.nr_map = 0;
xen_extra_mem_start = mem_end;
for (i = 0; i < memmap.nr_entries; i++) {
@@ -194,6 +244,15 @@ char * __init xen_memory_setup(void)
end -= delta;
extra_pages += PFN_DOWN(delta);
+ /*
+ * Set RAM below 4GB that is not for us to be unusable.
+ * This prevents "System RAM" address space from being
+ * used as potential resource for I/O address (happens
+ * when 'allocate_resource' is called).
+ */
+ if (delta &&
+ (xen_initial_domain() && end < 0x100000000ULL))
+ e820_add_region(end, delta, E820_UNUSABLE);
}
if (map[i].size > 0 && end > xen_extra_mem_start)
@@ -251,6 +310,13 @@ char * __init xen_memory_setup(void)
xen_add_extra_mem(extra_pages);
+ /*
+ * Set P2M for all non-RAM pages and E820 gaps to be identity
+ * type PFNs. We supply it with the non-sanitized version
+ * of the E820.
+ */
+ identity_pages = xen_set_identity(map_raw, memmap.nr_entries);
+ printk(KERN_INFO "Set %ld page(s) to 1-1 mapping.\n", identity_pages);
return "Xen";
}
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 72a4c79..3061244 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -509,3 +509,41 @@ void __init xen_smp_init(void)
xen_fill_possible_map();
xen_init_spinlocks();
}
+
+static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus)
+{
+ native_smp_prepare_cpus(max_cpus);
+ WARN_ON(xen_smp_intr_init(0));
+
+ if (!xen_have_vector_callback)
+ return;
+ xen_init_lock_cpu(0);
+ xen_init_spinlocks();
+}
+
+static int __cpuinit xen_hvm_cpu_up(unsigned int cpu)
+{
+ int rc;
+ rc = native_cpu_up(cpu);
+ WARN_ON (xen_smp_intr_init(cpu));
+ return rc;
+}
+
+static void xen_hvm_cpu_die(unsigned int cpu)
+{
+ unbind_from_irqhandler(per_cpu(xen_resched_irq, cpu), NULL);
+ unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu), NULL);
+ unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu), NULL);
+ unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu), NULL);
+ native_cpu_die(cpu);
+}
+
+void __init xen_hvm_smp_init(void)
+{
+ smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus;
+ smp_ops.smp_send_reschedule = xen_smp_send_reschedule;
+ smp_ops.cpu_up = xen_hvm_cpu_up;
+ smp_ops.cpu_die = xen_hvm_cpu_die;
+ smp_ops.send_call_func_ipi = xen_smp_send_call_function_ipi;
+ smp_ops.send_call_func_single_ipi = xen_smp_send_call_function_single_ipi;
+}
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
index 9bbd63a..45329c8 100644
--- a/arch/x86/xen/suspend.c
+++ b/arch/x86/xen/suspend.c
@@ -12,7 +12,7 @@
#include "xen-ops.h"
#include "mmu.h"
-void xen_pre_suspend(void)
+void xen_arch_pre_suspend(void)
{
xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn);
xen_start_info->console.domU.mfn =
@@ -26,8 +26,9 @@ void xen_pre_suspend(void)
BUG();
}
-void xen_hvm_post_suspend(int suspend_cancelled)
+void xen_arch_hvm_post_suspend(int suspend_cancelled)
{
+#ifdef CONFIG_XEN_PVHVM
int cpu;
xen_hvm_init_shared_info();
xen_callback_vector();
@@ -37,9 +38,10 @@ void xen_hvm_post_suspend(int suspend_cancelled)
xen_setup_runstate_info(cpu);
}
}
+#endif
}
-void xen_post_suspend(int suspend_cancelled)
+void xen_arch_post_suspend(int suspend_cancelled)
{
xen_build_mfn_list_list();
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 067759e..2e2d370 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -397,7 +397,9 @@ void xen_setup_timer(int cpu)
name = "<timer kasprintf failed>";
irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt,
- IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING|IRQF_TIMER,
+ IRQF_DISABLED|IRQF_PERCPU|
+ IRQF_NOBALANCING|IRQF_TIMER|
+ IRQF_FORCE_RESUME,
name, NULL);
evt = &per_cpu(xen_clock_events, cpu);
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 9d41bf9..3112f55 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -64,10 +64,12 @@ void xen_setup_vcpu_info_placement(void);
#ifdef CONFIG_SMP
void xen_smp_init(void);
+void __init xen_hvm_smp_init(void);
extern cpumask_var_t xen_cpu_initialized_map;
#else
static inline void xen_smp_init(void) {}
+static inline void xen_hvm_smp_init(void) {}
#endif
#ifdef CONFIG_PARAVIRT_SPINLOCKS