summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/sparc/Kconfig7
-rw-r--r--arch/sparc/include/asm/cpudata_32.h5
-rw-r--r--arch/sparc/include/asm/floppy_32.h40
-rw-r--r--arch/sparc/include/asm/io.h13
-rw-r--r--arch/sparc/include/asm/irq_32.h6
-rw-r--r--arch/sparc/include/asm/leon.h41
-rw-r--r--arch/sparc/include/asm/pcic.h12
-rw-r--r--arch/sparc/include/asm/pgtable_32.h6
-rw-r--r--arch/sparc/include/asm/pgtable_64.h3
-rw-r--r--arch/sparc/include/asm/setup.h12
-rw-r--r--arch/sparc/include/asm/smp_32.h37
-rw-r--r--arch/sparc/include/asm/smp_64.h4
-rw-r--r--arch/sparc/include/asm/spinlock_32.h1
-rw-r--r--arch/sparc/include/asm/system_32.h5
-rw-r--r--arch/sparc/include/asm/system_64.h4
-rw-r--r--arch/sparc/include/asm/winmacro.h9
-rw-r--r--arch/sparc/kernel/Makefile4
-rw-r--r--arch/sparc/kernel/cpu.c139
-rw-r--r--arch/sparc/kernel/cpumap.c4
-rw-r--r--arch/sparc/kernel/devices.c4
-rw-r--r--arch/sparc/kernel/ds.c14
-rw-r--r--arch/sparc/kernel/entry.S41
-rw-r--r--arch/sparc/kernel/head_32.S51
-rw-r--r--arch/sparc/kernel/ioport.c42
-rw-r--r--arch/sparc/kernel/irq.h51
-rw-r--r--arch/sparc/kernel/irq_32.c513
-rw-r--r--arch/sparc/kernel/irq_64.c6
-rw-r--r--arch/sparc/kernel/kernel.h5
-rw-r--r--arch/sparc/kernel/leon_kernel.c365
-rw-r--r--arch/sparc/kernel/leon_smp.c148
-rw-r--r--arch/sparc/kernel/mdesc.c2
-rw-r--r--arch/sparc/kernel/of_device_64.c3
-rw-r--r--arch/sparc/kernel/pci_msi.c3
-rw-r--r--arch/sparc/kernel/pcic.c83
-rw-r--r--arch/sparc/kernel/perf_event.c1
-rw-r--r--arch/sparc/kernel/process_32.c12
-rw-r--r--arch/sparc/kernel/prom_32.c1
-rw-r--r--arch/sparc/kernel/setup_32.c87
-rw-r--r--arch/sparc/kernel/setup_64.c78
-rw-r--r--arch/sparc/kernel/smp_32.c103
-rw-r--r--arch/sparc/kernel/smp_64.c58
-rw-r--r--arch/sparc/kernel/sun4c_irq.c150
-rw-r--r--arch/sparc/kernel/sun4d_irq.c494
-rw-r--r--arch/sparc/kernel/sun4d_smp.c93
-rw-r--r--arch/sparc/kernel/sun4m_irq.c179
-rw-r--r--arch/sparc/kernel/sun4m_smp.c51
-rw-r--r--arch/sparc/kernel/sysfs.c3
-rw-r--r--arch/sparc/kernel/time_32.c10
-rw-r--r--arch/sparc/kernel/us2e_cpufreq.c4
-rw-r--r--arch/sparc/kernel/us3_cpufreq.c4
-rw-r--r--arch/sparc/lib/Makefile1
-rw-r--r--arch/sparc/lib/rwsem_32.S204
-rw-r--r--arch/sparc/mm/init_64.c14
-rw-r--r--arch/x86/kernel/cpu/common.c4
-rw-r--r--drivers/block/drbd/drbd_int.h1
-rw-r--r--drivers/dma/ioat/dma.c1
-rw-r--r--drivers/dma/ioat/dma_v2.c1
-rw-r--r--drivers/dma/ioat/dma_v3.c1
-rw-r--r--drivers/ide/ide-acpi.c4
-rw-r--r--drivers/ide/ide-floppy.c2
-rw-r--r--drivers/ide/ide-scan-pci.c2
-rw-r--r--drivers/ide/pmac.c4
-rw-r--r--drivers/infiniband/hw/amso1100/c2.c1
-rw-r--r--drivers/md/bitmap.c10
-rw-r--r--drivers/md/md.c23
-rw-r--r--drivers/md/multipath.c60
-rw-r--r--drivers/md/multipath.h1
-rw-r--r--drivers/md/raid1.c506
-rw-r--r--drivers/md/raid1.h4
-rw-r--r--drivers/md/raid10.c424
-rw-r--r--drivers/md/raid5.c41
-rw-r--r--drivers/net/igb/igb_main.c2
-rw-r--r--fs/compat.c235
-rw-r--r--fs/exec.c125
-rw-r--r--fs/gfs2/bmap.c2
-rw-r--r--fs/gfs2/log.c29
-rw-r--r--fs/gfs2/rgrp.c4
-rw-r--r--fs/nilfs2/alloc.c12
-rw-r--r--fs/nilfs2/bmap.c4
-rw-r--r--fs/nilfs2/btnode.c19
-rw-r--r--fs/nilfs2/btnode.h4
-rw-r--r--fs/nilfs2/btree.c38
-rw-r--r--fs/nilfs2/cpfile.c24
-rw-r--r--fs/nilfs2/dat.c4
-rw-r--r--fs/nilfs2/file.c1
-rw-r--r--fs/nilfs2/gcinode.c25
-rw-r--r--fs/nilfs2/ifile.c4
-rw-r--r--fs/nilfs2/inode.c23
-rw-r--r--fs/nilfs2/ioctl.c61
-rw-r--r--fs/nilfs2/mdt.c8
-rw-r--r--fs/nilfs2/mdt.h9
-rw-r--r--fs/nilfs2/nilfs.h7
-rw-r--r--fs/nilfs2/page.c79
-rw-r--r--fs/nilfs2/page.h7
-rw-r--r--fs/nilfs2/recovery.c12
-rw-r--r--fs/nilfs2/segbuf.c17
-rw-r--r--fs/nilfs2/segment.c190
-rw-r--r--fs/nilfs2/segment.h2
-rw-r--r--fs/nilfs2/sufile.c274
-rw-r--r--fs/nilfs2/sufile.h4
-rw-r--r--fs/nilfs2/super.c131
-rw-r--r--fs/nilfs2/the_nilfs.c24
-rw-r--r--fs/nilfs2/the_nilfs.h2
-rw-r--r--include/linux/binfmts.h4
-rw-r--r--include/linux/ide.h2
-rw-r--r--include/linux/nilfs2_fs.h4
-rw-r--r--include/linux/skbuff.h6
-rw-r--r--include/net/mac80211.h3
-rw-r--r--init/Kconfig6
-rw-r--r--kernel/sched.c2
-rw-r--r--net/ipv4/fib_trie.c2
-rwxr-xr-xtools/testing/ktest/ktest.pl156
-rw-r--r--tools/testing/ktest/sample.conf93
113 files changed, 3086 insertions, 2894 deletions
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index e560d10..63a027c 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -25,6 +25,10 @@ config SPARC
select HAVE_DMA_ATTRS
select HAVE_DMA_API_DEBUG
select HAVE_ARCH_JUMP_LABEL
+ select HAVE_GENERIC_HARDIRQS
+ select GENERIC_HARDIRQS_NO_DEPRECATED
+ select GENERIC_IRQ_SHOW
+ select USE_GENERIC_SMP_HELPERS if SMP
config SPARC32
def_bool !64BIT
@@ -43,15 +47,12 @@ config SPARC64
select HAVE_DYNAMIC_FTRACE
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_SYSCALL_TRACEPOINTS
- select USE_GENERIC_SMP_HELPERS if SMP
select RTC_DRV_CMOS
select RTC_DRV_BQ4802
select RTC_DRV_SUN4V
select RTC_DRV_STARFIRE
select HAVE_PERF_EVENTS
select PERF_USE_VMALLOC
- select HAVE_GENERIC_HARDIRQS
- select GENERIC_IRQ_SHOW
select IRQ_PREFLOW_FASTEOI
config ARCH_DEFCONFIG
diff --git a/arch/sparc/include/asm/cpudata_32.h b/arch/sparc/include/asm/cpudata_32.h
index 31d48a0..a4c5a93 100644
--- a/arch/sparc/include/asm/cpudata_32.h
+++ b/arch/sparc/include/asm/cpudata_32.h
@@ -16,6 +16,10 @@ typedef struct {
unsigned long clock_tick;
unsigned int multiplier;
unsigned int counter;
+#ifdef CONFIG_SMP
+ unsigned int irq_resched_count;
+ unsigned int irq_call_count;
+#endif
int prom_node;
int mid;
int next;
@@ -23,5 +27,6 @@ typedef struct {
DECLARE_PER_CPU(cpuinfo_sparc, __cpu_data);
#define cpu_data(__cpu) per_cpu(__cpu_data, (__cpu))
+#define local_cpu_data() __get_cpu_var(__cpu_data)
#endif /* _SPARC_CPUDATA_H */
diff --git a/arch/sparc/include/asm/floppy_32.h b/arch/sparc/include/asm/floppy_32.h
index 86666f7..482c79e 100644
--- a/arch/sparc/include/asm/floppy_32.h
+++ b/arch/sparc/include/asm/floppy_32.h
@@ -281,28 +281,27 @@ static inline void sun_fd_enable_dma(void)
pdma_areasize = pdma_size;
}
-/* Our low-level entry point in arch/sparc/kernel/entry.S */
-extern int sparc_floppy_request_irq(int irq, unsigned long flags,
- irq_handler_t irq_handler);
+extern int sparc_floppy_request_irq(unsigned int irq,
+ irq_handler_t irq_handler);
static int sun_fd_request_irq(void)
{
static int once = 0;
- int error;
- if(!once) {
+ if (!once) {
once = 1;
- error = sparc_floppy_request_irq(FLOPPY_IRQ,
- IRQF_DISABLED,
- floppy_interrupt);
- return ((error == 0) ? 0 : -1);
- } else return 0;
+ return sparc_floppy_request_irq(FLOPPY_IRQ, floppy_interrupt);
+ } else {
+ return 0;
+ }
}
static struct linux_prom_registers fd_regs[2];
static int sun_floppy_init(void)
{
+ struct platform_device *op;
+ struct device_node *dp;
char state[128];
phandle tnode, fd_node;
int num_regs;
@@ -310,7 +309,6 @@ static int sun_floppy_init(void)
use_virtual_dma = 1;
- FLOPPY_IRQ = 11;
/* Forget it if we aren't on a machine that could possibly
* ever have a floppy drive.
*/
@@ -349,6 +347,26 @@ static int sun_floppy_init(void)
sun_fdc = (struct sun_flpy_controller *)
of_ioremap(&r, 0, fd_regs[0].reg_size, "floppy");
+ /* Look up irq in platform_device.
+ * We try "SUNW,fdtwo" and "fd"
+ */
+ for_each_node_by_name(dp, "SUNW,fdtwo") {
+ op = of_find_device_by_node(dp);
+ if (op)
+ break;
+ }
+ if (!op) {
+ for_each_node_by_name(dp, "fd") {
+ op = of_find_device_by_node(dp);
+ if (op)
+ break;
+ }
+ }
+ if (!op)
+ goto no_sun_fdc;
+
+ FLOPPY_IRQ = op->archdata.irqs[0];
+
/* Last minute sanity check... */
if(sun_fdc->status_82072 == 0xff) {
sun_fdc = NULL;
diff --git a/arch/sparc/include/asm/io.h b/arch/sparc/include/asm/io.h
index a34b299..f6902cf 100644
--- a/arch/sparc/include/asm/io.h
+++ b/arch/sparc/include/asm/io.h
@@ -5,4 +5,17 @@
#else
#include <asm/io_32.h>
#endif
+
+/*
+ * Defines used for both SPARC32 and SPARC64
+ */
+
+/* Big endian versions of memory read/write routines */
+#define readb_be(__addr) __raw_readb(__addr)
+#define readw_be(__addr) __raw_readw(__addr)
+#define readl_be(__addr) __raw_readl(__addr)
+#define writeb_be(__b, __addr) __raw_writeb(__b, __addr)
+#define writel_be(__w, __addr) __raw_writel(__w, __addr)
+#define writew_be(__l, __addr) __raw_writew(__l, __addr)
+
#endif
diff --git a/arch/sparc/include/asm/irq_32.h b/arch/sparc/include/asm/irq_32.h
index eced3e3..2ae3aca 100644
--- a/arch/sparc/include/asm/irq_32.h
+++ b/arch/sparc/include/asm/irq_32.h
@@ -6,7 +6,11 @@
#ifndef _SPARC_IRQ_H
#define _SPARC_IRQ_H
-#define NR_IRQS 16
+/* Allocated number of logical irq numbers.
+ * sun4d boxes (ss2000e) should be OK with ~32.
+ * Be on the safe side and make room for 64
+ */
+#define NR_IRQS 64
#include <linux/interrupt.h>
diff --git a/arch/sparc/include/asm/leon.h b/arch/sparc/include/asm/leon.h
index c04f96f..6bdaf1e 100644
--- a/arch/sparc/include/asm/leon.h
+++ b/arch/sparc/include/asm/leon.h
@@ -52,29 +52,6 @@
#define LEON_DIAGF_VALID 0x2000
#define LEON_DIAGF_VALID_SHIFT 13
-/*
- * Interrupt Sources
- *
- * The interrupt source numbers directly map to the trap type and to
- * the bits used in the Interrupt Clear, Interrupt Force, Interrupt Mask,
- * and the Interrupt Pending Registers.
- */
-#define LEON_INTERRUPT_CORRECTABLE_MEMORY_ERROR 1
-#define LEON_INTERRUPT_UART_1_RX_TX 2
-#define LEON_INTERRUPT_UART_0_RX_TX 3
-#define LEON_INTERRUPT_EXTERNAL_0 4
-#define LEON_INTERRUPT_EXTERNAL_1 5
-#define LEON_INTERRUPT_EXTERNAL_2 6
-#define LEON_INTERRUPT_EXTERNAL_3 7
-#define LEON_INTERRUPT_TIMER1 8
-#define LEON_INTERRUPT_TIMER2 9
-#define LEON_INTERRUPT_EMPTY1 10
-#define LEON_INTERRUPT_EMPTY2 11
-#define LEON_INTERRUPT_OPEN_ETH 12
-#define LEON_INTERRUPT_EMPTY4 13
-#define LEON_INTERRUPT_EMPTY5 14
-#define LEON_INTERRUPT_EMPTY6 15
-
/* irq masks */
#define LEON_HARD_INT(x) (1 << (x)) /* irq 0-15 */
#define LEON_IRQMASK_R 0x0000fffe /* bit 15- 1 of lregs.irqmask */
@@ -183,7 +160,6 @@ static inline void leon_srmmu_enabletlb(void)
/* macro access for leon_readnobuffer_reg() */
#define LEON_BYPASSCACHE_LOAD_VA(x) leon_readnobuffer_reg((unsigned long)(x))
-extern void sparc_leon_eirq_register(int eirq);
extern void leon_init(void);
extern void leon_switch_mm(void);
extern void leon_init_IRQ(void);
@@ -239,8 +215,8 @@ static inline int sparc_leon3_cpuid(void)
#endif /*!__ASSEMBLY__*/
#ifdef CONFIG_SMP
-# define LEON3_IRQ_RESCHEDULE 13
-# define LEON3_IRQ_TICKER (leon_percpu_timer_dev[0].irq)
+# define LEON3_IRQ_IPI_DEFAULT 13
+# define LEON3_IRQ_TICKER (leon3_ticker_irq)
# define LEON3_IRQ_CROSS_CALL 15
#endif
@@ -339,9 +315,9 @@ struct leon2_cacheregs {
#include <linux/interrupt.h>
struct device_node;
-extern int sparc_leon_eirq_get(int eirq, int cpu);
-extern irqreturn_t sparc_leon_eirq_isr(int dummy, void *dev_id);
-extern void sparc_leon_eirq_register(int eirq);
+extern unsigned int leon_build_device_irq(unsigned int real_irq,
+ irq_flow_handler_t flow_handler,
+ const char *name, int do_ack);
extern void leon_clear_clock_irq(void);
extern void leon_load_profile_irq(int cpu, unsigned int limit);
extern void leon_init_timers(irq_handler_t counter_fn);
@@ -358,6 +334,7 @@ extern void leon3_getCacheRegs(struct leon3_cacheregs *regs);
extern int leon_flush_needed(void);
extern void leon_switch_mm(void);
extern int srmmu_swprobe_trace;
+extern int leon3_ticker_irq;
#ifdef CONFIG_SMP
extern int leon_smp_nrcpus(void);
@@ -366,17 +343,19 @@ extern void leon_smp_done(void);
extern void leon_boot_cpus(void);
extern int leon_boot_one_cpu(int i);
void leon_init_smp(void);
-extern void cpu_probe(void);
extern void cpu_idle(void);
extern void init_IRQ(void);
extern void cpu_panic(void);
extern int __leon_processor_id(void);
void leon_enable_irq_cpu(unsigned int irq_nr, unsigned int cpu);
+extern irqreturn_t leon_percpu_timer_interrupt(int irq, void *unused);
-extern unsigned int real_irq_entry[], smpleon_ticker[];
+extern unsigned int real_irq_entry[];
+extern unsigned int smpleon_ipi[];
extern unsigned int patchme_maybe_smp_msg[];
extern unsigned int t_nmi[], linux_trap_ipi15_leon[];
extern unsigned int linux_trap_ipi15_sun4m[];
+extern int leon_ipi_irq;
#endif /* CONFIG_SMP */
diff --git a/arch/sparc/include/asm/pcic.h b/arch/sparc/include/asm/pcic.h
index f20ef56..7eb5d78 100644
--- a/arch/sparc/include/asm/pcic.h
+++ b/arch/sparc/include/asm/pcic.h
@@ -29,11 +29,17 @@ struct linux_pcic {
int pcic_imdim;
};
-extern int pcic_probe(void);
-/* Erm... MJ redefined pcibios_present() so that it does not work early. */
+#ifdef CONFIG_PCI
extern int pcic_present(void);
+extern int pcic_probe(void);
+extern void pci_time_init(void);
extern void sun4m_pci_init_IRQ(void);
-
+#else
+static inline int pcic_present(void) { return 0; }
+static inline int pcic_probe(void) { return 0; }
+static inline void pci_time_init(void) {}
+static inline void sun4m_pci_init_IRQ(void) {}
+#endif
#endif
/* Size of PCI I/O space which we relocate. */
diff --git a/arch/sparc/include/asm/pgtable_32.h b/arch/sparc/include/asm/pgtable_32.h
index 303bd4d..5b31a8e 100644
--- a/arch/sparc/include/asm/pgtable_32.h
+++ b/arch/sparc/include/asm/pgtable_32.h
@@ -8,6 +8,8 @@
* Copyright (C) 1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
*/
+#include <linux/const.h>
+
#ifndef __ASSEMBLY__
#include <asm-generic/4level-fixup.h>
@@ -456,9 +458,9 @@ extern int io_remap_pfn_range(struct vm_area_struct *vma,
#endif /* !(__ASSEMBLY__) */
-#define VMALLOC_START 0xfe600000
+#define VMALLOC_START _AC(0xfe600000,UL)
/* XXX Alter this when I get around to fixing sun4c - Anton */
-#define VMALLOC_END 0xffc00000
+#define VMALLOC_END _AC(0xffc00000,UL)
/* We provide our own get_unmapped_area to cope with VA holes for userland */
diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
index f8dddb7..b77128c 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -699,6 +699,9 @@ extern pmd_t swapper_low_pmd_dir[2048];
extern void paging_init(void);
extern unsigned long find_ecache_flush_span(unsigned long size);
+struct seq_file;
+extern void mmu_info(struct seq_file *);
+
/* These do nothing with the way I have things setup. */
#define mmu_lockarea(vaddr, len) (vaddr)
#define mmu_unlockarea(vaddr, len) do { } while(0)
diff --git a/arch/sparc/include/asm/setup.h b/arch/sparc/include/asm/setup.h
index 2643c62..64718ba 100644
--- a/arch/sparc/include/asm/setup.h
+++ b/arch/sparc/include/asm/setup.h
@@ -11,4 +11,16 @@
# define COMMAND_LINE_SIZE 256
#endif
+#ifdef __KERNEL__
+
+#ifdef CONFIG_SPARC32
+/* The CPU that was used for booting
+ * Only sun4d + leon may have boot_cpu_id != 0
+ */
+extern unsigned char boot_cpu_id;
+extern unsigned char boot_cpu_id4;
+#endif
+
+#endif /* __KERNEL__ */
+
#endif /* _SPARC_SETUP_H */
diff --git a/arch/sparc/include/asm/smp_32.h b/arch/sparc/include/asm/smp_32.h
index d82d7f4..093f108 100644
--- a/arch/sparc/include/asm/smp_32.h
+++ b/arch/sparc/include/asm/smp_32.h
@@ -50,42 +50,38 @@ void smp_callin(void);
void smp_boot_cpus(void);
void smp_store_cpu_info(int);
+void smp_resched_interrupt(void);
+void smp_call_function_single_interrupt(void);
+void smp_call_function_interrupt(void);
+
struct seq_file;
void smp_bogo(struct seq_file *);
void smp_info(struct seq_file *);
BTFIXUPDEF_CALL(void, smp_cross_call, smpfunc_t, cpumask_t, unsigned long, unsigned long, unsigned long, unsigned long)
BTFIXUPDEF_CALL(int, __hard_smp_processor_id, void)
+BTFIXUPDEF_CALL(void, smp_ipi_resched, int);
+BTFIXUPDEF_CALL(void, smp_ipi_single, int);
+BTFIXUPDEF_CALL(void, smp_ipi_mask_one, int);
BTFIXUPDEF_BLACKBOX(hard_smp_processor_id)
BTFIXUPDEF_BLACKBOX(load_current)
#define smp_cross_call(func,mask,arg1,arg2,arg3,arg4) BTFIXUP_CALL(smp_cross_call)(func,mask,arg1,arg2,arg3,arg4)
-static inline void xc0(smpfunc_t func) { smp_cross_call(func, cpu_online_map, 0, 0, 0, 0); }
+static inline void xc0(smpfunc_t func) { smp_cross_call(func, *cpu_online_mask, 0, 0, 0, 0); }
static inline void xc1(smpfunc_t func, unsigned long arg1)
-{ smp_cross_call(func, cpu_online_map, arg1, 0, 0, 0); }
+{ smp_cross_call(func, *cpu_online_mask, arg1, 0, 0, 0); }
static inline void xc2(smpfunc_t func, unsigned long arg1, unsigned long arg2)
-{ smp_cross_call(func, cpu_online_map, arg1, arg2, 0, 0); }
+{ smp_cross_call(func, *cpu_online_mask, arg1, arg2, 0, 0); }
static inline void xc3(smpfunc_t func, unsigned long arg1, unsigned long arg2,
unsigned long arg3)
-{ smp_cross_call(func, cpu_online_map, arg1, arg2, arg3, 0); }
+{ smp_cross_call(func, *cpu_online_mask, arg1, arg2, arg3, 0); }
static inline void xc4(smpfunc_t func, unsigned long arg1, unsigned long arg2,
unsigned long arg3, unsigned long arg4)
-{ smp_cross_call(func, cpu_online_map, arg1, arg2, arg3, arg4); }
-
-static inline int smp_call_function(void (*func)(void *info), void *info, int wait)
-{
- xc1((smpfunc_t)func, (unsigned long)info);
- return 0;
-}
+{ smp_cross_call(func, *cpu_online_mask, arg1, arg2, arg3, arg4); }
-static inline int smp_call_function_single(int cpuid, void (*func) (void *info),
- void *info, int wait)
-{
- smp_cross_call((smpfunc_t)func, cpumask_of_cpu(cpuid),
- (unsigned long) info, 0, 0, 0);
- return 0;
-}
+extern void arch_send_call_function_single_ipi(int cpu);
+extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
static inline int cpu_logical_map(int cpu)
{
@@ -135,6 +131,11 @@ static inline int hard_smp_processor_id(void)
__asm__ __volatile__("lda [%g0] ASI_M_VIKING_TMP1, %0\n\t"
"nop; nop" :
"=&r" (cpuid));
+ - leon
+ __asm__ __volatile__( "rd %asr17, %0\n\t"
+ "srl %0, 0x1c, %0\n\t"
+ "nop\n\t" :
+ "=&r" (cpuid));
See btfixup.h and btfixupprep.c to understand how a blackbox works.
*/
__asm__ __volatile__("sethi %%hi(___b_hard_smp_processor_id), %0\n\t"
diff --git a/arch/sparc/include/asm/smp_64.h b/arch/sparc/include/asm/smp_64.h
index f49e11c..20bca89 100644
--- a/arch/sparc/include/asm/smp_64.h
+++ b/arch/sparc/include/asm/smp_64.h
@@ -49,6 +49,10 @@ extern void cpu_play_dead(void);
extern void smp_fetch_global_regs(void);
+struct seq_file;
+void smp_bogo(struct seq_file *);
+void smp_info(struct seq_file *);
+
#ifdef CONFIG_HOTPLUG_CPU
extern int __cpu_disable(void);
extern void __cpu_die(unsigned int cpu);
diff --git a/arch/sparc/include/asm/spinlock_32.h b/arch/sparc/include/asm/spinlock_32.h
index 7f9b9db..5f5b8bf 100644
--- a/arch/sparc/include/asm/spinlock_32.h
+++ b/arch/sparc/include/asm/spinlock_32.h
@@ -9,6 +9,7 @@
#ifndef __ASSEMBLY__
#include <asm/psr.h>
+#include <asm/processor.h> /* for cpu_relax */
#define arch_spin_is_locked(lock) (*((volatile unsigned char *)(lock)) != 0)
diff --git a/arch/sparc/include/asm/system_32.h b/arch/sparc/include/asm/system_32.h
index 890036b..47a7e86 100644
--- a/arch/sparc/include/asm/system_32.h
+++ b/arch/sparc/include/asm/system_32.h
@@ -15,11 +15,6 @@
#include <linux/irqflags.h>
-static inline unsigned int probe_irq_mask(unsigned long val)
-{
- return 0;
-}
-
/*
* Sparc (general) CPU types
*/
diff --git a/arch/sparc/include/asm/system_64.h b/arch/sparc/include/asm/system_64.h
index e3b65d8..3c96d3b 100644
--- a/arch/sparc/include/asm/system_64.h
+++ b/arch/sparc/include/asm/system_64.h
@@ -29,10 +29,6 @@ enum sparc_cpu {
/* This cannot ever be a sun4c :) That's just history. */
#define ARCH_SUN4C 0
-extern const char *sparc_cpu_type;
-extern const char *sparc_fpu_type;
-extern const char *sparc_pmu_type;
-
extern char reboot_command[];
/* These are here in an effort to more fully work around Spitfire Errata
diff --git a/arch/sparc/include/asm/winmacro.h b/arch/sparc/include/asm/winmacro.h
index 5b0a06d..a9be04b 100644
--- a/arch/sparc/include/asm/winmacro.h
+++ b/arch/sparc/include/asm/winmacro.h
@@ -103,6 +103,7 @@
st %scratch, [%cur_reg + TI_W_SAVED];
#ifdef CONFIG_SMP
+/* Results of LOAD_CURRENT() after BTFIXUP for SUN4M, SUN4D & LEON (comments) */
#define LOAD_CURRENT4M(dest_reg, idreg) \
rd %tbr, %idreg; \
sethi %hi(current_set), %dest_reg; \
@@ -118,6 +119,14 @@
or %dest_reg, %lo(C_LABEL(current_set)), %dest_reg; \
ld [%idreg + %dest_reg], %dest_reg;
+#define LOAD_CURRENT_LEON(dest_reg, idreg) \
+ rd %asr17, %idreg; \
+ sethi %hi(current_set), %dest_reg; \
+ srl %idreg, 0x1c, %idreg; \
+ or %dest_reg, %lo(current_set), %dest_reg; \
+ sll %idreg, 0x2, %idreg; \
+ ld [%idreg + %dest_reg], %dest_reg;
+
/* Blackbox - take care with this... - check smp4m and smp4d before changing this. */
#define LOAD_CURRENT(dest_reg, idreg) \
sethi %hi(___b_load_current), %idreg; \
diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile
index 99aa4db..9cff270 100644
--- a/arch/sparc/kernel/Makefile
+++ b/arch/sparc/kernel/Makefile
@@ -71,10 +71,6 @@ obj-$(CONFIG_SPARC64) += pcr.o
obj-$(CONFIG_SPARC64) += nmi.o
obj-$(CONFIG_SPARC64_SMP) += cpumap.o
-# sparc32 do not use GENERIC_HARDIRQS but uses the generic devres implementation
-obj-$(CONFIG_SPARC32) += devres.o
-devres-y := ../../../kernel/irq/devres.o
-
obj-y += dma.o
obj-$(CONFIG_SPARC32_PCI) += pcic.o
diff --git a/arch/sparc/kernel/cpu.c b/arch/sparc/kernel/cpu.c
index 7925c54..138dbbc 100644
--- a/arch/sparc/kernel/cpu.c
+++ b/arch/sparc/kernel/cpu.c
@@ -4,6 +4,7 @@
* Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
*/
+#include <linux/seq_file.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
@@ -11,7 +12,9 @@
#include <linux/threads.h>
#include <asm/spitfire.h>
+#include <asm/pgtable.h>
#include <asm/oplib.h>
+#include <asm/setup.h>
#include <asm/page.h>
#include <asm/head.h>
#include <asm/psr.h>
@@ -23,6 +26,9 @@
DEFINE_PER_CPU(cpuinfo_sparc, __cpu_data) = { 0 };
EXPORT_PER_CPU_SYMBOL(__cpu_data);
+int ncpus_probed;
+unsigned int fsr_storage;
+
struct cpu_info {
int psr_vers;
const char *name;
@@ -247,13 +253,12 @@ static const struct manufacturer_info __initconst manufacturer_info[] = {
* machine type value into consideration too. I will fix this.
*/
-const char *sparc_cpu_type;
-const char *sparc_fpu_type;
+static const char *sparc_cpu_type;
+static const char *sparc_fpu_type;
const char *sparc_pmu_type;
-unsigned int fsr_storage;
-static void set_cpu_and_fpu(int psr_impl, int psr_vers, int fpu_vers)
+static void __init set_cpu_and_fpu(int psr_impl, int psr_vers, int fpu_vers)
{
const struct manufacturer_info *manuf;
int i;
@@ -313,7 +318,123 @@ static void set_cpu_and_fpu(int psr_impl, int psr_vers, int fpu_vers)
}
#ifdef CONFIG_SPARC32
-void __cpuinit cpu_probe(void)
+static int show_cpuinfo(struct seq_file *m, void *__unused)
+{
+ seq_printf(m,
+ "cpu\t\t: %s\n"
+ "fpu\t\t: %s\n"
+ "promlib\t\t: Version %d Revision %d\n"
+ "prom\t\t: %d.%d\n"
+ "type\t\t: %s\n"
+ "ncpus probed\t: %d\n"
+ "ncpus active\t: %d\n"
+#ifndef CONFIG_SMP
+ "CPU0Bogo\t: %lu.%02lu\n"
+ "CPU0ClkTck\t: %ld\n"
+#endif
+ ,
+ sparc_cpu_type,
+ sparc_fpu_type ,
+ romvec->pv_romvers,
+ prom_rev,
+ romvec->pv_printrev >> 16,
+ romvec->pv_printrev & 0xffff,
+ &cputypval[0],
+ ncpus_probed,
+ num_online_cpus()
+#ifndef CONFIG_SMP
+ , cpu_data(0).udelay_val/(500000/HZ),
+ (cpu_data(0).udelay_val/(5000/HZ)) % 100,
+ cpu_data(0).clock_tick
+#endif
+ );
+
+#ifdef CONFIG_SMP
+ smp_bogo(m);
+#endif
+ mmu_info(m);
+#ifdef CONFIG_SMP
+ smp_info(m);
+#endif
+ return 0;
+}
+#endif /* CONFIG_SPARC32 */
+
+#ifdef CONFIG_SPARC64
+unsigned int dcache_parity_tl1_occurred;
+unsigned int icache_parity_tl1_occurred;
+
+
+static int show_cpuinfo(struct seq_file *m, void *__unused)
+{
+ seq_printf(m,
+ "cpu\t\t: %s\n"
+ "fpu\t\t: %s\n"
+ "pmu\t\t: %s\n"
+ "prom\t\t: %s\n"
+ "type\t\t: %s\n"
+ "ncpus probed\t: %d\n"
+ "ncpus active\t: %d\n"
+ "D$ parity tl1\t: %u\n"
+ "I$ parity tl1\t: %u\n"
+#ifndef CONFIG_SMP
+ "Cpu0ClkTck\t: %016lx\n"
+#endif
+ ,
+ sparc_cpu_type,
+ sparc_fpu_type,
+ sparc_pmu_type,
+ prom_version,
+ ((tlb_type == hypervisor) ?
+ "sun4v" :
+ "sun4u"),
+ ncpus_probed,
+ num_online_cpus(),
+ dcache_parity_tl1_occurred,
+ icache_parity_tl1_occurred
+#ifndef CONFIG_SMP
+ , cpu_data(0).clock_tick
+#endif
+ );
+#ifdef CONFIG_SMP
+ smp_bogo(m);
+#endif
+ mmu_info(m);
+#ifdef CONFIG_SMP
+ smp_info(m);
+#endif
+ return 0;
+}
+#endif /* CONFIG_SPARC64 */
+
+static void *c_start(struct seq_file *m, loff_t *pos)
+{
+ /* The pointer we are returning is arbitrary,
+ * it just has to be non-NULL and not IS_ERR
+ * in the success case.
+ */
+ return *pos == 0 ? &c_start : NULL;
+}
+
+static void *c_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ ++*pos;
+ return c_start(m, pos);
+}
+
+static void c_stop(struct seq_file *m, void *v)
+{
+}
+
+const struct seq_operations cpuinfo_op = {
+ .start =c_start,
+ .next = c_next,
+ .stop = c_stop,
+ .show = show_cpuinfo,
+};
+
+#ifdef CONFIG_SPARC32
+static int __init cpu_type_probe(void)
{
int psr_impl, psr_vers, fpu_vers;
int psr;
@@ -332,8 +453,12 @@ void __cpuinit cpu_probe(void)
put_psr(psr);
set_cpu_and_fpu(psr_impl, psr_vers, fpu_vers);
+
+ return 0;
}
-#else
+#endif /* CONFIG_SPARC32 */
+
+#ifdef CONFIG_SPARC64
static void __init sun4v_cpu_probe(void)
{
switch (sun4v_chip_type) {
@@ -374,6 +499,6 @@ static int __init cpu_type_probe(void)
}
return 0;
}
+#endif /* CONFIG_SPARC64 */
early_initcall(cpu_type_probe);
-#endif
diff --git a/arch/sparc/kernel/cpumap.c b/arch/sparc/kernel/cpumap.c
index 8de64c8..d91fd78 100644
--- a/arch/sparc/kernel/cpumap.c
+++ b/arch/sparc/kernel/cpumap.c
@@ -202,7 +202,7 @@ static struct cpuinfo_tree *build_cpuinfo_tree(void)
new_tree->total_nodes = n;
memcpy(&new_tree->level, tmp_level, sizeof(tmp_level));
- prev_cpu = cpu = first_cpu(cpu_online_map);
+ prev_cpu = cpu = cpumask_first(cpu_online_mask);
/* Initialize all levels in the tree with the first CPU */
for (level = CPUINFO_LVL_PROC; level >= CPUINFO_LVL_ROOT; level--) {
@@ -381,7 +381,7 @@ static int simple_map_to_cpu(unsigned int index)
}
/* Impossible, since num_online_cpus() <= num_possible_cpus() */
- return first_cpu(cpu_online_map);
+ return cpumask_first(cpu_online_mask);
}
static int _map_to_cpu(unsigned int index)
diff --git a/arch/sparc/kernel/devices.c b/arch/sparc/kernel/devices.c
index d2eddd6..113c052 100644
--- a/arch/sparc/kernel/devices.c
+++ b/arch/sparc/kernel/devices.c
@@ -20,7 +20,6 @@
#include <asm/system.h>
#include <asm/cpudata.h>
-extern void cpu_probe(void);
extern void clock_stop_probe(void); /* tadpole.c */
extern void sun4c_probe_memerr_reg(void);
@@ -115,7 +114,7 @@ int cpu_get_hwmid(phandle prom_node)
void __init device_scan(void)
{
- prom_printf("Booting Linux...\n");
+ printk(KERN_NOTICE "Booting Linux...\n");
#ifndef CONFIG_SMP
{
@@ -133,7 +132,6 @@ void __init device_scan(void)
}
#endif /* !CONFIG_SMP */
- cpu_probe();
{
extern void auxio_probe(void);
extern void auxio_power_probe(void);
diff --git a/arch/sparc/kernel/ds.c b/arch/sparc/kernel/ds.c
index 3add4de..dd1342c 100644
--- a/arch/sparc/kernel/ds.c
+++ b/arch/sparc/kernel/ds.c
@@ -497,7 +497,7 @@ static void dr_cpu_init_response(struct ds_data *resp, u64 req_num,
tag->num_records = ncpus;
i = 0;
- for_each_cpu_mask(cpu, *mask) {
+ for_each_cpu(cpu, mask) {
ent[i].cpu = cpu;
ent[i].result = DR_CPU_RES_OK;
ent[i].stat = default_stat;
@@ -534,7 +534,7 @@ static int __cpuinit dr_cpu_configure(struct ds_info *dp,
int resp_len, ncpus, cpu;
unsigned long flags;
- ncpus = cpus_weight(*mask);
+ ncpus = cpumask_weight(mask);
resp_len = dr_cpu_size_response(ncpus);
resp = kzalloc(resp_len, GFP_KERNEL);
if (!resp)
@@ -547,7 +547,7 @@ static int __cpuinit dr_cpu_configure(struct ds_info *dp,
mdesc_populate_present_mask(mask);
mdesc_fill_in_cpu_data(mask);
- for_each_cpu_mask(cpu, *mask) {
+ for_each_cpu(cpu, mask) {
int err;
printk(KERN_INFO "ds-%llu: Starting cpu %d...\n",
@@ -593,7 +593,7 @@ static int dr_cpu_unconfigure(struct ds_info *dp,
int resp_len, ncpus, cpu;
unsigned long flags;
- ncpus = cpus_weight(*mask);
+ ncpus = cpumask_weight(mask);
resp_len = dr_cpu_size_response(ncpus);
resp = kzalloc(resp_len, GFP_KERNEL);
if (!resp)
@@ -603,7 +603,7 @@ static int dr_cpu_unconfigure(struct ds_info *dp,
resp_len, ncpus, mask,
DR_CPU_STAT_UNCONFIGURED);
- for_each_cpu_mask(cpu, *mask) {
+ for_each_cpu(cpu, mask) {
int err;
printk(KERN_INFO "ds-%llu: Shutting down cpu %d...\n",
@@ -649,13 +649,13 @@ static void __cpuinit dr_cpu_data(struct ds_info *dp,
purge_dups(cpu_list, tag->num_records);
- cpus_clear(mask);
+ cpumask_clear(&mask);
for (i = 0; i < tag->num_records; i++) {
if (cpu_list[i] == CPU_SENTINEL)
continue;
if (cpu_list[i] < nr_cpu_ids)
- cpu_set(cpu_list[i], mask);
+ cpumask_set_cpu(cpu_list[i], &mask);
}
if (tag->type == DR_CPU_CONFIGURE)
diff --git a/arch/sparc/kernel/entry.S b/arch/sparc/kernel/entry.S
index 6da784a..8341963 100644
--- a/arch/sparc/kernel/entry.S
+++ b/arch/sparc/kernel/entry.S
@@ -269,19 +269,22 @@ smp4m_ticker:
/* Here is where we check for possible SMP IPI passed to us
* on some level other than 15 which is the NMI and only used
* for cross calls. That has a separate entry point below.
+ *
+ * IPIs are sent on Level 12, 13 and 14. See IRQ_IPI_*.
*/
maybe_smp4m_msg:
GET_PROCESSOR4M_ID(o3)
sethi %hi(sun4m_irq_percpu), %l5
sll %o3, 2, %o3
or %l5, %lo(sun4m_irq_percpu), %o5
- sethi %hi(0x40000000), %o2
+ sethi %hi(0x70000000), %o2 ! Check all soft-IRQs
ld [%o5 + %o3], %o1
ld [%o1 + 0x00], %o3 ! sun4m_irq_percpu[cpu]->pending
andcc %o3, %o2, %g0
be,a smp4m_ticker
cmp %l7, 14
- st %o2, [%o1 + 0x04] ! sun4m_irq_percpu[cpu]->clear=0x40000000
+ /* Soft-IRQ IPI */
+ st %o2, [%o1 + 0x04] ! sun4m_irq_percpu[cpu]->clear=0x70000000
WRITE_PAUSE
ld [%o1 + 0x00], %g0 ! sun4m_irq_percpu[cpu]->pending
WRITE_PAUSE
@@ -290,9 +293,27 @@ maybe_smp4m_msg:
WRITE_PAUSE
wr %l4, PSR_ET, %psr
WRITE_PAUSE
- call smp_reschedule_irq
+ sll %o2, 28, %o2 ! shift for simpler checks below
+maybe_smp4m_msg_check_single:
+ andcc %o2, 0x1, %g0
+ beq,a maybe_smp4m_msg_check_mask
+ andcc %o2, 0x2, %g0
+ call smp_call_function_single_interrupt
nop
-
+ andcc %o2, 0x2, %g0
+maybe_smp4m_msg_check_mask:
+ beq,a maybe_smp4m_msg_check_resched
+ andcc %o2, 0x4, %g0
+ call smp_call_function_interrupt
+ nop
+ andcc %o2, 0x4, %g0
+maybe_smp4m_msg_check_resched:
+ /* rescheduling is done in RESTORE_ALL regardless, but incr stats */
+ beq,a maybe_smp4m_msg_out
+ nop
+ call smp_resched_interrupt
+ nop
+maybe_smp4m_msg_out:
RESTORE_ALL
.align 4
@@ -401,18 +422,18 @@ linux_trap_ipi15_sun4d:
1: b,a 1b
#ifdef CONFIG_SPARC_LEON
-
- .globl smpleon_ticker
- /* SMP per-cpu ticker interrupts are handled specially. */
-smpleon_ticker:
+ .globl smpleon_ipi
+ .extern leon_ipi_interrupt
+ /* SMP per-cpu IPI interrupts are handled specially. */
+smpleon_ipi:
SAVE_ALL
or %l0, PSR_PIL, %g2
wr %g2, 0x0, %psr
WRITE_PAUSE
wr %g2, PSR_ET, %psr
WRITE_PAUSE
- call leon_percpu_timer_interrupt
- add %sp, STACKFRAME_SZ, %o0
+ call leonsmp_ipi_interrupt
+ add %sp, STACKFRAME_SZ, %o1 ! pt_regs
wr %l0, PSR_ET, %psr
WRITE_PAUSE
RESTORE_ALL
diff --git a/arch/sparc/kernel/head_32.S b/arch/sparc/kernel/head_32.S
index 5942349..5877857 100644
--- a/arch/sparc/kernel/head_32.S
+++ b/arch/sparc/kernel/head_32.S
@@ -810,31 +810,25 @@ found_version:
got_prop:
#ifdef CONFIG_SPARC_LEON
/* no cpu-type check is needed, it is a SPARC-LEON */
-#ifdef CONFIG_SMP
- ba leon_smp_init
- nop
- .global leon_smp_init
-leon_smp_init:
- sethi %hi(boot_cpu_id), %g1 ! master always 0
- stb %g0, [%g1 + %lo(boot_cpu_id)]
- sethi %hi(boot_cpu_id4), %g1 ! master always 0
- stb %g0, [%g1 + %lo(boot_cpu_id4)]
+ sethi %hi(boot_cpu_id), %g2 ! boot-cpu index
- rd %asr17,%g1
- srl %g1,28,%g1
+#ifdef CONFIG_SMP
+ ldub [%g2 + %lo(boot_cpu_id)], %g1
+ cmp %g1, 0xff ! unset means first CPU
+ bne leon_smp_cpu_startup ! continue only with master
+ nop
+#endif
+ /* Get CPU-ID from most significant 4-bit of ASR17 */
+ rd %asr17, %g1
+ srl %g1, 28, %g1
- cmp %g0,%g1
- beq sun4c_continue_boot !continue with master
- nop
+ /* Update boot_cpu_id only on boot cpu */
+ stub %g1, [%g2 + %lo(boot_cpu_id)]
- ba leon_smp_cpu_startup
- nop
-#else
ba sun4c_continue_boot
nop
#endif
-#endif
set cputypval, %o2
ldub [%o2 + 0x4], %l1
@@ -893,9 +887,6 @@ sun4d_init:
sta %g4, [%g0] ASI_M_VIKING_TMP1
sethi %hi(boot_cpu_id), %g5
stb %g4, [%g5 + %lo(boot_cpu_id)]
- sll %g4, 2, %g4
- sethi %hi(boot_cpu_id4), %g5
- stb %g4, [%g5 + %lo(boot_cpu_id4)]
#endif
/* Fall through to sun4m_init */
@@ -1024,14 +1015,28 @@ sun4c_continue_boot:
bl 1b
add %o0, 0x1, %o0
+ /* If boot_cpu_id has not been setup by machine specific
+ * init-code above we default it to zero.
+ */
+ sethi %hi(boot_cpu_id), %g2
+ ldub [%g2 + %lo(boot_cpu_id)], %g3
+ cmp %g3, 0xff
+ bne 1f
+ nop
+ mov %g0, %g3
+ stub %g3, [%g2 + %lo(boot_cpu_id)]
+
+1: /* boot_cpu_id set. calculate boot_cpu_id4 = boot_cpu_id*4 */
+ sll %g3, 2, %g3
+ sethi %hi(boot_cpu_id4), %g2
+ stub %g3, [%g2 + %lo(boot_cpu_id4)]
+
/* Initialize the uwinmask value for init task just in case.
* But first make current_set[boot_cpu_id] point to something useful.
*/
set init_thread_union, %g6
set current_set, %g2
#ifdef CONFIG_SMP
- sethi %hi(boot_cpu_id4), %g3
- ldub [%g3 + %lo(boot_cpu_id4)], %g3
st %g6, [%g2]
add %g2, %g3, %g2
#endif
diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c
index c6ce9a6..1c9c80a 100644
--- a/arch/sparc/kernel/ioport.c
+++ b/arch/sparc/kernel/ioport.c
@@ -50,10 +50,15 @@
#include <asm/io-unit.h>
#include <asm/leon.h>
+/* This function must make sure that caches and memory are coherent after DMA
+ * On LEON systems without cache snooping it flushes the entire D-CACHE.
+ */
#ifndef CONFIG_SPARC_LEON
-#define mmu_inval_dma_area(p, l) /* Anton pulled it out for 2.4.0-xx */
+static inline void dma_make_coherent(unsigned long pa, unsigned long len)
+{
+}
#else
-static inline void mmu_inval_dma_area(void *va, unsigned long len)
+static inline void dma_make_coherent(unsigned long pa, unsigned long len)
{
if (!sparc_leon3_snooping_enabled())
leon_flush_dcache_all();
@@ -284,7 +289,6 @@ static void *sbus_alloc_coherent(struct device *dev, size_t len,
printk("sbus_alloc_consistent: cannot occupy 0x%lx", len_total);
goto err_nova;
}
- mmu_inval_dma_area((void *)va, len_total);
// XXX The mmu_map_dma_area does this for us below, see comments.
// sparc_mapiorange(0, virt_to_phys(va), res->start, len_total);
@@ -336,7 +340,6 @@ static void sbus_free_coherent(struct device *dev, size_t n, void *p,
release_resource(res);
kfree(res);
- /* mmu_inval_dma_area(va, n); */ /* it's consistent, isn't it */
pgv = virt_to_page(p);
mmu_unmap_dma_area(dev, ba, n);
@@ -463,7 +466,6 @@ static void *pci32_alloc_coherent(struct device *dev, size_t len,
printk("pci_alloc_consistent: cannot occupy 0x%lx", len_total);
goto err_nova;
}
- mmu_inval_dma_area(va, len_total);
sparc_mapiorange(0, virt_to_phys(va), res->start, len_total);
*pba = virt_to_phys(va); /* equals virt_to_bus (R.I.P.) for us. */
@@ -489,7 +491,6 @@ static void pci32_free_coherent(struct device *dev, size_t n, void *p,
dma_addr_t ba)
{
struct resource *res;
- void *pgp;
if ((res = _sparc_find_resource(&_sparc_dvma,
(unsigned long)p)) == NULL) {
@@ -509,14 +510,12 @@ static void pci32_free_coherent(struct device *dev, size_t n, void *p,
return;
}
- pgp = phys_to_virt(ba); /* bus_to_virt actually */
- mmu_inval_dma_area(pgp, n);
+ dma_make_coherent(ba, n);
sparc_unmapiorange((unsigned long)p, n);
release_resource(res);
kfree(res);
-
- free_pages((unsigned long)pgp, get_order(n));
+ free_pages((unsigned long)phys_to_virt(ba), get_order(n));
}
/*
@@ -535,7 +534,7 @@ static void pci32_unmap_page(struct device *dev, dma_addr_t ba, size_t size,
enum dma_data_direction dir, struct dma_attrs *attrs)
{
if (dir != PCI_DMA_TODEVICE)
- mmu_inval_dma_area(phys_to_virt(ba), PAGE_ALIGN(size));
+ dma_make_coherent(ba, PAGE_ALIGN(size));
}
/* Map a set of buffers described by scatterlist in streaming
@@ -562,8 +561,7 @@ static int pci32_map_sg(struct device *device, struct scatterlist *sgl,
/* IIep is write-through, not flushing. */
for_each_sg(sgl, sg, nents, n) {
- BUG_ON(page_address(sg_page(sg)) == NULL);
- sg->dma_address = virt_to_phys(sg_virt(sg));
+ sg->dma_address = sg_phys(sg);
sg->dma_length = sg->length;
}
return nents;
@@ -582,9 +580,7 @@ static void pci32_unmap_sg(struct device *dev, struct scatterlist *sgl,
if (dir != PCI_DMA_TODEVICE) {
for_each_sg(sgl, sg, nents, n) {
- BUG_ON(page_address(sg_page(sg)) == NULL);
- mmu_inval_dma_area(page_address(sg_page(sg)),
- PAGE_ALIGN(sg->length));
+ dma_make_coherent(sg_phys(sg), PAGE_ALIGN(sg->length));
}
}
}
@@ -603,8 +599,7 @@ static void pci32_sync_single_for_cpu(struct device *dev, dma_addr_t ba,
size_t size, enum dma_data_direction dir)
{
if (dir != PCI_DMA_TODEVICE) {
- mmu_inval_dma_area(phys_to_virt(ba),
- PAGE_ALIGN(size));
+ dma_make_coherent(ba, PAGE_ALIGN(size));
}
}
@@ -612,8 +607,7 @@ static void pci32_sync_single_for_device(struct device *dev, dma_addr_t ba,
size_t size, enum dma_data_direction dir)
{
if (dir != PCI_DMA_TODEVICE) {
- mmu_inval_dma_area(phys_to_virt(ba),
- PAGE_ALIGN(size));
+ dma_make_coherent(ba, PAGE_ALIGN(size));
}
}
@@ -631,9 +625,7 @@ static void pci32_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl,
if (dir != PCI_DMA_TODEVICE) {
for_each_sg(sgl, sg, nents, n) {
- BUG_ON(page_address(sg_page(sg)) == NULL);
- mmu_inval_dma_area(page_address(sg_page(sg)),
- PAGE_ALIGN(sg->length));
+ dma_make_coherent(sg_phys(sg), PAGE_ALIGN(sg->length));
}
}
}
@@ -646,9 +638,7 @@ static void pci32_sync_sg_for_device(struct device *device, struct scatterlist *
if (dir != PCI_DMA_TODEVICE) {
for_each_sg(sgl, sg, nents, n) {
- BUG_ON(page_address(sg_page(sg)) == NULL);
- mmu_inval_dma_area(page_address(sg_page(sg)),
- PAGE_ALIGN(sg->length));
+ dma_make_coherent(sg_phys(sg), PAGE_ALIGN(sg->length));
}
}
}
diff --git a/arch/sparc/kernel/irq.h b/arch/sparc/kernel/irq.h
index 008453b..100b9c2 100644
--- a/arch/sparc/kernel/irq.h
+++ b/arch/sparc/kernel/irq.h
@@ -2,6 +2,23 @@
#include <asm/btfixup.h>
+struct irq_bucket {
+ struct irq_bucket *next;
+ unsigned int real_irq;
+ unsigned int irq;
+ unsigned int pil;
+};
+
+#define SUN4D_MAX_BOARD 10
+#define SUN4D_MAX_IRQ ((SUN4D_MAX_BOARD + 2) << 5)
+
+/* Map between the irq identifier used in hw to the
+ * irq_bucket. The map is sufficient large to hold
+ * the sun4d hw identifiers.
+ */
+extern struct irq_bucket *irq_map[SUN4D_MAX_IRQ];
+
+
/* sun4m specific type definitions */
/* This maps direct to CPU specific interrupt registers */
@@ -35,6 +52,10 @@ struct sparc_irq_config {
};
extern struct sparc_irq_config sparc_irq_config;
+unsigned int irq_alloc(unsigned int real_irq, unsigned int pil);
+void irq_link(unsigned int irq);
+void irq_unlink(unsigned int irq);
+void handler_irq(unsigned int pil, struct pt_regs *regs);
/* Dave Redman (djhr@tadpole.co.uk)
* changed these to function pointers.. it saves cycles and will allow
@@ -44,33 +65,9 @@ extern struct sparc_irq_config sparc_irq_config;
* Changed these to btfixup entities... It saves cycles :)
*/
-BTFIXUPDEF_CALL(void, disable_irq, unsigned int)
-BTFIXUPDEF_CALL(void, enable_irq, unsigned int)
-BTFIXUPDEF_CALL(void, disable_pil_irq, unsigned int)
-BTFIXUPDEF_CALL(void, enable_pil_irq, unsigned int)
BTFIXUPDEF_CALL(void, clear_clock_irq, void)
BTFIXUPDEF_CALL(void, load_profile_irq, int, unsigned int)
-static inline void __disable_irq(unsigned int irq)
-{
- BTFIXUP_CALL(disable_irq)(irq);
-}
-
-static inline void __enable_irq(unsigned int irq)
-{
- BTFIXUP_CALL(enable_irq)(irq);
-}
-
-static inline void disable_pil_irq(unsigned int irq)
-{
- BTFIXUP_CALL(disable_pil_irq)(irq);
-}
-
-static inline void enable_pil_irq(unsigned int irq)
-{
- BTFIXUP_CALL(enable_pil_irq)(irq);
-}
-
static inline void clear_clock_irq(void)
{
BTFIXUP_CALL(clear_clock_irq)();
@@ -89,4 +86,10 @@ BTFIXUPDEF_CALL(void, set_irq_udt, int)
#define set_cpu_int(cpu,level) BTFIXUP_CALL(set_cpu_int)(cpu,level)
#define clear_cpu_int(cpu,level) BTFIXUP_CALL(clear_cpu_int)(cpu,level)
#define set_irq_udt(cpu) BTFIXUP_CALL(set_irq_udt)(cpu)
+
+/* All SUN4D IPIs are sent on this IRQ, may be shared with hard IRQs */
+#define SUN4D_IPI_IRQ 14
+
+extern void sun4d_ipi_interrupt(void);
+
#endif
diff --git a/arch/sparc/kernel/irq_32.c b/arch/sparc/kernel/irq_32.c
index 7c93df4..9b89d84 100644
--- a/arch/sparc/kernel/irq_32.c
+++ b/arch/sparc/kernel/irq_32.c
@@ -15,6 +15,7 @@
#include <linux/seq_file.h>
#include <asm/cacheflush.h>
+#include <asm/cpudata.h>
#include <asm/pcic.h>
#include <asm/leon.h>
@@ -101,284 +102,173 @@ EXPORT_SYMBOL(arch_local_irq_restore);
* directed CPU interrupts using the existing enable/disable irq code
* with tweaks.
*
+ * Sun4d complicates things even further. IRQ numbers are arbitrary
+ * 32-bit values in that case. Since this is similar to sparc64,
+ * we adopt a virtual IRQ numbering scheme as is done there.
+ * Virutal interrupt numbers are allocated by build_irq(). So NR_IRQS
+ * just becomes a limit of how many interrupt sources we can handle in
+ * a single system. Even fully loaded SS2000 machines top off at
+ * about 32 interrupt sources or so, therefore a NR_IRQS value of 64
+ * is more than enough.
+ *
+ * We keep a map of per-PIL enable interrupts. These get wired
+ * up via the irq_chip->startup() method which gets invoked by
+ * the generic IRQ layer during request_irq().
*/
+/* Table of allocated irqs. Unused entries has irq == 0 */
+static struct irq_bucket irq_table[NR_IRQS];
+/* Protect access to irq_table */
+static DEFINE_SPINLOCK(irq_table_lock);
-/*
- * Dave Redman (djhr@tadpole.co.uk)
- *
- * There used to be extern calls and hard coded values here.. very sucky!
- * instead, because some of the devices attach very early, I do something
- * equally sucky but at least we'll never try to free statically allocated
- * space or call kmalloc before kmalloc_init :(.
- *
- * In fact it's the timer10 that attaches first.. then timer14
- * then kmalloc_init is called.. then the tty interrupts attach.
- * hmmm....
- *
- */
-#define MAX_STATIC_ALLOC 4
-struct irqaction static_irqaction[MAX_STATIC_ALLOC];
-int static_irq_count;
-
-static struct {
- struct irqaction *action;
- int flags;
-} sparc_irq[NR_IRQS];
-#define SPARC_IRQ_INPROGRESS 1
-
-/* Used to protect the IRQ action lists */
-DEFINE_SPINLOCK(irq_action_lock);
+/* Map between the irq identifier used in hw to the irq_bucket. */
+struct irq_bucket *irq_map[SUN4D_MAX_IRQ];
+/* Protect access to irq_map */
+static DEFINE_SPINLOCK(irq_map_lock);
-int show_interrupts(struct seq_file *p, void *v)
+/* Allocate a new irq from the irq_table */
+unsigned int irq_alloc(unsigned int real_irq, unsigned int pil)
{
- int i = *(loff_t *)v;
- struct irqaction *action;
unsigned long flags;
-#ifdef CONFIG_SMP
- int j;
-#endif
+ unsigned int i;
+
+ spin_lock_irqsave(&irq_table_lock, flags);
+ for (i = 1; i < NR_IRQS; i++) {
+ if (irq_table[i].real_irq == real_irq && irq_table[i].pil == pil)
+ goto found;
+ }
- if (sparc_cpu_model == sun4d)
- return show_sun4d_interrupts(p, v);
+ for (i = 1; i < NR_IRQS; i++) {
+ if (!irq_table[i].irq)
+ break;
+ }
- spin_lock_irqsave(&irq_action_lock, flags);
if (i < NR_IRQS) {
- action = sparc_irq[i].action;
- if (!action)
- goto out_unlock;
- seq_printf(p, "%3d: ", i);
-#ifndef CONFIG_SMP
- seq_printf(p, "%10u ", kstat_irqs(i));
-#else
- for_each_online_cpu(j) {
- seq_printf(p, "%10u ",
- kstat_cpu(j).irqs[i]);
- }
-#endif
- seq_printf(p, " %c %s",
- (action->flags & IRQF_DISABLED) ? '+' : ' ',
- action->name);
- for (action = action->next; action; action = action->next) {
- seq_printf(p, ",%s %s",
- (action->flags & IRQF_DISABLED) ? " +" : "",
- action->name);
- }
- seq_putc(p, '\n');
+ irq_table[i].real_irq = real_irq;
+ irq_table[i].irq = i;
+ irq_table[i].pil = pil;
+ } else {
+ printk(KERN_ERR "IRQ: Out of virtual IRQs.\n");
+ i = 0;
}
-out_unlock:
- spin_unlock_irqrestore(&irq_action_lock, flags);
- return 0;
+found:
+ spin_unlock_irqrestore(&irq_table_lock, flags);
+
+ return i;
}
-void free_irq(unsigned int irq, void *dev_id)
+/* Based on a single pil handler_irq may need to call several
+ * interrupt handlers. Use irq_map as entry to irq_table,
+ * and let each entry in irq_table point to the next entry.
+ */
+void irq_link(unsigned int irq)
{
- struct irqaction *action;
- struct irqaction **actionp;
+ struct irq_bucket *p;
unsigned long flags;
- unsigned int cpu_irq;
-
- if (sparc_cpu_model == sun4d) {
- sun4d_free_irq(irq, dev_id);
- return;
- }
- cpu_irq = irq & (NR_IRQS - 1);
- if (cpu_irq > 14) { /* 14 irq levels on the sparc */
- printk(KERN_ERR "Trying to free bogus IRQ %d\n", irq);
- return;
- }
+ unsigned int pil;
- spin_lock_irqsave(&irq_action_lock, flags);
+ BUG_ON(irq >= NR_IRQS);
- actionp = &sparc_irq[cpu_irq].action;
- action = *actionp;
+ spin_lock_irqsave(&irq_map_lock, flags);
- if (!action->handler) {
- printk(KERN_ERR "Trying to free free IRQ%d\n", irq);
- goto out_unlock;
- }
- if (dev_id) {
- for (; action; action = action->next) {
- if (action->dev_id == dev_id)
- break;
- actionp = &action->next;
- }
- if (!action) {
- printk(KERN_ERR "Trying to free free shared IRQ%d\n",
- irq);
- goto out_unlock;
- }
- } else if (action->flags & IRQF_SHARED) {
- printk(KERN_ERR "Trying to free shared IRQ%d with NULL device ID\n",
- irq);
- goto out_unlock;
- }
- if (action->flags & SA_STATIC_ALLOC) {
- /*
- * This interrupt is marked as specially allocated
- * so it is a bad idea to free it.
- */
- printk(KERN_ERR "Attempt to free statically allocated IRQ%d (%s)\n",
- irq, action->name);
- goto out_unlock;
- }
-
- *actionp = action->next;
+ p = &irq_table[irq];
+ pil = p->pil;
+ BUG_ON(pil > SUN4D_MAX_IRQ);
+ p->next = irq_map[pil];
+ irq_map[pil] = p;
- spin_unlock_irqrestore(&irq_action_lock, flags);
+ spin_unlock_irqrestore(&irq_map_lock, flags);
+}
- synchronize_irq(irq);
+void irq_unlink(unsigned int irq)
+{
+ struct irq_bucket *p, **pnext;
+ unsigned long flags;
- spin_lock_irqsave(&irq_action_lock, flags);
+ BUG_ON(irq >= NR_IRQS);
- kfree(action);
+ spin_lock_irqsave(&irq_map_lock, flags);
- if (!sparc_irq[cpu_irq].action)
- __disable_irq(irq);
+ p = &irq_table[irq];
+ BUG_ON(p->pil > SUN4D_MAX_IRQ);
+ pnext = &irq_map[p->pil];
+ while (*pnext != p)
+ pnext = &(*pnext)->next;
+ *pnext = p->next;
-out_unlock:
- spin_unlock_irqrestore(&irq_action_lock, flags);
+ spin_unlock_irqrestore(&irq_map_lock, flags);
}
-EXPORT_SYMBOL(free_irq);
-
-/*
- * This is called when we want to synchronize with
- * interrupts. We may for example tell a device to
- * stop sending interrupts: but to make sure there
- * are no interrupts that are executing on another
- * CPU we need to call this function.
- */
-#ifdef CONFIG_SMP
-void synchronize_irq(unsigned int irq)
-{
- unsigned int cpu_irq;
- cpu_irq = irq & (NR_IRQS - 1);
- while (sparc_irq[cpu_irq].flags & SPARC_IRQ_INPROGRESS)
- cpu_relax();
-}
-EXPORT_SYMBOL(synchronize_irq);
-#endif /* SMP */
-void unexpected_irq(int irq, void *dev_id, struct pt_regs *regs)
+/* /proc/interrupts printing */
+int arch_show_interrupts(struct seq_file *p, int prec)
{
- int i;
- struct irqaction *action;
- unsigned int cpu_irq;
+ int j;
- cpu_irq = irq & (NR_IRQS - 1);
- action = sparc_irq[cpu_irq].action;
-
- printk(KERN_ERR "IO device interrupt, irq = %d\n", irq);
- printk(KERN_ERR "PC = %08lx NPC = %08lx FP=%08lx\n", regs->pc,
- regs->npc, regs->u_regs[14]);
- if (action) {
- printk(KERN_ERR "Expecting: ");
- for (i = 0; i < 16; i++)
- if (action->handler)
- printk(KERN_CONT "[%s:%d:0x%x] ", action->name,
- i, (unsigned int)action->handler);
- }
- printk(KERN_ERR "AIEEE\n");
- panic("bogus interrupt received");
+#ifdef CONFIG_SMP
+ seq_printf(p, "RES: ");
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ", cpu_data(j).irq_resched_count);
+ seq_printf(p, " IPI rescheduling interrupts\n");
+ seq_printf(p, "CAL: ");
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ", cpu_data(j).irq_call_count);
+ seq_printf(p, " IPI function call interrupts\n");
+#endif
+ seq_printf(p, "NMI: ");
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ", cpu_data(j).counter);
+ seq_printf(p, " Non-maskable interrupts\n");
+ return 0;
}
-void handler_irq(int pil, struct pt_regs *regs)
+void handler_irq(unsigned int pil, struct pt_regs *regs)
{
struct pt_regs *old_regs;
- struct irqaction *action;
- int cpu = smp_processor_id();
+ struct irq_bucket *p;
+ BUG_ON(pil > 15);
old_regs = set_irq_regs(regs);
irq_enter();
- disable_pil_irq(pil);
-#ifdef CONFIG_SMP
- /* Only rotate on lower priority IRQs (scsi, ethernet, etc.). */
- if ((sparc_cpu_model==sun4m) && (pil < 10))
- smp4m_irq_rotate(cpu);
-#endif
- action = sparc_irq[pil].action;
- sparc_irq[pil].flags |= SPARC_IRQ_INPROGRESS;
- kstat_cpu(cpu).irqs[pil]++;
- do {
- if (!action || !action->handler)
- unexpected_irq(pil, NULL, regs);
- action->handler(pil, action->dev_id);
- action = action->next;
- } while (action);
- sparc_irq[pil].flags &= ~SPARC_IRQ_INPROGRESS;
- enable_pil_irq(pil);
+
+ p = irq_map[pil];
+ while (p) {
+ struct irq_bucket *next = p->next;
+
+ generic_handle_irq(p->irq);
+ p = next;
+ }
irq_exit();
set_irq_regs(old_regs);
}
#if defined(CONFIG_BLK_DEV_FD) || defined(CONFIG_BLK_DEV_FD_MODULE)
+static unsigned int floppy_irq;
-/*
- * Fast IRQs on the Sparc can only have one routine attached to them,
- * thus no sharing possible.
- */
-static int request_fast_irq(unsigned int irq,
- void (*handler)(void),
- unsigned long irqflags, const char *devname)
+int sparc_floppy_request_irq(unsigned int irq, irq_handler_t irq_handler)
{
- struct irqaction *action;
- unsigned long flags;
unsigned int cpu_irq;
- int ret;
+ int err;
+
#if defined CONFIG_SMP && !defined CONFIG_SPARC_LEON
struct tt_entry *trap_table;
#endif
- cpu_irq = irq & (NR_IRQS - 1);
- if (cpu_irq > 14) {
- ret = -EINVAL;
- goto out;
- }
- if (!handler) {
- ret = -EINVAL;
- goto out;
- }
- spin_lock_irqsave(&irq_action_lock, flags);
+ err = request_irq(irq, irq_handler, 0, "floppy", NULL);
+ if (err)
+ return -1;
- action = sparc_irq[cpu_irq].action;
- if (action) {
- if (action->flags & IRQF_SHARED)
- panic("Trying to register fast irq when already shared.\n");
- if (irqflags & IRQF_SHARED)
- panic("Trying to register fast irq as shared.\n");
+ /* Save for later use in floppy interrupt handler */
+ floppy_irq = irq;
- /* Anyway, someone already owns it so cannot be made fast. */
- printk(KERN_ERR "request_fast_irq: Trying to register yet already owned.\n");
- ret = -EBUSY;
- goto out_unlock;
- }
-
- /*
- * If this is flagged as statically allocated then we use our
- * private struct which is never freed.
- */
- if (irqflags & SA_STATIC_ALLOC) {
- if (static_irq_count < MAX_STATIC_ALLOC)
- action = &static_irqaction[static_irq_count++];
- else
- printk(KERN_ERR "Fast IRQ%d (%s) SA_STATIC_ALLOC failed using kmalloc\n",
- irq, devname);
- }
-
- if (action == NULL)
- action = kmalloc(sizeof(struct irqaction), GFP_ATOMIC);
- if (!action) {
- ret = -ENOMEM;
- goto out_unlock;
- }
+ cpu_irq = (irq & (NR_IRQS - 1));
/* Dork with trap table if we get this far. */
#define INSTANTIATE(table) \
table[SP_TRAP_IRQ1+(cpu_irq-1)].inst_one = SPARC_RD_PSR_L0; \
table[SP_TRAP_IRQ1+(cpu_irq-1)].inst_two = \
- SPARC_BRANCH((unsigned long) handler, \
+ SPARC_BRANCH((unsigned long) floppy_hardint, \
(unsigned long) &table[SP_TRAP_IRQ1+(cpu_irq-1)].inst_two);\
table[SP_TRAP_IRQ1+(cpu_irq-1)].inst_three = SPARC_RD_WIM_L3; \
table[SP_TRAP_IRQ1+(cpu_irq-1)].inst_four = SPARC_NOP;
@@ -399,22 +289,9 @@ static int request_fast_irq(unsigned int irq,
* writing we have no CPU-neutral interface to fine-grained flushes.
*/
flush_cache_all();
-
- action->flags = irqflags;
- action->name = devname;
- action->dev_id = NULL;
- action->next = NULL;
-
- sparc_irq[cpu_irq].action = action;
-
- __enable_irq(irq);
-
- ret = 0;
-out_unlock:
- spin_unlock_irqrestore(&irq_action_lock, flags);
-out:
- return ret;
+ return 0;
}
+EXPORT_SYMBOL(sparc_floppy_request_irq);
/*
* These variables are used to access state from the assembler
@@ -440,154 +317,23 @@ EXPORT_SYMBOL(pdma_base);
unsigned long pdma_areasize;
EXPORT_SYMBOL(pdma_areasize);
-static irq_handler_t floppy_irq_handler;
-
+/* Use the generic irq support to call floppy_interrupt
+ * which was setup using request_irq() in sparc_floppy_request_irq().
+ * We only have one floppy interrupt so we do not need to check
+ * for additional handlers being wired up by irq_link()
+ */
void sparc_floppy_irq(int irq, void *dev_id, struct pt_regs *regs)
{
struct pt_regs *old_regs;
- int cpu = smp_processor_id();
old_regs = set_irq_regs(regs);
- disable_pil_irq(irq);
irq_enter();
- kstat_cpu(cpu).irqs[irq]++;
- floppy_irq_handler(irq, dev_id);
+ generic_handle_irq(floppy_irq);
irq_exit();
- enable_pil_irq(irq);
set_irq_regs(old_regs);
- /*
- * XXX Eek, it's totally changed with preempt_count() and such
- * if (softirq_pending(cpu))
- * do_softirq();
- */
-}
-
-int sparc_floppy_request_irq(int irq, unsigned long flags,
- irq_handler_t irq_handler)
-{
- floppy_irq_handler = irq_handler;
- return request_fast_irq(irq, floppy_hardint, flags, "floppy");
}
-EXPORT_SYMBOL(sparc_floppy_request_irq);
-
#endif
-int request_irq(unsigned int irq,
- irq_handler_t handler,
- unsigned long irqflags, const char *devname, void *dev_id)
-{
- struct irqaction *action, **actionp;
- unsigned long flags;
- unsigned int cpu_irq;
- int ret;
-
- if (sparc_cpu_model == sun4d)
- return sun4d_request_irq(irq, handler, irqflags, devname, dev_id);
-
- cpu_irq = irq & (NR_IRQS - 1);
- if (cpu_irq > 14) {
- ret = -EINVAL;
- goto out;
- }
- if (!handler) {
- ret = -EINVAL;
- goto out;
- }
-
- spin_lock_irqsave(&irq_action_lock, flags);
-
- actionp = &sparc_irq[cpu_irq].action;
- action = *actionp;
- if (action) {
- if (!(action->flags & IRQF_SHARED) || !(irqflags & IRQF_SHARED)) {
- ret = -EBUSY;
- goto out_unlock;
- }
- if ((action->flags & IRQF_DISABLED) != (irqflags & IRQF_DISABLED)) {
- printk(KERN_ERR "Attempt to mix fast and slow interrupts on IRQ%d denied\n",
- irq);
- ret = -EBUSY;
- goto out_unlock;
- }
- for ( ; action; action = *actionp)
- actionp = &action->next;
- }
-
- /* If this is flagged as statically allocated then we use our
- * private struct which is never freed.
- */
- if (irqflags & SA_STATIC_ALLOC) {
- if (static_irq_count < MAX_STATIC_ALLOC)
- action = &static_irqaction[static_irq_count++];
- else
- printk(KERN_ERR "Request for IRQ%d (%s) SA_STATIC_ALLOC failed using kmalloc\n",
- irq, devname);
- }
- if (action == NULL)
- action = kmalloc(sizeof(struct irqaction), GFP_ATOMIC);
- if (!action) {
- ret = -ENOMEM;
- goto out_unlock;
- }
-
- action->handler = handler;
- action->flags = irqflags;
- action->name = devname;
- action->next = NULL;
- action->dev_id = dev_id;
-
- *actionp = action;
-
- __enable_irq(irq);
-
- ret = 0;
-out_unlock:
- spin_unlock_irqrestore(&irq_action_lock, flags);
-out:
- return ret;
-}
-EXPORT_SYMBOL(request_irq);
-
-void disable_irq_nosync(unsigned int irq)
-{
- __disable_irq(irq);
-}
-EXPORT_SYMBOL(disable_irq_nosync);
-
-void disable_irq(unsigned int irq)
-{
- __disable_irq(irq);
-}
-EXPORT_SYMBOL(disable_irq);
-
-void enable_irq(unsigned int irq)
-{
- __enable_irq(irq);
-}
-EXPORT_SYMBOL(enable_irq);
-
-/*
- * We really don't need these at all on the Sparc. We only have
- * stubs here because they are exported to modules.
- */
-unsigned long probe_irq_on(void)
-{
- return 0;
-}
-EXPORT_SYMBOL(probe_irq_on);
-
-int probe_irq_off(unsigned long mask)
-{
- return 0;
-}
-EXPORT_SYMBOL(probe_irq_off);
-
-static unsigned int build_device_irq(struct platform_device *op,
- unsigned int real_irq)
-{
- return real_irq;
-}
-
/* djhr
* This could probably be made indirect too and assigned in the CPU
* bits of the code. That would be much nicer I think and would also
@@ -598,8 +344,6 @@ static unsigned int build_device_irq(struct platform_device *op,
void __init init_IRQ(void)
{
- sparc_irq_config.build_device_irq = build_device_irq;
-
switch (sparc_cpu_model) {
case sun4c:
case sun4:
@@ -607,14 +351,11 @@ void __init init_IRQ(void)
break;
case sun4m:
-#ifdef CONFIG_PCI
pcic_probe();
- if (pcic_present()) {
+ if (pcic_present())
sun4m_pci_init_IRQ();
- break;
- }
-#endif
- sun4m_init_IRQ();
+ else
+ sun4m_init_IRQ();
break;
case sun4d:
@@ -632,9 +373,3 @@ void __init init_IRQ(void)
btfixup();
}
-#ifdef CONFIG_PROC_FS
-void init_irq_proc(void)
-{
- /* For now, nothing... */
-}
-#endif /* CONFIG_PROC_FS */
diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c
index b1d275c..4e78862 100644
--- a/arch/sparc/kernel/irq_64.c
+++ b/arch/sparc/kernel/irq_64.c
@@ -224,13 +224,13 @@ static int irq_choose_cpu(unsigned int irq, const struct cpumask *affinity)
int cpuid;
cpumask_copy(&mask, affinity);
- if (cpus_equal(mask, cpu_online_map)) {
+ if (cpumask_equal(&mask, cpu_online_mask)) {
cpuid = map_to_cpu(irq);
} else {
cpumask_t tmp;
- cpus_and(tmp, cpu_online_map, mask);
- cpuid = cpus_empty(tmp) ? map_to_cpu(irq) : first_cpu(tmp);
+ cpumask_and(&tmp, cpu_online_mask, &mask);
+ cpuid = cpumask_empty(&tmp) ? map_to_cpu(irq) : cpumask_first(&tmp);
}
return cpuid;
diff --git a/arch/sparc/kernel/kernel.h b/arch/sparc/kernel/kernel.h
index 24ad449..6f6544c 100644
--- a/arch/sparc/kernel/kernel.h
+++ b/arch/sparc/kernel/kernel.h
@@ -6,11 +6,9 @@
#include <asm/traps.h>
/* cpu.c */
-extern const char *sparc_cpu_type;
extern const char *sparc_pmu_type;
-extern const char *sparc_fpu_type;
-
extern unsigned int fsr_storage;
+extern int ncpus_probed;
#ifdef CONFIG_SPARC32
/* cpu.c */
@@ -37,6 +35,7 @@ extern void sun4c_init_IRQ(void);
extern unsigned int lvl14_resolution;
extern void sun4m_init_IRQ(void);
+extern void sun4m_unmask_profile_irq(void);
extern void sun4m_clear_profile_irq(int cpu);
/* sun4d_irq.c */
diff --git a/arch/sparc/kernel/leon_kernel.c b/arch/sparc/kernel/leon_kernel.c
index 2969f77..2f538ac 100644
--- a/arch/sparc/kernel/leon_kernel.c
+++ b/arch/sparc/kernel/leon_kernel.c
@@ -19,53 +19,70 @@
#include <asm/leon_amba.h>
#include <asm/traps.h>
#include <asm/cacheflush.h>
+#include <asm/smp.h>
+#include <asm/setup.h>
#include "prom.h"
#include "irq.h"
struct leon3_irqctrl_regs_map *leon3_irqctrl_regs; /* interrupt controller base address */
struct leon3_gptimer_regs_map *leon3_gptimer_regs; /* timer controller base address */
-struct amba_apb_device leon_percpu_timer_dev[16];
int leondebug_irq_disable;
int leon_debug_irqout;
static int dummy_master_l10_counter;
unsigned long amba_system_id;
+static DEFINE_SPINLOCK(leon_irq_lock);
unsigned long leon3_gptimer_irq; /* interrupt controller irq number */
unsigned long leon3_gptimer_idx; /* Timer Index (0..6) within Timer Core */
+int leon3_ticker_irq; /* Timer ticker IRQ */
unsigned int sparc_leon_eirq;
-#define LEON_IMASK ((&leon3_irqctrl_regs->mask[0]))
+#define LEON_IMASK(cpu) (&leon3_irqctrl_regs->mask[cpu])
+#define LEON_IACK (&leon3_irqctrl_regs->iclear)
+#define LEON_DO_ACK_HW 1
-/* Return the IRQ of the pending IRQ on the extended IRQ controller */
-int sparc_leon_eirq_get(int eirq, int cpu)
+/* Return the last ACKed IRQ by the Extended IRQ controller. It has already
+ * been (automatically) ACKed when the CPU takes the trap.
+ */
+static inline unsigned int leon_eirq_get(int cpu)
{
return LEON3_BYPASS_LOAD_PA(&leon3_irqctrl_regs->intid[cpu]) & 0x1f;
}
-irqreturn_t sparc_leon_eirq_isr(int dummy, void *dev_id)
+/* Handle one or multiple IRQs from the extended interrupt controller */
+static void leon_handle_ext_irq(unsigned int irq, struct irq_desc *desc)
{
- printk(KERN_ERR "sparc_leon_eirq_isr: ERROR EXTENDED IRQ\n");
- return IRQ_HANDLED;
+ unsigned int eirq;
+ int cpu = sparc_leon3_cpuid();
+
+ eirq = leon_eirq_get(cpu);
+ if ((eirq & 0x10) && irq_map[eirq]->irq) /* bit4 tells if IRQ happened */
+ generic_handle_irq(irq_map[eirq]->irq);
}
/* The extended IRQ controller has been found, this function registers it */
-void sparc_leon_eirq_register(int eirq)
+void leon_eirq_setup(unsigned int eirq)
{
- int irq;
+ unsigned long mask, oldmask;
+ unsigned int veirq;
- /* Register a "BAD" handler for this interrupt, it should never happen */
- irq = request_irq(eirq, sparc_leon_eirq_isr,
- (IRQF_DISABLED | SA_STATIC_ALLOC), "extirq", NULL);
-
- if (irq) {
- printk(KERN_ERR
- "sparc_leon_eirq_register: unable to attach IRQ%d\n",
- eirq);
- } else {
- sparc_leon_eirq = eirq;
+ if (eirq < 1 || eirq > 0xf) {
+ printk(KERN_ERR "LEON EXT IRQ NUMBER BAD: %d\n", eirq);
+ return;
}
+ veirq = leon_build_device_irq(eirq, leon_handle_ext_irq, "extirq", 0);
+
+ /*
+ * Unmask the Extended IRQ, the IRQs routed through the Ext-IRQ
+ * controller have a mask-bit of their own, so this is safe.
+ */
+ irq_link(veirq);
+ mask = 1 << eirq;
+ oldmask = LEON3_BYPASS_LOAD_PA(LEON_IMASK(boot_cpu_id));
+ LEON3_BYPASS_STORE_PA(LEON_IMASK(boot_cpu_id), (oldmask | mask));
+ sparc_leon_eirq = eirq;
}
static inline unsigned long get_irqmask(unsigned int irq)
@@ -83,35 +100,151 @@ static inline unsigned long get_irqmask(unsigned int irq)
return mask;
}
-static void leon_enable_irq(unsigned int irq_nr)
+#ifdef CONFIG_SMP
+static int irq_choose_cpu(const struct cpumask *affinity)
{
- unsigned long mask, flags;
- mask = get_irqmask(irq_nr);
- local_irq_save(flags);
- LEON3_BYPASS_STORE_PA(LEON_IMASK,
- (LEON3_BYPASS_LOAD_PA(LEON_IMASK) | (mask)));
- local_irq_restore(flags);
+ cpumask_t mask;
+
+ cpus_and(mask, cpu_online_map, *affinity);
+ if (cpus_equal(mask, cpu_online_map) || cpus_empty(mask))
+ return boot_cpu_id;
+ else
+ return first_cpu(mask);
}
+#else
+#define irq_choose_cpu(affinity) boot_cpu_id
+#endif
-static void leon_disable_irq(unsigned int irq_nr)
+static int leon_set_affinity(struct irq_data *data, const struct cpumask *dest,
+ bool force)
{
- unsigned long mask, flags;
- mask = get_irqmask(irq_nr);
- local_irq_save(flags);
- LEON3_BYPASS_STORE_PA(LEON_IMASK,
- (LEON3_BYPASS_LOAD_PA(LEON_IMASK) & ~(mask)));
- local_irq_restore(flags);
+ unsigned long mask, oldmask, flags;
+ int oldcpu, newcpu;
+
+ mask = (unsigned long)data->chip_data;
+ oldcpu = irq_choose_cpu(data->affinity);
+ newcpu = irq_choose_cpu(dest);
+
+ if (oldcpu == newcpu)
+ goto out;
+
+ /* unmask on old CPU first before enabling on the selected CPU */
+ spin_lock_irqsave(&leon_irq_lock, flags);
+ oldmask = LEON3_BYPASS_LOAD_PA(LEON_IMASK(oldcpu));
+ LEON3_BYPASS_STORE_PA(LEON_IMASK(oldcpu), (oldmask & ~mask));
+ oldmask = LEON3_BYPASS_LOAD_PA(LEON_IMASK(newcpu));
+ LEON3_BYPASS_STORE_PA(LEON_IMASK(newcpu), (oldmask | mask));
+ spin_unlock_irqrestore(&leon_irq_lock, flags);
+out:
+ return IRQ_SET_MASK_OK;
+}
+
+static void leon_unmask_irq(struct irq_data *data)
+{
+ unsigned long mask, oldmask, flags;
+ int cpu;
+
+ mask = (unsigned long)data->chip_data;
+ cpu = irq_choose_cpu(data->affinity);
+ spin_lock_irqsave(&leon_irq_lock, flags);
+ oldmask = LEON3_BYPASS_LOAD_PA(LEON_IMASK(cpu));
+ LEON3_BYPASS_STORE_PA(LEON_IMASK(cpu), (oldmask | mask));
+ spin_unlock_irqrestore(&leon_irq_lock, flags);
+}
+
+static void leon_mask_irq(struct irq_data *data)
+{
+ unsigned long mask, oldmask, flags;
+ int cpu;
+
+ mask = (unsigned long)data->chip_data;
+ cpu = irq_choose_cpu(data->affinity);
+ spin_lock_irqsave(&leon_irq_lock, flags);
+ oldmask = LEON3_BYPASS_LOAD_PA(LEON_IMASK(cpu));
+ LEON3_BYPASS_STORE_PA(LEON_IMASK(cpu), (oldmask & ~mask));
+ spin_unlock_irqrestore(&leon_irq_lock, flags);
+}
+
+static unsigned int leon_startup_irq(struct irq_data *data)
+{
+ irq_link(data->irq);
+ leon_unmask_irq(data);
+ return 0;
+}
+static void leon_shutdown_irq(struct irq_data *data)
+{
+ leon_mask_irq(data);
+ irq_unlink(data->irq);
+}
+
+/* Used by external level sensitive IRQ handlers on the LEON: ACK IRQ ctrl */
+static void leon_eoi_irq(struct irq_data *data)
+{
+ unsigned long mask = (unsigned long)data->chip_data;
+
+ if (mask & LEON_DO_ACK_HW)
+ LEON3_BYPASS_STORE_PA(LEON_IACK, mask & ~LEON_DO_ACK_HW);
+}
+
+static struct irq_chip leon_irq = {
+ .name = "leon",
+ .irq_startup = leon_startup_irq,
+ .irq_shutdown = leon_shutdown_irq,
+ .irq_mask = leon_mask_irq,
+ .irq_unmask = leon_unmask_irq,
+ .irq_eoi = leon_eoi_irq,
+ .irq_set_affinity = leon_set_affinity,
+};
+
+/*
+ * Build a LEON IRQ for the edge triggered LEON IRQ controller:
+ * Edge (normal) IRQ - handle_simple_irq, ack=DONT-CARE, never ack
+ * Level IRQ (PCI|Level-GPIO) - handle_fasteoi_irq, ack=1, ack after ISR
+ * Per-CPU Edge - handle_percpu_irq, ack=0
+ */
+unsigned int leon_build_device_irq(unsigned int real_irq,
+ irq_flow_handler_t flow_handler,
+ const char *name, int do_ack)
+{
+ unsigned int irq;
+ unsigned long mask;
+
+ irq = 0;
+ mask = get_irqmask(real_irq);
+ if (mask == 0)
+ goto out;
+
+ irq = irq_alloc(real_irq, real_irq);
+ if (irq == 0)
+ goto out;
+
+ if (do_ack)
+ mask |= LEON_DO_ACK_HW;
+
+ irq_set_chip_and_handler_name(irq, &leon_irq,
+ flow_handler, name);
+ irq_set_chip_data(irq, (void *)mask);
+
+out:
+ return irq;
+}
+
+static unsigned int _leon_build_device_irq(struct platform_device *op,
+ unsigned int real_irq)
+{
+ return leon_build_device_irq(real_irq, handle_simple_irq, "edge", 0);
}
void __init leon_init_timers(irq_handler_t counter_fn)
{
- int irq;
+ int irq, eirq;
struct device_node *rootnp, *np, *nnp;
struct property *pp;
int len;
- int cpu, icsel;
+ int icsel;
int ampopts;
+ int err;
leondebug_irq_disable = 0;
leon_debug_irqout = 0;
@@ -173,98 +306,85 @@ void __init leon_init_timers(irq_handler_t counter_fn)
leon3_gptimer_irq = *(unsigned int *)pp->value;
} while (0);
- if (leon3_gptimer_regs && leon3_irqctrl_regs && leon3_gptimer_irq) {
- LEON3_BYPASS_STORE_PA(
- &leon3_gptimer_regs->e[leon3_gptimer_idx].val, 0);
- LEON3_BYPASS_STORE_PA(
- &leon3_gptimer_regs->e[leon3_gptimer_idx].rld,
- (((1000000 / HZ) - 1)));
- LEON3_BYPASS_STORE_PA(
+ if (!(leon3_gptimer_regs && leon3_irqctrl_regs && leon3_gptimer_irq))
+ goto bad;
+
+ LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx].val, 0);
+ LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx].rld,
+ (((1000000 / HZ) - 1)));
+ LEON3_BYPASS_STORE_PA(
&leon3_gptimer_regs->e[leon3_gptimer_idx].ctrl, 0);
#ifdef CONFIG_SMP
- leon_percpu_timer_dev[0].start = (int)leon3_gptimer_regs;
- leon_percpu_timer_dev[0].irq = leon3_gptimer_irq + 1 +
- leon3_gptimer_idx;
-
- if (!(LEON3_BYPASS_LOAD_PA(&leon3_gptimer_regs->config) &
- (1<<LEON3_GPTIMER_SEPIRQ))) {
- prom_printf("irq timer not configured with separate irqs\n");
- BUG();
- }
+ leon3_ticker_irq = leon3_gptimer_irq + 1 + leon3_gptimer_idx;
- LEON3_BYPASS_STORE_PA(
- &leon3_gptimer_regs->e[leon3_gptimer_idx+1].val, 0);
- LEON3_BYPASS_STORE_PA(
- &leon3_gptimer_regs->e[leon3_gptimer_idx+1].rld,
- (((1000000/HZ) - 1)));
- LEON3_BYPASS_STORE_PA(
- &leon3_gptimer_regs->e[leon3_gptimer_idx+1].ctrl, 0);
-# endif
-
- /*
- * The IRQ controller may (if implemented) consist of multiple
- * IRQ controllers, each mapped on a 4Kb boundary.
- * Each CPU may be routed to different IRQCTRLs, however
- * we assume that all CPUs (in SMP system) is routed to the
- * same IRQ Controller, and for non-SMP only one IRQCTRL is
- * accessed anyway.
- * In AMP systems, Linux must run on CPU0 for the time being.
- */
- cpu = sparc_leon3_cpuid();
- icsel = LEON3_BYPASS_LOAD_PA(&leon3_irqctrl_regs->icsel[cpu/8]);
- icsel = (icsel >> ((7 - (cpu&0x7)) * 4)) & 0xf;
- leon3_irqctrl_regs += icsel;
- } else {
- goto bad;
+ if (!(LEON3_BYPASS_LOAD_PA(&leon3_gptimer_regs->config) &
+ (1<<LEON3_GPTIMER_SEPIRQ))) {
+ printk(KERN_ERR "timer not configured with separate irqs\n");
+ BUG();
}
- irq = request_irq(leon3_gptimer_irq+leon3_gptimer_idx,
- counter_fn,
- (IRQF_DISABLED | SA_STATIC_ALLOC), "timer", NULL);
+ LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx+1].val,
+ 0);
+ LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx+1].rld,
+ (((1000000/HZ) - 1)));
+ LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx+1].ctrl,
+ 0);
+#endif
- if (irq) {
- printk(KERN_ERR "leon_time_init: unable to attach IRQ%d\n",
- LEON_INTERRUPT_TIMER1);
+ /*
+ * The IRQ controller may (if implemented) consist of multiple
+ * IRQ controllers, each mapped on a 4Kb boundary.
+ * Each CPU may be routed to different IRQCTRLs, however
+ * we assume that all CPUs (in SMP system) is routed to the
+ * same IRQ Controller, and for non-SMP only one IRQCTRL is
+ * accessed anyway.
+ * In AMP systems, Linux must run on CPU0 for the time being.
+ */
+ icsel = LEON3_BYPASS_LOAD_PA(&leon3_irqctrl_regs->icsel[boot_cpu_id/8]);
+ icsel = (icsel >> ((7 - (boot_cpu_id&0x7)) * 4)) & 0xf;
+ leon3_irqctrl_regs += icsel;
+
+ /* Mask all IRQs on boot-cpu IRQ controller */
+ LEON3_BYPASS_STORE_PA(&leon3_irqctrl_regs->mask[boot_cpu_id], 0);
+
+ /* Probe extended IRQ controller */
+ eirq = (LEON3_BYPASS_LOAD_PA(&leon3_irqctrl_regs->mpstatus)
+ >> 16) & 0xf;
+ if (eirq != 0)
+ leon_eirq_setup(eirq);
+
+ irq = _leon_build_device_irq(NULL, leon3_gptimer_irq+leon3_gptimer_idx);
+ err = request_irq(irq, counter_fn, IRQF_TIMER, "timer", NULL);
+ if (err) {
+ printk(KERN_ERR "unable to attach timer IRQ%d\n", irq);
prom_halt();
}
-# ifdef CONFIG_SMP
- {
- unsigned long flags;
- struct tt_entry *trap_table = &sparc_ttable[SP_TRAP_IRQ1 + (leon_percpu_timer_dev[0].irq - 1)];
-
- /* For SMP we use the level 14 ticker, however the bootup code
- * has copied the firmwares level 14 vector into boot cpu's
- * trap table, we must fix this now or we get squashed.
- */
- local_irq_save(flags);
-
- patchme_maybe_smp_msg[0] = 0x01000000; /* NOP out the branch */
-
- /* Adjust so that we jump directly to smpleon_ticker */
- trap_table->inst_three += smpleon_ticker - real_irq_entry;
+ LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx].ctrl,
+ LEON3_GPTIMER_EN |
+ LEON3_GPTIMER_RL |
+ LEON3_GPTIMER_LD |
+ LEON3_GPTIMER_IRQEN);
- local_flush_cache_all();
- local_irq_restore(flags);
+#ifdef CONFIG_SMP
+ /* Install per-cpu IRQ handler for broadcasted ticker */
+ irq = leon_build_device_irq(leon3_ticker_irq, handle_percpu_irq,
+ "per-cpu", 0);
+ err = request_irq(irq, leon_percpu_timer_interrupt,
+ IRQF_PERCPU | IRQF_TIMER, "ticker",
+ NULL);
+ if (err) {
+ printk(KERN_ERR "unable to attach ticker IRQ%d\n", irq);
+ prom_halt();
}
-# endif
-
- if (leon3_gptimer_regs) {
- LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx].ctrl,
- LEON3_GPTIMER_EN |
- LEON3_GPTIMER_RL |
- LEON3_GPTIMER_LD | LEON3_GPTIMER_IRQEN);
-#ifdef CONFIG_SMP
- LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx+1].ctrl,
- LEON3_GPTIMER_EN |
- LEON3_GPTIMER_RL |
- LEON3_GPTIMER_LD |
- LEON3_GPTIMER_IRQEN);
+ LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx+1].ctrl,
+ LEON3_GPTIMER_EN |
+ LEON3_GPTIMER_RL |
+ LEON3_GPTIMER_LD |
+ LEON3_GPTIMER_IRQEN);
#endif
-
- }
return;
bad:
printk(KERN_ERR "No Timer/irqctrl found\n");
@@ -281,9 +401,6 @@ void leon_load_profile_irq(int cpu, unsigned int limit)
BUG();
}
-
-
-
void __init leon_trans_init(struct device_node *dp)
{
if (strcmp(dp->type, "cpu") == 0 && strcmp(dp->name, "<NULL>") == 0) {
@@ -337,22 +454,18 @@ void leon_enable_irq_cpu(unsigned int irq_nr, unsigned int cpu)
{
unsigned long mask, flags, *addr;
mask = get_irqmask(irq_nr);
- local_irq_save(flags);
- addr = (unsigned long *)&(leon3_irqctrl_regs->mask[cpu]);
- LEON3_BYPASS_STORE_PA(addr, (LEON3_BYPASS_LOAD_PA(addr) | (mask)));
- local_irq_restore(flags);
+ spin_lock_irqsave(&leon_irq_lock, flags);
+ addr = (unsigned long *)LEON_IMASK(cpu);
+ LEON3_BYPASS_STORE_PA(addr, (LEON3_BYPASS_LOAD_PA(addr) | mask));
+ spin_unlock_irqrestore(&leon_irq_lock, flags);
}
#endif
void __init leon_init_IRQ(void)
{
- sparc_irq_config.init_timers = leon_init_timers;
-
- BTFIXUPSET_CALL(enable_irq, leon_enable_irq, BTFIXUPCALL_NORM);
- BTFIXUPSET_CALL(disable_irq, leon_disable_irq, BTFIXUPCALL_NORM);
- BTFIXUPSET_CALL(enable_pil_irq, leon_enable_irq, BTFIXUPCALL_NORM);
- BTFIXUPSET_CALL(disable_pil_irq, leon_disable_irq, BTFIXUPCALL_NORM);
+ sparc_irq_config.init_timers = leon_init_timers;
+ sparc_irq_config.build_device_irq = _leon_build_device_irq;
BTFIXUPSET_CALL(clear_clock_irq, leon_clear_clock_irq,
BTFIXUPCALL_NORM);
diff --git a/arch/sparc/kernel/leon_smp.c b/arch/sparc/kernel/leon_smp.c
index 8f5de4a..fe8fb44 100644
--- a/arch/sparc/kernel/leon_smp.c
+++ b/arch/sparc/kernel/leon_smp.c
@@ -14,6 +14,7 @@
#include <linux/smp.h>
#include <linux/interrupt.h>
#include <linux/kernel_stat.h>
+#include <linux/of.h>
#include <linux/init.h>
#include <linux/spinlock.h>
#include <linux/mm.h>
@@ -29,6 +30,7 @@
#include <asm/ptrace.h>
#include <asm/atomic.h>
#include <asm/irq_regs.h>
+#include <asm/traps.h>
#include <asm/delay.h>
#include <asm/irq.h>
@@ -50,9 +52,12 @@
extern ctxd_t *srmmu_ctx_table_phys;
static int smp_processors_ready;
extern volatile unsigned long cpu_callin_map[NR_CPUS];
-extern unsigned char boot_cpu_id;
extern cpumask_t smp_commenced_mask;
void __init leon_configure_cache_smp(void);
+static void leon_ipi_init(void);
+
+/* IRQ number of LEON IPIs */
+int leon_ipi_irq = LEON3_IRQ_IPI_DEFAULT;
static inline unsigned long do_swap(volatile unsigned long *ptr,
unsigned long val)
@@ -94,8 +99,6 @@ void __cpuinit leon_callin(void)
local_flush_cache_all();
local_flush_tlb_all();
- cpu_probe();
-
/* Fix idle thread fields. */
__asm__ __volatile__("ld [%0], %%g6\n\t" : : "r"(&current_set[cpuid])
: "memory" /* paranoid */);
@@ -104,11 +107,11 @@ void __cpuinit leon_callin(void)
atomic_inc(&init_mm.mm_count);
current->active_mm = &init_mm;
- while (!cpu_isset(cpuid, smp_commenced_mask))
+ while (!cpumask_test_cpu(cpuid, &smp_commenced_mask))
mb();
local_irq_enable();
- cpu_set(cpuid, cpu_online_map);
+ set_cpu_online(cpuid, true);
}
/*
@@ -179,13 +182,16 @@ void __init leon_boot_cpus(void)
int nrcpu = leon_smp_nrcpus();
int me = smp_processor_id();
+ /* Setup IPI */
+ leon_ipi_init();
+
printk(KERN_INFO "%d:(%d:%d) cpus mpirq at 0x%x\n", (unsigned int)me,
(unsigned int)nrcpu, (unsigned int)NR_CPUS,
(unsigned int)&(leon3_irqctrl_regs->mpstatus));
leon_enable_irq_cpu(LEON3_IRQ_CROSS_CALL, me);
leon_enable_irq_cpu(LEON3_IRQ_TICKER, me);
- leon_enable_irq_cpu(LEON3_IRQ_RESCHEDULE, me);
+ leon_enable_irq_cpu(leon_ipi_irq, me);
leon_smp_setbroadcast(1 << LEON3_IRQ_TICKER);
@@ -220,6 +226,10 @@ int __cpuinit leon_boot_one_cpu(int i)
(unsigned int)&leon3_irqctrl_regs->mpstatus);
local_flush_cache_all();
+ /* Make sure all IRQs are of from the start for this new CPU */
+ LEON_BYPASS_STORE_PA(&leon3_irqctrl_regs->mask[i], 0);
+
+ /* Wake one CPU */
LEON_BYPASS_STORE_PA(&(leon3_irqctrl_regs->mpstatus), 1 << i);
/* wheee... it's going... */
@@ -236,7 +246,7 @@ int __cpuinit leon_boot_one_cpu(int i)
} else {
leon_enable_irq_cpu(LEON3_IRQ_CROSS_CALL, i);
leon_enable_irq_cpu(LEON3_IRQ_TICKER, i);
- leon_enable_irq_cpu(LEON3_IRQ_RESCHEDULE, i);
+ leon_enable_irq_cpu(leon_ipi_irq, i);
}
local_flush_cache_all();
@@ -262,21 +272,21 @@ void __init leon_smp_done(void)
local_flush_cache_all();
/* Free unneeded trap tables */
- if (!cpu_isset(1, cpu_present_map)) {
+ if (!cpu_present(1)) {
ClearPageReserved(virt_to_page(&trapbase_cpu1));
init_page_count(virt_to_page(&trapbase_cpu1));
free_page((unsigned long)&trapbase_cpu1);
totalram_pages++;
num_physpages++;
}
- if (!cpu_isset(2, cpu_present_map)) {
+ if (!cpu_present(2)) {
ClearPageReserved(virt_to_page(&trapbase_cpu2));
init_page_count(virt_to_page(&trapbase_cpu2));
free_page((unsigned long)&trapbase_cpu2);
totalram_pages++;
num_physpages++;
}
- if (!cpu_isset(3, cpu_present_map)) {
+ if (!cpu_present(3)) {
ClearPageReserved(virt_to_page(&trapbase_cpu3));
init_page_count(virt_to_page(&trapbase_cpu3));
free_page((unsigned long)&trapbase_cpu3);
@@ -292,6 +302,99 @@ void leon_irq_rotate(int cpu)
{
}
+struct leon_ipi_work {
+ int single;
+ int msk;
+ int resched;
+};
+
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct leon_ipi_work, leon_ipi_work);
+
+/* Initialize IPIs on the LEON, in order to save IRQ resources only one IRQ
+ * is used for all three types of IPIs.
+ */
+static void __init leon_ipi_init(void)
+{
+ int cpu, len;
+ struct leon_ipi_work *work;
+ struct property *pp;
+ struct device_node *rootnp;
+ struct tt_entry *trap_table;
+ unsigned long flags;
+
+ /* Find IPI IRQ or stick with default value */
+ rootnp = of_find_node_by_path("/ambapp0");
+ if (rootnp) {
+ pp = of_find_property(rootnp, "ipi_num", &len);
+ if (pp && (*(int *)pp->value))
+ leon_ipi_irq = *(int *)pp->value;
+ }
+ printk(KERN_INFO "leon: SMP IPIs at IRQ %d\n", leon_ipi_irq);
+
+ /* Adjust so that we jump directly to smpleon_ipi */
+ local_irq_save(flags);
+ trap_table = &sparc_ttable[SP_TRAP_IRQ1 + (leon_ipi_irq - 1)];
+ trap_table->inst_three += smpleon_ipi - real_irq_entry;
+ local_flush_cache_all();
+ local_irq_restore(flags);
+
+ for_each_possible_cpu(cpu) {
+ work = &per_cpu(leon_ipi_work, cpu);
+ work->single = work->msk = work->resched = 0;
+ }
+}
+
+static void leon_ipi_single(int cpu)
+{
+ struct leon_ipi_work *work = &per_cpu(leon_ipi_work, cpu);
+
+ /* Mark work */
+ work->single = 1;
+
+ /* Generate IRQ on the CPU */
+ set_cpu_int(cpu, leon_ipi_irq);
+}
+
+static void leon_ipi_mask_one(int cpu)
+{
+ struct leon_ipi_work *work = &per_cpu(leon_ipi_work, cpu);
+
+ /* Mark work */
+ work->msk = 1;
+
+ /* Generate IRQ on the CPU */
+ set_cpu_int(cpu, leon_ipi_irq);
+}
+
+static void leon_ipi_resched(int cpu)
+{
+ struct leon_ipi_work *work = &per_cpu(leon_ipi_work, cpu);
+
+ /* Mark work */
+ work->resched = 1;
+
+ /* Generate IRQ on the CPU (any IRQ will cause resched) */
+ set_cpu_int(cpu, leon_ipi_irq);
+}
+
+void leonsmp_ipi_interrupt(void)
+{
+ struct leon_ipi_work *work = &__get_cpu_var(leon_ipi_work);
+
+ if (work->single) {
+ work->single = 0;
+ smp_call_function_single_interrupt();
+ }
+ if (work->msk) {
+ work->msk = 0;
+ smp_call_function_interrupt();
+ }
+ if (work->resched) {
+ work->resched = 0;
+ smp_resched_interrupt();
+ }
+}
+
static struct smp_funcall {
smpfunc_t func;
unsigned long arg1;
@@ -337,10 +440,10 @@ static void leon_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1,
{
register int i;
- cpu_clear(smp_processor_id(), mask);
- cpus_and(mask, cpu_online_map, mask);
+ cpumask_clear_cpu(smp_processor_id(), &mask);
+ cpumask_and(&mask, cpu_online_mask, &mask);
for (i = 0; i <= high; i++) {
- if (cpu_isset(i, mask)) {
+ if (cpumask_test_cpu(i, &mask)) {
ccall_info.processors_in[i] = 0;
ccall_info.processors_out[i] = 0;
set_cpu_int(i, LEON3_IRQ_CROSS_CALL);
@@ -354,7 +457,7 @@ static void leon_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1,
i = 0;
do {
- if (!cpu_isset(i, mask))
+ if (!cpumask_test_cpu(i, &mask))
continue;
while (!ccall_info.processors_in[i])
@@ -363,7 +466,7 @@ static void leon_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1,
i = 0;
do {
- if (!cpu_isset(i, mask))
+ if (!cpumask_test_cpu(i, &mask))
continue;
while (!ccall_info.processors_out[i])
@@ -386,27 +489,23 @@ void leon_cross_call_irq(void)
ccall_info.processors_out[i] = 1;
}
-void leon_percpu_timer_interrupt(struct pt_regs *regs)
+irqreturn_t leon_percpu_timer_interrupt(int irq, void *unused)
{
- struct pt_regs *old_regs;
int cpu = smp_processor_id();
- old_regs = set_irq_regs(regs);
-
leon_clear_profile_irq(cpu);
profile_tick(CPU_PROFILING);
if (!--prof_counter(cpu)) {
- int user = user_mode(regs);
+ int user = user_mode(get_irq_regs());
- irq_enter();
update_process_times(user);
- irq_exit();
prof_counter(cpu) = prof_multiplier(cpu);
}
- set_irq_regs(old_regs);
+
+ return IRQ_HANDLED;
}
static void __init smp_setup_percpu_timer(void)
@@ -449,6 +548,9 @@ void __init leon_init_smp(void)
BTFIXUPSET_CALL(smp_cross_call, leon_cross_call, BTFIXUPCALL_NORM);
BTFIXUPSET_CALL(__hard_smp_processor_id, __leon_processor_id,
BTFIXUPCALL_NORM);
+ BTFIXUPSET_CALL(smp_ipi_resched, leon_ipi_resched, BTFIXUPCALL_NORM);
+ BTFIXUPSET_CALL(smp_ipi_single, leon_ipi_single, BTFIXUPCALL_NORM);
+ BTFIXUPSET_CALL(smp_ipi_mask_one, leon_ipi_mask_one, BTFIXUPCALL_NORM);
}
#endif /* CONFIG_SPARC_LEON */
diff --git a/arch/sparc/kernel/mdesc.c b/arch/sparc/kernel/mdesc.c
index 56db064..42f28c7 100644
--- a/arch/sparc/kernel/mdesc.c
+++ b/arch/sparc/kernel/mdesc.c
@@ -768,7 +768,7 @@ static void * __cpuinit mdesc_iterate_over_cpus(void *(*func)(struct mdesc_handl
cpuid, NR_CPUS);
continue;
}
- if (!cpu_isset(cpuid, *mask))
+ if (!cpumask_test_cpu(cpuid, mask))
continue;
#endif
diff --git a/arch/sparc/kernel/of_device_64.c b/arch/sparc/kernel/of_device_64.c
index 5c14968..3bb2eac 100644
--- a/arch/sparc/kernel/of_device_64.c
+++ b/arch/sparc/kernel/of_device_64.c
@@ -622,8 +622,9 @@ static unsigned int __init build_one_device_irq(struct platform_device *op,
out:
nid = of_node_to_nid(dp);
if (nid != -1) {
- cpumask_t numa_mask = *cpumask_of_node(nid);
+ cpumask_t numa_mask;
+ cpumask_copy(&numa_mask, cpumask_of_node(nid));
irq_set_affinity(irq, &numa_mask);
}
diff --git a/arch/sparc/kernel/pci_msi.c b/arch/sparc/kernel/pci_msi.c
index 30982e9..580651a 100644
--- a/arch/sparc/kernel/pci_msi.c
+++ b/arch/sparc/kernel/pci_msi.c
@@ -284,8 +284,9 @@ static int bringup_one_msi_queue(struct pci_pbm_info *pbm,
nid = pbm->numa_node;
if (nid != -1) {
- cpumask_t numa_mask = *cpumask_of_node(nid);
+ cpumask_t numa_mask;
+ cpumask_copy(&numa_mask, cpumask_of_node(nid));
irq_set_affinity(irq, &numa_mask);
}
err = request_irq(irq, sparc64_msiq_interrupt, 0,
diff --git a/arch/sparc/kernel/pcic.c b/arch/sparc/kernel/pcic.c
index 2cdc131..948601a 100644
--- a/arch/sparc/kernel/pcic.c
+++ b/arch/sparc/kernel/pcic.c
@@ -164,6 +164,9 @@ void __iomem *pcic_regs;
volatile int pcic_speculative;
volatile int pcic_trapped;
+/* forward */
+unsigned int pcic_build_device_irq(struct platform_device *op,
+ unsigned int real_irq);
#define CONFIG_CMD(bus, device_fn, where) (0x80000000 | (((unsigned int)bus) << 16) | (((unsigned int)device_fn) << 8) | (where & ~3))
@@ -523,6 +526,7 @@ static void
pcic_fill_irq(struct linux_pcic *pcic, struct pci_dev *dev, int node)
{
struct pcic_ca2irq *p;
+ unsigned int real_irq;
int i, ivec;
char namebuf[64];
@@ -551,26 +555,25 @@ pcic_fill_irq(struct linux_pcic *pcic, struct pci_dev *dev, int node)
i = p->pin;
if (i >= 0 && i < 4) {
ivec = readw(pcic->pcic_regs+PCI_INT_SELECT_LO);
- dev->irq = ivec >> (i << 2) & 0xF;
+ real_irq = ivec >> (i << 2) & 0xF;
} else if (i >= 4 && i < 8) {
ivec = readw(pcic->pcic_regs+PCI_INT_SELECT_HI);
- dev->irq = ivec >> ((i-4) << 2) & 0xF;
+ real_irq = ivec >> ((i-4) << 2) & 0xF;
} else { /* Corrupted map */
printk("PCIC: BAD PIN %d\n", i); for (;;) {}
}
/* P3 */ /* printk("PCIC: device %s pin %d ivec 0x%x irq %x\n", namebuf, i, ivec, dev->irq); */
- /*
- * dev->irq=0 means PROM did not bother to program the upper
+ /* real_irq means PROM did not bother to program the upper
* half of PCIC. This happens on JS-E with PROM 3.11, for instance.
*/
- if (dev->irq == 0 || p->force) {
+ if (real_irq == 0 || p->force) {
if (p->irq == 0 || p->irq >= 15) { /* Corrupted map */
printk("PCIC: BAD IRQ %d\n", p->irq); for (;;) {}
}
printk("PCIC: setting irq %d at pin %d for device %02x:%02x\n",
p->irq, p->pin, dev->bus->number, dev->devfn);
- dev->irq = p->irq;
+ real_irq = p->irq;
i = p->pin;
if (i >= 4) {
@@ -584,7 +587,8 @@ pcic_fill_irq(struct linux_pcic *pcic, struct pci_dev *dev, int node)
ivec |= p->irq << (i << 2);
writew(ivec, pcic->pcic_regs+PCI_INT_SELECT_LO);
}
- }
+ }
+ dev->irq = pcic_build_device_irq(NULL, real_irq);
}
/*
@@ -729,6 +733,7 @@ void __init pci_time_init(void)
struct linux_pcic *pcic = &pcic0;
unsigned long v;
int timer_irq, irq;
+ int err;
do_arch_gettimeoffset = pci_gettimeoffset;
@@ -740,9 +745,10 @@ void __init pci_time_init(void)
timer_irq = PCI_COUNTER_IRQ_SYS(v);
writel (PCI_COUNTER_IRQ_SET(timer_irq, 0),
pcic->pcic_regs+PCI_COUNTER_IRQ);
- irq = request_irq(timer_irq, pcic_timer_handler,
- (IRQF_DISABLED | SA_STATIC_ALLOC), "timer", NULL);
- if (irq) {
+ irq = pcic_build_device_irq(NULL, timer_irq);
+ err = request_irq(irq, pcic_timer_handler,
+ IRQF_TIMER, "timer", NULL);
+ if (err) {
prom_printf("time_init: unable to attach IRQ%d\n", timer_irq);
prom_halt();
}
@@ -803,50 +809,73 @@ static inline unsigned long get_irqmask(int irq_nr)
return 1 << irq_nr;
}
-static void pcic_disable_irq(unsigned int irq_nr)
+static void pcic_mask_irq(struct irq_data *data)
{
unsigned long mask, flags;
- mask = get_irqmask(irq_nr);
+ mask = (unsigned long)data->chip_data;
local_irq_save(flags);
writel(mask, pcic0.pcic_regs+PCI_SYS_INT_TARGET_MASK_SET);
local_irq_restore(flags);
}
-static void pcic_enable_irq(unsigned int irq_nr)
+static void pcic_unmask_irq(struct irq_data *data)
{
unsigned long mask, flags;
- mask = get_irqmask(irq_nr);
+ mask = (unsigned long)data->chip_data;
local_irq_save(flags);
writel(mask, pcic0.pcic_regs+PCI_SYS_INT_TARGET_MASK_CLEAR);
local_irq_restore(flags);
}
-static void pcic_load_profile_irq(int cpu, unsigned int limit)
+static unsigned int pcic_startup_irq(struct irq_data *data)
{
- printk("PCIC: unimplemented code: FILE=%s LINE=%d", __FILE__, __LINE__);
+ irq_link(data->irq);
+ pcic_unmask_irq(data);
+ return 0;
}
-/* We assume the caller has disabled local interrupts when these are called,
- * or else very bizarre behavior will result.
- */
-static void pcic_disable_pil_irq(unsigned int pil)
+static struct irq_chip pcic_irq = {
+ .name = "pcic",
+ .irq_startup = pcic_startup_irq,
+ .irq_mask = pcic_mask_irq,
+ .irq_unmask = pcic_unmask_irq,
+};
+
+unsigned int pcic_build_device_irq(struct platform_device *op,
+ unsigned int real_irq)
{
- writel(get_irqmask(pil), pcic0.pcic_regs+PCI_SYS_INT_TARGET_MASK_SET);
+ unsigned int irq;
+ unsigned long mask;
+
+ irq = 0;
+ mask = get_irqmask(real_irq);
+ if (mask == 0)
+ goto out;
+
+ irq = irq_alloc(real_irq, real_irq);
+ if (irq == 0)
+ goto out;
+
+ irq_set_chip_and_handler_name(irq, &pcic_irq,
+ handle_level_irq, "PCIC");
+ irq_set_chip_data(irq, (void *)mask);
+
+out:
+ return irq;
}
-static void pcic_enable_pil_irq(unsigned int pil)
+
+static void pcic_load_profile_irq(int cpu, unsigned int limit)
{
- writel(get_irqmask(pil), pcic0.pcic_regs+PCI_SYS_INT_TARGET_MASK_CLEAR);
+ printk("PCIC: unimplemented code: FILE=%s LINE=%d", __FILE__, __LINE__);
}
void __init sun4m_pci_init_IRQ(void)
{
- BTFIXUPSET_CALL(enable_irq, pcic_enable_irq, BTFIXUPCALL_NORM);
- BTFIXUPSET_CALL(disable_irq, pcic_disable_irq, BTFIXUPCALL_NORM);
- BTFIXUPSET_CALL(enable_pil_irq, pcic_enable_pil_irq, BTFIXUPCALL_NORM);
- BTFIXUPSET_CALL(disable_pil_irq, pcic_disable_pil_irq, BTFIXUPCALL_NORM);
+ sparc_irq_config.build_device_irq = pcic_build_device_irq;
+
BTFIXUPSET_CALL(clear_clock_irq, pcic_clear_clock_irq, BTFIXUPCALL_NORM);
BTFIXUPSET_CALL(load_profile_irq, pcic_load_profile_irq, BTFIXUPCALL_NORM);
}
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index ee8426e..2cb0e1c 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -26,6 +26,7 @@
#include <asm/nmi.h>
#include <asm/pcr.h>
+#include "kernel.h"
#include "kstack.h"
/* Sparc64 chips have two performance counters, 32-bits each, with
diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c
index 1752929..c8cc461 100644
--- a/arch/sparc/kernel/process_32.c
+++ b/arch/sparc/kernel/process_32.c
@@ -128,8 +128,16 @@ void cpu_idle(void)
set_thread_flag(TIF_POLLING_NRFLAG);
/* endless idle loop with no priority at all */
while(1) {
- while (!need_resched())
- cpu_relax();
+#ifdef CONFIG_SPARC_LEON
+ if (pm_idle) {
+ while (!need_resched())
+ (*pm_idle)();
+ } else
+#endif
+ {
+ while (!need_resched())
+ cpu_relax();
+ }
preempt_enable_no_resched();
schedule();
preempt_disable();
diff --git a/arch/sparc/kernel/prom_32.c b/arch/sparc/kernel/prom_32.c
index 05fb253..5ce3d15 100644
--- a/arch/sparc/kernel/prom_32.c
+++ b/arch/sparc/kernel/prom_32.c
@@ -326,7 +326,6 @@ void __init of_console_init(void)
of_console_options = NULL;
}
- prom_printf(msg, of_console_path);
printk(msg, of_console_path);
}
diff --git a/arch/sparc/kernel/setup_32.c b/arch/sparc/kernel/setup_32.c
index 7b8b76c..3609bde 100644
--- a/arch/sparc/kernel/setup_32.c
+++ b/arch/sparc/kernel/setup_32.c
@@ -103,16 +103,20 @@ static unsigned int boot_flags __initdata = 0;
/* Exported for mm/init.c:paging_init. */
unsigned long cmdline_memory_size __initdata = 0;
+/* which CPU booted us (0xff = not set) */
+unsigned char boot_cpu_id = 0xff; /* 0xff will make it into DATA section... */
+unsigned char boot_cpu_id4; /* boot_cpu_id << 2 */
+
static void
prom_console_write(struct console *con, const char *s, unsigned n)
{
prom_write(s, n);
}
-static struct console prom_debug_console = {
- .name = "debug",
+static struct console prom_early_console = {
+ .name = "earlyprom",
.write = prom_console_write,
- .flags = CON_PRINTBUFFER,
+ .flags = CON_PRINTBUFFER | CON_BOOT,
.index = -1,
};
@@ -133,8 +137,7 @@ static void __init process_switch(char c)
prom_halt();
break;
case 'p':
- /* Use PROM debug console. */
- register_console(&prom_debug_console);
+ /* Just ignore, this behavior is now the default. */
break;
default:
printk("Unknown boot switch (-%c)\n", c);
@@ -215,6 +218,10 @@ void __init setup_arch(char **cmdline_p)
strcpy(boot_command_line, *cmdline_p);
parse_early_param();
+ boot_flags_init(*cmdline_p);
+
+ register_console(&prom_early_console);
+
/* Set sparc_cpu_model */
sparc_cpu_model = sun_unknown;
if (!strcmp(&cputypval[0], "sun4 "))
@@ -265,7 +272,6 @@ void __init setup_arch(char **cmdline_p)
#ifdef CONFIG_DUMMY_CONSOLE
conswitchp = &dummy_con;
#endif
- boot_flags_init(*cmdline_p);
idprom_init();
if (ARCH_SUN4C)
@@ -311,75 +317,6 @@ void __init setup_arch(char **cmdline_p)
smp_setup_cpu_possible_map();
}
-static int ncpus_probed;
-
-static int show_cpuinfo(struct seq_file *m, void *__unused)
-{
- seq_printf(m,
- "cpu\t\t: %s\n"
- "fpu\t\t: %s\n"
- "promlib\t\t: Version %d Revision %d\n"
- "prom\t\t: %d.%d\n"
- "type\t\t: %s\n"
- "ncpus probed\t: %d\n"
- "ncpus active\t: %d\n"
-#ifndef CONFIG_SMP
- "CPU0Bogo\t: %lu.%02lu\n"
- "CPU0ClkTck\t: %ld\n"
-#endif
- ,
- sparc_cpu_type,
- sparc_fpu_type ,
- romvec->pv_romvers,
- prom_rev,
- romvec->pv_printrev >> 16,
- romvec->pv_printrev & 0xffff,
- &cputypval[0],
- ncpus_probed,
- num_online_cpus()
-#ifndef CONFIG_SMP
- , cpu_data(0).udelay_val/(500000/HZ),
- (cpu_data(0).udelay_val/(5000/HZ)) % 100,
- cpu_data(0).clock_tick
-#endif
- );
-
-#ifdef CONFIG_SMP
- smp_bogo(m);
-#endif
- mmu_info(m);
-#ifdef CONFIG_SMP
- smp_info(m);
-#endif
- return 0;
-}
-
-static void *c_start(struct seq_file *m, loff_t *pos)
-{
- /* The pointer we are returning is arbitrary,
- * it just has to be non-NULL and not IS_ERR
- * in the success case.
- */
- return *pos == 0 ? &c_start : NULL;
-}
-
-static void *c_next(struct seq_file *m, void *v, loff_t *pos)
-{
- ++*pos;
- return c_start(m, pos);
-}
-
-static void c_stop(struct seq_file *m, void *v)
-{
-}
-
-const struct seq_operations cpuinfo_op = {
- .start =c_start,
- .next = c_next,
- .stop = c_stop,
- .show = show_cpuinfo,
-};
-
extern int stop_a_enabled;
void sun_do_break(void)
diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c
index 29bafe0..f3b6850 100644
--- a/arch/sparc/kernel/setup_64.c
+++ b/arch/sparc/kernel/setup_64.c
@@ -339,84 +339,6 @@ void __init setup_arch(char **cmdline_p)
paging_init();
}
-/* BUFFER is PAGE_SIZE bytes long. */
-
-extern void smp_info(struct seq_file *);
-extern void smp_bogo(struct seq_file *);
-extern void mmu_info(struct seq_file *);
-
-unsigned int dcache_parity_tl1_occurred;
-unsigned int icache_parity_tl1_occurred;
-
-int ncpus_probed;
-
-static int show_cpuinfo(struct seq_file *m, void *__unused)
-{
- seq_printf(m,
- "cpu\t\t: %s\n"
- "fpu\t\t: %s\n"
- "pmu\t\t: %s\n"
- "prom\t\t: %s\n"
- "type\t\t: %s\n"
- "ncpus probed\t: %d\n"
- "ncpus active\t: %d\n"
- "D$ parity tl1\t: %u\n"
- "I$ parity tl1\t: %u\n"
-#ifndef CONFIG_SMP
- "Cpu0ClkTck\t: %016lx\n"
-#endif
- ,
- sparc_cpu_type,
- sparc_fpu_type,
- sparc_pmu_type,
- prom_version,
- ((tlb_type == hypervisor) ?
- "sun4v" :
- "sun4u"),
- ncpus_probed,
- num_online_cpus(),
- dcache_parity_tl1_occurred,
- icache_parity_tl1_occurred
-#ifndef CONFIG_SMP
- , cpu_data(0).clock_tick
-#endif
- );
-#ifdef CONFIG_SMP
- smp_bogo(m);
-#endif
- mmu_info(m);
-#ifdef CONFIG_SMP
- smp_info(m);
-#endif
- return 0;
-}
-
-static void *c_start(struct seq_file *m, loff_t *pos)
-{
- /* The pointer we are returning is arbitrary,
- * it just has to be non-NULL and not IS_ERR
- * in the success case.
- */
- return *pos == 0 ? &c_start : NULL;
-}
-
-static void *c_next(struct seq_file *m, void *v, loff_t *pos)
-{
- ++*pos;
- return c_start(m, pos);
-}
-
-static void c_stop(struct seq_file *m, void *v)
-{
-}
-
-const struct seq_operations cpuinfo_op = {
- .start =c_start,
- .next = c_next,
- .stop = c_stop,
- .show = show_cpuinfo,
-};
-
extern int stop_a_enabled;
void sun_do_break(void)
diff --git a/arch/sparc/kernel/smp_32.c b/arch/sparc/kernel/smp_32.c
index 442286d..d5b3958 100644
--- a/arch/sparc/kernel/smp_32.c
+++ b/arch/sparc/kernel/smp_32.c
@@ -37,8 +37,6 @@
#include "irq.h"
volatile unsigned long cpu_callin_map[NR_CPUS] __cpuinitdata = {0,};
-unsigned char boot_cpu_id = 0;
-unsigned char boot_cpu_id4 = 0; /* boot_cpu_id << 2 */
cpumask_t smp_commenced_mask = CPU_MASK_NONE;
@@ -130,14 +128,57 @@ struct linux_prom_registers smp_penguin_ctable __cpuinitdata = { 0 };
void smp_send_reschedule(int cpu)
{
/*
- * XXX missing reschedule IPI, see scheduler_ipi()
+ * CPU model dependent way of implementing IPI generation targeting
+ * a single CPU. The trap handler needs only to do trap entry/return
+ * to call schedule.
*/
+ BTFIXUP_CALL(smp_ipi_resched)(cpu);
}
void smp_send_stop(void)
{
}
+void arch_send_call_function_single_ipi(int cpu)
+{
+ /* trigger one IPI single call on one CPU */
+ BTFIXUP_CALL(smp_ipi_single)(cpu);
+}
+
+void arch_send_call_function_ipi_mask(const struct cpumask *mask)
+{
+ int cpu;
+
+ /* trigger IPI mask call on each CPU */
+ for_each_cpu(cpu, mask)
+ BTFIXUP_CALL(smp_ipi_mask_one)(cpu);
+}
+
+void smp_resched_interrupt(void)
+{
+ irq_enter();
+ scheduler_ipi();
+ local_cpu_data().irq_resched_count++;
+ irq_exit();
+ /* re-schedule routine called by interrupt return code. */
+}
+
+void smp_call_function_single_interrupt(void)
+{
+ irq_enter();
+ generic_smp_call_function_single_interrupt();
+ local_cpu_data().irq_call_count++;
+ irq_exit();
+}
+
+void smp_call_function_interrupt(void)
+{
+ irq_enter();
+ generic_smp_call_function_interrupt();
+ local_cpu_data().irq_call_count++;
+ irq_exit();
+}
+
void smp_flush_cache_all(void)
{
xc0((smpfunc_t) BTFIXUP_CALL(local_flush_cache_all));
@@ -153,9 +194,10 @@ void smp_flush_tlb_all(void)
void smp_flush_cache_mm(struct mm_struct *mm)
{
if(mm->context != NO_CONTEXT) {
- cpumask_t cpu_mask = *mm_cpumask(mm);
- cpu_clear(smp_processor_id(), cpu_mask);
- if (!cpus_empty(cpu_mask))
+ cpumask_t cpu_mask;
+ cpumask_copy(&cpu_mask, mm_cpumask(mm));
+ cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
+ if (!cpumask_empty(&cpu_mask))
xc1((smpfunc_t) BTFIXUP_CALL(local_flush_cache_mm), (unsigned long) mm);
local_flush_cache_mm(mm);
}
@@ -164,9 +206,10 @@ void smp_flush_cache_mm(struct mm_struct *mm)
void smp_flush_tlb_mm(struct mm_struct *mm)
{
if(mm->context != NO_CONTEXT) {
- cpumask_t cpu_mask = *mm_cpumask(mm);
- cpu_clear(smp_processor_id(), cpu_mask);
- if (!cpus_empty(cpu_mask)) {
+ cpumask_t cpu_mask;
+ cpumask_copy(&cpu_mask, mm_cpumask(mm));
+ cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
+ if (!cpumask_empty(&cpu_mask)) {
xc1((smpfunc_t) BTFIXUP_CALL(local_flush_tlb_mm), (unsigned long) mm);
if(atomic_read(&mm->mm_users) == 1 && current->active_mm == mm)
cpumask_copy(mm_cpumask(mm),
@@ -182,9 +225,10 @@ void smp_flush_cache_range(struct vm_area_struct *vma, unsigned long start,
struct mm_struct *mm = vma->vm_mm;
if (mm->context != NO_CONTEXT) {
- cpumask_t cpu_mask = *mm_cpumask(mm);
- cpu_clear(smp_processor_id(), cpu_mask);
- if (!cpus_empty(cpu_mask))
+ cpumask_t cpu_mask;
+ cpumask_copy(&cpu_mask, mm_cpumask(mm));
+ cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
+ if (!cpumask_empty(&cpu_mask))
xc3((smpfunc_t) BTFIXUP_CALL(local_flush_cache_range), (unsigned long) vma, start, end);
local_flush_cache_range(vma, start, end);
}
@@ -196,9 +240,10 @@ void smp_flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
struct mm_struct *mm = vma->vm_mm;
if (mm->context != NO_CONTEXT) {
- cpumask_t cpu_mask = *mm_cpumask(mm);
- cpu_clear(smp_processor_id(), cpu_mask);
- if (!cpus_empty(cpu_mask))
+ cpumask_t cpu_mask;
+ cpumask_copy(&cpu_mask, mm_cpumask(mm));
+ cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
+ if (!cpumask_empty(&cpu_mask))
xc3((smpfunc_t) BTFIXUP_CALL(local_flush_tlb_range), (unsigned long) vma, start, end);
local_flush_tlb_range(vma, start, end);
}
@@ -209,9 +254,10 @@ void smp_flush_cache_page(struct vm_area_struct *vma, unsigned long page)
struct mm_struct *mm = vma->vm_mm;
if(mm->context != NO_CONTEXT) {
- cpumask_t cpu_mask = *mm_cpumask(mm);
- cpu_clear(smp_processor_id(), cpu_mask);
- if (!cpus_empty(cpu_mask))
+ cpumask_t cpu_mask;
+ cpumask_copy(&cpu_mask, mm_cpumask(mm));
+ cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
+ if (!cpumask_empty(&cpu_mask))
xc2((smpfunc_t) BTFIXUP_CALL(local_flush_cache_page), (unsigned long) vma, page);
local_flush_cache_page(vma, page);
}
@@ -222,19 +268,15 @@ void smp_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
struct mm_struct *mm = vma->vm_mm;
if(mm->context != NO_CONTEXT) {
- cpumask_t cpu_mask = *mm_cpumask(mm);
- cpu_clear(smp_processor_id(), cpu_mask);
- if (!cpus_empty(cpu_mask))
+ cpumask_t cpu_mask;
+ cpumask_copy(&cpu_mask, mm_cpumask(mm));
+ cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
+ if (!cpumask_empty(&cpu_mask))
xc2((smpfunc_t) BTFIXUP_CALL(local_flush_tlb_page), (unsigned long) vma, page);
local_flush_tlb_page(vma, page);
}
}
-void smp_reschedule_irq(void)
-{
- set_need_resched();
-}
-
void smp_flush_page_to_ram(unsigned long page)
{
/* Current theory is that those who call this are the one's
@@ -251,9 +293,10 @@ void smp_flush_page_to_ram(unsigned long page)
void smp_flush_sig_insns(struct mm_struct *mm, unsigned long insn_addr)
{
- cpumask_t cpu_mask = *mm_cpumask(mm);
- cpu_clear(smp_processor_id(), cpu_mask);
- if (!cpus_empty(cpu_mask))
+ cpumask_t cpu_mask;
+ cpumask_copy(&cpu_mask, mm_cpumask(mm));
+ cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
+ if (!cpumask_empty(&cpu_mask))
xc2((smpfunc_t) BTFIXUP_CALL(local_flush_sig_insns), (unsigned long) mm, insn_addr);
local_flush_sig_insns(mm, insn_addr);
}
@@ -407,7 +450,7 @@ int __cpuinit __cpu_up(unsigned int cpu)
};
if (!ret) {
- cpu_set(cpu, smp_commenced_mask);
+ cpumask_set_cpu(cpu, &smp_commenced_mask);
while (!cpu_online(cpu))
mb();
}
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index 9478da7..99cb172 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -121,11 +121,11 @@ void __cpuinit smp_callin(void)
/* inform the notifiers about the new cpu */
notify_cpu_starting(cpuid);
- while (!cpu_isset(cpuid, smp_commenced_mask))
+ while (!cpumask_test_cpu(cpuid, &smp_commenced_mask))
rmb();
ipi_call_lock_irq();
- cpu_set(cpuid, cpu_online_map);
+ set_cpu_online(cpuid, true);
ipi_call_unlock_irq();
/* idle thread is expected to have preempt disabled */
@@ -785,7 +785,7 @@ static void xcall_deliver(u64 data0, u64 data1, u64 data2, const cpumask_t *mask
/* Send cross call to all processors mentioned in MASK_P
* except self. Really, there are only two cases currently,
- * "&cpu_online_map" and "&mm->cpu_vm_mask".
+ * "cpu_online_mask" and "mm_cpumask(mm)".
*/
static void smp_cross_call_masked(unsigned long *func, u32 ctx, u64 data1, u64 data2, const cpumask_t *mask)
{
@@ -797,7 +797,7 @@ static void smp_cross_call_masked(unsigned long *func, u32 ctx, u64 data1, u64 d
/* Send cross call to all processors except self. */
static void smp_cross_call(unsigned long *func, u32 ctx, u64 data1, u64 data2)
{
- smp_cross_call_masked(func, ctx, data1, data2, &cpu_online_map);
+ smp_cross_call_masked(func, ctx, data1, data2, cpu_online_mask);
}
extern unsigned long xcall_sync_tick;
@@ -805,7 +805,7 @@ extern unsigned long xcall_sync_tick;
static void smp_start_sync_tick_client(int cpu)
{
xcall_deliver((u64) &xcall_sync_tick, 0, 0,
- &cpumask_of_cpu(cpu));
+ cpumask_of(cpu));
}
extern unsigned long xcall_call_function;
@@ -820,7 +820,7 @@ extern unsigned long xcall_call_function_single;
void arch_send_call_function_single_ipi(int cpu)
{
xcall_deliver((u64) &xcall_call_function_single, 0, 0,
- &cpumask_of_cpu(cpu));
+ cpumask_of(cpu));
}
void __irq_entry smp_call_function_client(int irq, struct pt_regs *regs)
@@ -918,7 +918,7 @@ void smp_flush_dcache_page_impl(struct page *page, int cpu)
}
if (data0) {
xcall_deliver(data0, __pa(pg_addr),
- (u64) pg_addr, &cpumask_of_cpu(cpu));
+ (u64) pg_addr, cpumask_of(cpu));
#ifdef CONFIG_DEBUG_DCFLUSH
atomic_inc(&dcpage_flushes_xcall);
#endif
@@ -954,7 +954,7 @@ void flush_dcache_page_all(struct mm_struct *mm, struct page *page)
}
if (data0) {
xcall_deliver(data0, __pa(pg_addr),
- (u64) pg_addr, &cpu_online_map);
+ (u64) pg_addr, cpu_online_mask);
#ifdef CONFIG_DEBUG_DCFLUSH
atomic_inc(&dcpage_flushes_xcall);
#endif
@@ -1197,32 +1197,32 @@ void __devinit smp_fill_in_sib_core_maps(void)
for_each_present_cpu(i) {
unsigned int j;
- cpus_clear(cpu_core_map[i]);
+ cpumask_clear(&cpu_core_map[i]);
if (cpu_data(i).core_id == 0) {
- cpu_set(i, cpu_core_map[i]);
+ cpumask_set_cpu(i, &cpu_core_map[i]);
continue;
}
for_each_present_cpu(j) {
if (cpu_data(i).core_id ==
cpu_data(j).core_id)
- cpu_set(j, cpu_core_map[i]);
+ cpumask_set_cpu(j, &cpu_core_map[i]);
}
}
for_each_present_cpu(i) {
unsigned int j;
- cpus_clear(per_cpu(cpu_sibling_map, i));
+ cpumask_clear(&per_cpu(cpu_sibling_map, i));
if (cpu_data(i).proc_id == -1) {
- cpu_set(i, per_cpu(cpu_sibling_map, i));
+ cpumask_set_cpu(i, &per_cpu(cpu_sibling_map, i));
continue;
}
for_each_present_cpu(j) {
if (cpu_data(i).proc_id ==
cpu_data(j).proc_id)
- cpu_set(j, per_cpu(cpu_sibling_map, i));
+ cpumask_set_cpu(j, &per_cpu(cpu_sibling_map, i));
}
}
}
@@ -1232,10 +1232,10 @@ int __cpuinit __cpu_up(unsigned int cpu)
int ret = smp_boot_one_cpu(cpu);
if (!ret) {
- cpu_set(cpu, smp_commenced_mask);
- while (!cpu_isset(cpu, cpu_online_map))
+ cpumask_set_cpu(cpu, &smp_commenced_mask);
+ while (!cpu_online(cpu))
mb();
- if (!cpu_isset(cpu, cpu_online_map)) {
+ if (!cpu_online(cpu)) {
ret = -ENODEV;
} else {
/* On SUN4V, writes to %tick and %stick are
@@ -1269,7 +1269,7 @@ void cpu_play_dead(void)
tb->nonresum_mondo_pa, 0);
}
- cpu_clear(cpu, smp_commenced_mask);
+ cpumask_clear_cpu(cpu, &smp_commenced_mask);
membar_safe("#Sync");
local_irq_disable();
@@ -1290,13 +1290,13 @@ int __cpu_disable(void)
cpuinfo_sparc *c;
int i;
- for_each_cpu_mask(i, cpu_core_map[cpu])
- cpu_clear(cpu, cpu_core_map[i]);
- cpus_clear(cpu_core_map[cpu]);
+ for_each_cpu(i, &cpu_core_map[cpu])
+ cpumask_clear_cpu(cpu, &cpu_core_map[i]);
+ cpumask_clear(&cpu_core_map[cpu]);
- for_each_cpu_mask(i, per_cpu(cpu_sibling_map, cpu))
- cpu_clear(cpu, per_cpu(cpu_sibling_map, i));
- cpus_clear(per_cpu(cpu_sibling_map, cpu));
+ for_each_cpu(i, &per_cpu(cpu_sibling_map, cpu))
+ cpumask_clear_cpu(cpu, &per_cpu(cpu_sibling_map, i));
+ cpumask_clear(&per_cpu(cpu_sibling_map, cpu));
c = &cpu_data(cpu);
@@ -1313,7 +1313,7 @@ int __cpu_disable(void)
local_irq_disable();
ipi_call_lock();
- cpu_clear(cpu, cpu_online_map);
+ set_cpu_online(cpu, false);
ipi_call_unlock();
cpu_map_rebuild();
@@ -1327,11 +1327,11 @@ void __cpu_die(unsigned int cpu)
for (i = 0; i < 100; i++) {
smp_rmb();
- if (!cpu_isset(cpu, smp_commenced_mask))
+ if (!cpumask_test_cpu(cpu, &smp_commenced_mask))
break;
msleep(100);
}
- if (cpu_isset(cpu, smp_commenced_mask)) {
+ if (cpumask_test_cpu(cpu, &smp_commenced_mask)) {
printk(KERN_ERR "CPU %u didn't die...\n", cpu);
} else {
#if defined(CONFIG_SUN_LDOMS)
@@ -1341,7 +1341,7 @@ void __cpu_die(unsigned int cpu)
do {
hv_err = sun4v_cpu_stop(cpu);
if (hv_err == HV_EOK) {
- cpu_clear(cpu, cpu_present_map);
+ set_cpu_present(cpu, false);
break;
}
} while (--limit > 0);
@@ -1362,7 +1362,7 @@ void __init smp_cpus_done(unsigned int max_cpus)
void smp_send_reschedule(int cpu)
{
xcall_deliver((u64) &xcall_receive_signal, 0, 0,
- &cpumask_of_cpu(cpu));
+ cpumask_of(cpu));
}
void __irq_entry smp_receive_signal_client(int irq, struct pt_regs *regs)
diff --git a/arch/sparc/kernel/sun4c_irq.c b/arch/sparc/kernel/sun4c_irq.c
index 90eea38..f6bf25a 100644
--- a/arch/sparc/kernel/sun4c_irq.c
+++ b/arch/sparc/kernel/sun4c_irq.c
@@ -65,62 +65,94 @@
*/
unsigned char __iomem *interrupt_enable;
-static void sun4c_disable_irq(unsigned int irq_nr)
+static void sun4c_mask_irq(struct irq_data *data)
{
- unsigned long flags;
- unsigned char current_mask, new_mask;
-
- local_irq_save(flags);
- irq_nr &= (NR_IRQS - 1);
- current_mask = sbus_readb(interrupt_enable);
- switch (irq_nr) {
- case 1:
- new_mask = ((current_mask) & (~(SUN4C_INT_E1)));
- break;
- case 8:
- new_mask = ((current_mask) & (~(SUN4C_INT_E8)));
- break;
- case 10:
- new_mask = ((current_mask) & (~(SUN4C_INT_E10)));
- break;
- case 14:
- new_mask = ((current_mask) & (~(SUN4C_INT_E14)));
- break;
- default:
+ unsigned long mask = (unsigned long)data->chip_data;
+
+ if (mask) {
+ unsigned long flags;
+
+ local_irq_save(flags);
+ mask = sbus_readb(interrupt_enable) & ~mask;
+ sbus_writeb(mask, interrupt_enable);
local_irq_restore(flags);
- return;
}
- sbus_writeb(new_mask, interrupt_enable);
- local_irq_restore(flags);
}
-static void sun4c_enable_irq(unsigned int irq_nr)
+static void sun4c_unmask_irq(struct irq_data *data)
{
- unsigned long flags;
- unsigned char current_mask, new_mask;
-
- local_irq_save(flags);
- irq_nr &= (NR_IRQS - 1);
- current_mask = sbus_readb(interrupt_enable);
- switch (irq_nr) {
- case 1:
- new_mask = ((current_mask) | SUN4C_INT_E1);
- break;
- case 8:
- new_mask = ((current_mask) | SUN4C_INT_E8);
- break;
- case 10:
- new_mask = ((current_mask) | SUN4C_INT_E10);
- break;
- case 14:
- new_mask = ((current_mask) | SUN4C_INT_E14);
- break;
- default:
+ unsigned long mask = (unsigned long)data->chip_data;
+
+ if (mask) {
+ unsigned long flags;
+
+ local_irq_save(flags);
+ mask = sbus_readb(interrupt_enable) | mask;
+ sbus_writeb(mask, interrupt_enable);
local_irq_restore(flags);
- return;
}
- sbus_writeb(new_mask, interrupt_enable);
- local_irq_restore(flags);
+}
+
+static unsigned int sun4c_startup_irq(struct irq_data *data)
+{
+ irq_link(data->irq);
+ sun4c_unmask_irq(data);
+
+ return 0;
+}
+
+static void sun4c_shutdown_irq(struct irq_data *data)
+{
+ sun4c_mask_irq(data);
+ irq_unlink(data->irq);
+}
+
+static struct irq_chip sun4c_irq = {
+ .name = "sun4c",
+ .irq_startup = sun4c_startup_irq,
+ .irq_shutdown = sun4c_shutdown_irq,
+ .irq_mask = sun4c_mask_irq,
+ .irq_unmask = sun4c_unmask_irq,
+};
+
+static unsigned int sun4c_build_device_irq(struct platform_device *op,
+ unsigned int real_irq)
+{
+ unsigned int irq;
+
+ if (real_irq >= 16) {
+ prom_printf("Bogus sun4c IRQ %u\n", real_irq);
+ prom_halt();
+ }
+
+ irq = irq_alloc(real_irq, real_irq);
+ if (irq) {
+ unsigned long mask = 0UL;
+
+ switch (real_irq) {
+ case 1:
+ mask = SUN4C_INT_E1;
+ break;
+ case 8:
+ mask = SUN4C_INT_E8;
+ break;
+ case 10:
+ mask = SUN4C_INT_E10;
+ break;
+ case 14:
+ mask = SUN4C_INT_E14;
+ break;
+ default:
+ /* All the rest are either always enabled,
+ * or are for signalling software interrupts.
+ */
+ break;
+ }
+ irq_set_chip_and_handler_name(irq, &sun4c_irq,
+ handle_level_irq, "level");
+ irq_set_chip_data(irq, (void *)mask);
+ }
+ return irq;
}
struct sun4c_timer_info {
@@ -144,8 +176,9 @@ static void sun4c_load_profile_irq(int cpu, unsigned int limit)
static void __init sun4c_init_timers(irq_handler_t counter_fn)
{
- const struct linux_prom_irqs *irq;
+ const struct linux_prom_irqs *prom_irqs;
struct device_node *dp;
+ unsigned int irq;
const u32 *addr;
int err;
@@ -163,9 +196,9 @@ static void __init sun4c_init_timers(irq_handler_t counter_fn)
sun4c_timers = (void __iomem *) (unsigned long) addr[0];
- irq = of_get_property(dp, "intr", NULL);
+ prom_irqs = of_get_property(dp, "intr", NULL);
of_node_put(dp);
- if (!irq) {
+ if (!prom_irqs) {
prom_printf("sun4c_init_timers: No intr property\n");
prom_halt();
}
@@ -178,15 +211,15 @@ static void __init sun4c_init_timers(irq_handler_t counter_fn)
master_l10_counter = &sun4c_timers->l10_count;
- err = request_irq(irq[0].pri, counter_fn,
- (IRQF_DISABLED | SA_STATIC_ALLOC),
- "timer", NULL);
+ irq = sun4c_build_device_irq(NULL, prom_irqs[0].pri);
+ err = request_irq(irq, counter_fn, IRQF_TIMER, "timer", NULL);
if (err) {
prom_printf("sun4c_init_timers: request_irq() fails with %d\n", err);
prom_halt();
}
- sun4c_disable_irq(irq[1].pri);
+ /* disable timer interrupt */
+ sun4c_mask_irq(irq_get_irq_data(irq));
}
#ifdef CONFIG_SMP
@@ -215,14 +248,11 @@ void __init sun4c_init_IRQ(void)
interrupt_enable = (void __iomem *) (unsigned long) addr[0];
- BTFIXUPSET_CALL(enable_irq, sun4c_enable_irq, BTFIXUPCALL_NORM);
- BTFIXUPSET_CALL(disable_irq, sun4c_disable_irq, BTFIXUPCALL_NORM);
- BTFIXUPSET_CALL(enable_pil_irq, sun4c_enable_irq, BTFIXUPCALL_NORM);
- BTFIXUPSET_CALL(disable_pil_irq, sun4c_disable_irq, BTFIXUPCALL_NORM);
BTFIXUPSET_CALL(clear_clock_irq, sun4c_clear_clock_irq, BTFIXUPCALL_NORM);
BTFIXUPSET_CALL(load_profile_irq, sun4c_load_profile_irq, BTFIXUPCALL_NOP);
- sparc_irq_config.init_timers = sun4c_init_timers;
+ sparc_irq_config.init_timers = sun4c_init_timers;
+ sparc_irq_config.build_device_irq = sun4c_build_device_irq;
#ifdef CONFIG_SMP
BTFIXUPSET_CALL(set_cpu_int, sun4c_nop, BTFIXUPCALL_NOP);
diff --git a/arch/sparc/kernel/sun4d_irq.c b/arch/sparc/kernel/sun4d_irq.c
index 77b4a89..a9ea60e 100644
--- a/arch/sparc/kernel/sun4d_irq.c
+++ b/arch/sparc/kernel/sun4d_irq.c
@@ -14,6 +14,7 @@
#include <asm/io.h>
#include <asm/sbi.h>
#include <asm/cacheflush.h>
+#include <asm/setup.h>
#include "kernel.h"
#include "irq.h"
@@ -22,22 +23,20 @@
* cpu local. CPU local interrupts cover the timer interrupts
* and whatnot, and we encode those as normal PILs between
* 0 and 15.
- *
- * SBUS interrupts are encoded integers including the board number
- * (plus one), the SBUS level, and the SBUS slot number. Sun4D
- * IRQ dispatch is done by:
- *
- * 1) Reading the BW local interrupt table in order to get the bus
- * interrupt mask.
- *
- * This table is indexed by SBUS interrupt level which can be
- * derived from the PIL we got interrupted on.
- *
- * 2) For each bus showing interrupt pending from #1, read the
- * SBI interrupt state register. This will indicate which slots
- * have interrupts pending for that SBUS interrupt level.
+ * SBUS interrupts are encodes as a combination of board, level and slot.
*/
+struct sun4d_handler_data {
+ unsigned int cpuid; /* target cpu */
+ unsigned int real_irq; /* interrupt level */
+};
+
+
+static unsigned int sun4d_encode_irq(int board, int lvl, int slot)
+{
+ return (board + 1) << 5 | (lvl << 2) | slot;
+}
+
struct sun4d_timer_regs {
u32 l10_timer_limit;
u32 l10_cur_countx;
@@ -48,17 +47,12 @@ struct sun4d_timer_regs {
static struct sun4d_timer_regs __iomem *sun4d_timers;
-#define TIMER_IRQ 10
-
-#define MAX_STATIC_ALLOC 4
-static unsigned char sbus_tid[32];
-
-static struct irqaction *irq_action[NR_IRQS];
+#define SUN4D_TIMER_IRQ 10
-static struct sbus_action {
- struct irqaction *action;
- /* For SMP this needs to be extended */
-} *sbus_actions;
+/* Specify which cpu handle interrupts from which board.
+ * Index is board - value is cpu.
+ */
+static unsigned char board_to_cpu[32];
static int pil_to_sbus[] = {
0,
@@ -79,152 +73,81 @@ static int pil_to_sbus[] = {
0,
};
-static int sbus_to_pil[] = {
- 0,
- 2,
- 3,
- 5,
- 7,
- 9,
- 11,
- 13,
-};
-
-static int nsbi;
-
/* Exported for sun4d_smp.c */
DEFINE_SPINLOCK(sun4d_imsk_lock);
-int show_sun4d_interrupts(struct seq_file *p, void *v)
+/* SBUS interrupts are encoded integers including the board number
+ * (plus one), the SBUS level, and the SBUS slot number. Sun4D
+ * IRQ dispatch is done by:
+ *
+ * 1) Reading the BW local interrupt table in order to get the bus
+ * interrupt mask.
+ *
+ * This table is indexed by SBUS interrupt level which can be
+ * derived from the PIL we got interrupted on.
+ *
+ * 2) For each bus showing interrupt pending from #1, read the
+ * SBI interrupt state register. This will indicate which slots
+ * have interrupts pending for that SBUS interrupt level.
+ *
+ * 3) Call the genreric IRQ support.
+ */
+static void sun4d_sbus_handler_irq(int sbusl)
{
- int i = *(loff_t *) v, j = 0, k = 0, sbusl;
- struct irqaction *action;
- unsigned long flags;
-#ifdef CONFIG_SMP
- int x;
-#endif
-
- spin_lock_irqsave(&irq_action_lock, flags);
- if (i < NR_IRQS) {
- sbusl = pil_to_sbus[i];
- if (!sbusl) {
- action = *(i + irq_action);
- if (!action)
- goto out_unlock;
- } else {
- for (j = 0; j < nsbi; j++) {
- for (k = 0; k < 4; k++)
- action = sbus_actions[(j << 5) + (sbusl << 2) + k].action;
- if (action)
- goto found_it;
- }
- goto out_unlock;
- }
-found_it: seq_printf(p, "%3d: ", i);
-#ifndef CONFIG_SMP
- seq_printf(p, "%10u ", kstat_irqs(i));
-#else
- for_each_online_cpu(x)
- seq_printf(p, "%10u ",
- kstat_cpu(cpu_logical_map(x)).irqs[i]);
-#endif
- seq_printf(p, "%c %s",
- (action->flags & IRQF_DISABLED) ? '+' : ' ',
- action->name);
- action = action->next;
- for (;;) {
- for (; action; action = action->next) {
- seq_printf(p, ",%s %s",
- (action->flags & IRQF_DISABLED) ? " +" : "",
- action->name);
- }
- if (!sbusl)
- break;
- k++;
- if (k < 4) {
- action = sbus_actions[(j << 5) + (sbusl << 2) + k].action;
- } else {
- j++;
- if (j == nsbi)
- break;
- k = 0;
- action = sbus_actions[(j << 5) + (sbusl << 2)].action;
+ unsigned int bus_mask;
+ unsigned int sbino, slot;
+ unsigned int sbil;
+
+ bus_mask = bw_get_intr_mask(sbusl) & 0x3ffff;
+ bw_clear_intr_mask(sbusl, bus_mask);
+
+ sbil = (sbusl << 2);
+ /* Loop for each pending SBI */
+ for (sbino = 0; bus_mask; sbino++) {
+ unsigned int idx, mask;
+
+ bus_mask >>= 1;
+ if (!(bus_mask & 1))
+ continue;
+ /* XXX This seems to ACK the irq twice. acquire_sbi()
+ * XXX uses swap, therefore this writes 0xf << sbil,
+ * XXX then later release_sbi() will write the individual
+ * XXX bits which were set again.
+ */
+ mask = acquire_sbi(SBI2DEVID(sbino), 0xf << sbil);
+ mask &= (0xf << sbil);
+
+ /* Loop for each pending SBI slot */
+ idx = 0;
+ slot = (1 << sbil);
+ while (mask != 0) {
+ unsigned int pil;
+ struct irq_bucket *p;
+
+ idx++;
+ slot <<= 1;
+ if (!(mask & slot))
+ continue;
+
+ mask &= ~slot;
+ pil = sun4d_encode_irq(sbino, sbil, idx);
+
+ p = irq_map[pil];
+ while (p) {
+ struct irq_bucket *next;
+
+ next = p->next;
+ generic_handle_irq(p->irq);
+ p = next;
}
+ release_sbi(SBI2DEVID(sbino), slot);
}
- seq_putc(p, '\n');
}
-out_unlock:
- spin_unlock_irqrestore(&irq_action_lock, flags);
- return 0;
-}
-
-void sun4d_free_irq(unsigned int irq, void *dev_id)
-{
- struct irqaction *action, **actionp;
- struct irqaction *tmp = NULL;
- unsigned long flags;
-
- spin_lock_irqsave(&irq_action_lock, flags);
- if (irq < 15)
- actionp = irq + irq_action;
- else
- actionp = &(sbus_actions[irq - (1 << 5)].action);
- action = *actionp;
- if (!action) {
- printk(KERN_ERR "Trying to free free IRQ%d\n", irq);
- goto out_unlock;
- }
- if (dev_id) {
- for (; action; action = action->next) {
- if (action->dev_id == dev_id)
- break;
- tmp = action;
- }
- if (!action) {
- printk(KERN_ERR "Trying to free free shared IRQ%d\n",
- irq);
- goto out_unlock;
- }
- } else if (action->flags & IRQF_SHARED) {
- printk(KERN_ERR "Trying to free shared IRQ%d with NULL device ID\n",
- irq);
- goto out_unlock;
- }
- if (action->flags & SA_STATIC_ALLOC) {
- /*
- * This interrupt is marked as specially allocated
- * so it is a bad idea to free it.
- */
- printk(KERN_ERR "Attempt to free statically allocated IRQ%d (%s)\n",
- irq, action->name);
- goto out_unlock;
- }
-
- if (tmp)
- tmp->next = action->next;
- else
- *actionp = action->next;
-
- spin_unlock_irqrestore(&irq_action_lock, flags);
-
- synchronize_irq(irq);
-
- spin_lock_irqsave(&irq_action_lock, flags);
-
- kfree(action);
-
- if (!(*actionp))
- __disable_irq(irq);
-
-out_unlock:
- spin_unlock_irqrestore(&irq_action_lock, flags);
}
void sun4d_handler_irq(int pil, struct pt_regs *regs)
{
struct pt_regs *old_regs;
- struct irqaction *action;
- int cpu = smp_processor_id();
/* SBUS IRQ level (1 - 7) */
int sbusl = pil_to_sbus[pil];
@@ -233,160 +156,96 @@ void sun4d_handler_irq(int pil, struct pt_regs *regs)
cc_set_iclr(1 << pil);
+#ifdef CONFIG_SMP
+ /*
+ * Check IPI data structures after IRQ has been cleared. Hard and Soft
+ * IRQ can happen at the same time, so both cases are always handled.
+ */
+ if (pil == SUN4D_IPI_IRQ)
+ sun4d_ipi_interrupt();
+#endif
+
old_regs = set_irq_regs(regs);
irq_enter();
- kstat_cpu(cpu).irqs[pil]++;
- if (!sbusl) {
- action = *(pil + irq_action);
- if (!action)
- unexpected_irq(pil, NULL, regs);
- do {
- action->handler(pil, action->dev_id);
- action = action->next;
- } while (action);
+ if (sbusl == 0) {
+ /* cpu interrupt */
+ struct irq_bucket *p;
+
+ p = irq_map[pil];
+ while (p) {
+ struct irq_bucket *next;
+
+ next = p->next;
+ generic_handle_irq(p->irq);
+ p = next;
+ }
} else {
- int bus_mask = bw_get_intr_mask(sbusl) & 0x3ffff;
- int sbino;
- struct sbus_action *actionp;
- unsigned mask, slot;
- int sbil = (sbusl << 2);
-
- bw_clear_intr_mask(sbusl, bus_mask);
-
- /* Loop for each pending SBI */
- for (sbino = 0; bus_mask; sbino++, bus_mask >>= 1)
- if (bus_mask & 1) {
- mask = acquire_sbi(SBI2DEVID(sbino), 0xf << sbil);
- mask &= (0xf << sbil);
- actionp = sbus_actions + (sbino << 5) + (sbil);
- /* Loop for each pending SBI slot */
- for (slot = (1 << sbil); mask; slot <<= 1, actionp++)
- if (mask & slot) {
- mask &= ~slot;
- action = actionp->action;
-
- if (!action)
- unexpected_irq(pil, NULL, regs);
- do {
- action->handler(pil, action->dev_id);
- action = action->next;
- } while (action);
- release_sbi(SBI2DEVID(sbino), slot);
- }
- }
+ /* SBUS interrupt */
+ sun4d_sbus_handler_irq(sbusl);
}
irq_exit();
set_irq_regs(old_regs);
}
-int sun4d_request_irq(unsigned int irq,
- irq_handler_t handler,
- unsigned long irqflags, const char *devname, void *dev_id)
+
+static void sun4d_mask_irq(struct irq_data *data)
{
- struct irqaction *action, *tmp = NULL, **actionp;
+ struct sun4d_handler_data *handler_data = data->handler_data;
+ unsigned int real_irq;
+#ifdef CONFIG_SMP
+ int cpuid = handler_data->cpuid;
unsigned long flags;
- int ret;
-
- if (irq > 14 && irq < (1 << 5)) {
- ret = -EINVAL;
- goto out;
- }
-
- if (!handler) {
- ret = -EINVAL;
- goto out;
- }
-
- spin_lock_irqsave(&irq_action_lock, flags);
-
- if (irq >= (1 << 5))
- actionp = &(sbus_actions[irq - (1 << 5)].action);
- else
- actionp = irq + irq_action;
- action = *actionp;
-
- if (action) {
- if ((action->flags & IRQF_SHARED) && (irqflags & IRQF_SHARED)) {
- for (tmp = action; tmp->next; tmp = tmp->next)
- /* find last entry - tmp used below */;
- } else {
- ret = -EBUSY;
- goto out_unlock;
- }
- if ((action->flags & IRQF_DISABLED) ^ (irqflags & IRQF_DISABLED)) {
- printk(KERN_ERR "Attempt to mix fast and slow interrupts on IRQ%d denied\n",
- irq);
- ret = -EBUSY;
- goto out_unlock;
- }
- action = NULL; /* Or else! */
- }
-
- /* If this is flagged as statically allocated then we use our
- * private struct which is never freed.
- */
- if (irqflags & SA_STATIC_ALLOC) {
- if (static_irq_count < MAX_STATIC_ALLOC)
- action = &static_irqaction[static_irq_count++];
- else
- printk(KERN_ERR "Request for IRQ%d (%s) SA_STATIC_ALLOC failed using kmalloc\n",
- irq, devname);
- }
-
- if (action == NULL)
- action = kmalloc(sizeof(struct irqaction), GFP_ATOMIC);
-
- if (!action) {
- ret = -ENOMEM;
- goto out_unlock;
- }
-
- action->handler = handler;
- action->flags = irqflags;
- action->name = devname;
- action->next = NULL;
- action->dev_id = dev_id;
-
- if (tmp)
- tmp->next = action;
- else
- *actionp = action;
-
- __enable_irq(irq);
-
- ret = 0;
-out_unlock:
- spin_unlock_irqrestore(&irq_action_lock, flags);
-out:
- return ret;
+#endif
+ real_irq = handler_data->real_irq;
+#ifdef CONFIG_SMP
+ spin_lock_irqsave(&sun4d_imsk_lock, flags);
+ cc_set_imsk_other(cpuid, cc_get_imsk_other(cpuid) | (1 << real_irq));
+ spin_unlock_irqrestore(&sun4d_imsk_lock, flags);
+#else
+ cc_set_imsk(cc_get_imsk() | (1 << real_irq));
+#endif
}
-static void sun4d_disable_irq(unsigned int irq)
+static void sun4d_unmask_irq(struct irq_data *data)
{
- int tid = sbus_tid[(irq >> 5) - 1];
+ struct sun4d_handler_data *handler_data = data->handler_data;
+ unsigned int real_irq;
+#ifdef CONFIG_SMP
+ int cpuid = handler_data->cpuid;
unsigned long flags;
+#endif
+ real_irq = handler_data->real_irq;
- if (irq < NR_IRQS)
- return;
-
+#ifdef CONFIG_SMP
spin_lock_irqsave(&sun4d_imsk_lock, flags);
- cc_set_imsk_other(tid, cc_get_imsk_other(tid) | (1 << sbus_to_pil[(irq >> 2) & 7]));
+ cc_set_imsk_other(cpuid, cc_get_imsk_other(cpuid) | ~(1 << real_irq));
spin_unlock_irqrestore(&sun4d_imsk_lock, flags);
+#else
+ cc_set_imsk(cc_get_imsk() | ~(1 << real_irq));
+#endif
}
-static void sun4d_enable_irq(unsigned int irq)
+static unsigned int sun4d_startup_irq(struct irq_data *data)
{
- int tid = sbus_tid[(irq >> 5) - 1];
- unsigned long flags;
-
- if (irq < NR_IRQS)
- return;
+ irq_link(data->irq);
+ sun4d_unmask_irq(data);
+ return 0;
+}
- spin_lock_irqsave(&sun4d_imsk_lock, flags);
- cc_set_imsk_other(tid, cc_get_imsk_other(tid) & ~(1 << sbus_to_pil[(irq >> 2) & 7]));
- spin_unlock_irqrestore(&sun4d_imsk_lock, flags);
+static void sun4d_shutdown_irq(struct irq_data *data)
+{
+ sun4d_mask_irq(data);
+ irq_unlink(data->irq);
}
+struct irq_chip sun4d_irq = {
+ .name = "sun4d",
+ .irq_startup = sun4d_startup_irq,
+ .irq_shutdown = sun4d_shutdown_irq,
+ .irq_unmask = sun4d_unmask_irq,
+ .irq_mask = sun4d_mask_irq,
+};
+
#ifdef CONFIG_SMP
static void sun4d_set_cpu_int(int cpu, int level)
{
@@ -413,7 +272,7 @@ void __init sun4d_distribute_irqs(void)
for_each_node_by_name(dp, "sbi") {
int devid = of_getintprop_default(dp, "device-id", 0);
int board = of_getintprop_default(dp, "board#", 0);
- sbus_tid[board] = cpuid;
+ board_to_cpu[board] = cpuid;
set_sbi_tid(devid, cpuid << 3);
}
printk(KERN_ERR "All sbus IRQs directed to CPU%d\n", cpuid);
@@ -443,15 +302,16 @@ static void __init sun4d_load_profile_irqs(void)
unsigned int sun4d_build_device_irq(struct platform_device *op,
unsigned int real_irq)
{
- static int pil_to_sbus[] = {
- 0, 0, 1, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0, 0,
- };
struct device_node *dp = op->dev.of_node;
struct device_node *io_unit, *sbi = dp->parent;
const struct linux_prom_registers *regs;
+ struct sun4d_handler_data *handler_data;
+ unsigned int pil;
+ unsigned int irq;
int board, slot;
int sbusl;
+ irq = 0;
while (sbi) {
if (!strcmp(sbi->name, "sbi"))
break;
@@ -484,7 +344,28 @@ unsigned int sun4d_build_device_irq(struct platform_device *op,
sbusl = pil_to_sbus[real_irq];
if (sbusl)
- return (((board + 1) << 5) + (sbusl << 2) + slot);
+ pil = sun4d_encode_irq(board, sbusl, slot);
+ else
+ pil = real_irq;
+
+ irq = irq_alloc(real_irq, pil);
+ if (irq == 0)
+ goto err_out;
+
+ handler_data = irq_get_handler_data(irq);
+ if (unlikely(handler_data))
+ goto err_out;
+
+ handler_data = kzalloc(sizeof(struct sun4d_handler_data), GFP_ATOMIC);
+ if (unlikely(!handler_data)) {
+ prom_printf("IRQ: kzalloc(sun4d_handler_data) failed.\n");
+ prom_halt();
+ }
+ handler_data->cpuid = board_to_cpu[board];
+ handler_data->real_irq = real_irq;
+ irq_set_chip_and_handler_name(irq, &sun4d_irq,
+ handle_level_irq, "level");
+ irq_set_handler_data(irq, handler_data);
err_out:
return real_irq;
@@ -518,6 +399,7 @@ static void __init sun4d_init_timers(irq_handler_t counter_fn)
{
struct device_node *dp;
struct resource res;
+ unsigned int irq;
const u32 *reg;
int err;
@@ -552,9 +434,8 @@ static void __init sun4d_init_timers(irq_handler_t counter_fn)
master_l10_counter = &sun4d_timers->l10_cur_count;
- err = request_irq(TIMER_IRQ, counter_fn,
- (IRQF_DISABLED | SA_STATIC_ALLOC),
- "timer", NULL);
+ irq = sun4d_build_device_irq(NULL, SUN4D_TIMER_IRQ);
+ err = request_irq(irq, counter_fn, IRQF_TIMER, "timer", NULL);
if (err) {
prom_printf("sun4d_init_timers: request_irq() failed with %d\n",
err);
@@ -567,27 +448,16 @@ static void __init sun4d_init_timers(irq_handler_t counter_fn)
void __init sun4d_init_sbi_irq(void)
{
struct device_node *dp;
- int target_cpu = 0;
+ int target_cpu;
-#ifdef CONFIG_SMP
target_cpu = boot_cpu_id;
-#endif
-
- nsbi = 0;
- for_each_node_by_name(dp, "sbi")
- nsbi++;
- sbus_actions = kzalloc(nsbi * 8 * 4 * sizeof(struct sbus_action), GFP_ATOMIC);
- if (!sbus_actions) {
- prom_printf("SUN4D: Cannot allocate sbus_actions, halting.\n");
- prom_halt();
- }
for_each_node_by_name(dp, "sbi") {
int devid = of_getintprop_default(dp, "device-id", 0);
int board = of_getintprop_default(dp, "board#", 0);
unsigned int mask;
set_sbi_tid(devid, target_cpu << 3);
- sbus_tid[board] = target_cpu;
+ board_to_cpu[board] = target_cpu;
/* Get rid of pending irqs from PROM */
mask = acquire_sbi(devid, 0xffffffff);
@@ -603,12 +473,10 @@ void __init sun4d_init_IRQ(void)
{
local_irq_disable();
- BTFIXUPSET_CALL(enable_irq, sun4d_enable_irq, BTFIXUPCALL_NORM);
- BTFIXUPSET_CALL(disable_irq, sun4d_disable_irq, BTFIXUPCALL_NORM);
BTFIXUPSET_CALL(clear_clock_irq, sun4d_clear_clock_irq, BTFIXUPCALL_NORM);
BTFIXUPSET_CALL(load_profile_irq, sun4d_load_profile_irq, BTFIXUPCALL_NORM);
- sparc_irq_config.init_timers = sun4d_init_timers;
+ sparc_irq_config.init_timers = sun4d_init_timers;
sparc_irq_config.build_device_irq = sun4d_build_device_irq;
#ifdef CONFIG_SMP
diff --git a/arch/sparc/kernel/sun4d_smp.c b/arch/sparc/kernel/sun4d_smp.c
index 475d50b..1333879 100644
--- a/arch/sparc/kernel/sun4d_smp.c
+++ b/arch/sparc/kernel/sun4d_smp.c
@@ -32,6 +32,7 @@ static inline unsigned long sun4d_swap(volatile unsigned long *ptr, unsigned lon
return val;
}
+static void smp4d_ipi_init(void);
static void smp_setup_percpu_timer(void);
static unsigned char cpu_leds[32];
@@ -80,8 +81,6 @@ void __cpuinit smp4d_callin(void)
local_flush_cache_all();
local_flush_tlb_all();
- cpu_probe();
-
while ((unsigned long)current_set[cpuid] < PAGE_OFFSET)
barrier();
@@ -105,7 +104,7 @@ void __cpuinit smp4d_callin(void)
local_irq_enable(); /* We don't allow PIL 14 yet */
- while (!cpu_isset(cpuid, smp_commenced_mask))
+ while (!cpumask_test_cpu(cpuid, &smp_commenced_mask))
barrier();
spin_lock_irqsave(&sun4d_imsk_lock, flags);
@@ -120,6 +119,7 @@ void __cpuinit smp4d_callin(void)
*/
void __init smp4d_boot_cpus(void)
{
+ smp4d_ipi_init();
if (boot_cpu_id)
current_set[0] = NULL;
smp_setup_percpu_timer();
@@ -191,6 +191,80 @@ void __init smp4d_smp_done(void)
sun4d_distribute_irqs();
}
+/* Memory structure giving interrupt handler information about IPI generated */
+struct sun4d_ipi_work {
+ int single;
+ int msk;
+ int resched;
+};
+
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct sun4d_ipi_work, sun4d_ipi_work);
+
+/* Initialize IPIs on the SUN4D SMP machine */
+static void __init smp4d_ipi_init(void)
+{
+ int cpu;
+ struct sun4d_ipi_work *work;
+
+ printk(KERN_INFO "smp4d: setup IPI at IRQ %d\n", SUN4D_IPI_IRQ);
+
+ for_each_possible_cpu(cpu) {
+ work = &per_cpu(sun4d_ipi_work, cpu);
+ work->single = work->msk = work->resched = 0;
+ }
+}
+
+void sun4d_ipi_interrupt(void)
+{
+ struct sun4d_ipi_work *work = &__get_cpu_var(sun4d_ipi_work);
+
+ if (work->single) {
+ work->single = 0;
+ smp_call_function_single_interrupt();
+ }
+ if (work->msk) {
+ work->msk = 0;
+ smp_call_function_interrupt();
+ }
+ if (work->resched) {
+ work->resched = 0;
+ smp_resched_interrupt();
+ }
+}
+
+static void smp4d_ipi_single(int cpu)
+{
+ struct sun4d_ipi_work *work = &per_cpu(sun4d_ipi_work, cpu);
+
+ /* Mark work */
+ work->single = 1;
+
+ /* Generate IRQ on the CPU */
+ sun4d_send_ipi(cpu, SUN4D_IPI_IRQ);
+}
+
+static void smp4d_ipi_mask_one(int cpu)
+{
+ struct sun4d_ipi_work *work = &per_cpu(sun4d_ipi_work, cpu);
+
+ /* Mark work */
+ work->msk = 1;
+
+ /* Generate IRQ on the CPU */
+ sun4d_send_ipi(cpu, SUN4D_IPI_IRQ);
+}
+
+static void smp4d_ipi_resched(int cpu)
+{
+ struct sun4d_ipi_work *work = &per_cpu(sun4d_ipi_work, cpu);
+
+ /* Mark work */
+ work->resched = 1;
+
+ /* Generate IRQ on the CPU (any IRQ will cause resched) */
+ sun4d_send_ipi(cpu, SUN4D_IPI_IRQ);
+}
+
static struct smp_funcall {
smpfunc_t func;
unsigned long arg1;
@@ -239,10 +313,10 @@ static void smp4d_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1,
{
register int i;
- cpu_clear(smp_processor_id(), mask);
- cpus_and(mask, cpu_online_map, mask);
+ cpumask_clear_cpu(smp_processor_id(), &mask);
+ cpumask_and(&mask, cpu_online_mask, &mask);
for (i = 0; i <= high; i++) {
- if (cpu_isset(i, mask)) {
+ if (cpumask_test_cpu(i, &mask)) {
ccall_info.processors_in[i] = 0;
ccall_info.processors_out[i] = 0;
sun4d_send_ipi(i, IRQ_CROSS_CALL);
@@ -255,7 +329,7 @@ static void smp4d_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1,
i = 0;
do {
- if (!cpu_isset(i, mask))
+ if (!cpumask_test_cpu(i, &mask))
continue;
while (!ccall_info.processors_in[i])
barrier();
@@ -263,7 +337,7 @@ static void smp4d_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1,
i = 0;
do {
- if (!cpu_isset(i, mask))
+ if (!cpumask_test_cpu(i, &mask))
continue;
while (!ccall_info.processors_out[i])
barrier();
@@ -356,6 +430,9 @@ void __init sun4d_init_smp(void)
BTFIXUPSET_BLACKBOX(load_current, smp4d_blackbox_current);
BTFIXUPSET_CALL(smp_cross_call, smp4d_cross_call, BTFIXUPCALL_NORM);
BTFIXUPSET_CALL(__hard_smp_processor_id, __smp4d_processor_id, BTFIXUPCALL_NORM);
+ BTFIXUPSET_CALL(smp_ipi_resched, smp4d_ipi_resched, BTFIXUPCALL_NORM);
+ BTFIXUPSET_CALL(smp_ipi_single, smp4d_ipi_single, BTFIXUPCALL_NORM);
+ BTFIXUPSET_CALL(smp_ipi_mask_one, smp4d_ipi_mask_one, BTFIXUPCALL_NORM);
for (i = 0; i < NR_CPUS; i++) {
ccall_info.processors_in[i] = 1;
diff --git a/arch/sparc/kernel/sun4m_irq.c b/arch/sparc/kernel/sun4m_irq.c
index 69df625..422c16d 100644
--- a/arch/sparc/kernel/sun4m_irq.c
+++ b/arch/sparc/kernel/sun4m_irq.c
@@ -100,6 +100,11 @@
struct sun4m_irq_percpu __iomem *sun4m_irq_percpu[SUN4M_NCPUS];
struct sun4m_irq_global __iomem *sun4m_irq_global;
+struct sun4m_handler_data {
+ bool percpu;
+ long mask;
+};
+
/* Dave Redman (djhr@tadpole.co.uk)
* The sun4m interrupt registers.
*/
@@ -142,9 +147,9 @@ struct sun4m_irq_global __iomem *sun4m_irq_global;
#define OBP_INT_LEVEL_VME 0x40
#define SUN4M_TIMER_IRQ (OBP_INT_LEVEL_ONBOARD | 10)
-#define SUM4M_PROFILE_IRQ (OBP_INT_LEVEL_ONBOARD | 14)
+#define SUN4M_PROFILE_IRQ (OBP_INT_LEVEL_ONBOARD | 14)
-static unsigned long irq_mask[0x50] = {
+static unsigned long sun4m_imask[0x50] = {
/* 0x00 - SMP */
0, SUN4M_SOFT_INT(1),
SUN4M_SOFT_INT(2), SUN4M_SOFT_INT(3),
@@ -169,7 +174,7 @@ static unsigned long irq_mask[0x50] = {
SUN4M_INT_VIDEO, SUN4M_INT_MODULE,
SUN4M_INT_REALTIME, SUN4M_INT_FLOPPY,
(SUN4M_INT_SERIAL | SUN4M_INT_KBDMS),
- SUN4M_INT_AUDIO, 0, SUN4M_INT_MODULE_ERR,
+ SUN4M_INT_AUDIO, SUN4M_INT_E14, SUN4M_INT_MODULE_ERR,
/* 0x30 - sbus */
0, 0, SUN4M_INT_SBUS(0), SUN4M_INT_SBUS(1),
0, SUN4M_INT_SBUS(2), 0, SUN4M_INT_SBUS(3),
@@ -182,105 +187,110 @@ static unsigned long irq_mask[0x50] = {
0, SUN4M_INT_VME(6), 0, 0
};
-static unsigned long sun4m_get_irqmask(unsigned int irq)
+static void sun4m_mask_irq(struct irq_data *data)
{
- unsigned long mask;
-
- if (irq < 0x50)
- mask = irq_mask[irq];
- else
- mask = 0;
+ struct sun4m_handler_data *handler_data = data->handler_data;
+ int cpu = smp_processor_id();
- if (!mask)
- printk(KERN_ERR "sun4m_get_irqmask: IRQ%d has no valid mask!\n",
- irq);
+ if (handler_data->mask) {
+ unsigned long flags;
- return mask;
+ local_irq_save(flags);
+ if (handler_data->percpu) {
+ sbus_writel(handler_data->mask, &sun4m_irq_percpu[cpu]->set);
+ } else {
+ sbus_writel(handler_data->mask, &sun4m_irq_global->mask_set);
+ }
+ local_irq_restore(flags);
+ }
}
-static void sun4m_disable_irq(unsigned int irq_nr)
+static void sun4m_unmask_irq(struct irq_data *data)
{
- unsigned long mask, flags;
+ struct sun4m_handler_data *handler_data = data->handler_data;
int cpu = smp_processor_id();
- mask = sun4m_get_irqmask(irq_nr);
- local_irq_save(flags);
- if (irq_nr > 15)
- sbus_writel(mask, &sun4m_irq_global->mask_set);
- else
- sbus_writel(mask, &sun4m_irq_percpu[cpu]->set);
- local_irq_restore(flags);
-}
-
-static void sun4m_enable_irq(unsigned int irq_nr)
-{
- unsigned long mask, flags;
- int cpu = smp_processor_id();
+ if (handler_data->mask) {
+ unsigned long flags;
- /* Dreadful floppy hack. When we use 0x2b instead of
- * 0x0b the system blows (it starts to whistle!).
- * So we continue to use 0x0b. Fixme ASAP. --P3
- */
- if (irq_nr != 0x0b) {
- mask = sun4m_get_irqmask(irq_nr);
- local_irq_save(flags);
- if (irq_nr > 15)
- sbus_writel(mask, &sun4m_irq_global->mask_clear);
- else
- sbus_writel(mask, &sun4m_irq_percpu[cpu]->clear);
- local_irq_restore(flags);
- } else {
local_irq_save(flags);
- sbus_writel(SUN4M_INT_FLOPPY, &sun4m_irq_global->mask_clear);
+ if (handler_data->percpu) {
+ sbus_writel(handler_data->mask, &sun4m_irq_percpu[cpu]->clear);
+ } else {
+ sbus_writel(handler_data->mask, &sun4m_irq_global->mask_clear);
+ }
local_irq_restore(flags);
}
}
-static unsigned long cpu_pil_to_imask[16] = {
-/*0*/ 0x00000000,
-/*1*/ 0x00000000,
-/*2*/ SUN4M_INT_SBUS(0) | SUN4M_INT_VME(0),
-/*3*/ SUN4M_INT_SBUS(1) | SUN4M_INT_VME(1),
-/*4*/ SUN4M_INT_SCSI,
-/*5*/ SUN4M_INT_SBUS(2) | SUN4M_INT_VME(2),
-/*6*/ SUN4M_INT_ETHERNET,
-/*7*/ SUN4M_INT_SBUS(3) | SUN4M_INT_VME(3),
-/*8*/ SUN4M_INT_VIDEO,
-/*9*/ SUN4M_INT_SBUS(4) | SUN4M_INT_VME(4) | SUN4M_INT_MODULE_ERR,
-/*10*/ SUN4M_INT_REALTIME,
-/*11*/ SUN4M_INT_SBUS(5) | SUN4M_INT_VME(5) | SUN4M_INT_FLOPPY,
-/*12*/ SUN4M_INT_SERIAL | SUN4M_INT_KBDMS,
-/*13*/ SUN4M_INT_SBUS(6) | SUN4M_INT_VME(6) | SUN4M_INT_AUDIO,
-/*14*/ SUN4M_INT_E14,
-/*15*/ SUN4M_INT_ERROR,
-};
+static unsigned int sun4m_startup_irq(struct irq_data *data)
+{
+ irq_link(data->irq);
+ sun4m_unmask_irq(data);
+ return 0;
+}
-/* We assume the caller has disabled local interrupts when these are called,
- * or else very bizarre behavior will result.
- */
-static void sun4m_disable_pil_irq(unsigned int pil)
+static void sun4m_shutdown_irq(struct irq_data *data)
{
- sbus_writel(cpu_pil_to_imask[pil], &sun4m_irq_global->mask_set);
+ sun4m_mask_irq(data);
+ irq_unlink(data->irq);
}
-static void sun4m_enable_pil_irq(unsigned int pil)
+static struct irq_chip sun4m_irq = {
+ .name = "sun4m",
+ .irq_startup = sun4m_startup_irq,
+ .irq_shutdown = sun4m_shutdown_irq,
+ .irq_mask = sun4m_mask_irq,
+ .irq_unmask = sun4m_unmask_irq,
+};
+
+
+static unsigned int sun4m_build_device_irq(struct platform_device *op,
+ unsigned int real_irq)
{
- sbus_writel(cpu_pil_to_imask[pil], &sun4m_irq_global->mask_clear);
+ struct sun4m_handler_data *handler_data;
+ unsigned int irq;
+ unsigned int pil;
+
+ if (real_irq >= OBP_INT_LEVEL_VME) {
+ prom_printf("Bogus sun4m IRQ %u\n", real_irq);
+ prom_halt();
+ }
+ pil = (real_irq & 0xf);
+ irq = irq_alloc(real_irq, pil);
+
+ if (irq == 0)
+ goto out;
+
+ handler_data = irq_get_handler_data(irq);
+ if (unlikely(handler_data))
+ goto out;
+
+ handler_data = kzalloc(sizeof(struct sun4m_handler_data), GFP_ATOMIC);
+ if (unlikely(!handler_data)) {
+ prom_printf("IRQ: kzalloc(sun4m_handler_data) failed.\n");
+ prom_halt();
+ }
+
+ handler_data->mask = sun4m_imask[real_irq];
+ handler_data->percpu = real_irq < OBP_INT_LEVEL_ONBOARD;
+ irq_set_chip_and_handler_name(irq, &sun4m_irq,
+ handle_level_irq, "level");
+ irq_set_handler_data(irq, handler_data);
+
+out:
+ return irq;
}
#ifdef CONFIG_SMP
static void sun4m_send_ipi(int cpu, int level)
{
- unsigned long mask = sun4m_get_irqmask(level);
-
- sbus_writel(mask, &sun4m_irq_percpu[cpu]->set);
+ sbus_writel(SUN4M_SOFT_INT(level), &sun4m_irq_percpu[cpu]->set);
}
static void sun4m_clear_ipi(int cpu, int level)
{
- unsigned long mask = sun4m_get_irqmask(level);
-
- sbus_writel(mask, &sun4m_irq_percpu[cpu]->clear);
+ sbus_writel(SUN4M_SOFT_INT(level), &sun4m_irq_percpu[cpu]->clear);
}
static void sun4m_set_udt(int cpu)
@@ -343,7 +353,15 @@ void sun4m_nmi(struct pt_regs *regs)
prom_halt();
}
-/* Exported for sun4m_smp.c */
+void sun4m_unmask_profile_irq(void)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ sbus_writel(sun4m_imask[SUN4M_PROFILE_IRQ], &sun4m_irq_global->mask_clear);
+ local_irq_restore(flags);
+}
+
void sun4m_clear_profile_irq(int cpu)
{
sbus_readl(&timers_percpu[cpu]->l14_limit);
@@ -358,6 +376,7 @@ static void __init sun4m_init_timers(irq_handler_t counter_fn)
{
struct device_node *dp = of_find_node_by_name(NULL, "counter");
int i, err, len, num_cpu_timers;
+ unsigned int irq;
const u32 *addr;
if (!dp) {
@@ -384,8 +403,9 @@ static void __init sun4m_init_timers(irq_handler_t counter_fn)
master_l10_counter = &timers_global->l10_count;
- err = request_irq(SUN4M_TIMER_IRQ, counter_fn,
- (IRQF_DISABLED | SA_STATIC_ALLOC), "timer", NULL);
+ irq = sun4m_build_device_irq(NULL, SUN4M_TIMER_IRQ);
+
+ err = request_irq(irq, counter_fn, IRQF_TIMER, "timer", NULL);
if (err) {
printk(KERN_ERR "sun4m_init_timers: Register IRQ error %d.\n",
err);
@@ -452,14 +472,11 @@ void __init sun4m_init_IRQ(void)
if (num_cpu_iregs == 4)
sbus_writel(0, &sun4m_irq_global->interrupt_target);
- BTFIXUPSET_CALL(enable_irq, sun4m_enable_irq, BTFIXUPCALL_NORM);
- BTFIXUPSET_CALL(disable_irq, sun4m_disable_irq, BTFIXUPCALL_NORM);
- BTFIXUPSET_CALL(enable_pil_irq, sun4m_enable_pil_irq, BTFIXUPCALL_NORM);
- BTFIXUPSET_CALL(disable_pil_irq, sun4m_disable_pil_irq, BTFIXUPCALL_NORM);
BTFIXUPSET_CALL(clear_clock_irq, sun4m_clear_clock_irq, BTFIXUPCALL_NORM);
BTFIXUPSET_CALL(load_profile_irq, sun4m_load_profile_irq, BTFIXUPCALL_NORM);
sparc_irq_config.init_timers = sun4m_init_timers;
+ sparc_irq_config.build_device_irq = sun4m_build_device_irq;
#ifdef CONFIG_SMP
BTFIXUPSET_CALL(set_cpu_int, sun4m_send_ipi, BTFIXUPCALL_NORM);
diff --git a/arch/sparc/kernel/sun4m_smp.c b/arch/sparc/kernel/sun4m_smp.c
index 5cc7dc5..5947686 100644
--- a/arch/sparc/kernel/sun4m_smp.c
+++ b/arch/sparc/kernel/sun4m_smp.c
@@ -15,6 +15,9 @@
#include "irq.h"
#include "kernel.h"
+#define IRQ_IPI_SINGLE 12
+#define IRQ_IPI_MASK 13
+#define IRQ_IPI_RESCHED 14
#define IRQ_CROSS_CALL 15
static inline unsigned long
@@ -26,6 +29,7 @@ swap_ulong(volatile unsigned long *ptr, unsigned long val)
return val;
}
+static void smp4m_ipi_init(void);
static void smp_setup_percpu_timer(void);
void __cpuinit smp4m_callin(void)
@@ -59,8 +63,6 @@ void __cpuinit smp4m_callin(void)
local_flush_cache_all();
local_flush_tlb_all();
- cpu_probe();
-
/* Fix idle thread fields. */
__asm__ __volatile__("ld [%0], %%g6\n\t"
: : "r" (&current_set[cpuid])
@@ -70,7 +72,7 @@ void __cpuinit smp4m_callin(void)
atomic_inc(&init_mm.mm_count);
current->active_mm = &init_mm;
- while (!cpu_isset(cpuid, smp_commenced_mask))
+ while (!cpumask_test_cpu(cpuid, &smp_commenced_mask))
mb();
local_irq_enable();
@@ -83,6 +85,7 @@ void __cpuinit smp4m_callin(void)
*/
void __init smp4m_boot_cpus(void)
{
+ smp4m_ipi_init();
smp_setup_percpu_timer();
local_flush_cache_all();
}
@@ -150,18 +153,25 @@ void __init smp4m_smp_done(void)
/* Ok, they are spinning and ready to go. */
}
-/* At each hardware IRQ, we get this called to forward IRQ reception
- * to the next processor. The caller must disable the IRQ level being
- * serviced globally so that there are no double interrupts received.
- *
- * XXX See sparc64 irq.c.
- */
-void smp4m_irq_rotate(int cpu)
+
+/* Initialize IPIs on the SUN4M SMP machine */
+static void __init smp4m_ipi_init(void)
+{
+}
+
+static void smp4m_ipi_resched(int cpu)
+{
+ set_cpu_int(cpu, IRQ_IPI_RESCHED);
+}
+
+static void smp4m_ipi_single(int cpu)
{
- int next = cpu_data(cpu).next;
+ set_cpu_int(cpu, IRQ_IPI_SINGLE);
+}
- if (next != cpu)
- set_irq_udt(next);
+static void smp4m_ipi_mask_one(int cpu)
+{
+ set_cpu_int(cpu, IRQ_IPI_MASK);
}
static struct smp_funcall {
@@ -199,10 +209,10 @@ static void smp4m_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1,
{
register int i;
- cpu_clear(smp_processor_id(), mask);
- cpus_and(mask, cpu_online_map, mask);
+ cpumask_clear_cpu(smp_processor_id(), &mask);
+ cpumask_and(&mask, cpu_online_mask, &mask);
for (i = 0; i < ncpus; i++) {
- if (cpu_isset(i, mask)) {
+ if (cpumask_test_cpu(i, &mask)) {
ccall_info.processors_in[i] = 0;
ccall_info.processors_out[i] = 0;
set_cpu_int(i, IRQ_CROSS_CALL);
@@ -218,7 +228,7 @@ static void smp4m_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1,
i = 0;
do {
- if (!cpu_isset(i, mask))
+ if (!cpumask_test_cpu(i, &mask))
continue;
while (!ccall_info.processors_in[i])
barrier();
@@ -226,7 +236,7 @@ static void smp4m_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1,
i = 0;
do {
- if (!cpu_isset(i, mask))
+ if (!cpumask_test_cpu(i, &mask))
continue;
while (!ccall_info.processors_out[i])
barrier();
@@ -277,7 +287,7 @@ static void __cpuinit smp_setup_percpu_timer(void)
load_profile_irq(cpu, lvl14_resolution);
if (cpu == boot_cpu_id)
- enable_pil_irq(14);
+ sun4m_unmask_profile_irq();
}
static void __init smp4m_blackbox_id(unsigned *addr)
@@ -306,4 +316,7 @@ void __init sun4m_init_smp(void)
BTFIXUPSET_BLACKBOX(load_current, smp4m_blackbox_current);
BTFIXUPSET_CALL(smp_cross_call, smp4m_cross_call, BTFIXUPCALL_NORM);
BTFIXUPSET_CALL(__hard_smp_processor_id, __smp4m_processor_id, BTFIXUPCALL_NORM);
+ BTFIXUPSET_CALL(smp_ipi_resched, smp4m_ipi_resched, BTFIXUPCALL_NORM);
+ BTFIXUPSET_CALL(smp_ipi_single, smp4m_ipi_single, BTFIXUPCALL_NORM);
+ BTFIXUPSET_CALL(smp_ipi_mask_one, smp4m_ipi_mask_one, BTFIXUPCALL_NORM);
}
diff --git a/arch/sparc/kernel/sysfs.c b/arch/sparc/kernel/sysfs.c
index 1eb8b00..7408201 100644
--- a/arch/sparc/kernel/sysfs.c
+++ b/arch/sparc/kernel/sysfs.c
@@ -103,9 +103,10 @@ static unsigned long run_on_cpu(unsigned long cpu,
unsigned long (*func)(unsigned long),
unsigned long arg)
{
- cpumask_t old_affinity = current->cpus_allowed;
+ cpumask_t old_affinity;
unsigned long ret;
+ cpumask_copy(&old_affinity, tsk_cpus_allowed(current));
/* should return -EINVAL to userspace */
if (set_cpus_allowed_ptr(current, cpumask_of(cpu)))
return 0;
diff --git a/arch/sparc/kernel/time_32.c b/arch/sparc/kernel/time_32.c
index 96046a4..1060e06 100644
--- a/arch/sparc/kernel/time_32.c
+++ b/arch/sparc/kernel/time_32.c
@@ -228,14 +228,10 @@ static void __init sbus_time_init(void)
void __init time_init(void)
{
-#ifdef CONFIG_PCI
- extern void pci_time_init(void);
- if (pcic_present()) {
+ if (pcic_present())
pci_time_init();
- return;
- }
-#endif
- sbus_time_init();
+ else
+ sbus_time_init();
}
diff --git a/arch/sparc/kernel/us2e_cpufreq.c b/arch/sparc/kernel/us2e_cpufreq.c
index 8f982b7..531d54f 100644
--- a/arch/sparc/kernel/us2e_cpufreq.c
+++ b/arch/sparc/kernel/us2e_cpufreq.c
@@ -237,7 +237,7 @@ static unsigned int us2e_freq_get(unsigned int cpu)
if (!cpu_online(cpu))
return 0;
- cpus_allowed = current->cpus_allowed;
+ cpumask_copy(&cpus_allowed, tsk_cpus_allowed(current));
set_cpus_allowed_ptr(current, cpumask_of(cpu));
clock_tick = sparc64_get_clock_tick(cpu) / 1000;
@@ -258,7 +258,7 @@ static void us2e_set_cpu_divider_index(unsigned int cpu, unsigned int index)
if (!cpu_online(cpu))
return;
- cpus_allowed = current->cpus_allowed;
+ cpumask_copy(&cpus_allowed, tsk_cpus_allowed(current));
set_cpus_allowed_ptr(current, cpumask_of(cpu));
new_freq = clock_tick = sparc64_get_clock_tick(cpu) / 1000;
diff --git a/arch/sparc/kernel/us3_cpufreq.c b/arch/sparc/kernel/us3_cpufreq.c
index f35d1e7..9a8ceb7 100644
--- a/arch/sparc/kernel/us3_cpufreq.c
+++ b/arch/sparc/kernel/us3_cpufreq.c
@@ -85,7 +85,7 @@ static unsigned int us3_freq_get(unsigned int cpu)
if (!cpu_online(cpu))
return 0;
- cpus_allowed = current->cpus_allowed;
+ cpumask_copy(&cpus_allowed, tsk_cpus_allowed(current));
set_cpus_allowed_ptr(current, cpumask_of(cpu));
reg = read_safari_cfg();
@@ -105,7 +105,7 @@ static void us3_set_cpu_divider_index(unsigned int cpu, unsigned int index)
if (!cpu_online(cpu))
return;
- cpus_allowed = current->cpus_allowed;
+ cpumask_copy(&cpus_allowed, tsk_cpus_allowed(current));
set_cpus_allowed_ptr(current, cpumask_of(cpu));
new_freq = sparc64_get_clock_tick(cpu) / 1000;
diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
index 846d1c4..7f01b8f 100644
--- a/arch/sparc/lib/Makefile
+++ b/arch/sparc/lib/Makefile
@@ -15,7 +15,6 @@ lib-$(CONFIG_SPARC32) += divdi3.o udivdi3.o
lib-$(CONFIG_SPARC32) += copy_user.o locks.o
lib-y += atomic_$(BITS).o
lib-$(CONFIG_SPARC32) += lshrdi3.o ashldi3.o
-lib-$(CONFIG_SPARC32) += rwsem_32.o
lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o
lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o
diff --git a/arch/sparc/lib/rwsem_32.S b/arch/sparc/lib/rwsem_32.S
deleted file mode 100644
index 9675268..0000000
--- a/arch/sparc/lib/rwsem_32.S
+++ /dev/null
@@ -1,204 +0,0 @@
-/*
- * Assembly part of rw semaphores.
- *
- * Copyright (C) 1999 Jakub Jelinek (jakub@redhat.com)
- */
-
-#include <asm/ptrace.h>
-#include <asm/psr.h>
-
- .section .sched.text, "ax"
- .align 4
-
- .globl ___down_read
-___down_read:
- rd %psr, %g3
- nop
- nop
- nop
- or %g3, PSR_PIL, %g7
- wr %g7, 0, %psr
- nop
- nop
- nop
-#ifdef CONFIG_SMP
-1: ldstub [%g1 + 4], %g7
- tst %g7
- bne 1b
- ld [%g1], %g7
- sub %g7, 1, %g7
- st %g7, [%g1]
- stb %g0, [%g1 + 4]
-#else
- ld [%g1], %g7
- sub %g7, 1, %g7
- st %g7, [%g1]
-#endif
- wr %g3, 0, %psr
- add %g7, 1, %g7
- nop
- nop
- subcc %g7, 1, %g7
- bneg 3f
- nop
-2: jmpl %o7, %g0
- mov %g4, %o7
-3: save %sp, -64, %sp
- mov %g1, %l1
- mov %g4, %l4
- bcs 4f
- mov %g5, %l5
- call down_read_failed
- mov %l1, %o0
- mov %l1, %g1
- mov %l4, %g4
- ba ___down_read
- restore %l5, %g0, %g5
-4: call down_read_failed_biased
- mov %l1, %o0
- mov %l1, %g1
- mov %l4, %g4
- ba 2b
- restore %l5, %g0, %g5
-
- .globl ___down_write
-___down_write:
- rd %psr, %g3
- nop
- nop
- nop
- or %g3, PSR_PIL, %g7
- wr %g7, 0, %psr
- sethi %hi(0x01000000), %g2
- nop
- nop
-#ifdef CONFIG_SMP
-1: ldstub [%g1 + 4], %g7
- tst %g7
- bne 1b
- ld [%g1], %g7
- sub %g7, %g2, %g7
- st %g7, [%g1]
- stb %g0, [%g1 + 4]
-#else
- ld [%g1], %g7
- sub %g7, %g2, %g7
- st %g7, [%g1]
-#endif
- wr %g3, 0, %psr
- add %g7, %g2, %g7
- nop
- nop
- subcc %g7, %g2, %g7
- bne 3f
- nop
-2: jmpl %o7, %g0
- mov %g4, %o7
-3: save %sp, -64, %sp
- mov %g1, %l1
- mov %g4, %l4
- bcs 4f
- mov %g5, %l5
- call down_write_failed
- mov %l1, %o0
- mov %l1, %g1
- mov %l4, %g4
- ba ___down_write
- restore %l5, %g0, %g5
-4: call down_write_failed_biased
- mov %l1, %o0
- mov %l1, %g1
- mov %l4, %g4
- ba 2b
- restore %l5, %g0, %g5
-
- .text
- .globl ___up_read
-___up_read:
- rd %psr, %g3
- nop
- nop
- nop
- or %g3, PSR_PIL, %g7
- wr %g7, 0, %psr
- nop
- nop
- nop
-#ifdef CONFIG_SMP
-1: ldstub [%g1 + 4], %g7
- tst %g7
- bne 1b
- ld [%g1], %g7
- add %g7, 1, %g7
- st %g7, [%g1]
- stb %g0, [%g1 + 4]
-#else
- ld [%g1], %g7
- add %g7, 1, %g7
- st %g7, [%g1]
-#endif
- wr %g3, 0, %psr
- nop
- nop
- nop
- cmp %g7, 0
- be 3f
- nop
-2: jmpl %o7, %g0
- mov %g4, %o7
-3: save %sp, -64, %sp
- mov %g1, %l1
- mov %g4, %l4
- mov %g5, %l5
- clr %o1
- call __rwsem_wake
- mov %l1, %o0
- mov %l1, %g1
- mov %l4, %g4
- ba 2b
- restore %l5, %g0, %g5
-
- .globl ___up_write
-___up_write:
- rd %psr, %g3
- nop
- nop
- nop
- or %g3, PSR_PIL, %g7
- wr %g7, 0, %psr
- sethi %hi(0x01000000), %g2
- nop
- nop
-#ifdef CONFIG_SMP
-1: ldstub [%g1 + 4], %g7
- tst %g7
- bne 1b
- ld [%g1], %g7
- add %g7, %g2, %g7
- st %g7, [%g1]
- stb %g0, [%g1 + 4]
-#else
- ld [%g1], %g7
- add %g7, %g2, %g7
- st %g7, [%g1]
-#endif
- wr %g3, 0, %psr
- sub %g7, %g2, %g7
- nop
- nop
- addcc %g7, %g2, %g7
- bcs 3f
- nop
-2: jmpl %o7, %g0
- mov %g4, %o7
-3: save %sp, -64, %sp
- mov %g1, %l1
- mov %g4, %l4
- mov %g5, %l5
- mov %g7, %o1
- call __rwsem_wake
- mov %l1, %o0
- mov %l1, %g1
- mov %l4, %g4
- ba 2b
- restore %l5, %g0, %g5
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 2f6ae1d..e10cd03 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -862,7 +862,7 @@ static void init_node_masks_nonnuma(void)
for (i = 0; i < NR_CPUS; i++)
numa_cpu_lookup_table[i] = 0;
- numa_cpumask_lookup_table[0] = CPU_MASK_ALL;
+ cpumask_setall(&numa_cpumask_lookup_table[0]);
}
#ifdef CONFIG_NEED_MULTIPLE_NODES
@@ -1080,7 +1080,7 @@ static void __init numa_parse_mdesc_group_cpus(struct mdesc_handle *md,
{
u64 arc;
- cpus_clear(*mask);
+ cpumask_clear(mask);
mdesc_for_each_arc(arc, md, grp, MDESC_ARC_TYPE_BACK) {
u64 target = mdesc_arc_target(md, arc);
@@ -1091,7 +1091,7 @@ static void __init numa_parse_mdesc_group_cpus(struct mdesc_handle *md,
continue;
id = mdesc_get_property(md, target, "id", NULL);
if (*id < nr_cpu_ids)
- cpu_set(*id, *mask);
+ cpumask_set_cpu(*id, mask);
}
}
@@ -1153,13 +1153,13 @@ static int __init numa_parse_mdesc_group(struct mdesc_handle *md, u64 grp,
numa_parse_mdesc_group_cpus(md, grp, &mask);
- for_each_cpu_mask(cpu, mask)
+ for_each_cpu(cpu, &mask)
numa_cpu_lookup_table[cpu] = index;
- numa_cpumask_lookup_table[index] = mask;
+ cpumask_copy(&numa_cpumask_lookup_table[index], &mask);
if (numa_debug) {
printk(KERN_INFO "NUMA GROUP[%d]: cpus [ ", index);
- for_each_cpu_mask(cpu, mask)
+ for_each_cpu(cpu, &mask)
printk("%d ", cpu);
printk("]\n");
}
@@ -1218,7 +1218,7 @@ static int __init numa_parse_jbus(void)
index = 0;
for_each_present_cpu(cpu) {
numa_cpu_lookup_table[cpu] = index;
- numa_cpumask_lookup_table[index] = cpumask_of_cpu(cpu);
+ cpumask_copy(&numa_cpumask_lookup_table[index], cpumask_of(cpu));
node_masks[index].mask = ~((1UL << 36UL) - 1UL);
node_masks[index].val = cpu << 36UL;
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index cbc70a2..c8b4162 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -254,7 +254,7 @@ static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
}
#endif
-static int disable_smep __initdata;
+static int disable_smep __cpuinitdata;
static __init int setup_disable_smep(char *arg)
{
disable_smep = 1;
@@ -262,7 +262,7 @@ static __init int setup_disable_smep(char *arg)
}
__setup("nosmep", setup_disable_smep);
-static __init void setup_smep(struct cpuinfo_x86 *c)
+static __cpuinit void setup_smep(struct cpuinfo_x86 *c)
{
if (cpu_has(c, X86_FEATURE_SMEP)) {
if (unlikely(disable_smep)) {
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index b2699bb..d871b14 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -42,6 +42,7 @@
#include <linux/genhd.h>
#include <net/tcp.h>
#include <linux/lru_cache.h>
+#include <linux/prefetch.h>
#ifdef __CHECKER__
# define __protected_by(x) __attribute__((require_context(x,1,999,"rdwr")))
diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c
index c9213ea..a4d6cb0 100644
--- a/drivers/dma/ioat/dma.c
+++ b/drivers/dma/ioat/dma.c
@@ -34,6 +34,7 @@
#include <linux/delay.h>
#include <linux/dma-mapping.h>
#include <linux/workqueue.h>
+#include <linux/prefetch.h>
#include <linux/i7300_idle.h>
#include "dma.h"
#include "registers.h"
diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c
index effd140..f4a51d4 100644
--- a/drivers/dma/ioat/dma_v2.c
+++ b/drivers/dma/ioat/dma_v2.c
@@ -34,6 +34,7 @@
#include <linux/delay.h>
#include <linux/dma-mapping.h>
#include <linux/workqueue.h>
+#include <linux/prefetch.h>
#include <linux/i7300_idle.h>
#include "dma.h"
#include "dma_v2.h"
diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c
index d0f4990..d845dc4 100644
--- a/drivers/dma/ioat/dma_v3.c
+++ b/drivers/dma/ioat/dma_v3.c
@@ -60,6 +60,7 @@
#include <linux/gfp.h>
#include <linux/dmaengine.h>
#include <linux/dma-mapping.h>
+#include <linux/prefetch.h>
#include "registers.h"
#include "hw.h"
#include "dma.h"
diff --git a/drivers/ide/ide-acpi.c b/drivers/ide/ide-acpi.c
index c26c119..2af8cb4 100644
--- a/drivers/ide/ide-acpi.c
+++ b/drivers/ide/ide-acpi.c
@@ -416,21 +416,21 @@ void ide_acpi_get_timing(ide_hwif_t *hwif)
out_obj = output.pointer;
if (out_obj->type != ACPI_TYPE_BUFFER) {
- kfree(output.pointer);
DEBPRINT("Run _GTM: error: "
"expected object type of ACPI_TYPE_BUFFER, "
"got 0x%x\n", out_obj->type);
+ kfree(output.pointer);
return;
}
if (!out_obj->buffer.length || !out_obj->buffer.pointer ||
out_obj->buffer.length != sizeof(struct GTM_buffer)) {
- kfree(output.pointer);
printk(KERN_ERR
"%s: unexpected _GTM length (0x%x)[should be 0x%zx] or "
"addr (0x%p)\n",
__func__, out_obj->buffer.length,
sizeof(struct GTM_buffer), out_obj->buffer.pointer);
+ kfree(output.pointer);
return;
}
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index 5a702d0..61fdf544 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -73,7 +73,7 @@ static int ide_floppy_callback(ide_drive_t *drive, int dsc)
drive->failed_pc = NULL;
if (pc->c[0] == GPCMD_READ_10 || pc->c[0] == GPCMD_WRITE_10 ||
- (rq && rq->cmd_type == REQ_TYPE_BLOCK_PC))
+ rq->cmd_type == REQ_TYPE_BLOCK_PC)
uptodate = 1; /* FIXME */
else if (pc->c[0] == GPCMD_REQUEST_SENSE) {
diff --git a/drivers/ide/ide-scan-pci.c b/drivers/ide/ide-scan-pci.c
index 0e79eff..c3da53e 100644
--- a/drivers/ide/ide-scan-pci.c
+++ b/drivers/ide/ide-scan-pci.c
@@ -88,7 +88,7 @@ static int __init ide_scan_pcibus(void)
struct list_head *l, *n;
pre_init = 0;
- while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)))
+ for_each_pci_dev(dev)
ide_scan_pcidev(dev);
/*
diff --git a/drivers/ide/pmac.c b/drivers/ide/pmac.c
index ebcf8e4..1db7c43 100644
--- a/drivers/ide/pmac.c
+++ b/drivers/ide/pmac.c
@@ -1334,7 +1334,7 @@ out_free_pmif:
static int
pmac_ide_pci_suspend(struct pci_dev *pdev, pm_message_t mesg)
{
- pmac_ide_hwif_t *pmif = (pmac_ide_hwif_t *)pci_get_drvdata(pdev);
+ pmac_ide_hwif_t *pmif = pci_get_drvdata(pdev);
int rc = 0;
if (mesg.event != pdev->dev.power.power_state.event
@@ -1350,7 +1350,7 @@ pmac_ide_pci_suspend(struct pci_dev *pdev, pm_message_t mesg)
static int
pmac_ide_pci_resume(struct pci_dev *pdev)
{
- pmac_ide_hwif_t *pmif = (pmac_ide_hwif_t *)pci_get_drvdata(pdev);
+ pmac_ide_hwif_t *pmif = pci_get_drvdata(pdev);
int rc = 0;
if (pdev->dev.power.power_state.event != PM_EVENT_ON) {
diff --git a/drivers/infiniband/hw/amso1100/c2.c b/drivers/infiniband/hw/amso1100/c2.c
index dc85d77..0cfc455 100644
--- a/drivers/infiniband/hw/amso1100/c2.c
+++ b/drivers/infiniband/hw/amso1100/c2.c
@@ -47,6 +47,7 @@
#include <linux/init.h>
#include <linux/dma-mapping.h>
#include <linux/slab.h>
+#include <linux/prefetch.h>
#include <asm/io.h>
#include <asm/irq.h>
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index 5c93627..70bd738 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -493,11 +493,11 @@ void bitmap_update_sb(struct bitmap *bitmap)
spin_unlock_irqrestore(&bitmap->lock, flags);
sb = kmap_atomic(bitmap->sb_page, KM_USER0);
sb->events = cpu_to_le64(bitmap->mddev->events);
- if (bitmap->mddev->events < bitmap->events_cleared) {
+ if (bitmap->mddev->events < bitmap->events_cleared)
/* rocking back to read-only */
bitmap->events_cleared = bitmap->mddev->events;
- sb->events_cleared = cpu_to_le64(bitmap->events_cleared);
- }
+ sb->events_cleared = cpu_to_le64(bitmap->events_cleared);
+ sb->state = cpu_to_le32(bitmap->flags);
/* Just in case these have been changed via sysfs: */
sb->daemon_sleep = cpu_to_le32(bitmap->mddev->bitmap_info.daemon_sleep/HZ);
sb->write_behind = cpu_to_le32(bitmap->mddev->bitmap_info.max_write_behind);
@@ -618,7 +618,7 @@ success:
if (le32_to_cpu(sb->version) == BITMAP_MAJOR_HOSTENDIAN)
bitmap->flags |= BITMAP_HOSTENDIAN;
bitmap->events_cleared = le64_to_cpu(sb->events_cleared);
- if (sb->state & cpu_to_le32(BITMAP_STALE))
+ if (bitmap->flags & BITMAP_STALE)
bitmap->events_cleared = bitmap->mddev->events;
err = 0;
out:
@@ -652,9 +652,11 @@ static int bitmap_mask_state(struct bitmap *bitmap, enum bitmap_state bits,
switch (op) {
case MASK_SET:
sb->state |= cpu_to_le32(bits);
+ bitmap->flags |= bits;
break;
case MASK_UNSET:
sb->state &= cpu_to_le32(~bits);
+ bitmap->flags &= ~bits;
break;
default:
BUG();
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 7d6f7f1..aa640a8 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -3324,7 +3324,7 @@ resync_start_store(mddev_t *mddev, const char *buf, size_t len)
char *e;
unsigned long long n = simple_strtoull(buf, &e, 10);
- if (mddev->pers)
+ if (mddev->pers && !test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
return -EBUSY;
if (cmd_match(buf, "none"))
n = MaxSector;
@@ -4347,13 +4347,19 @@ static int md_alloc(dev_t dev, char *name)
disk->fops = &md_fops;
disk->private_data = mddev;
disk->queue = mddev->queue;
+ blk_queue_flush(mddev->queue, REQ_FLUSH | REQ_FUA);
/* Allow extended partitions. This makes the
* 'mdp' device redundant, but we can't really
* remove it now.
*/
disk->flags |= GENHD_FL_EXT_DEVT;
- add_disk(disk);
mddev->gendisk = disk;
+ /* As soon as we call add_disk(), another thread could get
+ * through to md_open, so make sure it doesn't get too far
+ */
+ mutex_lock(&mddev->open_mutex);
+ add_disk(disk);
+
error = kobject_init_and_add(&mddev->kobj, &md_ktype,
&disk_to_dev(disk)->kobj, "%s", "md");
if (error) {
@@ -4367,8 +4373,7 @@ static int md_alloc(dev_t dev, char *name)
if (mddev->kobj.sd &&
sysfs_create_group(&mddev->kobj, &md_bitmap_group))
printk(KERN_DEBUG "pointless warning\n");
-
- blk_queue_flush(mddev->queue, REQ_FLUSH | REQ_FUA);
+ mutex_unlock(&mddev->open_mutex);
abort:
mutex_unlock(&disks_mutex);
if (!error && mddev->kobj.sd) {
@@ -5211,6 +5216,16 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info)
} else
super_types[mddev->major_version].
validate_super(mddev, rdev);
+ if ((info->state & (1<<MD_DISK_SYNC)) &&
+ (!test_bit(In_sync, &rdev->flags) ||
+ rdev->raid_disk != info->raid_disk)) {
+ /* This was a hot-add request, but events doesn't
+ * match, so reject it.
+ */
+ export_rdev(rdev);
+ return -EINVAL;
+ }
+
if (test_bit(In_sync, &rdev->flags))
rdev->saved_raid_disk = rdev->raid_disk;
else
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index c358909..3535c23 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -146,7 +146,7 @@ static void multipath_status (struct seq_file *seq, mddev_t *mddev)
int i;
seq_printf (seq, " [%d/%d] [", conf->raid_disks,
- conf->working_disks);
+ conf->raid_disks - mddev->degraded);
for (i = 0; i < conf->raid_disks; i++)
seq_printf (seq, "%s",
conf->multipaths[i].rdev &&
@@ -186,35 +186,36 @@ static int multipath_congested(void *data, int bits)
static void multipath_error (mddev_t *mddev, mdk_rdev_t *rdev)
{
multipath_conf_t *conf = mddev->private;
+ char b[BDEVNAME_SIZE];
- if (conf->working_disks <= 1) {
+ if (conf->raid_disks - mddev->degraded <= 1) {
/*
* Uh oh, we can do nothing if this is our last path, but
* first check if this is a queued request for a device
* which has just failed.
*/
printk(KERN_ALERT
- "multipath: only one IO path left and IO error.\n");
+ "multipath: only one IO path left and IO error.\n");
/* leave it active... it's all we have */
- } else {
- /*
- * Mark disk as unusable
- */
- if (!test_bit(Faulty, &rdev->flags)) {
- char b[BDEVNAME_SIZE];
- clear_bit(In_sync, &rdev->flags);
- set_bit(Faulty, &rdev->flags);
- set_bit(MD_CHANGE_DEVS, &mddev->flags);
- conf->working_disks--;
- mddev->degraded++;
- printk(KERN_ALERT "multipath: IO failure on %s,"
- " disabling IO path.\n"
- "multipath: Operation continuing"
- " on %d IO paths.\n",
- bdevname (rdev->bdev,b),
- conf->working_disks);
- }
+ return;
+ }
+ /*
+ * Mark disk as unusable
+ */
+ if (test_and_clear_bit(In_sync, &rdev->flags)) {
+ unsigned long flags;
+ spin_lock_irqsave(&conf->device_lock, flags);
+ mddev->degraded++;
+ spin_unlock_irqrestore(&conf->device_lock, flags);
}
+ set_bit(Faulty, &rdev->flags);
+ set_bit(MD_CHANGE_DEVS, &mddev->flags);
+ printk(KERN_ALERT "multipath: IO failure on %s,"
+ " disabling IO path.\n"
+ "multipath: Operation continuing"
+ " on %d IO paths.\n",
+ bdevname(rdev->bdev, b),
+ conf->raid_disks - mddev->degraded);
}
static void print_multipath_conf (multipath_conf_t *conf)
@@ -227,7 +228,7 @@ static void print_multipath_conf (multipath_conf_t *conf)
printk("(conf==NULL)\n");
return;
}
- printk(" --- wd:%d rd:%d\n", conf->working_disks,
+ printk(" --- wd:%d rd:%d\n", conf->raid_disks - conf->mddev->degraded,
conf->raid_disks);
for (i = 0; i < conf->raid_disks; i++) {
@@ -274,10 +275,11 @@ static int multipath_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
PAGE_CACHE_SIZE - 1);
}
- conf->working_disks++;
+ spin_lock_irq(&conf->device_lock);
mddev->degraded--;
rdev->raid_disk = path;
set_bit(In_sync, &rdev->flags);
+ spin_unlock_irq(&conf->device_lock);
rcu_assign_pointer(p->rdev, rdev);
err = 0;
md_integrity_add_rdev(rdev, mddev);
@@ -391,6 +393,7 @@ static int multipath_run (mddev_t *mddev)
int disk_idx;
struct multipath_info *disk;
mdk_rdev_t *rdev;
+ int working_disks;
if (md_check_no_bitmap(mddev))
return -EINVAL;
@@ -424,7 +427,7 @@ static int multipath_run (mddev_t *mddev)
goto out_free_conf;
}
- conf->working_disks = 0;
+ working_disks = 0;
list_for_each_entry(rdev, &mddev->disks, same_set) {
disk_idx = rdev->raid_disk;
if (disk_idx < 0 ||
@@ -446,7 +449,7 @@ static int multipath_run (mddev_t *mddev)
}
if (!test_bit(Faulty, &rdev->flags))
- conf->working_disks++;
+ working_disks++;
}
conf->raid_disks = mddev->raid_disks;
@@ -454,12 +457,12 @@ static int multipath_run (mddev_t *mddev)
spin_lock_init(&conf->device_lock);
INIT_LIST_HEAD(&conf->retry_list);
- if (!conf->working_disks) {
+ if (!working_disks) {
printk(KERN_ERR "multipath: no operational IO paths for %s\n",
mdname(mddev));
goto out_free_conf;
}
- mddev->degraded = conf->raid_disks - conf->working_disks;
+ mddev->degraded = conf->raid_disks - working_disks;
conf->pool = mempool_create_kmalloc_pool(NR_RESERVED_BUFS,
sizeof(struct multipath_bh));
@@ -481,7 +484,8 @@ static int multipath_run (mddev_t *mddev)
printk(KERN_INFO
"multipath: array %s active with %d out of %d IO paths\n",
- mdname(mddev), conf->working_disks, mddev->raid_disks);
+ mdname(mddev), conf->raid_disks - mddev->degraded,
+ mddev->raid_disks);
/*
* Ok, everything is just fine now
*/
diff --git a/drivers/md/multipath.h b/drivers/md/multipath.h
index d1c2a8d..3c5a45e 100644
--- a/drivers/md/multipath.h
+++ b/drivers/md/multipath.h
@@ -9,7 +9,6 @@ struct multipath_private_data {
mddev_t *mddev;
struct multipath_info *multipaths;
int raid_disks;
- int working_disks;
spinlock_t device_lock;
struct list_head retry_list;
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 2b7a7ff..5d09609 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -297,23 +297,24 @@ static void raid1_end_read_request(struct bio *bio, int error)
rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev);
}
-static void r1_bio_write_done(r1bio_t *r1_bio, int vcnt, struct bio_vec *bv,
- int behind)
+static void r1_bio_write_done(r1bio_t *r1_bio)
{
if (atomic_dec_and_test(&r1_bio->remaining))
{
/* it really is the end of this request */
if (test_bit(R1BIO_BehindIO, &r1_bio->state)) {
/* free extra copy of the data pages */
- int i = vcnt;
+ int i = r1_bio->behind_page_count;
while (i--)
- safe_put_page(bv[i].bv_page);
+ safe_put_page(r1_bio->behind_pages[i]);
+ kfree(r1_bio->behind_pages);
+ r1_bio->behind_pages = NULL;
}
/* clear the bitmap if all writes complete successfully */
bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector,
r1_bio->sectors,
!test_bit(R1BIO_Degraded, &r1_bio->state),
- behind);
+ test_bit(R1BIO_BehindIO, &r1_bio->state));
md_write_end(r1_bio->mddev);
raid_end_bio_io(r1_bio);
}
@@ -386,7 +387,7 @@ static void raid1_end_write_request(struct bio *bio, int error)
* Let's see if all mirrored write operations have finished
* already.
*/
- r1_bio_write_done(r1_bio, bio->bi_vcnt, bio->bi_io_vec, behind);
+ r1_bio_write_done(r1_bio);
if (to_put)
bio_put(to_put);
@@ -411,10 +412,10 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
{
const sector_t this_sector = r1_bio->sector;
const int sectors = r1_bio->sectors;
- int new_disk = -1;
int start_disk;
+ int best_disk;
int i;
- sector_t new_distance, current_distance;
+ sector_t best_dist;
mdk_rdev_t *rdev;
int choose_first;
@@ -425,6 +426,8 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
* We take the first readable disk when above the resync window.
*/
retry:
+ best_disk = -1;
+ best_dist = MaxSector;
if (conf->mddev->recovery_cp < MaxSector &&
(this_sector + sectors >= conf->next_resync)) {
choose_first = 1;
@@ -434,8 +437,8 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
start_disk = conf->last_used;
}
- /* make sure the disk is operational */
for (i = 0 ; i < conf->raid_disks ; i++) {
+ sector_t dist;
int disk = start_disk + i;
if (disk >= conf->raid_disks)
disk -= conf->raid_disks;
@@ -443,60 +446,43 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
rdev = rcu_dereference(conf->mirrors[disk].rdev);
if (r1_bio->bios[disk] == IO_BLOCKED
|| rdev == NULL
- || !test_bit(In_sync, &rdev->flags))
+ || test_bit(Faulty, &rdev->flags))
continue;
-
- new_disk = disk;
- if (!test_bit(WriteMostly, &rdev->flags))
- break;
- }
-
- if (new_disk < 0 || choose_first)
- goto rb_out;
-
- /*
- * Don't change to another disk for sequential reads:
- */
- if (conf->next_seq_sect == this_sector)
- goto rb_out;
- if (this_sector == conf->mirrors[new_disk].head_position)
- goto rb_out;
-
- current_distance = abs(this_sector
- - conf->mirrors[new_disk].head_position);
-
- /* look for a better disk - i.e. head is closer */
- start_disk = new_disk;
- for (i = 1; i < conf->raid_disks; i++) {
- int disk = start_disk + 1;
- if (disk >= conf->raid_disks)
- disk -= conf->raid_disks;
-
- rdev = rcu_dereference(conf->mirrors[disk].rdev);
- if (r1_bio->bios[disk] == IO_BLOCKED
- || rdev == NULL
- || !test_bit(In_sync, &rdev->flags)
- || test_bit(WriteMostly, &rdev->flags))
+ if (!test_bit(In_sync, &rdev->flags) &&
+ rdev->recovery_offset < this_sector + sectors)
continue;
-
- if (!atomic_read(&rdev->nr_pending)) {
- new_disk = disk;
+ if (test_bit(WriteMostly, &rdev->flags)) {
+ /* Don't balance among write-mostly, just
+ * use the first as a last resort */
+ if (best_disk < 0)
+ best_disk = disk;
+ continue;
+ }
+ /* This is a reasonable device to use. It might
+ * even be best.
+ */
+ dist = abs(this_sector - conf->mirrors[disk].head_position);
+ if (choose_first
+ /* Don't change to another disk for sequential reads */
+ || conf->next_seq_sect == this_sector
+ || dist == 0
+ /* If device is idle, use it */
+ || atomic_read(&rdev->nr_pending) == 0) {
+ best_disk = disk;
break;
}
- new_distance = abs(this_sector - conf->mirrors[disk].head_position);
- if (new_distance < current_distance) {
- current_distance = new_distance;
- new_disk = disk;
+ if (dist < best_dist) {
+ best_dist = dist;
+ best_disk = disk;
}
}
- rb_out:
- if (new_disk >= 0) {
- rdev = rcu_dereference(conf->mirrors[new_disk].rdev);
+ if (best_disk >= 0) {
+ rdev = rcu_dereference(conf->mirrors[best_disk].rdev);
if (!rdev)
goto retry;
atomic_inc(&rdev->nr_pending);
- if (!test_bit(In_sync, &rdev->flags)) {
+ if (test_bit(Faulty, &rdev->flags)) {
/* cannot risk returning a device that failed
* before we inc'ed nr_pending
*/
@@ -504,11 +490,11 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
goto retry;
}
conf->next_seq_sect = this_sector + sectors;
- conf->last_used = new_disk;
+ conf->last_used = best_disk;
}
rcu_read_unlock();
- return new_disk;
+ return best_disk;
}
static int raid1_congested(void *data, int bits)
@@ -675,37 +661,36 @@ static void unfreeze_array(conf_t *conf)
/* duplicate the data pages for behind I/O
- * We return a list of bio_vec rather than just page pointers
- * as it makes freeing easier
*/
-static struct bio_vec *alloc_behind_pages(struct bio *bio)
+static void alloc_behind_pages(struct bio *bio, r1bio_t *r1_bio)
{
int i;
struct bio_vec *bvec;
- struct bio_vec *pages = kzalloc(bio->bi_vcnt * sizeof(struct bio_vec),
+ struct page **pages = kzalloc(bio->bi_vcnt * sizeof(struct page*),
GFP_NOIO);
if (unlikely(!pages))
- goto do_sync_io;
+ return;
bio_for_each_segment(bvec, bio, i) {
- pages[i].bv_page = alloc_page(GFP_NOIO);
- if (unlikely(!pages[i].bv_page))
+ pages[i] = alloc_page(GFP_NOIO);
+ if (unlikely(!pages[i]))
goto do_sync_io;
- memcpy(kmap(pages[i].bv_page) + bvec->bv_offset,
+ memcpy(kmap(pages[i]) + bvec->bv_offset,
kmap(bvec->bv_page) + bvec->bv_offset, bvec->bv_len);
- kunmap(pages[i].bv_page);
+ kunmap(pages[i]);
kunmap(bvec->bv_page);
}
-
- return pages;
+ r1_bio->behind_pages = pages;
+ r1_bio->behind_page_count = bio->bi_vcnt;
+ set_bit(R1BIO_BehindIO, &r1_bio->state);
+ return;
do_sync_io:
- if (pages)
- for (i = 0; i < bio->bi_vcnt && pages[i].bv_page; i++)
- put_page(pages[i].bv_page);
+ for (i = 0; i < bio->bi_vcnt; i++)
+ if (pages[i])
+ put_page(pages[i]);
kfree(pages);
PRINTK("%dB behind alloc failed, doing sync I/O\n", bio->bi_size);
- return NULL;
}
static int make_request(mddev_t *mddev, struct bio * bio)
@@ -717,7 +702,6 @@ static int make_request(mddev_t *mddev, struct bio * bio)
int i, targets = 0, disks;
struct bitmap *bitmap;
unsigned long flags;
- struct bio_vec *behind_pages = NULL;
const int rw = bio_data_dir(bio);
const unsigned long do_sync = (bio->bi_rw & REQ_SYNC);
const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA));
@@ -870,9 +854,8 @@ static int make_request(mddev_t *mddev, struct bio * bio)
if (bitmap &&
(atomic_read(&bitmap->behind_writes)
< mddev->bitmap_info.max_write_behind) &&
- !waitqueue_active(&bitmap->behind_wait) &&
- (behind_pages = alloc_behind_pages(bio)) != NULL)
- set_bit(R1BIO_BehindIO, &r1_bio->state);
+ !waitqueue_active(&bitmap->behind_wait))
+ alloc_behind_pages(bio, r1_bio);
atomic_set(&r1_bio->remaining, 1);
atomic_set(&r1_bio->behind_remaining, 0);
@@ -893,7 +876,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
mbio->bi_rw = WRITE | do_flush_fua | do_sync;
mbio->bi_private = r1_bio;
- if (behind_pages) {
+ if (r1_bio->behind_pages) {
struct bio_vec *bvec;
int j;
@@ -905,7 +888,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
* them all
*/
__bio_for_each_segment(bvec, mbio, j, 0)
- bvec->bv_page = behind_pages[j].bv_page;
+ bvec->bv_page = r1_bio->behind_pages[j];
if (test_bit(WriteMostly, &conf->mirrors[i].rdev->flags))
atomic_inc(&r1_bio->behind_remaining);
}
@@ -915,8 +898,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
bio_list_add(&conf->pending_bio_list, mbio);
spin_unlock_irqrestore(&conf->device_lock, flags);
}
- r1_bio_write_done(r1_bio, bio->bi_vcnt, behind_pages, behind_pages != NULL);
- kfree(behind_pages); /* the behind pages are attached to the bios now */
+ r1_bio_write_done(r1_bio);
/* In case raid1d snuck in to freeze_array */
wake_up(&conf->wait_barrier);
@@ -1196,194 +1178,210 @@ static void end_sync_write(struct bio *bio, int error)
}
}
-static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
+static int fix_sync_read_error(r1bio_t *r1_bio)
{
+ /* Try some synchronous reads of other devices to get
+ * good data, much like with normal read errors. Only
+ * read into the pages we already have so we don't
+ * need to re-issue the read request.
+ * We don't need to freeze the array, because being in an
+ * active sync request, there is no normal IO, and
+ * no overlapping syncs.
+ */
+ mddev_t *mddev = r1_bio->mddev;
conf_t *conf = mddev->private;
- int i;
- int disks = conf->raid_disks;
- struct bio *bio, *wbio;
-
- bio = r1_bio->bios[r1_bio->read_disk];
+ struct bio *bio = r1_bio->bios[r1_bio->read_disk];
+ sector_t sect = r1_bio->sector;
+ int sectors = r1_bio->sectors;
+ int idx = 0;
+ while(sectors) {
+ int s = sectors;
+ int d = r1_bio->read_disk;
+ int success = 0;
+ mdk_rdev_t *rdev;
+ int start;
- if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
- /* We have read all readable devices. If we haven't
- * got the block, then there is no hope left.
- * If we have, then we want to do a comparison
- * and skip the write if everything is the same.
- * If any blocks failed to read, then we need to
- * attempt an over-write
- */
- int primary;
- if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) {
- for (i=0; i<mddev->raid_disks; i++)
- if (r1_bio->bios[i]->bi_end_io == end_sync_read)
- md_error(mddev, conf->mirrors[i].rdev);
+ if (s > (PAGE_SIZE>>9))
+ s = PAGE_SIZE >> 9;
+ do {
+ if (r1_bio->bios[d]->bi_end_io == end_sync_read) {
+ /* No rcu protection needed here devices
+ * can only be removed when no resync is
+ * active, and resync is currently active
+ */
+ rdev = conf->mirrors[d].rdev;
+ if (sync_page_io(rdev,
+ sect,
+ s<<9,
+ bio->bi_io_vec[idx].bv_page,
+ READ, false)) {
+ success = 1;
+ break;
+ }
+ }
+ d++;
+ if (d == conf->raid_disks)
+ d = 0;
+ } while (!success && d != r1_bio->read_disk);
- md_done_sync(mddev, r1_bio->sectors, 1);
+ if (!success) {
+ char b[BDEVNAME_SIZE];
+ /* Cannot read from anywhere, array is toast */
+ md_error(mddev, conf->mirrors[r1_bio->read_disk].rdev);
+ printk(KERN_ALERT "md/raid1:%s: %s: unrecoverable I/O read error"
+ " for block %llu\n",
+ mdname(mddev),
+ bdevname(bio->bi_bdev, b),
+ (unsigned long long)r1_bio->sector);
+ md_done_sync(mddev, r1_bio->sectors, 0);
put_buf(r1_bio);
- return;
+ return 0;
}
- for (primary=0; primary<mddev->raid_disks; primary++)
- if (r1_bio->bios[primary]->bi_end_io == end_sync_read &&
- test_bit(BIO_UPTODATE, &r1_bio->bios[primary]->bi_flags)) {
- r1_bio->bios[primary]->bi_end_io = NULL;
- rdev_dec_pending(conf->mirrors[primary].rdev, mddev);
- break;
- }
- r1_bio->read_disk = primary;
- for (i=0; i<mddev->raid_disks; i++)
- if (r1_bio->bios[i]->bi_end_io == end_sync_read) {
- int j;
- int vcnt = r1_bio->sectors >> (PAGE_SHIFT- 9);
- struct bio *pbio = r1_bio->bios[primary];
- struct bio *sbio = r1_bio->bios[i];
-
- if (test_bit(BIO_UPTODATE, &sbio->bi_flags)) {
- for (j = vcnt; j-- ; ) {
- struct page *p, *s;
- p = pbio->bi_io_vec[j].bv_page;
- s = sbio->bi_io_vec[j].bv_page;
- if (memcmp(page_address(p),
- page_address(s),
- PAGE_SIZE))
- break;
- }
- } else
- j = 0;
- if (j >= 0)
- mddev->resync_mismatches += r1_bio->sectors;
- if (j < 0 || (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)
- && test_bit(BIO_UPTODATE, &sbio->bi_flags))) {
- sbio->bi_end_io = NULL;
- rdev_dec_pending(conf->mirrors[i].rdev, mddev);
- } else {
- /* fixup the bio for reuse */
- int size;
- sbio->bi_vcnt = vcnt;
- sbio->bi_size = r1_bio->sectors << 9;
- sbio->bi_idx = 0;
- sbio->bi_phys_segments = 0;
- sbio->bi_flags &= ~(BIO_POOL_MASK - 1);
- sbio->bi_flags |= 1 << BIO_UPTODATE;
- sbio->bi_next = NULL;
- sbio->bi_sector = r1_bio->sector +
- conf->mirrors[i].rdev->data_offset;
- sbio->bi_bdev = conf->mirrors[i].rdev->bdev;
- size = sbio->bi_size;
- for (j = 0; j < vcnt ; j++) {
- struct bio_vec *bi;
- bi = &sbio->bi_io_vec[j];
- bi->bv_offset = 0;
- if (size > PAGE_SIZE)
- bi->bv_len = PAGE_SIZE;
- else
- bi->bv_len = size;
- size -= PAGE_SIZE;
- memcpy(page_address(bi->bv_page),
- page_address(pbio->bi_io_vec[j].bv_page),
- PAGE_SIZE);
- }
- }
- }
+ start = d;
+ /* write it back and re-read */
+ while (d != r1_bio->read_disk) {
+ if (d == 0)
+ d = conf->raid_disks;
+ d--;
+ if (r1_bio->bios[d]->bi_end_io != end_sync_read)
+ continue;
+ rdev = conf->mirrors[d].rdev;
+ if (sync_page_io(rdev,
+ sect,
+ s<<9,
+ bio->bi_io_vec[idx].bv_page,
+ WRITE, false) == 0) {
+ r1_bio->bios[d]->bi_end_io = NULL;
+ rdev_dec_pending(rdev, mddev);
+ md_error(mddev, rdev);
+ } else
+ atomic_add(s, &rdev->corrected_errors);
+ }
+ d = start;
+ while (d != r1_bio->read_disk) {
+ if (d == 0)
+ d = conf->raid_disks;
+ d--;
+ if (r1_bio->bios[d]->bi_end_io != end_sync_read)
+ continue;
+ rdev = conf->mirrors[d].rdev;
+ if (sync_page_io(rdev,
+ sect,
+ s<<9,
+ bio->bi_io_vec[idx].bv_page,
+ READ, false) == 0)
+ md_error(mddev, rdev);
+ }
+ sectors -= s;
+ sect += s;
+ idx ++;
}
- if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) {
- /* ouch - failed to read all of that.
- * Try some synchronous reads of other devices to get
- * good data, much like with normal read errors. Only
- * read into the pages we already have so we don't
- * need to re-issue the read request.
- * We don't need to freeze the array, because being in an
- * active sync request, there is no normal IO, and
- * no overlapping syncs.
- */
- sector_t sect = r1_bio->sector;
- int sectors = r1_bio->sectors;
- int idx = 0;
-
- while(sectors) {
- int s = sectors;
- int d = r1_bio->read_disk;
- int success = 0;
- mdk_rdev_t *rdev;
-
- if (s > (PAGE_SIZE>>9))
- s = PAGE_SIZE >> 9;
- do {
- if (r1_bio->bios[d]->bi_end_io == end_sync_read) {
- /* No rcu protection needed here devices
- * can only be removed when no resync is
- * active, and resync is currently active
- */
- rdev = conf->mirrors[d].rdev;
- if (sync_page_io(rdev,
- sect,
- s<<9,
- bio->bi_io_vec[idx].bv_page,
- READ, false)) {
- success = 1;
- break;
- }
- }
- d++;
- if (d == conf->raid_disks)
- d = 0;
- } while (!success && d != r1_bio->read_disk);
-
- if (success) {
- int start = d;
- /* write it back and re-read */
- set_bit(R1BIO_Uptodate, &r1_bio->state);
- while (d != r1_bio->read_disk) {
- if (d == 0)
- d = conf->raid_disks;
- d--;
- if (r1_bio->bios[d]->bi_end_io != end_sync_read)
- continue;
- rdev = conf->mirrors[d].rdev;
- atomic_add(s, &rdev->corrected_errors);
- if (sync_page_io(rdev,
- sect,
- s<<9,
- bio->bi_io_vec[idx].bv_page,
- WRITE, false) == 0)
- md_error(mddev, rdev);
- }
- d = start;
- while (d != r1_bio->read_disk) {
- if (d == 0)
- d = conf->raid_disks;
- d--;
- if (r1_bio->bios[d]->bi_end_io != end_sync_read)
- continue;
- rdev = conf->mirrors[d].rdev;
- if (sync_page_io(rdev,
- sect,
- s<<9,
- bio->bi_io_vec[idx].bv_page,
- READ, false) == 0)
- md_error(mddev, rdev);
- }
- } else {
- char b[BDEVNAME_SIZE];
- /* Cannot read from anywhere, array is toast */
- md_error(mddev, conf->mirrors[r1_bio->read_disk].rdev);
- printk(KERN_ALERT "md/raid1:%s: %s: unrecoverable I/O read error"
- " for block %llu\n",
- mdname(mddev),
- bdevname(bio->bi_bdev, b),
- (unsigned long long)r1_bio->sector);
- md_done_sync(mddev, r1_bio->sectors, 0);
- put_buf(r1_bio);
- return;
+ set_bit(R1BIO_Uptodate, &r1_bio->state);
+ set_bit(BIO_UPTODATE, &bio->bi_flags);
+ return 1;
+}
+
+static int process_checks(r1bio_t *r1_bio)
+{
+ /* We have read all readable devices. If we haven't
+ * got the block, then there is no hope left.
+ * If we have, then we want to do a comparison
+ * and skip the write if everything is the same.
+ * If any blocks failed to read, then we need to
+ * attempt an over-write
+ */
+ mddev_t *mddev = r1_bio->mddev;
+ conf_t *conf = mddev->private;
+ int primary;
+ int i;
+
+ for (primary = 0; primary < conf->raid_disks; primary++)
+ if (r1_bio->bios[primary]->bi_end_io == end_sync_read &&
+ test_bit(BIO_UPTODATE, &r1_bio->bios[primary]->bi_flags)) {
+ r1_bio->bios[primary]->bi_end_io = NULL;
+ rdev_dec_pending(conf->mirrors[primary].rdev, mddev);
+ break;
+ }
+ r1_bio->read_disk = primary;
+ for (i = 0; i < conf->raid_disks; i++) {
+ int j;
+ int vcnt = r1_bio->sectors >> (PAGE_SHIFT- 9);
+ struct bio *pbio = r1_bio->bios[primary];
+ struct bio *sbio = r1_bio->bios[i];
+ int size;
+
+ if (r1_bio->bios[i]->bi_end_io != end_sync_read)
+ continue;
+
+ if (test_bit(BIO_UPTODATE, &sbio->bi_flags)) {
+ for (j = vcnt; j-- ; ) {
+ struct page *p, *s;
+ p = pbio->bi_io_vec[j].bv_page;
+ s = sbio->bi_io_vec[j].bv_page;
+ if (memcmp(page_address(p),
+ page_address(s),
+ PAGE_SIZE))
+ break;
}
- sectors -= s;
- sect += s;
- idx ++;
+ } else
+ j = 0;
+ if (j >= 0)
+ mddev->resync_mismatches += r1_bio->sectors;
+ if (j < 0 || (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)
+ && test_bit(BIO_UPTODATE, &sbio->bi_flags))) {
+ /* No need to write to this device. */
+ sbio->bi_end_io = NULL;
+ rdev_dec_pending(conf->mirrors[i].rdev, mddev);
+ continue;
+ }
+ /* fixup the bio for reuse */
+ sbio->bi_vcnt = vcnt;
+ sbio->bi_size = r1_bio->sectors << 9;
+ sbio->bi_idx = 0;
+ sbio->bi_phys_segments = 0;
+ sbio->bi_flags &= ~(BIO_POOL_MASK - 1);
+ sbio->bi_flags |= 1 << BIO_UPTODATE;
+ sbio->bi_next = NULL;
+ sbio->bi_sector = r1_bio->sector +
+ conf->mirrors[i].rdev->data_offset;
+ sbio->bi_bdev = conf->mirrors[i].rdev->bdev;
+ size = sbio->bi_size;
+ for (j = 0; j < vcnt ; j++) {
+ struct bio_vec *bi;
+ bi = &sbio->bi_io_vec[j];
+ bi->bv_offset = 0;
+ if (size > PAGE_SIZE)
+ bi->bv_len = PAGE_SIZE;
+ else
+ bi->bv_len = size;
+ size -= PAGE_SIZE;
+ memcpy(page_address(bi->bv_page),
+ page_address(pbio->bi_io_vec[j].bv_page),
+ PAGE_SIZE);
}
}
+ return 0;
+}
+static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
+{
+ conf_t *conf = mddev->private;
+ int i;
+ int disks = conf->raid_disks;
+ struct bio *bio, *wbio;
+
+ bio = r1_bio->bios[r1_bio->read_disk];
+
+ if (!test_bit(R1BIO_Uptodate, &r1_bio->state))
+ /* ouch - failed to read all of that. */
+ if (!fix_sync_read_error(r1_bio))
+ return;
+
+ if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
+ if (process_checks(r1_bio) < 0)
+ return;
/*
* schedule writes
*/
@@ -2063,7 +2061,7 @@ static int raid1_resize(mddev_t *mddev, sector_t sectors)
set_capacity(mddev->gendisk, mddev->array_sectors);
revalidate_disk(mddev->gendisk);
if (sectors > mddev->dev_sectors &&
- mddev->recovery_cp == MaxSector) {
+ mddev->recovery_cp > mddev->dev_sectors) {
mddev->recovery_cp = mddev->dev_sectors;
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
}
diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h
index cbfdf1a..5fc4ca1 100644
--- a/drivers/md/raid1.h
+++ b/drivers/md/raid1.h
@@ -94,7 +94,9 @@ struct r1bio_s {
int read_disk;
struct list_head retry_list;
- struct bitmap_update *bitmap_update;
+ /* Next two are only valid when R1BIO_BehindIO is set */
+ struct page **behind_pages;
+ int behind_page_count;
/*
* if the IO is in WRITE direction, then multiple bios are used.
* We choose the number when they are allocated.
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 8e94626..6e84668 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -271,9 +271,10 @@ static void raid10_end_read_request(struct bio *bio, int error)
*/
set_bit(R10BIO_Uptodate, &r10_bio->state);
raid_end_bio_io(r10_bio);
+ rdev_dec_pending(conf->mirrors[dev].rdev, conf->mddev);
} else {
/*
- * oops, read error:
+ * oops, read error - keep the refcount on the rdev
*/
char b[BDEVNAME_SIZE];
if (printk_ratelimit())
@@ -282,8 +283,6 @@ static void raid10_end_read_request(struct bio *bio, int error)
bdevname(conf->mirrors[dev].rdev->bdev,b), (unsigned long long)r10_bio->sector);
reschedule_retry(r10_bio);
}
-
- rdev_dec_pending(conf->mirrors[dev].rdev, conf->mddev);
}
static void raid10_end_write_request(struct bio *bio, int error)
@@ -488,13 +487,19 @@ static int raid10_mergeable_bvec(struct request_queue *q,
static int read_balance(conf_t *conf, r10bio_t *r10_bio)
{
const sector_t this_sector = r10_bio->sector;
- int disk, slot, nslot;
+ int disk, slot;
const int sectors = r10_bio->sectors;
- sector_t new_distance, current_distance;
+ sector_t new_distance, best_dist;
mdk_rdev_t *rdev;
+ int do_balance;
+ int best_slot;
raid10_find_phys(conf, r10_bio);
rcu_read_lock();
+retry:
+ best_slot = -1;
+ best_dist = MaxSector;
+ do_balance = 1;
/*
* Check if we can balance. We can balance on the whole
* device if no resync is going on (recovery is ok), or below
@@ -502,86 +507,58 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio)
* above the resync window.
*/
if (conf->mddev->recovery_cp < MaxSector
- && (this_sector + sectors >= conf->next_resync)) {
- /* make sure that disk is operational */
- slot = 0;
- disk = r10_bio->devs[slot].devnum;
-
- while ((rdev = rcu_dereference(conf->mirrors[disk].rdev)) == NULL ||
- r10_bio->devs[slot].bio == IO_BLOCKED ||
- !test_bit(In_sync, &rdev->flags)) {
- slot++;
- if (slot == conf->copies) {
- slot = 0;
- disk = -1;
- break;
- }
- disk = r10_bio->devs[slot].devnum;
- }
- goto rb_out;
- }
-
+ && (this_sector + sectors >= conf->next_resync))
+ do_balance = 0;
- /* make sure the disk is operational */
- slot = 0;
- disk = r10_bio->devs[slot].devnum;
- while ((rdev=rcu_dereference(conf->mirrors[disk].rdev)) == NULL ||
- r10_bio->devs[slot].bio == IO_BLOCKED ||
- !test_bit(In_sync, &rdev->flags)) {
- slot ++;
- if (slot == conf->copies) {
- disk = -1;
- goto rb_out;
- }
+ for (slot = 0; slot < conf->copies ; slot++) {
+ if (r10_bio->devs[slot].bio == IO_BLOCKED)
+ continue;
disk = r10_bio->devs[slot].devnum;
- }
-
-
- current_distance = abs(r10_bio->devs[slot].addr -
- conf->mirrors[disk].head_position);
-
- /* Find the disk whose head is closest,
- * or - for far > 1 - find the closest to partition beginning */
-
- for (nslot = slot; nslot < conf->copies; nslot++) {
- int ndisk = r10_bio->devs[nslot].devnum;
-
-
- if ((rdev=rcu_dereference(conf->mirrors[ndisk].rdev)) == NULL ||
- r10_bio->devs[nslot].bio == IO_BLOCKED ||
- !test_bit(In_sync, &rdev->flags))
+ rdev = rcu_dereference(conf->mirrors[disk].rdev);
+ if (rdev == NULL)
continue;
+ if (!test_bit(In_sync, &rdev->flags))
+ continue;
+
+ if (!do_balance)
+ break;
/* This optimisation is debatable, and completely destroys
* sequential read speed for 'far copies' arrays. So only
* keep it for 'near' arrays, and review those later.
*/
- if (conf->near_copies > 1 && !atomic_read(&rdev->nr_pending)) {
- disk = ndisk;
- slot = nslot;
+ if (conf->near_copies > 1 && !atomic_read(&rdev->nr_pending))
break;
- }
/* for far > 1 always use the lowest address */
if (conf->far_copies > 1)
- new_distance = r10_bio->devs[nslot].addr;
+ new_distance = r10_bio->devs[slot].addr;
else
- new_distance = abs(r10_bio->devs[nslot].addr -
- conf->mirrors[ndisk].head_position);
- if (new_distance < current_distance) {
- current_distance = new_distance;
- disk = ndisk;
- slot = nslot;
+ new_distance = abs(r10_bio->devs[slot].addr -
+ conf->mirrors[disk].head_position);
+ if (new_distance < best_dist) {
+ best_dist = new_distance;
+ best_slot = slot;
}
}
+ if (slot == conf->copies)
+ slot = best_slot;
-rb_out:
- r10_bio->read_slot = slot;
-/* conf->next_seq_sect = this_sector + sectors;*/
-
- if (disk >= 0 && (rdev=rcu_dereference(conf->mirrors[disk].rdev))!= NULL)
- atomic_inc(&conf->mirrors[disk].rdev->nr_pending);
- else
+ if (slot >= 0) {
+ disk = r10_bio->devs[slot].devnum;
+ rdev = rcu_dereference(conf->mirrors[disk].rdev);
+ if (!rdev)
+ goto retry;
+ atomic_inc(&rdev->nr_pending);
+ if (test_bit(Faulty, &rdev->flags)) {
+ /* Cannot risk returning a device that failed
+ * before we inc'ed nr_pending
+ */
+ rdev_dec_pending(rdev, conf->mddev);
+ goto retry;
+ }
+ r10_bio->read_slot = slot;
+ } else
disk = -1;
rcu_read_unlock();
@@ -1460,40 +1437,33 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
int max_read_errors = atomic_read(&mddev->max_corr_read_errors);
int d = r10_bio->devs[r10_bio->read_slot].devnum;
- rcu_read_lock();
- rdev = rcu_dereference(conf->mirrors[d].rdev);
- if (rdev) { /* If rdev is not NULL */
- char b[BDEVNAME_SIZE];
- int cur_read_error_count = 0;
+ /* still own a reference to this rdev, so it cannot
+ * have been cleared recently.
+ */
+ rdev = conf->mirrors[d].rdev;
- bdevname(rdev->bdev, b);
+ if (test_bit(Faulty, &rdev->flags))
+ /* drive has already been failed, just ignore any
+ more fix_read_error() attempts */
+ return;
- if (test_bit(Faulty, &rdev->flags)) {
- rcu_read_unlock();
- /* drive has already been failed, just ignore any
- more fix_read_error() attempts */
- return;
- }
+ check_decay_read_errors(mddev, rdev);
+ atomic_inc(&rdev->read_errors);
+ if (atomic_read(&rdev->read_errors) > max_read_errors) {
+ char b[BDEVNAME_SIZE];
+ bdevname(rdev->bdev, b);
- check_decay_read_errors(mddev, rdev);
- atomic_inc(&rdev->read_errors);
- cur_read_error_count = atomic_read(&rdev->read_errors);
- if (cur_read_error_count > max_read_errors) {
- rcu_read_unlock();
- printk(KERN_NOTICE
- "md/raid10:%s: %s: Raid device exceeded "
- "read_error threshold "
- "[cur %d:max %d]\n",
- mdname(mddev),
- b, cur_read_error_count, max_read_errors);
- printk(KERN_NOTICE
- "md/raid10:%s: %s: Failing raid "
- "device\n", mdname(mddev), b);
- md_error(mddev, conf->mirrors[d].rdev);
- return;
- }
+ printk(KERN_NOTICE
+ "md/raid10:%s: %s: Raid device exceeded "
+ "read_error threshold [cur %d:max %d]\n",
+ mdname(mddev), b,
+ atomic_read(&rdev->read_errors), max_read_errors);
+ printk(KERN_NOTICE
+ "md/raid10:%s: %s: Failing raid device\n",
+ mdname(mddev), b);
+ md_error(mddev, conf->mirrors[d].rdev);
+ return;
}
- rcu_read_unlock();
while(sectors) {
int s = sectors;
@@ -1562,8 +1532,8 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
"write failed"
" (%d sectors at %llu on %s)\n",
mdname(mddev), s,
- (unsigned long long)(sect+
- rdev->data_offset),
+ (unsigned long long)(
+ sect + rdev->data_offset),
bdevname(rdev->bdev, b));
printk(KERN_NOTICE "md/raid10:%s: %s: failing "
"drive\n",
@@ -1599,8 +1569,8 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
"corrected sectors"
" (%d sectors at %llu on %s)\n",
mdname(mddev), s,
- (unsigned long long)(sect+
- rdev->data_offset),
+ (unsigned long long)(
+ sect + rdev->data_offset),
bdevname(rdev->bdev, b));
printk(KERN_NOTICE "md/raid10:%s: %s: failing drive\n",
mdname(mddev),
@@ -1612,8 +1582,8 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
"md/raid10:%s: read error corrected"
" (%d sectors at %llu on %s)\n",
mdname(mddev), s,
- (unsigned long long)(sect+
- rdev->data_offset),
+ (unsigned long long)(
+ sect + rdev->data_offset),
bdevname(rdev->bdev, b));
}
@@ -1663,7 +1633,8 @@ static void raid10d(mddev_t *mddev)
else if (test_bit(R10BIO_IsRecover, &r10_bio->state))
recovery_request_write(mddev, r10_bio);
else {
- int mirror;
+ int slot = r10_bio->read_slot;
+ int mirror = r10_bio->devs[slot].devnum;
/* we got a read error. Maybe the drive is bad. Maybe just
* the block and we can fix it.
* We freeze all other IO, and try reading the block from
@@ -1677,9 +1648,10 @@ static void raid10d(mddev_t *mddev)
fix_read_error(conf, mddev, r10_bio);
unfreeze_array(conf);
}
+ rdev_dec_pending(conf->mirrors[mirror].rdev, mddev);
- bio = r10_bio->devs[r10_bio->read_slot].bio;
- r10_bio->devs[r10_bio->read_slot].bio =
+ bio = r10_bio->devs[slot].bio;
+ r10_bio->devs[slot].bio =
mddev->ro ? IO_BLOCKED : NULL;
mirror = read_balance(conf, r10_bio);
if (mirror == -1) {
@@ -1693,6 +1665,7 @@ static void raid10d(mddev_t *mddev)
} else {
const unsigned long do_sync = (r10_bio->master_bio->bi_rw & REQ_SYNC);
bio_put(bio);
+ slot = r10_bio->read_slot;
rdev = conf->mirrors[mirror].rdev;
if (printk_ratelimit())
printk(KERN_ERR "md/raid10:%s: %s: redirecting sector %llu to"
@@ -1702,8 +1675,8 @@ static void raid10d(mddev_t *mddev)
(unsigned long long)r10_bio->sector);
bio = bio_clone_mddev(r10_bio->master_bio,
GFP_NOIO, mddev);
- r10_bio->devs[r10_bio->read_slot].bio = bio;
- bio->bi_sector = r10_bio->devs[r10_bio->read_slot].addr
+ r10_bio->devs[slot].bio = bio;
+ bio->bi_sector = r10_bio->devs[slot].addr
+ rdev->data_offset;
bio->bi_bdev = rdev->bdev;
bio->bi_rw = READ | do_sync;
@@ -1763,13 +1736,13 @@ static int init_resync(conf_t *conf)
*
*/
-static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, int go_faster)
+static sector_t sync_request(mddev_t *mddev, sector_t sector_nr,
+ int *skipped, int go_faster)
{
conf_t *conf = mddev->private;
r10bio_t *r10_bio;
struct bio *biolist = NULL, *bio;
sector_t max_sector, nr_sectors;
- int disk;
int i;
int max_sync;
sector_t sync_blocks;
@@ -1858,108 +1831,114 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
int j, k;
r10_bio = NULL;
- for (i=0 ; i<conf->raid_disks; i++)
- if (conf->mirrors[i].rdev &&
- !test_bit(In_sync, &conf->mirrors[i].rdev->flags)) {
- int still_degraded = 0;
- /* want to reconstruct this device */
- r10bio_t *rb2 = r10_bio;
- sector_t sect = raid10_find_virt(conf, sector_nr, i);
- int must_sync;
- /* Unless we are doing a full sync, we only need
- * to recover the block if it is set in the bitmap
- */
- must_sync = bitmap_start_sync(mddev->bitmap, sect,
- &sync_blocks, 1);
- if (sync_blocks < max_sync)
- max_sync = sync_blocks;
- if (!must_sync &&
- !conf->fullsync) {
- /* yep, skip the sync_blocks here, but don't assume
- * that there will never be anything to do here
- */
- chunks_skipped = -1;
- continue;
- }
+ for (i=0 ; i<conf->raid_disks; i++) {
+ int still_degraded;
+ r10bio_t *rb2;
+ sector_t sect;
+ int must_sync;
- r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO);
- raise_barrier(conf, rb2 != NULL);
- atomic_set(&r10_bio->remaining, 0);
+ if (conf->mirrors[i].rdev == NULL ||
+ test_bit(In_sync, &conf->mirrors[i].rdev->flags))
+ continue;
- r10_bio->master_bio = (struct bio*)rb2;
- if (rb2)
- atomic_inc(&rb2->remaining);
- r10_bio->mddev = mddev;
- set_bit(R10BIO_IsRecover, &r10_bio->state);
- r10_bio->sector = sect;
+ still_degraded = 0;
+ /* want to reconstruct this device */
+ rb2 = r10_bio;
+ sect = raid10_find_virt(conf, sector_nr, i);
+ /* Unless we are doing a full sync, we only need
+ * to recover the block if it is set in the bitmap
+ */
+ must_sync = bitmap_start_sync(mddev->bitmap, sect,
+ &sync_blocks, 1);
+ if (sync_blocks < max_sync)
+ max_sync = sync_blocks;
+ if (!must_sync &&
+ !conf->fullsync) {
+ /* yep, skip the sync_blocks here, but don't assume
+ * that there will never be anything to do here
+ */
+ chunks_skipped = -1;
+ continue;
+ }
- raid10_find_phys(conf, r10_bio);
+ r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO);
+ raise_barrier(conf, rb2 != NULL);
+ atomic_set(&r10_bio->remaining, 0);
- /* Need to check if the array will still be
- * degraded
- */
- for (j=0; j<conf->raid_disks; j++)
- if (conf->mirrors[j].rdev == NULL ||
- test_bit(Faulty, &conf->mirrors[j].rdev->flags)) {
- still_degraded = 1;
- break;
- }
-
- must_sync = bitmap_start_sync(mddev->bitmap, sect,
- &sync_blocks, still_degraded);
-
- for (j=0; j<conf->copies;j++) {
- int d = r10_bio->devs[j].devnum;
- if (conf->mirrors[d].rdev &&
- test_bit(In_sync, &conf->mirrors[d].rdev->flags)) {
- /* This is where we read from */
- bio = r10_bio->devs[0].bio;
- bio->bi_next = biolist;
- biolist = bio;
- bio->bi_private = r10_bio;
- bio->bi_end_io = end_sync_read;
- bio->bi_rw = READ;
- bio->bi_sector = r10_bio->devs[j].addr +
- conf->mirrors[d].rdev->data_offset;
- bio->bi_bdev = conf->mirrors[d].rdev->bdev;
- atomic_inc(&conf->mirrors[d].rdev->nr_pending);
- atomic_inc(&r10_bio->remaining);
- /* and we write to 'i' */
-
- for (k=0; k<conf->copies; k++)
- if (r10_bio->devs[k].devnum == i)
- break;
- BUG_ON(k == conf->copies);
- bio = r10_bio->devs[1].bio;
- bio->bi_next = biolist;
- biolist = bio;
- bio->bi_private = r10_bio;
- bio->bi_end_io = end_sync_write;
- bio->bi_rw = WRITE;
- bio->bi_sector = r10_bio->devs[k].addr +
- conf->mirrors[i].rdev->data_offset;
- bio->bi_bdev = conf->mirrors[i].rdev->bdev;
-
- r10_bio->devs[0].devnum = d;
- r10_bio->devs[1].devnum = i;
+ r10_bio->master_bio = (struct bio*)rb2;
+ if (rb2)
+ atomic_inc(&rb2->remaining);
+ r10_bio->mddev = mddev;
+ set_bit(R10BIO_IsRecover, &r10_bio->state);
+ r10_bio->sector = sect;
- break;
- }
- }
- if (j == conf->copies) {
- /* Cannot recover, so abort the recovery */
- put_buf(r10_bio);
- if (rb2)
- atomic_dec(&rb2->remaining);
- r10_bio = rb2;
- if (!test_and_set_bit(MD_RECOVERY_INTR,
- &mddev->recovery))
- printk(KERN_INFO "md/raid10:%s: insufficient "
- "working devices for recovery.\n",
- mdname(mddev));
+ raid10_find_phys(conf, r10_bio);
+
+ /* Need to check if the array will still be
+ * degraded
+ */
+ for (j=0; j<conf->raid_disks; j++)
+ if (conf->mirrors[j].rdev == NULL ||
+ test_bit(Faulty, &conf->mirrors[j].rdev->flags)) {
+ still_degraded = 1;
break;
}
+
+ must_sync = bitmap_start_sync(mddev->bitmap, sect,
+ &sync_blocks, still_degraded);
+
+ for (j=0; j<conf->copies;j++) {
+ int d = r10_bio->devs[j].devnum;
+ if (!conf->mirrors[d].rdev ||
+ !test_bit(In_sync, &conf->mirrors[d].rdev->flags))
+ continue;
+ /* This is where we read from */
+ bio = r10_bio->devs[0].bio;
+ bio->bi_next = biolist;
+ biolist = bio;
+ bio->bi_private = r10_bio;
+ bio->bi_end_io = end_sync_read;
+ bio->bi_rw = READ;
+ bio->bi_sector = r10_bio->devs[j].addr +
+ conf->mirrors[d].rdev->data_offset;
+ bio->bi_bdev = conf->mirrors[d].rdev->bdev;
+ atomic_inc(&conf->mirrors[d].rdev->nr_pending);
+ atomic_inc(&r10_bio->remaining);
+ /* and we write to 'i' */
+
+ for (k=0; k<conf->copies; k++)
+ if (r10_bio->devs[k].devnum == i)
+ break;
+ BUG_ON(k == conf->copies);
+ bio = r10_bio->devs[1].bio;
+ bio->bi_next = biolist;
+ biolist = bio;
+ bio->bi_private = r10_bio;
+ bio->bi_end_io = end_sync_write;
+ bio->bi_rw = WRITE;
+ bio->bi_sector = r10_bio->devs[k].addr +
+ conf->mirrors[i].rdev->data_offset;
+ bio->bi_bdev = conf->mirrors[i].rdev->bdev;
+
+ r10_bio->devs[0].devnum = d;
+ r10_bio->devs[1].devnum = i;
+
+ break;
+ }
+ if (j == conf->copies) {
+ /* Cannot recover, so abort the recovery */
+ put_buf(r10_bio);
+ if (rb2)
+ atomic_dec(&rb2->remaining);
+ r10_bio = rb2;
+ if (!test_and_set_bit(MD_RECOVERY_INTR,
+ &mddev->recovery))
+ printk(KERN_INFO "md/raid10:%s: insufficient "
+ "working devices for recovery.\n",
+ mdname(mddev));
+ break;
}
+ }
if (biolist == NULL) {
while (r10_bio) {
r10bio_t *rb2 = r10_bio;
@@ -1977,7 +1956,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
if (!bitmap_start_sync(mddev->bitmap, sector_nr,
&sync_blocks, mddev->degraded) &&
- !conf->fullsync && !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
+ !conf->fullsync && !test_bit(MD_RECOVERY_REQUESTED,
+ &mddev->recovery)) {
/* We can skip this block */
*skipped = 1;
return sync_blocks + sectors_skipped;
@@ -2022,7 +2002,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
for (i=0; i<conf->copies; i++) {
int d = r10_bio->devs[i].devnum;
if (r10_bio->devs[i].bio->bi_end_io)
- rdev_dec_pending(conf->mirrors[d].rdev, mddev);
+ rdev_dec_pending(conf->mirrors[d].rdev,
+ mddev);
}
put_buf(r10_bio);
biolist = NULL;
@@ -2047,26 +2028,27 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
do {
struct page *page;
int len = PAGE_SIZE;
- disk = 0;
if (sector_nr + (len>>9) > max_sector)
len = (max_sector - sector_nr) << 9;
if (len == 0)
break;
for (bio= biolist ; bio ; bio=bio->bi_next) {
+ struct bio *bio2;
page = bio->bi_io_vec[bio->bi_vcnt].bv_page;
- if (bio_add_page(bio, page, len, 0) == 0) {
- /* stop here */
- struct bio *bio2;
- bio->bi_io_vec[bio->bi_vcnt].bv_page = page;
- for (bio2 = biolist; bio2 && bio2 != bio; bio2 = bio2->bi_next) {
- /* remove last page from this bio */
- bio2->bi_vcnt--;
- bio2->bi_size -= len;
- bio2->bi_flags &= ~(1<< BIO_SEG_VALID);
- }
- goto bio_full;
+ if (bio_add_page(bio, page, len, 0))
+ continue;
+
+ /* stop here */
+ bio->bi_io_vec[bio->bi_vcnt].bv_page = page;
+ for (bio2 = biolist;
+ bio2 && bio2 != bio;
+ bio2 = bio2->bi_next) {
+ /* remove last page from this bio */
+ bio2->bi_vcnt--;
+ bio2->bi_size -= len;
+ bio2->bi_flags &= ~(1<< BIO_SEG_VALID);
}
- disk = i;
+ goto bio_full;
}
nr_sectors += len>>9;
sector_nr += len>>9;
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 49bf5f8..34dd545 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -1700,27 +1700,25 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
raid5_conf_t *conf = mddev->private;
pr_debug("raid456: error called\n");
- if (!test_bit(Faulty, &rdev->flags)) {
- set_bit(MD_CHANGE_DEVS, &mddev->flags);
- if (test_and_clear_bit(In_sync, &rdev->flags)) {
- unsigned long flags;
- spin_lock_irqsave(&conf->device_lock, flags);
- mddev->degraded++;
- spin_unlock_irqrestore(&conf->device_lock, flags);
- /*
- * if recovery was running, make sure it aborts.
- */
- set_bit(MD_RECOVERY_INTR, &mddev->recovery);
- }
- set_bit(Faulty, &rdev->flags);
- printk(KERN_ALERT
- "md/raid:%s: Disk failure on %s, disabling device.\n"
- "md/raid:%s: Operation continuing on %d devices.\n",
- mdname(mddev),
- bdevname(rdev->bdev, b),
- mdname(mddev),
- conf->raid_disks - mddev->degraded);
+ if (test_and_clear_bit(In_sync, &rdev->flags)) {
+ unsigned long flags;
+ spin_lock_irqsave(&conf->device_lock, flags);
+ mddev->degraded++;
+ spin_unlock_irqrestore(&conf->device_lock, flags);
+ /*
+ * if recovery was running, make sure it aborts.
+ */
+ set_bit(MD_RECOVERY_INTR, &mddev->recovery);
}
+ set_bit(Faulty, &rdev->flags);
+ set_bit(MD_CHANGE_DEVS, &mddev->flags);
+ printk(KERN_ALERT
+ "md/raid:%s: Disk failure on %s, disabling device.\n"
+ "md/raid:%s: Operation continuing on %d devices.\n",
+ mdname(mddev),
+ bdevname(rdev->bdev, b),
+ mdname(mddev),
+ conf->raid_disks - mddev->degraded);
}
/*
@@ -5391,7 +5389,8 @@ static int raid5_resize(mddev_t *mddev, sector_t sectors)
return -EINVAL;
set_capacity(mddev->gendisk, mddev->array_sectors);
revalidate_disk(mddev->gendisk);
- if (sectors > mddev->dev_sectors && mddev->recovery_cp == MaxSector) {
+ if (sectors > mddev->dev_sectors &&
+ mddev->recovery_cp > mddev->dev_sectors) {
mddev->recovery_cp = mddev->dev_sectors;
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
}
diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c
index 0ae9352..18fccf9 100644
--- a/drivers/net/igb/igb_main.c
+++ b/drivers/net/igb/igb_main.c
@@ -45,9 +45,9 @@
#include <linux/interrupt.h>
#include <linux/if_ether.h>
#include <linux/aer.h>
+#include <linux/prefetch.h>
#ifdef CONFIG_IGB_DCA
#include <linux/dca.h>
-#include <linux/prefetch.h>
#endif
#include "igb.h"
diff --git a/fs/compat.c b/fs/compat.c
index 72fe6cd..0ea0083 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1306,241 +1306,6 @@ compat_sys_openat(unsigned int dfd, const char __user *filename, int flags, int
return do_sys_open(dfd, filename, flags, mode);
}
-/*
- * compat_count() counts the number of arguments/envelopes. It is basically
- * a copy of count() from fs/exec.c, except that it works with 32 bit argv
- * and envp pointers.
- */
-static int compat_count(compat_uptr_t __user *argv, int max)
-{
- int i = 0;
-
- if (argv != NULL) {
- for (;;) {
- compat_uptr_t p;
-
- if (get_user(p, argv))
- return -EFAULT;
- if (!p)
- break;
- argv++;
- if (i++ >= max)
- return -E2BIG;
-
- if (fatal_signal_pending(current))
- return -ERESTARTNOHAND;
- cond_resched();
- }
- }
- return i;
-}
-
-/*
- * compat_copy_strings() is basically a copy of copy_strings() from fs/exec.c
- * except that it works with 32 bit argv and envp pointers.
- */
-static int compat_copy_strings(int argc, compat_uptr_t __user *argv,
- struct linux_binprm *bprm)
-{
- struct page *kmapped_page = NULL;
- char *kaddr = NULL;
- unsigned long kpos = 0;
- int ret;
-
- while (argc-- > 0) {
- compat_uptr_t str;
- int len;
- unsigned long pos;
-
- if (get_user(str, argv+argc) ||
- !(len = strnlen_user(compat_ptr(str), MAX_ARG_STRLEN))) {
- ret = -EFAULT;
- goto out;
- }
-
- if (len > MAX_ARG_STRLEN) {
- ret = -E2BIG;
- goto out;
- }
-
- /* We're going to work our way backwords. */
- pos = bprm->p;
- str += len;
- bprm->p -= len;
-
- while (len > 0) {
- int offset, bytes_to_copy;
-
- if (fatal_signal_pending(current)) {
- ret = -ERESTARTNOHAND;
- goto out;
- }
- cond_resched();
-
- offset = pos % PAGE_SIZE;
- if (offset == 0)
- offset = PAGE_SIZE;
-
- bytes_to_copy = offset;
- if (bytes_to_copy > len)
- bytes_to_copy = len;
-
- offset -= bytes_to_copy;
- pos -= bytes_to_copy;
- str -= bytes_to_copy;
- len -= bytes_to_copy;
-
- if (!kmapped_page || kpos != (pos & PAGE_MASK)) {
- struct page *page;
-
- page = get_arg_page(bprm, pos, 1);
- if (!page) {
- ret = -E2BIG;
- goto out;
- }
-
- if (kmapped_page) {
- flush_kernel_dcache_page(kmapped_page);
- kunmap(kmapped_page);
- put_page(kmapped_page);
- }
- kmapped_page = page;
- kaddr = kmap(kmapped_page);
- kpos = pos & PAGE_MASK;
- flush_cache_page(bprm->vma, kpos,
- page_to_pfn(kmapped_page));
- }
- if (copy_from_user(kaddr+offset, compat_ptr(str),
- bytes_to_copy)) {
- ret = -EFAULT;
- goto out;
- }
- }
- }
- ret = 0;
-out:
- if (kmapped_page) {
- flush_kernel_dcache_page(kmapped_page);
- kunmap(kmapped_page);
- put_page(kmapped_page);
- }
- return ret;
-}
-
-/*
- * compat_do_execve() is mostly a copy of do_execve(), with the exception
- * that it processes 32 bit argv and envp pointers.
- */
-int compat_do_execve(char * filename,
- compat_uptr_t __user *argv,
- compat_uptr_t __user *envp,
- struct pt_regs * regs)
-{
- struct linux_binprm *bprm;
- struct file *file;
- struct files_struct *displaced;
- bool clear_in_exec;
- int retval;
-
- retval = unshare_files(&displaced);
- if (retval)
- goto out_ret;
-
- retval = -ENOMEM;
- bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
- if (!bprm)
- goto out_files;
-
- retval = prepare_bprm_creds(bprm);
- if (retval)
- goto out_free;
-
- retval = check_unsafe_exec(bprm);
- if (retval < 0)
- goto out_free;
- clear_in_exec = retval;
- current->in_execve = 1;
-
- file = open_exec(filename);
- retval = PTR_ERR(file);
- if (IS_ERR(file))
- goto out_unmark;
-
- sched_exec();
-
- bprm->file = file;
- bprm->filename = filename;
- bprm->interp = filename;
-
- retval = bprm_mm_init(bprm);
- if (retval)
- goto out_file;
-
- bprm->argc = compat_count(argv, MAX_ARG_STRINGS);
- if ((retval = bprm->argc) < 0)
- goto out;
-
- bprm->envc = compat_count(envp, MAX_ARG_STRINGS);
- if ((retval = bprm->envc) < 0)
- goto out;
-
- retval = prepare_binprm(bprm);
- if (retval < 0)
- goto out;
-
- retval = copy_strings_kernel(1, &bprm->filename, bprm);
- if (retval < 0)
- goto out;
-
- bprm->exec = bprm->p;
- retval = compat_copy_strings(bprm->envc, envp, bprm);
- if (retval < 0)
- goto out;
-
- retval = compat_copy_strings(bprm->argc, argv, bprm);
- if (retval < 0)
- goto out;
-
- retval = search_binary_handler(bprm, regs);
- if (retval < 0)
- goto out;
-
- /* execve succeeded */
- current->fs->in_exec = 0;
- current->in_execve = 0;
- acct_update_integrals(current);
- free_bprm(bprm);
- if (displaced)
- put_files_struct(displaced);
- return retval;
-
-out:
- if (bprm->mm) {
- acct_arg_size(bprm, 0);
- mmput(bprm->mm);
- }
-
-out_file:
- if (bprm->file) {
- allow_write_access(bprm->file);
- fput(bprm->file);
- }
-
-out_unmark:
- if (clear_in_exec)
- current->fs->in_exec = 0;
- current->in_execve = 0;
-
-out_free:
- free_bprm(bprm);
-
-out_files:
- if (displaced)
- reset_files_struct(displaced);
-out_ret:
- return retval;
-}
-
#define __COMPAT_NFDBITS (8 * sizeof(compat_ulong_t))
static int poll_select_copy_remaining(struct timespec *end_time, void __user *p,
diff --git a/fs/exec.c b/fs/exec.c
index 8328beb..c016896 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -55,6 +55,7 @@
#include <linux/fs_struct.h>
#include <linux/pipe_fs_i.h>
#include <linux/oom.h>
+#include <linux/compat.h>
#include <asm/uaccess.h>
#include <asm/mmu_context.h>
@@ -166,8 +167,13 @@ out:
}
#ifdef CONFIG_MMU
-
-void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
+/*
+ * The nascent bprm->mm is not visible until exec_mmap() but it can
+ * use a lot of memory, account these pages in current->mm temporary
+ * for oom_badness()->get_mm_rss(). Once exec succeeds or fails, we
+ * change the counter back via acct_arg_size(0).
+ */
+static void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
{
struct mm_struct *mm = current->mm;
long diff = (long)(pages - bprm->vma_pages);
@@ -186,7 +192,7 @@ void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
#endif
}
-struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
+static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
int write)
{
struct page *page;
@@ -305,11 +311,11 @@ static bool valid_arg_len(struct linux_binprm *bprm, long len)
#else
-void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
+static inline void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
{
}
-struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
+static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
int write)
{
struct page *page;
@@ -398,22 +404,56 @@ err:
return err;
}
+struct user_arg_ptr {
+#ifdef CONFIG_COMPAT
+ bool is_compat;
+#endif
+ union {
+ const char __user *const __user *native;
+#ifdef CONFIG_COMPAT
+ compat_uptr_t __user *compat;
+#endif
+ } ptr;
+};
+
+static const char __user *get_user_arg_ptr(struct user_arg_ptr argv, int nr)
+{
+ const char __user *native;
+
+#ifdef CONFIG_COMPAT
+ if (unlikely(argv.is_compat)) {
+ compat_uptr_t compat;
+
+ if (get_user(compat, argv.ptr.compat + nr))
+ return ERR_PTR(-EFAULT);
+
+ return compat_ptr(compat);
+ }
+#endif
+
+ if (get_user(native, argv.ptr.native + nr))
+ return ERR_PTR(-EFAULT);
+
+ return native;
+}
+
/*
* count() counts the number of strings in array ARGV.
*/
-static int count(const char __user * const __user * argv, int max)
+static int count(struct user_arg_ptr argv, int max)
{
int i = 0;
- if (argv != NULL) {
+ if (argv.ptr.native != NULL) {
for (;;) {
- const char __user * p;
+ const char __user *p = get_user_arg_ptr(argv, i);
- if (get_user(p, argv))
- return -EFAULT;
if (!p)
break;
- argv++;
+
+ if (IS_ERR(p))
+ return -EFAULT;
+
if (i++ >= max)
return -E2BIG;
@@ -430,7 +470,7 @@ static int count(const char __user * const __user * argv, int max)
* processes's memory to the new process's stack. The call to get_user_pages()
* ensures the destination page is created and not swapped out.
*/
-static int copy_strings(int argc, const char __user *const __user *argv,
+static int copy_strings(int argc, struct user_arg_ptr argv,
struct linux_binprm *bprm)
{
struct page *kmapped_page = NULL;
@@ -443,16 +483,18 @@ static int copy_strings(int argc, const char __user *const __user *argv,
int len;
unsigned long pos;
- if (get_user(str, argv+argc) ||
- !(len = strnlen_user(str, MAX_ARG_STRLEN))) {
- ret = -EFAULT;
+ ret = -EFAULT;
+ str = get_user_arg_ptr(argv, argc);
+ if (IS_ERR(str))
goto out;
- }
- if (!valid_arg_len(bprm, len)) {
- ret = -E2BIG;
+ len = strnlen_user(str, MAX_ARG_STRLEN);
+ if (!len)
+ goto out;
+
+ ret = -E2BIG;
+ if (!valid_arg_len(bprm, len))
goto out;
- }
/* We're going to work our way backwords. */
pos = bprm->p;
@@ -519,14 +561,19 @@ out:
/*
* Like copy_strings, but get argv and its values from kernel memory.
*/
-int copy_strings_kernel(int argc, const char *const *argv,
+int copy_strings_kernel(int argc, const char *const *__argv,
struct linux_binprm *bprm)
{
int r;
mm_segment_t oldfs = get_fs();
+ struct user_arg_ptr argv = {
+ .ptr.native = (const char __user *const __user *)__argv,
+ };
+
set_fs(KERNEL_DS);
- r = copy_strings(argc, (const char __user *const __user *)argv, bprm);
+ r = copy_strings(argc, argv, bprm);
set_fs(oldfs);
+
return r;
}
EXPORT_SYMBOL(copy_strings_kernel);
@@ -1379,10 +1426,10 @@ EXPORT_SYMBOL(search_binary_handler);
/*
* sys_execve() executes a new program.
*/
-int do_execve(const char * filename,
- const char __user *const __user *argv,
- const char __user *const __user *envp,
- struct pt_regs * regs)
+static int do_execve_common(const char *filename,
+ struct user_arg_ptr argv,
+ struct user_arg_ptr envp,
+ struct pt_regs *regs)
{
struct linux_binprm *bprm;
struct file *file;
@@ -1489,6 +1536,34 @@ out_ret:
return retval;
}
+int do_execve(const char *filename,
+ const char __user *const __user *__argv,
+ const char __user *const __user *__envp,
+ struct pt_regs *regs)
+{
+ struct user_arg_ptr argv = { .ptr.native = __argv };
+ struct user_arg_ptr envp = { .ptr.native = __envp };
+ return do_execve_common(filename, argv, envp, regs);
+}
+
+#ifdef CONFIG_COMPAT
+int compat_do_execve(char *filename,
+ compat_uptr_t __user *__argv,
+ compat_uptr_t __user *__envp,
+ struct pt_regs *regs)
+{
+ struct user_arg_ptr argv = {
+ .is_compat = true,
+ .ptr.compat = __argv,
+ };
+ struct user_arg_ptr envp = {
+ .is_compat = true,
+ .ptr.compat = __envp,
+ };
+ return do_execve_common(filename, argv, envp, regs);
+}
+#endif
+
void set_binfmt(struct linux_binfmt *new)
{
struct mm_struct *mm = current->mm;
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 74add2d..e65493a 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -780,6 +780,8 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
metadata = (height != ip->i_height - 1);
if (metadata)
revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs;
+ else if (ip->i_depth)
+ revokes = sdp->sd_inptrs;
if (ip != GFS2_I(sdp->sd_rindex))
error = gfs2_rindex_hold(sdp, &ip->i_alloc->al_ri_gh);
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index cec26c0..903115f 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -228,6 +228,27 @@ static int gfs2_ail1_empty(struct gfs2_sbd *sdp)
return ret;
}
+static void gfs2_ail1_wait(struct gfs2_sbd *sdp)
+{
+ struct gfs2_ail *ai;
+ struct gfs2_bufdata *bd;
+ struct buffer_head *bh;
+
+ spin_lock(&sdp->sd_ail_lock);
+ list_for_each_entry_reverse(ai, &sdp->sd_ail1_list, ai_list) {
+ list_for_each_entry(bd, &ai->ai_ail1_list, bd_ail_st_list) {
+ bh = bd->bd_bh;
+ if (!buffer_locked(bh))
+ continue;
+ get_bh(bh);
+ spin_unlock(&sdp->sd_ail_lock);
+ wait_on_buffer(bh);
+ brelse(bh);
+ return;
+ }
+ }
+ spin_unlock(&sdp->sd_ail_lock);
+}
/**
* gfs2_ail2_empty_one - Check whether or not a trans in the AIL has been synced
@@ -878,9 +899,9 @@ void gfs2_meta_syncfs(struct gfs2_sbd *sdp)
gfs2_log_flush(sdp, NULL);
for (;;) {
gfs2_ail1_start(sdp);
+ gfs2_ail1_wait(sdp);
if (gfs2_ail1_empty(sdp))
break;
- msleep(10);
}
}
@@ -920,12 +941,14 @@ int gfs2_logd(void *data)
if (gfs2_ail_flush_reqd(sdp)) {
gfs2_ail1_start(sdp);
- io_schedule();
+ gfs2_ail1_wait(sdp);
gfs2_ail1_empty(sdp);
gfs2_log_flush(sdp, NULL);
}
- wake_up(&sdp->sd_log_waitq);
+ if (!gfs2_ail_flush_reqd(sdp))
+ wake_up(&sdp->sd_log_waitq);
+
t = gfs2_tune_get(sdp, gt_logd_secs) * HZ;
if (freezing(current))
refrigerator();
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 7273ad3..9b780df 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1629,6 +1629,10 @@ void __gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen)
gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
gfs2_trans_add_rg(rgd);
+
+ /* Directories keep their data in the metadata address space */
+ if (ip->i_depth)
+ gfs2_meta_wipe(ip, bstart, blen);
}
/**
diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c
index f768448..eed4d7b 100644
--- a/fs/nilfs2/alloc.c
+++ b/fs/nilfs2/alloc.c
@@ -489,8 +489,8 @@ int nilfs_palloc_prepare_alloc_entry(struct inode *inode,
void nilfs_palloc_commit_alloc_entry(struct inode *inode,
struct nilfs_palloc_req *req)
{
- nilfs_mdt_mark_buffer_dirty(req->pr_bitmap_bh);
- nilfs_mdt_mark_buffer_dirty(req->pr_desc_bh);
+ mark_buffer_dirty(req->pr_bitmap_bh);
+ mark_buffer_dirty(req->pr_desc_bh);
nilfs_mdt_mark_dirty(inode);
brelse(req->pr_bitmap_bh);
@@ -527,8 +527,8 @@ void nilfs_palloc_commit_free_entry(struct inode *inode,
kunmap(req->pr_bitmap_bh->b_page);
kunmap(req->pr_desc_bh->b_page);
- nilfs_mdt_mark_buffer_dirty(req->pr_desc_bh);
- nilfs_mdt_mark_buffer_dirty(req->pr_bitmap_bh);
+ mark_buffer_dirty(req->pr_desc_bh);
+ mark_buffer_dirty(req->pr_bitmap_bh);
nilfs_mdt_mark_dirty(inode);
brelse(req->pr_bitmap_bh);
@@ -683,8 +683,8 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems)
kunmap(bitmap_bh->b_page);
kunmap(desc_bh->b_page);
- nilfs_mdt_mark_buffer_dirty(desc_bh);
- nilfs_mdt_mark_buffer_dirty(bitmap_bh);
+ mark_buffer_dirty(desc_bh);
+ mark_buffer_dirty(bitmap_bh);
nilfs_mdt_mark_dirty(inode);
brelse(bitmap_bh);
diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c
index 4723f04..aadbd0b 100644
--- a/fs/nilfs2/bmap.c
+++ b/fs/nilfs2/bmap.c
@@ -34,7 +34,9 @@
struct inode *nilfs_bmap_get_dat(const struct nilfs_bmap *bmap)
{
- return NILFS_I_NILFS(bmap->b_inode)->ns_dat;
+ struct the_nilfs *nilfs = bmap->b_inode->i_sb->s_fs_info;
+
+ return nilfs->ns_dat;
}
static int nilfs_bmap_convert_error(struct nilfs_bmap *bmap,
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index 609cd22..a35ae35 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c
@@ -34,12 +34,6 @@
#include "page.h"
#include "btnode.h"
-void nilfs_btnode_cache_init(struct address_space *btnc,
- struct backing_dev_info *bdi)
-{
- nilfs_mapping_init(btnc, bdi);
-}
-
void nilfs_btnode_cache_clear(struct address_space *btnc)
{
invalidate_mapping_pages(btnc, 0, -1);
@@ -62,7 +56,7 @@ nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr)
BUG();
}
memset(bh->b_data, 0, 1 << inode->i_blkbits);
- bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev;
+ bh->b_bdev = inode->i_sb->s_bdev;
bh->b_blocknr = blocknr;
set_buffer_mapped(bh);
set_buffer_uptodate(bh);
@@ -94,10 +88,11 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr,
if (pblocknr == 0) {
pblocknr = blocknr;
if (inode->i_ino != NILFS_DAT_INO) {
- struct inode *dat = NILFS_I_NILFS(inode)->ns_dat;
+ struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
/* blocknr is a virtual block number */
- err = nilfs_dat_translate(dat, blocknr, &pblocknr);
+ err = nilfs_dat_translate(nilfs->ns_dat, blocknr,
+ &pblocknr);
if (unlikely(err)) {
brelse(bh);
goto out_locked;
@@ -120,7 +115,7 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr,
goto found;
}
set_buffer_mapped(bh);
- bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev;
+ bh->b_bdev = inode->i_sb->s_bdev;
bh->b_blocknr = pblocknr; /* set block address for read */
bh->b_end_io = end_buffer_read_sync;
get_bh(bh);
@@ -259,7 +254,7 @@ void nilfs_btnode_commit_change_key(struct address_space *btnc,
"invalid oldkey %lld (newkey=%lld)",
(unsigned long long)oldkey,
(unsigned long long)newkey);
- nilfs_btnode_mark_dirty(obh);
+ mark_buffer_dirty(obh);
spin_lock_irq(&btnc->tree_lock);
radix_tree_delete(&btnc->page_tree, oldkey);
@@ -271,7 +266,7 @@ void nilfs_btnode_commit_change_key(struct address_space *btnc,
unlock_page(opage);
} else {
nilfs_copy_buffer(nbh, obh);
- nilfs_btnode_mark_dirty(nbh);
+ mark_buffer_dirty(nbh);
nbh->b_blocknr = newkey;
ctxt->bh = nbh;
diff --git a/fs/nilfs2/btnode.h b/fs/nilfs2/btnode.h
index 1b8ebd8..3a4dd2d 100644
--- a/fs/nilfs2/btnode.h
+++ b/fs/nilfs2/btnode.h
@@ -37,7 +37,6 @@ struct nilfs_btnode_chkey_ctxt {
struct buffer_head *newbh;
};
-void nilfs_btnode_cache_init(struct address_space *, struct backing_dev_info *);
void nilfs_btnode_cache_clear(struct address_space *);
struct buffer_head *nilfs_btnode_create_block(struct address_space *btnc,
__u64 blocknr);
@@ -51,7 +50,4 @@ void nilfs_btnode_commit_change_key(struct address_space *,
void nilfs_btnode_abort_change_key(struct address_space *,
struct nilfs_btnode_chkey_ctxt *);
-#define nilfs_btnode_mark_dirty(bh) nilfs_mark_buffer_dirty(bh)
-
-
#endif /* _NILFS_BTNODE_H */
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c
index d451ae0..7eafe46 100644
--- a/fs/nilfs2/btree.c
+++ b/fs/nilfs2/btree.c
@@ -714,7 +714,7 @@ static void nilfs_btree_promote_key(struct nilfs_bmap *btree,
nilfs_btree_get_nonroot_node(path, level),
path[level].bp_index, key);
if (!buffer_dirty(path[level].bp_bh))
- nilfs_btnode_mark_dirty(path[level].bp_bh);
+ mark_buffer_dirty(path[level].bp_bh);
} while ((path[level].bp_index == 0) &&
(++level < nilfs_btree_height(btree) - 1));
}
@@ -739,7 +739,7 @@ static void nilfs_btree_do_insert(struct nilfs_bmap *btree,
nilfs_btree_node_insert(node, path[level].bp_index,
*keyp, *ptrp, ncblk);
if (!buffer_dirty(path[level].bp_bh))
- nilfs_btnode_mark_dirty(path[level].bp_bh);
+ mark_buffer_dirty(path[level].bp_bh);
if (path[level].bp_index == 0)
nilfs_btree_promote_key(btree, path, level + 1,
@@ -777,9 +777,9 @@ static void nilfs_btree_carry_left(struct nilfs_bmap *btree,
nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
if (!buffer_dirty(path[level].bp_bh))
- nilfs_btnode_mark_dirty(path[level].bp_bh);
+ mark_buffer_dirty(path[level].bp_bh);
if (!buffer_dirty(path[level].bp_sib_bh))
- nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
+ mark_buffer_dirty(path[level].bp_sib_bh);
nilfs_btree_promote_key(btree, path, level + 1,
nilfs_btree_node_get_key(node, 0));
@@ -823,9 +823,9 @@ static void nilfs_btree_carry_right(struct nilfs_bmap *btree,
nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
if (!buffer_dirty(path[level].bp_bh))
- nilfs_btnode_mark_dirty(path[level].bp_bh);
+ mark_buffer_dirty(path[level].bp_bh);
if (!buffer_dirty(path[level].bp_sib_bh))
- nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
+ mark_buffer_dirty(path[level].bp_sib_bh);
path[level + 1].bp_index++;
nilfs_btree_promote_key(btree, path, level + 1,
@@ -870,9 +870,9 @@ static void nilfs_btree_split(struct nilfs_bmap *btree,
nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
if (!buffer_dirty(path[level].bp_bh))
- nilfs_btnode_mark_dirty(path[level].bp_bh);
+ mark_buffer_dirty(path[level].bp_bh);
if (!buffer_dirty(path[level].bp_sib_bh))
- nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
+ mark_buffer_dirty(path[level].bp_sib_bh);
newkey = nilfs_btree_node_get_key(right, 0);
newptr = path[level].bp_newreq.bpr_ptr;
@@ -919,7 +919,7 @@ static void nilfs_btree_grow(struct nilfs_bmap *btree,
nilfs_btree_node_set_level(root, level + 1);
if (!buffer_dirty(path[level].bp_sib_bh))
- nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
+ mark_buffer_dirty(path[level].bp_sib_bh);
path[level].bp_bh = path[level].bp_sib_bh;
path[level].bp_sib_bh = NULL;
@@ -1194,7 +1194,7 @@ static void nilfs_btree_do_delete(struct nilfs_bmap *btree,
nilfs_btree_node_delete(node, path[level].bp_index,
keyp, ptrp, ncblk);
if (!buffer_dirty(path[level].bp_bh))
- nilfs_btnode_mark_dirty(path[level].bp_bh);
+ mark_buffer_dirty(path[level].bp_bh);
if (path[level].bp_index == 0)
nilfs_btree_promote_key(btree, path, level + 1,
nilfs_btree_node_get_key(node, 0));
@@ -1226,9 +1226,9 @@ static void nilfs_btree_borrow_left(struct nilfs_bmap *btree,
nilfs_btree_node_move_right(left, node, n, ncblk, ncblk);
if (!buffer_dirty(path[level].bp_bh))
- nilfs_btnode_mark_dirty(path[level].bp_bh);
+ mark_buffer_dirty(path[level].bp_bh);
if (!buffer_dirty(path[level].bp_sib_bh))
- nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
+ mark_buffer_dirty(path[level].bp_sib_bh);
nilfs_btree_promote_key(btree, path, level + 1,
nilfs_btree_node_get_key(node, 0));
@@ -1258,9 +1258,9 @@ static void nilfs_btree_borrow_right(struct nilfs_bmap *btree,
nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
if (!buffer_dirty(path[level].bp_bh))
- nilfs_btnode_mark_dirty(path[level].bp_bh);
+ mark_buffer_dirty(path[level].bp_bh);
if (!buffer_dirty(path[level].bp_sib_bh))
- nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
+ mark_buffer_dirty(path[level].bp_sib_bh);
path[level + 1].bp_index++;
nilfs_btree_promote_key(btree, path, level + 1,
@@ -1289,7 +1289,7 @@ static void nilfs_btree_concat_left(struct nilfs_bmap *btree,
nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
if (!buffer_dirty(path[level].bp_sib_bh))
- nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
+ mark_buffer_dirty(path[level].bp_sib_bh);
nilfs_btnode_delete(path[level].bp_bh);
path[level].bp_bh = path[level].bp_sib_bh;
@@ -1315,7 +1315,7 @@ static void nilfs_btree_concat_right(struct nilfs_bmap *btree,
nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
if (!buffer_dirty(path[level].bp_bh))
- nilfs_btnode_mark_dirty(path[level].bp_bh);
+ mark_buffer_dirty(path[level].bp_bh);
nilfs_btnode_delete(path[level].bp_sib_bh);
path[level].bp_sib_bh = NULL;
@@ -1709,7 +1709,7 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree,
nilfs_btree_node_init(node, 0, 1, n, ncblk, keys, ptrs);
nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, ncblk);
if (!buffer_dirty(bh))
- nilfs_btnode_mark_dirty(bh);
+ mark_buffer_dirty(bh);
if (!nilfs_bmap_dirty(btree))
nilfs_bmap_set_dirty(btree);
@@ -1787,7 +1787,7 @@ static int nilfs_btree_propagate_p(struct nilfs_bmap *btree,
{
while ((++level < nilfs_btree_height(btree) - 1) &&
!buffer_dirty(path[level].bp_bh))
- nilfs_btnode_mark_dirty(path[level].bp_bh);
+ mark_buffer_dirty(path[level].bp_bh);
return 0;
}
@@ -2229,7 +2229,7 @@ static int nilfs_btree_mark(struct nilfs_bmap *btree, __u64 key, int level)
}
if (!buffer_dirty(bh))
- nilfs_btnode_mark_dirty(bh);
+ mark_buffer_dirty(bh);
brelse(bh);
if (!nilfs_bmap_dirty(btree))
nilfs_bmap_set_dirty(btree);
diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c
index 5ff15a8..c9b342c 100644
--- a/fs/nilfs2/cpfile.c
+++ b/fs/nilfs2/cpfile.c
@@ -216,14 +216,14 @@ int nilfs_cpfile_get_checkpoint(struct inode *cpfile,
if (!nilfs_cpfile_is_in_first(cpfile, cno))
nilfs_cpfile_block_add_valid_checkpoints(cpfile, cp_bh,
kaddr, 1);
- nilfs_mdt_mark_buffer_dirty(cp_bh);
+ mark_buffer_dirty(cp_bh);
kaddr = kmap_atomic(header_bh->b_page, KM_USER0);
header = nilfs_cpfile_block_get_header(cpfile, header_bh,
kaddr);
le64_add_cpu(&header->ch_ncheckpoints, 1);
kunmap_atomic(kaddr, KM_USER0);
- nilfs_mdt_mark_buffer_dirty(header_bh);
+ mark_buffer_dirty(header_bh);
nilfs_mdt_mark_dirty(cpfile);
}
@@ -326,7 +326,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
}
if (nicps > 0) {
tnicps += nicps;
- nilfs_mdt_mark_buffer_dirty(cp_bh);
+ mark_buffer_dirty(cp_bh);
nilfs_mdt_mark_dirty(cpfile);
if (!nilfs_cpfile_is_in_first(cpfile, cno)) {
count =
@@ -358,7 +358,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
header = nilfs_cpfile_block_get_header(cpfile, header_bh,
kaddr);
le64_add_cpu(&header->ch_ncheckpoints, -(u64)tnicps);
- nilfs_mdt_mark_buffer_dirty(header_bh);
+ mark_buffer_dirty(header_bh);
nilfs_mdt_mark_dirty(cpfile);
kunmap_atomic(kaddr, KM_USER0);
}
@@ -671,10 +671,10 @@ static int nilfs_cpfile_set_snapshot(struct inode *cpfile, __u64 cno)
le64_add_cpu(&header->ch_nsnapshots, 1);
kunmap_atomic(kaddr, KM_USER0);
- nilfs_mdt_mark_buffer_dirty(prev_bh);
- nilfs_mdt_mark_buffer_dirty(curr_bh);
- nilfs_mdt_mark_buffer_dirty(cp_bh);
- nilfs_mdt_mark_buffer_dirty(header_bh);
+ mark_buffer_dirty(prev_bh);
+ mark_buffer_dirty(curr_bh);
+ mark_buffer_dirty(cp_bh);
+ mark_buffer_dirty(header_bh);
nilfs_mdt_mark_dirty(cpfile);
brelse(prev_bh);
@@ -774,10 +774,10 @@ static int nilfs_cpfile_clear_snapshot(struct inode *cpfile, __u64 cno)
le64_add_cpu(&header->ch_nsnapshots, -1);
kunmap_atomic(kaddr, KM_USER0);
- nilfs_mdt_mark_buffer_dirty(next_bh);
- nilfs_mdt_mark_buffer_dirty(prev_bh);
- nilfs_mdt_mark_buffer_dirty(cp_bh);
- nilfs_mdt_mark_buffer_dirty(header_bh);
+ mark_buffer_dirty(next_bh);
+ mark_buffer_dirty(prev_bh);
+ mark_buffer_dirty(cp_bh);
+ mark_buffer_dirty(header_bh);
nilfs_mdt_mark_dirty(cpfile);
brelse(prev_bh);
diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c
index 59e5fe7..fcc2f86 100644
--- a/fs/nilfs2/dat.c
+++ b/fs/nilfs2/dat.c
@@ -54,7 +54,7 @@ static int nilfs_dat_prepare_entry(struct inode *dat,
static void nilfs_dat_commit_entry(struct inode *dat,
struct nilfs_palloc_req *req)
{
- nilfs_mdt_mark_buffer_dirty(req->pr_entry_bh);
+ mark_buffer_dirty(req->pr_entry_bh);
nilfs_mdt_mark_dirty(dat);
brelse(req->pr_entry_bh);
}
@@ -361,7 +361,7 @@ int nilfs_dat_move(struct inode *dat, __u64 vblocknr, sector_t blocknr)
entry->de_blocknr = cpu_to_le64(blocknr);
kunmap_atomic(kaddr, KM_USER0);
- nilfs_mdt_mark_buffer_dirty(entry_bh);
+ mark_buffer_dirty(entry_bh);
nilfs_mdt_mark_dirty(dat);
brelse(entry_bh);
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index 397e732..d7eeca6 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -111,7 +111,6 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
nilfs_transaction_commit(inode->i_sb);
mapped:
- SetPageChecked(page);
wait_on_page_writeback(page);
return VM_FAULT_LOCKED;
}
diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c
index 1c2a3e2..08a07a2 100644
--- a/fs/nilfs2/gcinode.c
+++ b/fs/nilfs2/gcinode.c
@@ -48,9 +48,6 @@
#include "dat.h"
#include "ifile.h"
-static const struct address_space_operations def_gcinode_aops = {
-};
-
/*
* nilfs_gccache_submit_read_data() - add data buffer and submit read request
* @inode - gc inode
@@ -87,9 +84,9 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff,
goto out;
if (pbn == 0) {
- struct inode *dat_inode = NILFS_I_NILFS(inode)->ns_dat;
- /* use original dat, not gc dat. */
- err = nilfs_dat_translate(dat_inode, vbn, &pbn);
+ struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
+
+ err = nilfs_dat_translate(nilfs->ns_dat, vbn, &pbn);
if (unlikely(err)) { /* -EIO, -ENOMEM, -ENOENT */
brelse(bh);
goto failed;
@@ -103,7 +100,7 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff,
}
if (!buffer_mapped(bh)) {
- bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev;
+ bh->b_bdev = inode->i_sb->s_bdev;
set_buffer_mapped(bh);
}
bh->b_blocknr = pbn;
@@ -160,15 +157,11 @@ int nilfs_gccache_wait_and_mark_dirty(struct buffer_head *bh)
if (buffer_dirty(bh))
return -EEXIST;
- if (buffer_nilfs_node(bh)) {
- if (nilfs_btree_broken_node_block(bh)) {
- clear_buffer_uptodate(bh);
- return -EIO;
- }
- nilfs_btnode_mark_dirty(bh);
- } else {
- nilfs_mark_buffer_dirty(bh);
+ if (buffer_nilfs_node(bh) && nilfs_btree_broken_node_block(bh)) {
+ clear_buffer_uptodate(bh);
+ return -EIO;
}
+ mark_buffer_dirty(bh);
return 0;
}
@@ -178,7 +171,7 @@ int nilfs_init_gcinode(struct inode *inode)
inode->i_mode = S_IFREG;
mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
- inode->i_mapping->a_ops = &def_gcinode_aops;
+ inode->i_mapping->a_ops = &empty_aops;
inode->i_mapping->backing_dev_info = inode->i_sb->s_bdi;
ii->i_flags = 0;
diff --git a/fs/nilfs2/ifile.c b/fs/nilfs2/ifile.c
index bfc73d3..684d763 100644
--- a/fs/nilfs2/ifile.c
+++ b/fs/nilfs2/ifile.c
@@ -80,7 +80,7 @@ int nilfs_ifile_create_inode(struct inode *ifile, ino_t *out_ino,
return ret;
}
nilfs_palloc_commit_alloc_entry(ifile, &req);
- nilfs_mdt_mark_buffer_dirty(req.pr_entry_bh);
+ mark_buffer_dirty(req.pr_entry_bh);
nilfs_mdt_mark_dirty(ifile);
*out_ino = (ino_t)req.pr_entry_nr;
*out_bh = req.pr_entry_bh;
@@ -128,7 +128,7 @@ int nilfs_ifile_delete_inode(struct inode *ifile, ino_t ino)
raw_inode->i_flags = 0;
kunmap_atomic(kaddr, KM_USER0);
- nilfs_mdt_mark_buffer_dirty(req.pr_entry_bh);
+ mark_buffer_dirty(req.pr_entry_bh);
brelse(req.pr_entry_bh);
nilfs_palloc_commit_free_entry(ifile, &req);
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index c0aa274..587f184 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -74,14 +74,14 @@ int nilfs_get_block(struct inode *inode, sector_t blkoff,
struct buffer_head *bh_result, int create)
{
struct nilfs_inode_info *ii = NILFS_I(inode);
+ struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
__u64 blknum = 0;
int err = 0, ret;
- struct inode *dat = NILFS_I_NILFS(inode)->ns_dat;
unsigned maxblocks = bh_result->b_size >> inode->i_blkbits;
- down_read(&NILFS_MDT(dat)->mi_sem);
+ down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
ret = nilfs_bmap_lookup_contig(ii->i_bmap, blkoff, &blknum, maxblocks);
- up_read(&NILFS_MDT(dat)->mi_sem);
+ up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
if (ret >= 0) { /* found */
map_bh(bh_result, inode->i_sb, blknum);
if (ret > 0)
@@ -596,6 +596,16 @@ void nilfs_write_inode_common(struct inode *inode,
raw_inode->i_flags = cpu_to_le32(ii->i_flags);
raw_inode->i_generation = cpu_to_le32(inode->i_generation);
+ if (NILFS_ROOT_METADATA_FILE(inode->i_ino)) {
+ struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
+
+ /* zero-fill unused portion in the case of super root block */
+ raw_inode->i_xattr = 0;
+ raw_inode->i_pad = 0;
+ memset((void *)raw_inode + sizeof(*raw_inode), 0,
+ nilfs->ns_inode_size - sizeof(*raw_inode));
+ }
+
if (has_bmap)
nilfs_bmap_write(ii->i_bmap, raw_inode);
else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
@@ -872,8 +882,7 @@ int nilfs_set_file_dirty(struct inode *inode, unsigned nr_dirty)
return -EINVAL; /* NILFS_I_DIRTY may remain for
freeing inode */
}
- list_del(&ii->i_dirty);
- list_add_tail(&ii->i_dirty, &nilfs->ns_dirty_files);
+ list_move_tail(&ii->i_dirty, &nilfs->ns_dirty_files);
set_bit(NILFS_I_QUEUED, &ii->i_state);
}
spin_unlock(&nilfs->ns_inode_lock);
@@ -892,7 +901,7 @@ int nilfs_mark_inode_dirty(struct inode *inode)
return err;
}
nilfs_update_inode(inode, ibh);
- nilfs_mdt_mark_buffer_dirty(ibh);
+ mark_buffer_dirty(ibh);
nilfs_mdt_mark_dirty(NILFS_I(inode)->i_root->ifile);
brelse(ibh);
return 0;
@@ -931,7 +940,7 @@ void nilfs_dirty_inode(struct inode *inode)
int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len)
{
- struct the_nilfs *nilfs = NILFS_I_NILFS(inode);
+ struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
__u64 logical = 0, phys = 0, size = 0;
__u32 flags = 0;
loff_t isize;
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index f2469ba..41d6743 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -698,6 +698,63 @@ static int nilfs_ioctl_sync(struct inode *inode, struct file *filp,
return 0;
}
+static int nilfs_ioctl_resize(struct inode *inode, struct file *filp,
+ void __user *argp)
+{
+ __u64 newsize;
+ int ret = -EPERM;
+
+ if (!capable(CAP_SYS_ADMIN))
+ goto out;
+
+ ret = mnt_want_write(filp->f_path.mnt);
+ if (ret)
+ goto out;
+
+ ret = -EFAULT;
+ if (copy_from_user(&newsize, argp, sizeof(newsize)))
+ goto out_drop_write;
+
+ ret = nilfs_resize_fs(inode->i_sb, newsize);
+
+out_drop_write:
+ mnt_drop_write(filp->f_path.mnt);
+out:
+ return ret;
+}
+
+static int nilfs_ioctl_set_alloc_range(struct inode *inode, void __user *argp)
+{
+ struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
+ __u64 range[2];
+ __u64 minseg, maxseg;
+ unsigned long segbytes;
+ int ret = -EPERM;
+
+ if (!capable(CAP_SYS_ADMIN))
+ goto out;
+
+ ret = -EFAULT;
+ if (copy_from_user(range, argp, sizeof(__u64[2])))
+ goto out;
+
+ ret = -ERANGE;
+ if (range[1] > i_size_read(inode->i_sb->s_bdev->bd_inode))
+ goto out;
+
+ segbytes = nilfs->ns_blocks_per_segment * nilfs->ns_blocksize;
+
+ minseg = range[0] + segbytes - 1;
+ do_div(minseg, segbytes);
+ maxseg = NILFS_SB2_OFFSET_BYTES(range[1]);
+ do_div(maxseg, segbytes);
+ maxseg--;
+
+ ret = nilfs_sufile_set_alloc_range(nilfs->ns_sufile, minseg, maxseg);
+out:
+ return ret;
+}
+
static int nilfs_ioctl_get_info(struct inode *inode, struct file *filp,
unsigned int cmd, void __user *argp,
size_t membsz,
@@ -763,6 +820,10 @@ long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
return nilfs_ioctl_clean_segments(inode, filp, cmd, argp);
case NILFS_IOCTL_SYNC:
return nilfs_ioctl_sync(inode, filp, cmd, argp);
+ case NILFS_IOCTL_RESIZE:
+ return nilfs_ioctl_resize(inode, filp, argp);
+ case NILFS_IOCTL_SET_ALLOC_RANGE:
+ return nilfs_ioctl_set_alloc_range(inode, argp);
default:
return -ENOTTY;
}
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index a649b05..800e8d7 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -66,7 +66,7 @@ nilfs_mdt_insert_new_block(struct inode *inode, unsigned long block,
kunmap_atomic(kaddr, KM_USER0);
set_buffer_uptodate(bh);
- nilfs_mark_buffer_dirty(bh);
+ mark_buffer_dirty(bh);
nilfs_mdt_mark_dirty(inode);
return 0;
}
@@ -355,7 +355,7 @@ int nilfs_mdt_mark_block_dirty(struct inode *inode, unsigned long block)
err = nilfs_mdt_read_block(inode, block, 0, &bh);
if (unlikely(err))
return err;
- nilfs_mark_buffer_dirty(bh);
+ mark_buffer_dirty(bh);
nilfs_mdt_mark_dirty(inode);
brelse(bh);
return 0;
@@ -450,9 +450,9 @@ int nilfs_mdt_setup_shadow_map(struct inode *inode,
INIT_LIST_HEAD(&shadow->frozen_buffers);
address_space_init_once(&shadow->frozen_data);
- nilfs_mapping_init(&shadow->frozen_data, bdi);
+ nilfs_mapping_init(&shadow->frozen_data, inode, bdi);
address_space_init_once(&shadow->frozen_btnodes);
- nilfs_mapping_init(&shadow->frozen_btnodes, bdi);
+ nilfs_mapping_init(&shadow->frozen_btnodes, inode, bdi);
mi->mi_shadow = shadow;
return 0;
}
diff --git a/fs/nilfs2/mdt.h b/fs/nilfs2/mdt.h
index ed68563..ab20a4b 100644
--- a/fs/nilfs2/mdt.h
+++ b/fs/nilfs2/mdt.h
@@ -64,11 +64,6 @@ static inline struct nilfs_mdt_info *NILFS_MDT(const struct inode *inode)
return inode->i_private;
}
-static inline struct the_nilfs *NILFS_I_NILFS(struct inode *inode)
-{
- return inode->i_sb->s_fs_info;
-}
-
/* Default GFP flags using highmem */
#define NILFS_MDT_GFP (__GFP_WAIT | __GFP_IO | __GFP_HIGHMEM)
@@ -93,8 +88,6 @@ int nilfs_mdt_freeze_buffer(struct inode *inode, struct buffer_head *bh);
struct buffer_head *nilfs_mdt_get_frozen_buffer(struct inode *inode,
struct buffer_head *bh);
-#define nilfs_mdt_mark_buffer_dirty(bh) nilfs_mark_buffer_dirty(bh)
-
static inline void nilfs_mdt_mark_dirty(struct inode *inode)
{
if (!test_bit(NILFS_I_DIRTY, &NILFS_I(inode)->i_state))
@@ -108,7 +101,7 @@ static inline void nilfs_mdt_clear_dirty(struct inode *inode)
static inline __u64 nilfs_mdt_cno(struct inode *inode)
{
- return NILFS_I_NILFS(inode)->ns_cno;
+ return ((struct the_nilfs *)inode->i_sb->s_fs_info)->ns_cno;
}
#define nilfs_mdt_bgl_lock(inode, bg) \
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index a8dd344..a9c6a53 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -80,12 +80,6 @@ static inline struct inode *NILFS_BTNC_I(struct address_space *btnc)
return &ii->vfs_inode;
}
-static inline struct inode *NILFS_AS_I(struct address_space *mapping)
-{
- return (mapping->host) ? :
- container_of(mapping, struct inode, i_data);
-}
-
/*
* Dynamic state flags of NILFS on-memory inode (i_state)
*/
@@ -298,6 +292,7 @@ struct nilfs_super_block **nilfs_prepare_super(struct super_block *sb,
int flip);
int nilfs_commit_super(struct super_block *sb, int flag);
int nilfs_cleanup_super(struct super_block *sb);
+int nilfs_resize_fs(struct super_block *sb, __u64 newsize);
int nilfs_attach_checkpoint(struct super_block *sb, __u64 cno, int curr_mnt,
struct nilfs_root **root);
int nilfs_checkpoint_is_mounted(struct super_block *sb, __u64 cno);
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index 1168059..65221a0 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -37,8 +37,7 @@
#define NILFS_BUFFER_INHERENT_BITS \
((1UL << BH_Uptodate) | (1UL << BH_Mapped) | (1UL << BH_NILFS_Node) | \
- (1UL << BH_NILFS_Volatile) | (1UL << BH_NILFS_Allocated) | \
- (1UL << BH_NILFS_Checked))
+ (1UL << BH_NILFS_Volatile) | (1UL << BH_NILFS_Checked))
static struct buffer_head *
__nilfs_get_page_block(struct page *page, unsigned long block, pgoff_t index,
@@ -59,19 +58,6 @@ __nilfs_get_page_block(struct page *page, unsigned long block, pgoff_t index,
return bh;
}
-/*
- * Since the page cache of B-tree node pages or data page cache of pseudo
- * inodes does not have a valid mapping->host pointer, calling
- * mark_buffer_dirty() for their buffers causes a NULL pointer dereference;
- * it calls __mark_inode_dirty(NULL) through __set_page_dirty().
- * To avoid this problem, the old style mark_buffer_dirty() is used instead.
- */
-void nilfs_mark_buffer_dirty(struct buffer_head *bh)
-{
- if (!buffer_dirty(bh) && !test_set_buffer_dirty(bh))
- __set_page_dirty_nobuffers(bh->b_page);
-}
-
struct buffer_head *nilfs_grab_buffer(struct inode *inode,
struct address_space *mapping,
unsigned long blkoff,
@@ -183,7 +169,7 @@ int nilfs_page_buffers_clean(struct page *page)
void nilfs_page_bug(struct page *page)
{
struct address_space *m;
- unsigned long ino = 0;
+ unsigned long ino;
if (unlikely(!page)) {
printk(KERN_CRIT "NILFS_PAGE_BUG(NULL)\n");
@@ -191,11 +177,8 @@ void nilfs_page_bug(struct page *page)
}
m = page->mapping;
- if (m) {
- struct inode *inode = NILFS_AS_I(m);
- if (inode != NULL)
- ino = inode->i_ino;
- }
+ ino = m ? m->host->i_ino : 0;
+
printk(KERN_CRIT "NILFS_PAGE_BUG(%p): cnt=%d index#=%llu flags=0x%lx "
"mapping=%p ino=%lu\n",
page, atomic_read(&page->_count),
@@ -217,56 +200,6 @@ void nilfs_page_bug(struct page *page)
}
/**
- * nilfs_alloc_private_page - allocate a private page with buffer heads
- *
- * Return Value: On success, a pointer to the allocated page is returned.
- * On error, NULL is returned.
- */
-struct page *nilfs_alloc_private_page(struct block_device *bdev, int size,
- unsigned long state)
-{
- struct buffer_head *bh, *head, *tail;
- struct page *page;
-
- page = alloc_page(GFP_NOFS); /* page_count of the returned page is 1 */
- if (unlikely(!page))
- return NULL;
-
- lock_page(page);
- head = alloc_page_buffers(page, size, 0);
- if (unlikely(!head)) {
- unlock_page(page);
- __free_page(page);
- return NULL;
- }
-
- bh = head;
- do {
- bh->b_state = (1UL << BH_NILFS_Allocated) | state;
- tail = bh;
- bh->b_bdev = bdev;
- bh = bh->b_this_page;
- } while (bh);
-
- tail->b_this_page = head;
- attach_page_buffers(page, head);
-
- return page;
-}
-
-void nilfs_free_private_page(struct page *page)
-{
- BUG_ON(!PageLocked(page));
- BUG_ON(page->mapping);
-
- if (page_has_buffers(page) && !try_to_free_buffers(page))
- NILFS_PAGE_BUG(page, "failed to free page");
-
- unlock_page(page);
- __free_page(page);
-}
-
-/**
* nilfs_copy_page -- copy the page with buffers
* @dst: destination page
* @src: source page
@@ -492,10 +425,10 @@ unsigned nilfs_page_count_clean_buffers(struct page *page,
return nc;
}
-void nilfs_mapping_init(struct address_space *mapping,
+void nilfs_mapping_init(struct address_space *mapping, struct inode *inode,
struct backing_dev_info *bdi)
{
- mapping->host = NULL;
+ mapping->host = inode;
mapping->flags = 0;
mapping_set_gfp_mask(mapping, GFP_NOFS);
mapping->assoc_mapping = NULL;
diff --git a/fs/nilfs2/page.h b/fs/nilfs2/page.h
index f06b79a..fb7de71 100644
--- a/fs/nilfs2/page.h
+++ b/fs/nilfs2/page.h
@@ -38,14 +38,12 @@ enum {
BH_NILFS_Redirected,
};
-BUFFER_FNS(NILFS_Allocated, nilfs_allocated) /* nilfs private buffers */
BUFFER_FNS(NILFS_Node, nilfs_node) /* nilfs node buffers */
BUFFER_FNS(NILFS_Volatile, nilfs_volatile)
BUFFER_FNS(NILFS_Checked, nilfs_checked) /* buffer is verified */
BUFFER_FNS(NILFS_Redirected, nilfs_redirected) /* redirected to a copy */
-void nilfs_mark_buffer_dirty(struct buffer_head *bh);
int __nilfs_clear_page_dirty(struct page *);
struct buffer_head *nilfs_grab_buffer(struct inode *, struct address_space *,
@@ -54,14 +52,11 @@ void nilfs_forget_buffer(struct buffer_head *);
void nilfs_copy_buffer(struct buffer_head *, struct buffer_head *);
int nilfs_page_buffers_clean(struct page *);
void nilfs_page_bug(struct page *);
-struct page *nilfs_alloc_private_page(struct block_device *, int,
- unsigned long);
-void nilfs_free_private_page(struct page *);
int nilfs_copy_dirty_pages(struct address_space *, struct address_space *);
void nilfs_copy_back_pages(struct address_space *, struct address_space *);
void nilfs_clear_dirty_pages(struct address_space *);
-void nilfs_mapping_init(struct address_space *mapping,
+void nilfs_mapping_init(struct address_space *mapping, struct inode *inode,
struct backing_dev_info *bdi);
unsigned nilfs_page_count_clean_buffers(struct page *, unsigned, unsigned);
unsigned long nilfs_find_uncommitted_extent(struct inode *inode,
diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c
index ba4a645..a604ac0 100644
--- a/fs/nilfs2/recovery.c
+++ b/fs/nilfs2/recovery.c
@@ -387,9 +387,9 @@ static int nilfs_scan_dsync_log(struct the_nilfs *nilfs, sector_t start_blocknr,
static void dispose_recovery_list(struct list_head *head)
{
while (!list_empty(head)) {
- struct nilfs_recovery_block *rb
- = list_entry(head->next,
- struct nilfs_recovery_block, list);
+ struct nilfs_recovery_block *rb;
+
+ rb = list_first_entry(head, struct nilfs_recovery_block, list);
list_del(&rb->list);
kfree(rb);
}
@@ -416,9 +416,9 @@ static int nilfs_segment_list_add(struct list_head *head, __u64 segnum)
void nilfs_dispose_segment_list(struct list_head *head)
{
while (!list_empty(head)) {
- struct nilfs_segment_entry *ent
- = list_entry(head->next,
- struct nilfs_segment_entry, list);
+ struct nilfs_segment_entry *ent;
+
+ ent = list_first_entry(head, struct nilfs_segment_entry, list);
list_del(&ent->list);
kfree(ent);
}
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index 2853ff2..850a7c0 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -239,12 +239,15 @@ nilfs_segbuf_fill_in_super_root_crc(struct nilfs_segment_buffer *segbuf,
u32 seed)
{
struct nilfs_super_root *raw_sr;
+ struct the_nilfs *nilfs = segbuf->sb_super->s_fs_info;
+ unsigned srsize;
u32 crc;
raw_sr = (struct nilfs_super_root *)segbuf->sb_super_root->b_data;
+ srsize = NILFS_SR_BYTES(nilfs->ns_inode_size);
crc = crc32_le(seed,
(unsigned char *)raw_sr + sizeof(raw_sr->sr_sum),
- NILFS_SR_BYTES - sizeof(raw_sr->sr_sum));
+ srsize - sizeof(raw_sr->sr_sum));
raw_sr->sr_sum = cpu_to_le32(crc);
}
@@ -254,18 +257,6 @@ static void nilfs_release_buffers(struct list_head *list)
list_for_each_entry_safe(bh, n, list, b_assoc_buffers) {
list_del_init(&bh->b_assoc_buffers);
- if (buffer_nilfs_allocated(bh)) {
- struct page *clone_page = bh->b_page;
-
- /* remove clone page */
- brelse(bh);
- page_cache_release(clone_page); /* for each bh */
- if (page_count(clone_page) <= 2) {
- lock_page(clone_page);
- nilfs_free_private_page(clone_page);
- }
- continue;
- }
brelse(bh);
}
}
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index afe4f21..141646e 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -655,13 +655,10 @@ static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode,
if (unlikely(page->index > last))
break;
- if (mapping->host) {
- lock_page(page);
- if (!page_has_buffers(page))
- create_empty_buffers(page,
- 1 << inode->i_blkbits, 0);
- unlock_page(page);
- }
+ lock_page(page);
+ if (!page_has_buffers(page))
+ create_empty_buffers(page, 1 << inode->i_blkbits, 0);
+ unlock_page(page);
bh = head = page_buffers(page);
do {
@@ -809,7 +806,7 @@ static int nilfs_segctor_create_checkpoint(struct nilfs_sc_info *sci)
/* The following code is duplicated with cpfile. But, it is
needed to collect the checkpoint even if it was not newly
created */
- nilfs_mdt_mark_buffer_dirty(bh_cp);
+ mark_buffer_dirty(bh_cp);
nilfs_mdt_mark_dirty(nilfs->ns_cpfile);
nilfs_cpfile_put_checkpoint(
nilfs->ns_cpfile, nilfs->ns_cno, bh_cp);
@@ -889,12 +886,14 @@ static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci,
{
struct buffer_head *bh_sr;
struct nilfs_super_root *raw_sr;
- unsigned isz = nilfs->ns_inode_size;
+ unsigned isz, srsz;
bh_sr = NILFS_LAST_SEGBUF(&sci->sc_segbufs)->sb_super_root;
raw_sr = (struct nilfs_super_root *)bh_sr->b_data;
+ isz = nilfs->ns_inode_size;
+ srsz = NILFS_SR_BYTES(isz);
- raw_sr->sr_bytes = cpu_to_le16(NILFS_SR_BYTES);
+ raw_sr->sr_bytes = cpu_to_le16(srsz);
raw_sr->sr_nongc_ctime
= cpu_to_le64(nilfs_doing_gc() ?
nilfs->ns_nongc_ctime : sci->sc_seg_ctime);
@@ -906,6 +905,7 @@ static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci,
NILFS_SR_CPFILE_OFFSET(isz), 1);
nilfs_write_inode_common(nilfs->ns_sufile, (void *)raw_sr +
NILFS_SR_SUFILE_OFFSET(isz), 1);
+ memset((void *)raw_sr + srsz, 0, nilfs->ns_blocksize - srsz);
}
static void nilfs_redirty_inodes(struct list_head *head)
@@ -954,8 +954,8 @@ static int nilfs_segctor_apply_buffers(struct nilfs_sc_info *sci,
dispose_buffers:
while (!list_empty(listp)) {
- bh = list_entry(listp->next, struct buffer_head,
- b_assoc_buffers);
+ bh = list_first_entry(listp, struct buffer_head,
+ b_assoc_buffers);
list_del_init(&bh->b_assoc_buffers);
brelse(bh);
}
@@ -1500,10 +1500,7 @@ nilfs_segctor_update_payload_blocknr(struct nilfs_sc_info *sci,
nblocks = le32_to_cpu(finfo->fi_nblocks);
ndatablk = le32_to_cpu(finfo->fi_ndatablk);
- if (buffer_nilfs_node(bh))
- inode = NILFS_BTNC_I(bh->b_page->mapping);
- else
- inode = NILFS_AS_I(bh->b_page->mapping);
+ inode = bh->b_page->mapping->host;
if (mode == SC_LSEG_DSYNC)
sc_op = &nilfs_sc_dsync_ops;
@@ -1556,83 +1553,24 @@ static int nilfs_segctor_assign(struct nilfs_sc_info *sci, int mode)
return 0;
}
-static int
-nilfs_copy_replace_page_buffers(struct page *page, struct list_head *out)
-{
- struct page *clone_page;
- struct buffer_head *bh, *head, *bh2;
- void *kaddr;
-
- bh = head = page_buffers(page);
-
- clone_page = nilfs_alloc_private_page(bh->b_bdev, bh->b_size, 0);
- if (unlikely(!clone_page))
- return -ENOMEM;
-
- bh2 = page_buffers(clone_page);
- kaddr = kmap_atomic(page, KM_USER0);
- do {
- if (list_empty(&bh->b_assoc_buffers))
- continue;
- get_bh(bh2);
- page_cache_get(clone_page); /* for each bh */
- memcpy(bh2->b_data, kaddr + bh_offset(bh), bh2->b_size);
- bh2->b_blocknr = bh->b_blocknr;
- list_replace(&bh->b_assoc_buffers, &bh2->b_assoc_buffers);
- list_add_tail(&bh->b_assoc_buffers, out);
- } while (bh = bh->b_this_page, bh2 = bh2->b_this_page, bh != head);
- kunmap_atomic(kaddr, KM_USER0);
-
- if (!TestSetPageWriteback(clone_page))
- account_page_writeback(clone_page);
- unlock_page(clone_page);
-
- return 0;
-}
-
-static int nilfs_test_page_to_be_frozen(struct page *page)
-{
- struct address_space *mapping = page->mapping;
-
- if (!mapping || !mapping->host || S_ISDIR(mapping->host->i_mode))
- return 0;
-
- if (page_mapped(page)) {
- ClearPageChecked(page);
- return 1;
- }
- return PageChecked(page);
-}
-
-static int nilfs_begin_page_io(struct page *page, struct list_head *out)
+static void nilfs_begin_page_io(struct page *page)
{
if (!page || PageWriteback(page))
/* For split b-tree node pages, this function may be called
twice. We ignore the 2nd or later calls by this check. */
- return 0;
+ return;
lock_page(page);
clear_page_dirty_for_io(page);
set_page_writeback(page);
unlock_page(page);
-
- if (nilfs_test_page_to_be_frozen(page)) {
- int err = nilfs_copy_replace_page_buffers(page, out);
- if (unlikely(err))
- return err;
- }
- return 0;
}
-static int nilfs_segctor_prepare_write(struct nilfs_sc_info *sci,
- struct page **failed_page)
+static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci)
{
struct nilfs_segment_buffer *segbuf;
struct page *bd_page = NULL, *fs_page = NULL;
- struct list_head *list = &sci->sc_copied_buffers;
- int err;
- *failed_page = NULL;
list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) {
struct buffer_head *bh;
@@ -1662,11 +1600,7 @@ static int nilfs_segctor_prepare_write(struct nilfs_sc_info *sci,
break;
}
if (bh->b_page != fs_page) {
- err = nilfs_begin_page_io(fs_page, list);
- if (unlikely(err)) {
- *failed_page = fs_page;
- goto out;
- }
+ nilfs_begin_page_io(fs_page);
fs_page = bh->b_page;
}
}
@@ -1677,11 +1611,7 @@ static int nilfs_segctor_prepare_write(struct nilfs_sc_info *sci,
set_page_writeback(bd_page);
unlock_page(bd_page);
}
- err = nilfs_begin_page_io(fs_page, list);
- if (unlikely(err))
- *failed_page = fs_page;
- out:
- return err;
+ nilfs_begin_page_io(fs_page);
}
static int nilfs_segctor_write(struct nilfs_sc_info *sci,
@@ -1694,24 +1624,6 @@ static int nilfs_segctor_write(struct nilfs_sc_info *sci,
return ret;
}
-static void __nilfs_end_page_io(struct page *page, int err)
-{
- if (!err) {
- if (!nilfs_page_buffers_clean(page))
- __set_page_dirty_nobuffers(page);
- ClearPageError(page);
- } else {
- __set_page_dirty_nobuffers(page);
- SetPageError(page);
- }
-
- if (buffer_nilfs_allocated(page_buffers(page))) {
- if (TestClearPageWriteback(page))
- dec_zone_page_state(page, NR_WRITEBACK);
- } else
- end_page_writeback(page);
-}
-
static void nilfs_end_page_io(struct page *page, int err)
{
if (!page)
@@ -1738,40 +1650,19 @@ static void nilfs_end_page_io(struct page *page, int err)
return;
}
- __nilfs_end_page_io(page, err);
-}
-
-static void nilfs_clear_copied_buffers(struct list_head *list, int err)
-{
- struct buffer_head *bh, *head;
- struct page *page;
-
- while (!list_empty(list)) {
- bh = list_entry(list->next, struct buffer_head,
- b_assoc_buffers);
- page = bh->b_page;
- page_cache_get(page);
- head = bh = page_buffers(page);
- do {
- if (!list_empty(&bh->b_assoc_buffers)) {
- list_del_init(&bh->b_assoc_buffers);
- if (!err) {
- set_buffer_uptodate(bh);
- clear_buffer_dirty(bh);
- clear_buffer_delay(bh);
- clear_buffer_nilfs_volatile(bh);
- }
- brelse(bh); /* for b_assoc_buffers */
- }
- } while ((bh = bh->b_this_page) != head);
-
- __nilfs_end_page_io(page, err);
- page_cache_release(page);
+ if (!err) {
+ if (!nilfs_page_buffers_clean(page))
+ __set_page_dirty_nobuffers(page);
+ ClearPageError(page);
+ } else {
+ __set_page_dirty_nobuffers(page);
+ SetPageError(page);
}
+
+ end_page_writeback(page);
}
-static void nilfs_abort_logs(struct list_head *logs, struct page *failed_page,
- int err)
+static void nilfs_abort_logs(struct list_head *logs, int err)
{
struct nilfs_segment_buffer *segbuf;
struct page *bd_page = NULL, *fs_page = NULL;
@@ -1801,8 +1692,6 @@ static void nilfs_abort_logs(struct list_head *logs, struct page *failed_page,
}
if (bh->b_page != fs_page) {
nilfs_end_page_io(fs_page, err);
- if (fs_page && fs_page == failed_page)
- return;
fs_page = bh->b_page;
}
}
@@ -1821,12 +1710,11 @@ static void nilfs_segctor_abort_construction(struct nilfs_sc_info *sci,
list_splice_tail_init(&sci->sc_write_logs, &logs);
ret = nilfs_wait_on_logs(&logs);
- nilfs_abort_logs(&logs, NULL, ret ? : err);
+ nilfs_abort_logs(&logs, ret ? : err);
list_splice_tail_init(&sci->sc_segbufs, &logs);
nilfs_cancel_segusage(&logs, nilfs->ns_sufile);
nilfs_free_incomplete_logs(&logs, nilfs);
- nilfs_clear_copied_buffers(&sci->sc_copied_buffers, err);
if (sci->sc_stage.flags & NILFS_CF_SUFREED) {
ret = nilfs_sufile_cancel_freev(nilfs->ns_sufile,
@@ -1920,8 +1808,6 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
nilfs_end_page_io(fs_page, 0);
- nilfs_clear_copied_buffers(&sci->sc_copied_buffers, 0);
-
nilfs_drop_collected_inodes(&sci->sc_dirty_files);
if (nilfs_doing_gc())
@@ -1979,7 +1865,7 @@ static int nilfs_segctor_collect_dirty_files(struct nilfs_sc_info *sci,
"failed to get inode block.\n");
return err;
}
- nilfs_mdt_mark_buffer_dirty(ibh);
+ mark_buffer_dirty(ibh);
nilfs_mdt_mark_dirty(ifile);
spin_lock(&nilfs->ns_inode_lock);
if (likely(!ii->i_bh))
@@ -1991,8 +1877,7 @@ static int nilfs_segctor_collect_dirty_files(struct nilfs_sc_info *sci,
clear_bit(NILFS_I_QUEUED, &ii->i_state);
set_bit(NILFS_I_BUSY, &ii->i_state);
- list_del(&ii->i_dirty);
- list_add_tail(&ii->i_dirty, &sci->sc_dirty_files);
+ list_move_tail(&ii->i_dirty, &sci->sc_dirty_files);
}
spin_unlock(&nilfs->ns_inode_lock);
@@ -2014,8 +1899,7 @@ static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci,
clear_bit(NILFS_I_BUSY, &ii->i_state);
brelse(ii->i_bh);
ii->i_bh = NULL;
- list_del(&ii->i_dirty);
- list_add_tail(&ii->i_dirty, &ti->ti_garbage);
+ list_move_tail(&ii->i_dirty, &ti->ti_garbage);
}
spin_unlock(&nilfs->ns_inode_lock);
}
@@ -2026,7 +1910,6 @@ static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci,
static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
{
struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
- struct page *failed_page;
int err;
sci->sc_stage.scnt = NILFS_ST_INIT;
@@ -2081,11 +1964,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
nilfs_segctor_update_segusage(sci, nilfs->ns_sufile);
/* Write partial segments */
- err = nilfs_segctor_prepare_write(sci, &failed_page);
- if (err) {
- nilfs_abort_logs(&sci->sc_segbufs, failed_page, err);
- goto failed_to_write;
- }
+ nilfs_segctor_prepare_write(sci);
nilfs_add_checksums_on_logs(&sci->sc_segbufs,
nilfs->ns_crc_seed);
@@ -2687,7 +2566,6 @@ static struct nilfs_sc_info *nilfs_segctor_new(struct super_block *sb,
INIT_LIST_HEAD(&sci->sc_segbufs);
INIT_LIST_HEAD(&sci->sc_write_logs);
INIT_LIST_HEAD(&sci->sc_gc_inodes);
- INIT_LIST_HEAD(&sci->sc_copied_buffers);
init_timer(&sci->sc_timer);
sci->sc_interval = HZ * NILFS_SC_DEFAULT_TIMEOUT;
@@ -2741,8 +2619,6 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci)
if (flag || !nilfs_segctor_confirm(sci))
nilfs_segctor_write_out(sci);
- WARN_ON(!list_empty(&sci->sc_copied_buffers));
-
if (!list_empty(&sci->sc_dirty_files)) {
nilfs_warning(sci->sc_super, __func__,
"dirty file(s) after the final construction\n");
diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h
index 6c02a86..38a1d00 100644
--- a/fs/nilfs2/segment.h
+++ b/fs/nilfs2/segment.h
@@ -92,7 +92,6 @@ struct nilfs_segsum_pointer {
* @sc_nblk_inc: Block count of current generation
* @sc_dirty_files: List of files to be written
* @sc_gc_inodes: List of GC inodes having blocks to be written
- * @sc_copied_buffers: List of copied buffers (buffer heads) to freeze data
* @sc_freesegs: array of segment numbers to be freed
* @sc_nfreesegs: number of segments on @sc_freesegs
* @sc_dsync_inode: inode whose data pages are written for a sync operation
@@ -136,7 +135,6 @@ struct nilfs_sc_info {
struct list_head sc_dirty_files;
struct list_head sc_gc_inodes;
- struct list_head sc_copied_buffers;
__u64 *sc_freesegs;
size_t sc_nfreesegs;
diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c
index 1d6f488..0a0aba6 100644
--- a/fs/nilfs2/sufile.c
+++ b/fs/nilfs2/sufile.c
@@ -33,7 +33,9 @@
struct nilfs_sufile_info {
struct nilfs_mdt_info mi;
- unsigned long ncleansegs;
+ unsigned long ncleansegs;/* number of clean segments */
+ __u64 allocmin; /* lower limit of allocatable segment range */
+ __u64 allocmax; /* upper limit of allocatable segment range */
};
static inline struct nilfs_sufile_info *NILFS_SUI(struct inode *sufile)
@@ -96,6 +98,13 @@ nilfs_sufile_get_segment_usage_block(struct inode *sufile, __u64 segnum,
create, NULL, bhp);
}
+static int nilfs_sufile_delete_segment_usage_block(struct inode *sufile,
+ __u64 segnum)
+{
+ return nilfs_mdt_delete_block(sufile,
+ nilfs_sufile_get_blkoff(sufile, segnum));
+}
+
static void nilfs_sufile_mod_counter(struct buffer_head *header_bh,
u64 ncleanadd, u64 ndirtyadd)
{
@@ -108,7 +117,7 @@ static void nilfs_sufile_mod_counter(struct buffer_head *header_bh,
le64_add_cpu(&header->sh_ndirtysegs, ndirtyadd);
kunmap_atomic(kaddr, KM_USER0);
- nilfs_mdt_mark_buffer_dirty(header_bh);
+ mark_buffer_dirty(header_bh);
}
/**
@@ -248,6 +257,35 @@ int nilfs_sufile_update(struct inode *sufile, __u64 segnum, int create,
}
/**
+ * nilfs_sufile_set_alloc_range - limit range of segment to be allocated
+ * @sufile: inode of segment usage file
+ * @start: minimum segment number of allocatable region (inclusive)
+ * @end: maximum segment number of allocatable region (inclusive)
+ *
+ * Return Value: On success, 0 is returned. On error, one of the
+ * following negative error codes is returned.
+ *
+ * %-ERANGE - invalid segment region
+ */
+int nilfs_sufile_set_alloc_range(struct inode *sufile, __u64 start, __u64 end)
+{
+ struct nilfs_sufile_info *sui = NILFS_SUI(sufile);
+ __u64 nsegs;
+ int ret = -ERANGE;
+
+ down_write(&NILFS_MDT(sufile)->mi_sem);
+ nsegs = nilfs_sufile_get_nsegments(sufile);
+
+ if (start <= end && end < nsegs) {
+ sui->allocmin = start;
+ sui->allocmax = end;
+ ret = 0;
+ }
+ up_write(&NILFS_MDT(sufile)->mi_sem);
+ return ret;
+}
+
+/**
* nilfs_sufile_alloc - allocate a segment
* @sufile: inode of segment usage file
* @segnump: pointer to segment number
@@ -269,11 +307,12 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
struct buffer_head *header_bh, *su_bh;
struct nilfs_sufile_header *header;
struct nilfs_segment_usage *su;
+ struct nilfs_sufile_info *sui = NILFS_SUI(sufile);
size_t susz = NILFS_MDT(sufile)->mi_entry_size;
__u64 segnum, maxsegnum, last_alloc;
void *kaddr;
- unsigned long nsegments, ncleansegs, nsus;
- int ret, i, j;
+ unsigned long nsegments, ncleansegs, nsus, cnt;
+ int ret, j;
down_write(&NILFS_MDT(sufile)->mi_sem);
@@ -287,13 +326,31 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
kunmap_atomic(kaddr, KM_USER0);
nsegments = nilfs_sufile_get_nsegments(sufile);
+ maxsegnum = sui->allocmax;
segnum = last_alloc + 1;
- maxsegnum = nsegments - 1;
- for (i = 0; i < nsegments; i += nsus) {
- if (segnum >= nsegments) {
- /* wrap around */
- segnum = 0;
- maxsegnum = last_alloc;
+ if (segnum < sui->allocmin || segnum > sui->allocmax)
+ segnum = sui->allocmin;
+
+ for (cnt = 0; cnt < nsegments; cnt += nsus) {
+ if (segnum > maxsegnum) {
+ if (cnt < sui->allocmax - sui->allocmin + 1) {
+ /*
+ * wrap around in the limited region.
+ * if allocation started from
+ * sui->allocmin, this never happens.
+ */
+ segnum = sui->allocmin;
+ maxsegnum = last_alloc;
+ } else if (segnum > sui->allocmin &&
+ sui->allocmax + 1 < nsegments) {
+ segnum = sui->allocmax + 1;
+ maxsegnum = nsegments - 1;
+ } else if (sui->allocmin > 0) {
+ segnum = 0;
+ maxsegnum = sui->allocmin - 1;
+ } else {
+ break; /* never happens */
+ }
}
ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 1,
&su_bh);
@@ -319,9 +376,9 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
header->sh_last_alloc = cpu_to_le64(segnum);
kunmap_atomic(kaddr, KM_USER0);
- NILFS_SUI(sufile)->ncleansegs--;
- nilfs_mdt_mark_buffer_dirty(header_bh);
- nilfs_mdt_mark_buffer_dirty(su_bh);
+ sui->ncleansegs--;
+ mark_buffer_dirty(header_bh);
+ mark_buffer_dirty(su_bh);
nilfs_mdt_mark_dirty(sufile);
brelse(su_bh);
*segnump = segnum;
@@ -364,7 +421,7 @@ void nilfs_sufile_do_cancel_free(struct inode *sufile, __u64 segnum,
nilfs_sufile_mod_counter(header_bh, -1, 1);
NILFS_SUI(sufile)->ncleansegs--;
- nilfs_mdt_mark_buffer_dirty(su_bh);
+ mark_buffer_dirty(su_bh);
nilfs_mdt_mark_dirty(sufile);
}
@@ -395,7 +452,7 @@ void nilfs_sufile_do_scrap(struct inode *sufile, __u64 segnum,
nilfs_sufile_mod_counter(header_bh, clean ? (u64)-1 : 0, dirty ? 0 : 1);
NILFS_SUI(sufile)->ncleansegs -= clean;
- nilfs_mdt_mark_buffer_dirty(su_bh);
+ mark_buffer_dirty(su_bh);
nilfs_mdt_mark_dirty(sufile);
}
@@ -421,7 +478,7 @@ void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum,
sudirty = nilfs_segment_usage_dirty(su);
nilfs_segment_usage_set_clean(su);
kunmap_atomic(kaddr, KM_USER0);
- nilfs_mdt_mark_buffer_dirty(su_bh);
+ mark_buffer_dirty(su_bh);
nilfs_sufile_mod_counter(header_bh, 1, sudirty ? (u64)-1 : 0);
NILFS_SUI(sufile)->ncleansegs++;
@@ -441,7 +498,7 @@ int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum)
ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &bh);
if (!ret) {
- nilfs_mdt_mark_buffer_dirty(bh);
+ mark_buffer_dirty(bh);
nilfs_mdt_mark_dirty(sufile);
brelse(bh);
}
@@ -476,7 +533,7 @@ int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum,
su->su_nblocks = cpu_to_le32(nblocks);
kunmap_atomic(kaddr, KM_USER0);
- nilfs_mdt_mark_buffer_dirty(bh);
+ mark_buffer_dirty(bh);
nilfs_mdt_mark_dirty(sufile);
brelse(bh);
@@ -505,7 +562,7 @@ int nilfs_sufile_get_stat(struct inode *sufile, struct nilfs_sustat *sustat)
{
struct buffer_head *header_bh;
struct nilfs_sufile_header *header;
- struct the_nilfs *nilfs = NILFS_I_NILFS(sufile);
+ struct the_nilfs *nilfs = sufile->i_sb->s_fs_info;
void *kaddr;
int ret;
@@ -555,11 +612,183 @@ void nilfs_sufile_do_set_error(struct inode *sufile, __u64 segnum,
nilfs_sufile_mod_counter(header_bh, -1, 0);
NILFS_SUI(sufile)->ncleansegs--;
}
- nilfs_mdt_mark_buffer_dirty(su_bh);
+ mark_buffer_dirty(su_bh);
nilfs_mdt_mark_dirty(sufile);
}
/**
+ * nilfs_sufile_truncate_range - truncate range of segment array
+ * @sufile: inode of segment usage file
+ * @start: start segment number (inclusive)
+ * @end: end segment number (inclusive)
+ *
+ * Return Value: On success, 0 is returned. On error, one of the
+ * following negative error codes is returned.
+ *
+ * %-EIO - I/O error.
+ *
+ * %-ENOMEM - Insufficient amount of memory available.
+ *
+ * %-EINVAL - Invalid number of segments specified
+ *
+ * %-EBUSY - Dirty or active segments are present in the range
+ */
+static int nilfs_sufile_truncate_range(struct inode *sufile,
+ __u64 start, __u64 end)
+{
+ struct the_nilfs *nilfs = sufile->i_sb->s_fs_info;
+ struct buffer_head *header_bh;
+ struct buffer_head *su_bh;
+ struct nilfs_segment_usage *su, *su2;
+ size_t susz = NILFS_MDT(sufile)->mi_entry_size;
+ unsigned long segusages_per_block;
+ unsigned long nsegs, ncleaned;
+ __u64 segnum;
+ void *kaddr;
+ ssize_t n, nc;
+ int ret;
+ int j;
+
+ nsegs = nilfs_sufile_get_nsegments(sufile);
+
+ ret = -EINVAL;
+ if (start > end || start >= nsegs)
+ goto out;
+
+ ret = nilfs_sufile_get_header_block(sufile, &header_bh);
+ if (ret < 0)
+ goto out;
+
+ segusages_per_block = nilfs_sufile_segment_usages_per_block(sufile);
+ ncleaned = 0;
+
+ for (segnum = start; segnum <= end; segnum += n) {
+ n = min_t(unsigned long,
+ segusages_per_block -
+ nilfs_sufile_get_offset(sufile, segnum),
+ end - segnum + 1);
+ ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0,
+ &su_bh);
+ if (ret < 0) {
+ if (ret != -ENOENT)
+ goto out_header;
+ /* hole */
+ continue;
+ }
+ kaddr = kmap_atomic(su_bh->b_page, KM_USER0);
+ su = nilfs_sufile_block_get_segment_usage(
+ sufile, segnum, su_bh, kaddr);
+ su2 = su;
+ for (j = 0; j < n; j++, su = (void *)su + susz) {
+ if ((le32_to_cpu(su->su_flags) &
+ ~(1UL << NILFS_SEGMENT_USAGE_ERROR)) ||
+ nilfs_segment_is_active(nilfs, segnum + j)) {
+ ret = -EBUSY;
+ kunmap_atomic(kaddr, KM_USER0);
+ brelse(su_bh);
+ goto out_header;
+ }
+ }
+ nc = 0;
+ for (su = su2, j = 0; j < n; j++, su = (void *)su + susz) {
+ if (nilfs_segment_usage_error(su)) {
+ nilfs_segment_usage_set_clean(su);
+ nc++;
+ }
+ }
+ kunmap_atomic(kaddr, KM_USER0);
+ if (nc > 0) {
+ mark_buffer_dirty(su_bh);
+ ncleaned += nc;
+ }
+ brelse(su_bh);
+
+ if (n == segusages_per_block) {
+ /* make hole */
+ nilfs_sufile_delete_segment_usage_block(sufile, segnum);
+ }
+ }
+ ret = 0;
+
+out_header:
+ if (ncleaned > 0) {
+ NILFS_SUI(sufile)->ncleansegs += ncleaned;
+ nilfs_sufile_mod_counter(header_bh, ncleaned, 0);
+ nilfs_mdt_mark_dirty(sufile);
+ }
+ brelse(header_bh);
+out:
+ return ret;
+}
+
+/**
+ * nilfs_sufile_resize - resize segment array
+ * @sufile: inode of segment usage file
+ * @newnsegs: new number of segments
+ *
+ * Return Value: On success, 0 is returned. On error, one of the
+ * following negative error codes is returned.
+ *
+ * %-EIO - I/O error.
+ *
+ * %-ENOMEM - Insufficient amount of memory available.
+ *
+ * %-ENOSPC - Enough free space is not left for shrinking
+ *
+ * %-EBUSY - Dirty or active segments exist in the region to be truncated
+ */
+int nilfs_sufile_resize(struct inode *sufile, __u64 newnsegs)
+{
+ struct the_nilfs *nilfs = sufile->i_sb->s_fs_info;
+ struct buffer_head *header_bh;
+ struct nilfs_sufile_header *header;
+ struct nilfs_sufile_info *sui = NILFS_SUI(sufile);
+ void *kaddr;
+ unsigned long nsegs, nrsvsegs;
+ int ret = 0;
+
+ down_write(&NILFS_MDT(sufile)->mi_sem);
+
+ nsegs = nilfs_sufile_get_nsegments(sufile);
+ if (nsegs == newnsegs)
+ goto out;
+
+ ret = -ENOSPC;
+ nrsvsegs = nilfs_nrsvsegs(nilfs, newnsegs);
+ if (newnsegs < nsegs && nsegs - newnsegs + nrsvsegs > sui->ncleansegs)
+ goto out;
+
+ ret = nilfs_sufile_get_header_block(sufile, &header_bh);
+ if (ret < 0)
+ goto out;
+
+ if (newnsegs > nsegs) {
+ sui->ncleansegs += newnsegs - nsegs;
+ } else /* newnsegs < nsegs */ {
+ ret = nilfs_sufile_truncate_range(sufile, newnsegs, nsegs - 1);
+ if (ret < 0)
+ goto out_header;
+
+ sui->ncleansegs -= nsegs - newnsegs;
+ }
+
+ kaddr = kmap_atomic(header_bh->b_page, KM_USER0);
+ header = kaddr + bh_offset(header_bh);
+ header->sh_ncleansegs = cpu_to_le64(sui->ncleansegs);
+ kunmap_atomic(kaddr, KM_USER0);
+
+ mark_buffer_dirty(header_bh);
+ nilfs_mdt_mark_dirty(sufile);
+ nilfs_set_nsegments(nilfs, newnsegs);
+
+out_header:
+ brelse(header_bh);
+out:
+ up_write(&NILFS_MDT(sufile)->mi_sem);
+ return ret;
+}
+
+/**
* nilfs_sufile_get_suinfo -
* @sufile: inode of segment usage file
* @segnum: segment number to start looking
@@ -583,7 +812,7 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf,
struct nilfs_segment_usage *su;
struct nilfs_suinfo *si = buf;
size_t susz = NILFS_MDT(sufile)->mi_entry_size;
- struct the_nilfs *nilfs = NILFS_I_NILFS(sufile);
+ struct the_nilfs *nilfs = sufile->i_sb->s_fs_info;
void *kaddr;
unsigned long nsegs, segusages_per_block;
ssize_t n;
@@ -679,6 +908,9 @@ int nilfs_sufile_read(struct super_block *sb, size_t susize,
kunmap_atomic(kaddr, KM_USER0);
brelse(header_bh);
+ sui->allocmax = nilfs_sufile_get_nsegments(sufile) - 1;
+ sui->allocmin = 0;
+
unlock_new_inode(sufile);
out:
*inodep = sufile;
diff --git a/fs/nilfs2/sufile.h b/fs/nilfs2/sufile.h
index a943fba..e84bc5b 100644
--- a/fs/nilfs2/sufile.h
+++ b/fs/nilfs2/sufile.h
@@ -31,11 +31,12 @@
static inline unsigned long nilfs_sufile_get_nsegments(struct inode *sufile)
{
- return NILFS_I_NILFS(sufile)->ns_nsegments;
+ return ((struct the_nilfs *)sufile->i_sb->s_fs_info)->ns_nsegments;
}
unsigned long nilfs_sufile_get_ncleansegs(struct inode *sufile);
+int nilfs_sufile_set_alloc_range(struct inode *sufile, __u64 start, __u64 end);
int nilfs_sufile_alloc(struct inode *, __u64 *);
int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum);
int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum,
@@ -61,6 +62,7 @@ void nilfs_sufile_do_cancel_free(struct inode *, __u64, struct buffer_head *,
void nilfs_sufile_do_set_error(struct inode *, __u64, struct buffer_head *,
struct buffer_head *);
+int nilfs_sufile_resize(struct inode *sufile, __u64 newnsegs);
int nilfs_sufile_read(struct super_block *sb, size_t susize,
struct nilfs_inode *raw_inode, struct inode **inodep);
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 062cca0..8351c44 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -56,6 +56,7 @@
#include "btnode.h"
#include "page.h"
#include "cpfile.h"
+#include "sufile.h" /* nilfs_sufile_resize(), nilfs_sufile_set_alloc_range() */
#include "ifile.h"
#include "dat.h"
#include "segment.h"
@@ -165,7 +166,7 @@ struct inode *nilfs_alloc_inode(struct super_block *sb)
ii->i_state = 0;
ii->i_cno = 0;
ii->vfs_inode.i_version = 1;
- nilfs_btnode_cache_init(&ii->i_btnode_cache, sb->s_bdi);
+ nilfs_mapping_init(&ii->i_btnode_cache, &ii->vfs_inode, sb->s_bdi);
return &ii->vfs_inode;
}
@@ -347,6 +348,134 @@ int nilfs_cleanup_super(struct super_block *sb)
return ret;
}
+/**
+ * nilfs_move_2nd_super - relocate secondary super block
+ * @sb: super block instance
+ * @sb2off: new offset of the secondary super block (in bytes)
+ */
+static int nilfs_move_2nd_super(struct super_block *sb, loff_t sb2off)
+{
+ struct the_nilfs *nilfs = sb->s_fs_info;
+ struct buffer_head *nsbh;
+ struct nilfs_super_block *nsbp;
+ sector_t blocknr, newblocknr;
+ unsigned long offset;
+ int sb2i = -1; /* array index of the secondary superblock */
+ int ret = 0;
+
+ /* nilfs->ns_sem must be locked by the caller. */
+ if (nilfs->ns_sbh[1] &&
+ nilfs->ns_sbh[1]->b_blocknr > nilfs->ns_first_data_block) {
+ sb2i = 1;
+ blocknr = nilfs->ns_sbh[1]->b_blocknr;
+ } else if (nilfs->ns_sbh[0]->b_blocknr > nilfs->ns_first_data_block) {
+ sb2i = 0;
+ blocknr = nilfs->ns_sbh[0]->b_blocknr;
+ }
+ if (sb2i >= 0 && (u64)blocknr << nilfs->ns_blocksize_bits == sb2off)
+ goto out; /* super block location is unchanged */
+
+ /* Get new super block buffer */
+ newblocknr = sb2off >> nilfs->ns_blocksize_bits;
+ offset = sb2off & (nilfs->ns_blocksize - 1);
+ nsbh = sb_getblk(sb, newblocknr);
+ if (!nsbh) {
+ printk(KERN_WARNING
+ "NILFS warning: unable to move secondary superblock "
+ "to block %llu\n", (unsigned long long)newblocknr);
+ ret = -EIO;
+ goto out;
+ }
+ nsbp = (void *)nsbh->b_data + offset;
+ memset(nsbp, 0, nilfs->ns_blocksize);
+
+ if (sb2i >= 0) {
+ memcpy(nsbp, nilfs->ns_sbp[sb2i], nilfs->ns_sbsize);
+ brelse(nilfs->ns_sbh[sb2i]);
+ nilfs->ns_sbh[sb2i] = nsbh;
+ nilfs->ns_sbp[sb2i] = nsbp;
+ } else if (nilfs->ns_sbh[0]->b_blocknr < nilfs->ns_first_data_block) {
+ /* secondary super block will be restored to index 1 */
+ nilfs->ns_sbh[1] = nsbh;
+ nilfs->ns_sbp[1] = nsbp;
+ } else {
+ brelse(nsbh);
+ }
+out:
+ return ret;
+}
+
+/**
+ * nilfs_resize_fs - resize the filesystem
+ * @sb: super block instance
+ * @newsize: new size of the filesystem (in bytes)
+ */
+int nilfs_resize_fs(struct super_block *sb, __u64 newsize)
+{
+ struct the_nilfs *nilfs = sb->s_fs_info;
+ struct nilfs_super_block **sbp;
+ __u64 devsize, newnsegs;
+ loff_t sb2off;
+ int ret;
+
+ ret = -ERANGE;
+ devsize = i_size_read(sb->s_bdev->bd_inode);
+ if (newsize > devsize)
+ goto out;
+
+ /*
+ * Write lock is required to protect some functions depending
+ * on the number of segments, the number of reserved segments,
+ * and so forth.
+ */
+ down_write(&nilfs->ns_segctor_sem);
+
+ sb2off = NILFS_SB2_OFFSET_BYTES(newsize);
+ newnsegs = sb2off >> nilfs->ns_blocksize_bits;
+ do_div(newnsegs, nilfs->ns_blocks_per_segment);
+
+ ret = nilfs_sufile_resize(nilfs->ns_sufile, newnsegs);
+ up_write(&nilfs->ns_segctor_sem);
+ if (ret < 0)
+ goto out;
+
+ ret = nilfs_construct_segment(sb);
+ if (ret < 0)
+ goto out;
+
+ down_write(&nilfs->ns_sem);
+ nilfs_move_2nd_super(sb, sb2off);
+ ret = -EIO;
+ sbp = nilfs_prepare_super(sb, 0);
+ if (likely(sbp)) {
+ nilfs_set_log_cursor(sbp[0], nilfs);
+ /*
+ * Drop NILFS_RESIZE_FS flag for compatibility with
+ * mount-time resize which may be implemented in a
+ * future release.
+ */
+ sbp[0]->s_state = cpu_to_le16(le16_to_cpu(sbp[0]->s_state) &
+ ~NILFS_RESIZE_FS);
+ sbp[0]->s_dev_size = cpu_to_le64(newsize);
+ sbp[0]->s_nsegments = cpu_to_le64(nilfs->ns_nsegments);
+ if (sbp[1])
+ memcpy(sbp[1], sbp[0], nilfs->ns_sbsize);
+ ret = nilfs_commit_super(sb, NILFS_SB_COMMIT_ALL);
+ }
+ up_write(&nilfs->ns_sem);
+
+ /*
+ * Reset the range of allocatable segments last. This order
+ * is important in the case of expansion because the secondary
+ * superblock must be protected from log write until migration
+ * completes.
+ */
+ if (!ret)
+ nilfs_sufile_set_alloc_range(nilfs->ns_sufile, 0, newnsegs - 1);
+out:
+ return ret;
+}
+
static void nilfs_put_super(struct super_block *sb)
{
struct the_nilfs *nilfs = sb->s_fs_info;
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index d2acd1a..d327140 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -363,6 +363,24 @@ static unsigned long long nilfs_max_size(unsigned int blkbits)
return res;
}
+/**
+ * nilfs_nrsvsegs - calculate the number of reserved segments
+ * @nilfs: nilfs object
+ * @nsegs: total number of segments
+ */
+unsigned long nilfs_nrsvsegs(struct the_nilfs *nilfs, unsigned long nsegs)
+{
+ return max_t(unsigned long, NILFS_MIN_NRSVSEGS,
+ DIV_ROUND_UP(nsegs * nilfs->ns_r_segments_percentage,
+ 100));
+}
+
+void nilfs_set_nsegments(struct the_nilfs *nilfs, unsigned long nsegs)
+{
+ nilfs->ns_nsegments = nsegs;
+ nilfs->ns_nrsvsegs = nilfs_nrsvsegs(nilfs, nsegs);
+}
+
static int nilfs_store_disk_layout(struct the_nilfs *nilfs,
struct nilfs_super_block *sbp)
{
@@ -389,13 +407,9 @@ static int nilfs_store_disk_layout(struct the_nilfs *nilfs,
}
nilfs->ns_first_data_block = le64_to_cpu(sbp->s_first_data_block);
- nilfs->ns_nsegments = le64_to_cpu(sbp->s_nsegments);
nilfs->ns_r_segments_percentage =
le32_to_cpu(sbp->s_r_segments_percentage);
- nilfs->ns_nrsvsegs =
- max_t(unsigned long, NILFS_MIN_NRSVSEGS,
- DIV_ROUND_UP(nilfs->ns_nsegments *
- nilfs->ns_r_segments_percentage, 100));
+ nilfs_set_nsegments(nilfs, le64_to_cpu(sbp->s_nsegments));
nilfs->ns_crc_seed = le32_to_cpu(sbp->s_crc_seed);
return 0;
}
diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h
index f496814..9992b11 100644
--- a/fs/nilfs2/the_nilfs.h
+++ b/fs/nilfs2/the_nilfs.h
@@ -268,6 +268,8 @@ struct the_nilfs *alloc_nilfs(struct block_device *bdev);
void destroy_nilfs(struct the_nilfs *nilfs);
int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data);
int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb);
+unsigned long nilfs_nrsvsegs(struct the_nilfs *nilfs, unsigned long nsegs);
+void nilfs_set_nsegments(struct the_nilfs *nilfs, unsigned long nsegs);
int nilfs_discard_segments(struct the_nilfs *, __u64 *, size_t);
int nilfs_count_free_blocks(struct the_nilfs *, sector_t *);
struct nilfs_root *nilfs_lookup_root(struct the_nilfs *nilfs, __u64 cno);
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index c3d6512..8845613 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -60,10 +60,6 @@ struct linux_binprm {
unsigned long loader, exec;
};
-extern void acct_arg_size(struct linux_binprm *bprm, unsigned long pages);
-extern struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
- int write);
-
#define BINPRM_FLAGS_ENFORCE_NONDUMP_BIT 0
#define BINPRM_FLAGS_ENFORCE_NONDUMP (1 << BINPRM_FLAGS_ENFORCE_NONDUMP_BIT)
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 072fe8c..4255785 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -18,13 +18,13 @@
#include <linux/pci.h>
#include <linux/completion.h>
#include <linux/pm.h>
+#include <linux/mutex.h>
#ifdef CONFIG_BLK_DEV_IDEACPI
#include <acpi/acpi.h>
#endif
#include <asm/byteorder.h>
#include <asm/system.h>
#include <asm/io.h>
-#include <asm/mutex.h>
/* for request_sense */
#include <linux/cdrom.h>
diff --git a/include/linux/nilfs2_fs.h b/include/linux/nilfs2_fs.h
index 8768c46..7454ad7 100644
--- a/include/linux/nilfs2_fs.h
+++ b/include/linux/nilfs2_fs.h
@@ -107,7 +107,7 @@ struct nilfs_super_root {
#define NILFS_SR_DAT_OFFSET(inode_size) NILFS_SR_MDT_OFFSET(inode_size, 0)
#define NILFS_SR_CPFILE_OFFSET(inode_size) NILFS_SR_MDT_OFFSET(inode_size, 1)
#define NILFS_SR_SUFILE_OFFSET(inode_size) NILFS_SR_MDT_OFFSET(inode_size, 2)
-#define NILFS_SR_BYTES (sizeof(struct nilfs_super_root))
+#define NILFS_SR_BYTES(inode_size) NILFS_SR_MDT_OFFSET(inode_size, 3)
/*
* Maximal mount counts
@@ -845,5 +845,7 @@ struct nilfs_bdesc {
_IOR(NILFS_IOCTL_IDENT, 0x8A, __u64)
#define NILFS_IOCTL_RESIZE \
_IOW(NILFS_IOCTL_IDENT, 0x8B, __u64)
+#define NILFS_IOCTL_SET_ALLOC_RANGE \
+ _IOW(NILFS_IOCTL_IDENT, 0x8C, __u64[2])
#endif /* _LINUX_NILFS_FS_H */
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index aeaad97..e8b78ce 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1782,7 +1782,7 @@ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len)
#define skb_queue_walk(queue, skb) \
for (skb = (queue)->next; \
- (skb != (struct sk_buff *)(queue)); \
+ skb != (struct sk_buff *)(queue); \
skb = skb->next)
#define skb_queue_walk_safe(queue, skb, tmp) \
@@ -1791,7 +1791,7 @@ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len)
skb = tmp, tmp = skb->next)
#define skb_queue_walk_from(queue, skb) \
- for (; (skb != (struct sk_buff *)(queue)); \
+ for (; skb != (struct sk_buff *)(queue); \
skb = skb->next)
#define skb_queue_walk_from_safe(queue, skb, tmp) \
@@ -1801,7 +1801,7 @@ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len)
#define skb_queue_reverse_walk(queue, skb) \
for (skb = (queue)->prev; \
- (skb != (struct sk_buff *)(queue)); \
+ skb != (struct sk_buff *)(queue); \
skb = skb->prev)
#define skb_queue_reverse_walk_safe(queue, skb, tmp) \
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 8c7189c..e6d6a66 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -538,7 +538,7 @@ struct ieee80211_tx_info {
};
/**
- * ieee80211_sched_scan_ies - scheduled scan IEs
+ * struct ieee80211_sched_scan_ies - scheduled scan IEs
*
* This structure is used to pass the appropriate IEs to be used in scheduled
* scans for all bands. It contains both the IEs passed from the userspace
@@ -2278,6 +2278,7 @@ static inline int ieee80211_sta_ps_transition_ni(struct ieee80211_sta *sta,
/**
* ieee80211_sta_set_tim - set the TIM bit for a sleeping station
+ * @sta: &struct ieee80211_sta pointer for the sleeping station
*
* If a driver buffers frames for a powersave station instead of passing
* them back to mac80211 for retransmission, the station needs to be told
diff --git a/init/Kconfig b/init/Kconfig
index 4986ecc..c8b172e 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -827,11 +827,6 @@ config SCHED_AUTOGROUP
desktop applications. Task group autogeneration is currently based
upon task session.
-config SCHED_TTWU_QUEUE
- bool
- depends on !SPARC32
- default y
-
config MM_OWNER
bool
@@ -908,7 +903,6 @@ endif
config CC_OPTIMIZE_FOR_SIZE
bool "Optimize for size"
- default y
help
Enabling this option will pass "-Os" instead of "-O2" to gcc
resulting in a smaller kernel.
diff --git a/kernel/sched.c b/kernel/sched.c
index c62acf4..0516af4 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2564,7 +2564,7 @@ static void ttwu_queue(struct task_struct *p, int cpu)
{
struct rq *rq = cpu_rq(cpu);
-#if defined(CONFIG_SMP) && defined(CONFIG_SCHED_TTWU_QUEUE)
+#if defined(CONFIG_SMP)
if (sched_feat(TTWU_QUEUE) && cpu != smp_processor_id()) {
ttwu_queue_remote(p, cpu);
return;
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index dd5a320..58c25ea 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -67,12 +67,12 @@
#include <linux/if_arp.h>
#include <linux/proc_fs.h>
#include <linux/rcupdate.h>
-#include <linux/prefetch.h>
#include <linux/skbuff.h>
#include <linux/netlink.h>
#include <linux/init.h>
#include <linux/list.h>
#include <linux/slab.h>
+#include <linux/prefetch.h>
#include <net/net_namespace.h>
#include <net/ip.h>
#include <net/protocol.h>
diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl
index 8ce792e..1fd29b2 100755
--- a/tools/testing/ktest/ktest.pl
+++ b/tools/testing/ktest/ktest.pl
@@ -36,6 +36,7 @@ $default{"REBOOT_ON_SUCCESS"} = 1;
$default{"POWEROFF_ON_SUCCESS"} = 0;
$default{"BUILD_OPTIONS"} = "";
$default{"BISECT_SLEEP_TIME"} = 60; # sleep time between bisects
+$default{"PATCHCHECK_SLEEP_TIME"} = 60; # sleep time between patch checks
$default{"CLEAR_LOG"} = 0;
$default{"BISECT_MANUAL"} = 0;
$default{"BISECT_SKIP"} = 1;
@@ -96,6 +97,7 @@ my $monitor_pid;
my $monitor_cnt = 0;
my $sleep_time;
my $bisect_sleep_time;
+my $patchcheck_sleep_time;
my $store_failures;
my $timeout;
my $booted_timeout;
@@ -112,6 +114,7 @@ my $successes = 0;
my %entered_configs;
my %config_help;
+my %variable;
$config_help{"MACHINE"} = << "EOF"
The machine hostname that you will test.
@@ -260,6 +263,39 @@ sub get_ktest_configs {
}
}
+sub process_variables {
+ my ($value) = @_;
+ my $retval = "";
+
+ # We want to check for '\', and it is just easier
+ # to check the previous characet of '$' and not need
+ # to worry if '$' is the first character. By adding
+ # a space to $value, we can just check [^\\]\$ and
+ # it will still work.
+ $value = " $value";
+
+ while ($value =~ /(.*?[^\\])\$\{(.*?)\}(.*)/) {
+ my $begin = $1;
+ my $var = $2;
+ my $end = $3;
+ # append beginning of value to retval
+ $retval = "$retval$begin";
+ if (defined($variable{$var})) {
+ $retval = "$retval$variable{$var}";
+ } else {
+ # put back the origin piece.
+ $retval = "$retval\$\{$var\}";
+ }
+ $value = $end;
+ }
+ $retval = "$retval$value";
+
+ # remove the space added in the beginning
+ $retval =~ s/ //;
+
+ return "$retval"
+}
+
sub set_value {
my ($lvalue, $rvalue) = @_;
@@ -269,10 +305,22 @@ sub set_value {
if ($rvalue =~ /^\s*$/) {
delete $opt{$lvalue};
} else {
+ $rvalue = process_variables($rvalue);
$opt{$lvalue} = $rvalue;
}
}
+sub set_variable {
+ my ($lvalue, $rvalue) = @_;
+
+ if ($rvalue =~ /^\s*$/) {
+ delete $variable{$lvalue};
+ } else {
+ $rvalue = process_variables($rvalue);
+ $variable{$lvalue} = $rvalue;
+ }
+}
+
sub read_config {
my ($config) = @_;
@@ -385,6 +433,22 @@ sub read_config {
$repeats{$val} = $repeat;
}
}
+ } elsif (/^\s*([A-Z_\[\]\d]+)\s*:=\s*(.*?)\s*$/) {
+ next if ($skip);
+
+ my $lvalue = $1;
+ my $rvalue = $2;
+
+ # process config variables.
+ # Config variables are only active while reading the
+ # config and can be defined anywhere. They also ignore
+ # TEST_START and DEFAULTS, but are skipped if they are in
+ # on of these sections that have SKIP defined.
+ # The save variable can be
+ # defined multiple times and the new one simply overrides
+ # the prevous one.
+ set_variable($lvalue, $rvalue);
+
} else {
die "$name: $.: Garbage found in config\n$_";
}
@@ -838,6 +902,7 @@ sub monitor {
if ($stop_test_after > 0 && !$booted && !$bug) {
if (time - $monitor_start > $stop_test_after) {
+ doprint "STOP_TEST_AFTER ($stop_test_after seconds) timed out\n";
$done = 1;
}
}
@@ -907,7 +972,7 @@ sub install {
return if (!defined($post_install));
my $cp_post_install = $post_install;
- $cp_post_install = s/\$KERNEL_VERSION/$version/g;
+ $cp_post_install =~ s/\$KERNEL_VERSION/$version/g;
run_command "$cp_post_install" or
dodie "Failed to run post install";
}
@@ -1247,14 +1312,14 @@ sub run_bisect_test {
if ($failed) {
$result = 0;
-
- # reboot the box to a good kernel
- if ($type ne "build") {
- bisect_reboot;
- }
} else {
$result = 1;
}
+
+ # reboot the box to a kernel we can ssh to
+ if ($type ne "build") {
+ bisect_reboot;
+ }
$in_bisect = 0;
return $result;
@@ -1763,6 +1828,14 @@ sub config_bisect {
success $i;
}
+sub patchcheck_reboot {
+ doprint "Reboot and sleep $patchcheck_sleep_time seconds\n";
+ reboot;
+ start_monitor;
+ wait_for_monitor $patchcheck_sleep_time;
+ end_monitor;
+}
+
sub patchcheck {
my ($i) = @_;
@@ -1854,6 +1927,8 @@ sub patchcheck {
end_monitor;
return 0 if ($failed);
+ patchcheck_reboot;
+
}
$in_patchcheck = 0;
success $i;
@@ -1944,7 +2019,7 @@ for (my $i = 0, my $repeat = 1; $i <= $opt{"NUM_TESTS"}; $i += $repeat) {
}
}
-sub set_test_option {
+sub __set_test_option {
my ($name, $i) = @_;
my $option = "$name\[$i\]";
@@ -1970,6 +2045,72 @@ sub set_test_option {
return undef;
}
+sub eval_option {
+ my ($option, $i) = @_;
+
+ # Add space to evaluate the character before $
+ $option = " $option";
+ my $retval = "";
+
+ while ($option =~ /(.*?[^\\])\$\{(.*?)\}(.*)/) {
+ my $start = $1;
+ my $var = $2;
+ my $end = $3;
+
+ # Append beginning of line
+ $retval = "$retval$start";
+
+ # If the iteration option OPT[$i] exists, then use that.
+ # otherwise see if the default OPT (without [$i]) exists.
+
+ my $o = "$var\[$i\]";
+
+ if (defined($opt{$o})) {
+ $o = $opt{$o};
+ $retval = "$retval$o";
+ } elsif (defined($opt{$var})) {
+ $o = $opt{$var};
+ $retval = "$retval$o";
+ } else {
+ $retval = "$retval\$\{$var\}";
+ }
+
+ $option = $end;
+ }
+
+ $retval = "$retval$option";
+
+ $retval =~ s/^ //;
+
+ return $retval;
+}
+
+sub set_test_option {
+ my ($name, $i) = @_;
+
+ my $option = __set_test_option($name, $i);
+ return $option if (!defined($option));
+
+ my $prev = "";
+
+ # Since an option can evaluate to another option,
+ # keep iterating until we do not evaluate any more
+ # options.
+ my $r = 0;
+ while ($prev ne $option) {
+ # Check for recursive evaluations.
+ # 100 deep should be more than enough.
+ if ($r++ > 100) {
+ die "Over 100 evaluations accurred with $name\n" .
+ "Check for recursive variables\n";
+ }
+ $prev = $option;
+ $option = eval_option($option, $i);
+ }
+
+ return $option;
+}
+
# First we need to do is the builds
for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) {
@@ -2003,6 +2144,7 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) {
$poweroff_after_halt = set_test_option("POWEROFF_AFTER_HALT", $i);
$sleep_time = set_test_option("SLEEP_TIME", $i);
$bisect_sleep_time = set_test_option("BISECT_SLEEP_TIME", $i);
+ $patchcheck_sleep_time = set_test_option("PATCHCHECK_SLEEP_TIME", $i);
$bisect_manual = set_test_option("BISECT_MANUAL", $i);
$bisect_skip = set_test_option("BISECT_SKIP", $i);
$store_failures = set_test_option("STORE_FAILURES", $i);
diff --git a/tools/testing/ktest/sample.conf b/tools/testing/ktest/sample.conf
index 4c5d6bd..48cbcc80 100644
--- a/tools/testing/ktest/sample.conf
+++ b/tools/testing/ktest/sample.conf
@@ -73,6 +73,95 @@
# ktest will fail to execute, and no tests will run.
#
+#### Config variables ####
+#
+# This config file can also contain "config variables".
+# These are assigned with ":=" instead of the ktest option
+# assigment "=".
+#
+# The difference between ktest options and config variables
+# is that config variables can be used multiple times,
+# where each instance will override the previous instance.
+# And that they only live at time of processing this config.
+#
+# The advantage to config variables are that they can be used
+# by any option or any other config variables to define thing
+# that you may use over and over again in the options.
+#
+# For example:
+#
+# USER := root
+# TARGET := mybox
+# TEST_CASE := ssh ${USER}@${TARGET} /path/to/my/test
+#
+# TEST_START
+# MIN_CONFIG = config1
+# TEST = ${TEST_CASE}
+#
+# TEST_START
+# MIN_CONFIG = config2
+# TEST = ${TEST_CASE}
+#
+# TEST_CASE := ssh ${USER}@${TARGET} /path/to/my/test2
+#
+# TEST_START
+# MIN_CONFIG = config1
+# TEST = ${TEST_CASE}
+#
+# TEST_START
+# MIN_CONFIG = config2
+# TEST = ${TEST_CASE}
+#
+# TEST_DIR := /home/me/test
+#
+# BUILD_DIR = ${TEST_DIR}/linux.git
+# OUTPUT_DIR = ${TEST_DIR}/test
+#
+# Note, the config variables are evaluated immediately, thus
+# updating TARGET after TEST_CASE has been assigned does nothing
+# to TEST_CASE.
+#
+# As shown in the example, to evaluate a config variable, you
+# use the ${X} convention. Simple $X will not work.
+#
+# If the config variable does not exist, the ${X} will not
+# be evaluated. Thus:
+#
+# MAKE_CMD = PATH=/mypath:${PATH} make
+#
+# If PATH is not a config variable, then the ${PATH} in
+# the MAKE_CMD option will be evaluated by the shell when
+# the MAKE_CMD option is passed into shell processing.
+
+#### Using options in other options ####
+#
+# Options that are defined in the config file may also be used
+# by other options. All options are evaulated at time of
+# use (except that config variables are evaluated at config
+# processing time).
+#
+# If an ktest option is used within another option, instead of
+# typing it again in that option you can simply use the option
+# just like you can config variables.
+#
+# MACHINE = mybox
+#
+# TEST = ssh root@${MACHINE} /path/to/test
+#
+# The option will be used per test case. Thus:
+#
+# TEST_TYPE = test
+# TEST = ssh root@{MACHINE}
+#
+# TEST_START
+# MACHINE = box1
+#
+# TEST_START
+# MACHINE = box2
+#
+# For both test cases, MACHINE will be evaluated at the time
+# of the test case. The first test will run ssh root@box1
+# and the second will run ssh root@box2.
#### Mandatory Default Options ####
@@ -366,6 +455,10 @@
# (default 60)
#BISECT_SLEEP_TIME = 60
+# The time in between patch checks to sleep (in seconds)
+# (default 60)
+#PATCHCHECK_SLEEP_TIME = 60
+
# Reboot the target box on error (default 0)
#REBOOT_ON_ERROR = 0