diff options
Diffstat (limited to 'arch')
50 files changed, 878 insertions, 595 deletions
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 295e0a8..b208573 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -176,6 +176,7 @@ asmlinkage void __cpuinit secondary_start_kernel(void) cpu_set(cpu, mm->cpu_vm_mask); cpu_switch_mm(mm->pgd, mm); enter_lazy_tlb(mm, current); + local_flush_tlb_all(); cpu_init(); diff --git a/arch/arm/lib/bitops.h b/arch/arm/lib/bitops.h index 5382a30..2036ff1 100644 --- a/arch/arm/lib/bitops.h +++ b/arch/arm/lib/bitops.h @@ -7,7 +7,7 @@ 1: ldrexb r2, [r1] \instr r2, r2, r3 strexb r0, r2, [r1] - cmpne r0, #0 + cmp r0, #0 bne 1b mov pc, lr .endm diff --git a/arch/arm/mach-integrator/platsmp.c b/arch/arm/mach-integrator/platsmp.c index aecf47b..ea10bd8 100644 --- a/arch/arm/mach-integrator/platsmp.c +++ b/arch/arm/mach-integrator/platsmp.c @@ -15,6 +15,7 @@ #include <linux/mm.h> #include <asm/atomic.h> +#include <asm/cacheflush.h> #include <asm/delay.h> #include <asm/mmu_context.h> #include <asm/procinfo.h> @@ -80,6 +81,7 @@ int __cpuinit boot_secondary(unsigned int cpu, struct task_struct *idle) * "cpu" is Linux's internal ID. */ pen_release = cpu; + flush_cache_all(); /* * XXX diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c index 31f65c8..ab6e061 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c @@ -1,5 +1,5 @@ /* - * (c) 2003, 2004 Advanced Micro Devices, Inc. + * (c) 2003, 2004, 2005 Advanced Micro Devices, Inc. * Your use of this code is subject to the terms and conditions of the * GNU general public license version 2. See "COPYING" or * http://www.gnu.org/licenses/gpl.html @@ -44,7 +44,7 @@ #define PFX "powernow-k8: " #define BFX PFX "BIOS error: " -#define VERSION "version 1.40.2" +#define VERSION "version 1.50.3" #include "powernow-k8.h" /* serialize freq changes */ @@ -231,7 +231,7 @@ static int write_new_vid(struct powernow_k8_data *data, u32 vid) /* * Reduce the vid by the max of step or reqvid. * Decreasing vid codes represent increasing voltages: - * vid of 0 is 1.550V, vid of 0x1e is 0.800V, vid of 0x1f is off. + * vid of 0 is 1.550V, vid of 0x1e is 0.800V, vid of VID_OFF is off. */ static int decrease_vid_code_by_step(struct powernow_k8_data *data, u32 reqvid, u32 step) { @@ -466,7 +466,7 @@ static int check_supported_cpu(unsigned int cpu) eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); if (((eax & CPUID_USE_XFAM_XMOD) != CPUID_USE_XFAM_XMOD) || ((eax & CPUID_XFAM) != CPUID_XFAM_K8) || - ((eax & CPUID_XMOD) > CPUID_XMOD_REV_E)) { + ((eax & CPUID_XMOD) > CPUID_XMOD_REV_F)) { printk(KERN_INFO PFX "Processor cpuid %x not supported\n", eax); goto out; } @@ -695,6 +695,7 @@ static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned data->irt = (data->acpi_data.states[index].control >> IRT_SHIFT) & IRT_MASK; data->rvo = (data->acpi_data.states[index].control >> RVO_SHIFT) & RVO_MASK; + data->exttype = (data->acpi_data.states[index].control >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK; data->plllock = (data->acpi_data.states[index].control >> PLL_L_SHIFT) & PLL_L_MASK; data->vidmvs = 1 << ((data->acpi_data.states[index].control >> MVS_SHIFT) & MVS_MASK); data->vstable = (data->acpi_data.states[index].control >> VST_SHIFT) & VST_MASK; @@ -734,8 +735,16 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) } for (i = 0; i < data->acpi_data.state_count; i++) { - u32 fid = data->acpi_data.states[i].control & FID_MASK; - u32 vid = (data->acpi_data.states[i].control >> VID_SHIFT) & VID_MASK; + u32 fid; + u32 vid; + + if (data->exttype) { + fid = data->acpi_data.states[i].status & FID_MASK; + vid = (data->acpi_data.states[i].status >> VID_SHIFT) & VID_MASK; + } else { + fid = data->acpi_data.states[i].control & FID_MASK; + vid = (data->acpi_data.states[i].control >> VID_SHIFT) & VID_MASK; + } dprintk(" %d : fid 0x%x, vid 0x%x\n", i, fid, vid); @@ -752,7 +761,7 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) } /* verify voltage is OK - BIOSs are using "off" to indicate invalid */ - if (vid == 0x1f) { + if (vid == VID_OFF) { dprintk("invalid vid %u, ignoring\n", vid); powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; continue; @@ -929,15 +938,6 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi down(&fidvid_sem); - for_each_cpu_mask(i, cpu_core_map[pol->cpu]) { - /* make sure the sibling is initialized */ - if (!powernow_data[i]) { - ret = 0; - up(&fidvid_sem); - goto err_out; - } - } - powernow_k8_acpi_pst_values(data, newstate); if (transition_frequency(data, newstate)) { @@ -977,7 +977,7 @@ static int __init powernowk8_cpu_init(struct cpufreq_policy *pol) { struct powernow_k8_data *data; cpumask_t oldmask = CPU_MASK_ALL; - int rc; + int rc, i; if (!check_supported_cpu(pol->cpu)) return -ENODEV; @@ -1063,7 +1063,9 @@ static int __init powernowk8_cpu_init(struct cpufreq_policy *pol) printk("cpu_init done, current fid 0x%x, vid 0x%x\n", data->currfid, data->currvid); - powernow_data[pol->cpu] = data; + for_each_cpu_mask(i, cpu_core_map[pol->cpu]) { + powernow_data[i] = data; + } return 0; diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.h b/arch/i386/kernel/cpu/cpufreq/powernow-k8.h index 9ed5bf2..b1e85bb 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.h +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.h @@ -1,5 +1,5 @@ /* - * (c) 2003, 2004 Advanced Micro Devices, Inc. + * (c) 2003, 2004, 2005 Advanced Micro Devices, Inc. * Your use of this code is subject to the terms and conditions of the * GNU general public license version 2. See "COPYING" or * http://www.gnu.org/licenses/gpl.html @@ -19,6 +19,7 @@ struct powernow_k8_data { u32 vidmvs; /* usable value calculated from mvs */ u32 vstable; /* voltage stabilization time, units 20 us */ u32 plllock; /* pll lock time, units 1 us */ + u32 exttype; /* extended interface = 1 */ /* keep track of the current fid / vid */ u32 currvid, currfid; @@ -41,7 +42,7 @@ struct powernow_k8_data { #define CPUID_XFAM 0x0ff00000 /* extended family */ #define CPUID_XFAM_K8 0 #define CPUID_XMOD 0x000f0000 /* extended model */ -#define CPUID_XMOD_REV_E 0x00020000 +#define CPUID_XMOD_REV_F 0x00040000 #define CPUID_USE_XFAM_XMOD 0x00000f00 #define CPUID_GET_MAX_CAPABILITIES 0x80000000 #define CPUID_FREQ_VOLT_CAPABILITIES 0x80000007 @@ -57,25 +58,26 @@ struct powernow_k8_data { /* Field definitions within the FID VID Low Control MSR : */ #define MSR_C_LO_INIT_FID_VID 0x00010000 -#define MSR_C_LO_NEW_VID 0x00001f00 -#define MSR_C_LO_NEW_FID 0x0000002f +#define MSR_C_LO_NEW_VID 0x00003f00 +#define MSR_C_LO_NEW_FID 0x0000003f #define MSR_C_LO_VID_SHIFT 8 /* Field definitions within the FID VID High Control MSR : */ -#define MSR_C_HI_STP_GNT_TO 0x000fffff +#define MSR_C_HI_STP_GNT_TO 0x000fffff /* Field definitions within the FID VID Low Status MSR : */ -#define MSR_S_LO_CHANGE_PENDING 0x80000000 /* cleared when completed */ -#define MSR_S_LO_MAX_RAMP_VID 0x1f000000 +#define MSR_S_LO_CHANGE_PENDING 0x80000000 /* cleared when completed */ +#define MSR_S_LO_MAX_RAMP_VID 0x3f000000 #define MSR_S_LO_MAX_FID 0x003f0000 #define MSR_S_LO_START_FID 0x00003f00 #define MSR_S_LO_CURRENT_FID 0x0000003f /* Field definitions within the FID VID High Status MSR : */ -#define MSR_S_HI_MAX_WORKING_VID 0x001f0000 -#define MSR_S_HI_START_VID 0x00001f00 -#define MSR_S_HI_CURRENT_VID 0x0000001f -#define MSR_C_HI_STP_GNT_BENIGN 0x00000001 +#define MSR_S_HI_MIN_WORKING_VID 0x3f000000 +#define MSR_S_HI_MAX_WORKING_VID 0x003f0000 +#define MSR_S_HI_START_VID 0x00003f00 +#define MSR_S_HI_CURRENT_VID 0x0000003f +#define MSR_C_HI_STP_GNT_BENIGN 0x00000001 /* * There are restrictions frequencies have to follow: @@ -99,13 +101,15 @@ struct powernow_k8_data { #define MIN_FREQ_RESOLUTION 200 /* fids jump by 2 matching freq jumps by 200 */ #define MAX_FID 0x2a /* Spec only gives FID values as far as 5 GHz */ -#define LEAST_VID 0x1e /* Lowest (numerically highest) useful vid value */ +#define LEAST_VID 0x3e /* Lowest (numerically highest) useful vid value */ #define MIN_FREQ 800 /* Min and max freqs, per spec */ #define MAX_FREQ 5000 #define INVALID_FID_MASK 0xffffffc1 /* not a valid fid if these bits are set */ -#define INVALID_VID_MASK 0xffffffe0 /* not a valid vid if these bits are set */ +#define INVALID_VID_MASK 0xffffffc0 /* not a valid vid if these bits are set */ + +#define VID_OFF 0x3f #define STOP_GRANT_5NS 1 /* min poss memory access latency for voltage change */ @@ -121,12 +125,14 @@ struct powernow_k8_data { #define IRT_SHIFT 30 #define RVO_SHIFT 28 +#define EXT_TYPE_SHIFT 27 #define PLL_L_SHIFT 20 #define MVS_SHIFT 18 #define VST_SHIFT 11 #define VID_SHIFT 6 #define IRT_MASK 3 #define RVO_MASK 3 +#define EXT_TYPE_MASK 1 #define PLL_L_MASK 0x7f #define MVS_MASK 3 #define VST_MASK 0x7f diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c index 1d768b2..6c55b50 100644 --- a/arch/i386/kernel/cpu/intel_cacheinfo.c +++ b/arch/i386/kernel/cpu/intel_cacheinfo.c @@ -128,7 +128,7 @@ static int __devinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_le cpuid_count(4, index, &eax, &ebx, &ecx, &edx); cache_eax.full = eax; if (cache_eax.split.type == CACHE_TYPE_NULL) - return -1; + return -EIO; /* better error ? */ this_leaf->eax.full = eax; this_leaf->ebx.full = ebx; @@ -334,6 +334,7 @@ static int __devinit detect_cache_attributes(unsigned int cpu) struct _cpuid4_info *this_leaf; unsigned long j; int retval; + cpumask_t oldmask; if (num_cache_leaves == 0) return -ENOENT; @@ -345,19 +346,26 @@ static int __devinit detect_cache_attributes(unsigned int cpu) memset(cpuid4_info[cpu], 0, sizeof(struct _cpuid4_info) * num_cache_leaves); + oldmask = current->cpus_allowed; + retval = set_cpus_allowed(current, cpumask_of_cpu(cpu)); + if (retval) + goto out; + /* Do cpuid and store the results */ + retval = 0; for (j = 0; j < num_cache_leaves; j++) { this_leaf = CPUID4_INFO_IDX(cpu, j); retval = cpuid4_cache_lookup(j, this_leaf); if (unlikely(retval < 0)) - goto err_out; + break; cache_shared_cpu_map_setup(cpu, j); } - return 0; + set_cpus_allowed(current, oldmask); -err_out: - free_cache_attributes(cpu); - return -ENOMEM; +out: + if (retval) + free_cache_attributes(cpu); + return retval; } #ifdef CONFIG_SYSFS diff --git a/arch/i386/kernel/machine_kexec.c b/arch/i386/kernel/machine_kexec.c index 52ed18d..cb699a2 100644 --- a/arch/i386/kernel/machine_kexec.c +++ b/arch/i386/kernel/machine_kexec.c @@ -16,6 +16,7 @@ #include <asm/io.h> #include <asm/apic.h> #include <asm/cpufeature.h> +#include <asm/desc.h> static inline unsigned long read_cr3(void) { @@ -90,33 +91,32 @@ static void identity_map_page(unsigned long address) } #endif - static void set_idt(void *newidt, __u16 limit) { - unsigned char curidt[6]; + struct Xgt_desc_struct curidt; /* ia32 supports unaliged loads & stores */ - (*(__u16 *)(curidt)) = limit; - (*(__u32 *)(curidt +2)) = (unsigned long)(newidt); + curidt.size = limit; + curidt.address = (unsigned long)newidt; __asm__ __volatile__ ( - "lidt %0\n" - : "=m" (curidt) + "lidtl %0\n" + : : "m" (curidt) ); }; static void set_gdt(void *newgdt, __u16 limit) { - unsigned char curgdt[6]; + struct Xgt_desc_struct curgdt; /* ia32 supports unaligned loads & stores */ - (*(__u16 *)(curgdt)) = limit; - (*(__u32 *)(curgdt +2)) = (unsigned long)(newgdt); + curgdt.size = limit; + curgdt.address = (unsigned long)newgdt; __asm__ __volatile__ ( - "lgdt %0\n" - : "=m" (curgdt) + "lgdtl %0\n" + : : "m" (curgdt) ); }; diff --git a/arch/i386/kernel/mpparse.c b/arch/i386/kernel/mpparse.c index af917f6..ce838ab 100644 --- a/arch/i386/kernel/mpparse.c +++ b/arch/i386/kernel/mpparse.c @@ -1116,7 +1116,15 @@ int mp_register_gsi (u32 gsi, int edge_level, int active_high_low) */ int irq = gsi; if (gsi < MAX_GSI_NUM) { - gsi = pci_irq++; + if (gsi > 15) + gsi = pci_irq++; +#ifdef CONFIG_ACPI_BUS + /* + * Don't assign IRQ used by ACPI SCI + */ + if (gsi == acpi_fadt.sci_int) + gsi = pci_irq++; +#endif gsi_to_irq[irq] = gsi; } else { printk(KERN_ERR "GSI %u is too high\n", gsi); diff --git a/arch/i386/kernel/numaq.c b/arch/i386/kernel/numaq.c index e51edf0..5f5b075 100644 --- a/arch/i386/kernel/numaq.c +++ b/arch/i386/kernel/numaq.c @@ -31,6 +31,7 @@ #include <linux/nodemask.h> #include <asm/numaq.h> #include <asm/topology.h> +#include <asm/processor.h> #define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT)) @@ -77,3 +78,11 @@ int __init get_memcfg_numaq(void) smp_dump_qct(); return 1; } + +static int __init numaq_dsc_disable(void) +{ + printk(KERN_DEBUG "NUMAQ: disabling TSC\n"); + tsc_disable = 1; + return 0; +} +core_initcall(numaq_dsc_disable); diff --git a/arch/i386/mm/discontig.c b/arch/i386/mm/discontig.c index b358f07..c369a8b 100644 --- a/arch/i386/mm/discontig.c +++ b/arch/i386/mm/discontig.c @@ -243,6 +243,14 @@ static unsigned long calculate_numa_remap_pages(void) /* now the roundup is correct, convert to PAGE_SIZE pages */ size = size * PTRS_PER_PTE; + if (node_end_pfn[nid] & (PTRS_PER_PTE-1)) { + /* + * Adjust size if node_end_pfn is not on a proper + * pmd boundary. remap_numa_kva will barf otherwise. + */ + size += node_end_pfn[nid] & (PTRS_PER_PTE-1); + } + /* * Validate the region we are allocating only contains valid * pages. diff --git a/arch/i386/pci/irq.c b/arch/i386/pci/irq.c index 187350c..86348b6 100644 --- a/arch/i386/pci/irq.c +++ b/arch/i386/pci/irq.c @@ -551,6 +551,13 @@ static __init int intel_router_probe(struct irq_router *r, struct pci_dev *route static __init int via_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) { /* FIXME: We should move some of the quirk fixup stuff here */ + + if (router->device == PCI_DEVICE_ID_VIA_82C686 && + device == PCI_DEVICE_ID_VIA_82C586_0) { + /* Asus k7m bios wrongly reports 82C686A as 586-compatible */ + device = PCI_DEVICE_ID_VIA_82C686; + } + switch(device) { case PCI_DEVICE_ID_VIA_82C586_0: diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c index c067435..c9f2f60 100644 --- a/arch/s390/appldata/appldata_base.c +++ b/arch/s390/appldata/appldata_base.c @@ -232,7 +232,11 @@ static int appldata_diag(char record_nr, u16 function, unsigned long buffer, ry = -1; asm volatile( "diag %1,%0,0xDC\n\t" - : "=d" (ry) : "d" (&(appldata_parameter_list)) : "cc"); + : "=d" (ry) + : "d" (&appldata_parameter_list), + "m" (appldata_parameter_list), + "m" (appldata_product_id) + : "cc"); return (int) ry; } /************************ timer, work, DIAG <END> ****************************/ diff --git a/arch/s390/defconfig b/arch/s390/defconfig index 89850b2..0865251 100644 --- a/arch/s390/defconfig +++ b/arch/s390/defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.12-rc3 -# Fri Apr 22 15:30:58 2005 +# Linux kernel version: 2.6.13-rc4 +# Fri Jul 29 14:49:30 2005 # CONFIG_MMU=y CONFIG_RWSEM_XCHGADD_ALGORITHM=y @@ -23,10 +23,11 @@ CONFIG_INIT_ENV_ARG_LIMIT=32 CONFIG_LOCALVERSION="" CONFIG_SWAP=y CONFIG_SYSVIPC=y -# CONFIG_POSIX_MQUEUE is not set +CONFIG_POSIX_MQUEUE=y # CONFIG_BSD_PROCESS_ACCT is not set CONFIG_SYSCTL=y -# CONFIG_AUDIT is not set +CONFIG_AUDIT=y +# CONFIG_AUDITSYSCALL is not set CONFIG_HOTPLUG=y CONFIG_KOBJECT_UEVENT=y CONFIG_IKCONFIG=y @@ -36,6 +37,8 @@ CONFIG_IKCONFIG_PROC=y CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_ALL is not set # CONFIG_KALLSYMS_EXTRA_PASS is not set +CONFIG_PRINTK=y +CONFIG_BUG=y CONFIG_BASE_FULL=y CONFIG_FUTEX=y CONFIG_EPOLL=y @@ -51,9 +54,10 @@ CONFIG_BASE_SMALL=0 # Loadable module support # CONFIG_MODULES=y -# CONFIG_MODULE_UNLOAD is not set +CONFIG_MODULE_UNLOAD=y +# CONFIG_MODULE_FORCE_UNLOAD is not set CONFIG_OBSOLETE_MODPARM=y -# CONFIG_MODVERSIONS is not set +CONFIG_MODVERSIONS=y # CONFIG_MODULE_SRCVERSION_ALL is not set CONFIG_KMOD=y CONFIG_STOP_MACHINE=y @@ -81,8 +85,15 @@ CONFIG_MARCH_G5=y # CONFIG_MARCH_Z990 is not set CONFIG_PACK_STACK=y # CONFIG_SMALL_STACK is not set -# CONFIG_CHECK_STACK is not set +CONFIG_CHECK_STACK=y +CONFIG_STACK_GUARD=256 # CONFIG_WARN_STACK is not set +CONFIG_SELECT_MEMORY_MODEL=y +CONFIG_FLATMEM_MANUAL=y +# CONFIG_DISCONTIGMEM_MANUAL is not set +# CONFIG_SPARSEMEM_MANUAL is not set +CONFIG_FLATMEM=y +CONFIG_FLAT_NODE_MEM_MAP=y # # I/O subsystem configuration @@ -95,7 +106,7 @@ CONFIG_QDIO=y # # Misc # -# CONFIG_PREEMPT is not set +CONFIG_PREEMPT=y CONFIG_IPL=y # CONFIG_IPL_TAPE is not set CONFIG_IPL_VM=y @@ -105,9 +116,110 @@ CONFIG_BINFMT_MISC=m CONFIG_PFAULT=y # CONFIG_SHARED_KERNEL is not set # CONFIG_CMM is not set -# CONFIG_VIRT_TIMER is not set +CONFIG_VIRT_TIMER=y +CONFIG_VIRT_CPU_ACCOUNTING=y +# CONFIG_APPLDATA_BASE is not set CONFIG_NO_IDLE_HZ=y CONFIG_NO_IDLE_HZ_INIT=y +# CONFIG_KEXEC is not set + +# +# Networking +# +CONFIG_NET=y + +# +# Networking options +# +CONFIG_PACKET=y +# CONFIG_PACKET_MMAP is not set +CONFIG_UNIX=y +CONFIG_XFRM=y +# CONFIG_XFRM_USER is not set +CONFIG_NET_KEY=y +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +# CONFIG_IP_ADVANCED_ROUTER is not set +CONFIG_IP_FIB_HASH=y +# CONFIG_IP_PNP is not set +# CONFIG_NET_IPIP is not set +# CONFIG_NET_IPGRE is not set +# CONFIG_IP_MROUTE is not set +# CONFIG_ARPD is not set +# CONFIG_SYN_COOKIES is not set +# CONFIG_INET_AH is not set +# CONFIG_INET_ESP is not set +# CONFIG_INET_IPCOMP is not set +# CONFIG_INET_TUNNEL is not set +CONFIG_IP_TCPDIAG=y +CONFIG_IP_TCPDIAG_IPV6=y +# CONFIG_TCP_CONG_ADVANCED is not set +CONFIG_TCP_CONG_BIC=y +CONFIG_IPV6=y +# CONFIG_IPV6_PRIVACY is not set +# CONFIG_INET6_AH is not set +# CONFIG_INET6_ESP is not set +# CONFIG_INET6_IPCOMP is not set +# CONFIG_INET6_TUNNEL is not set +# CONFIG_IPV6_TUNNEL is not set +# CONFIG_NETFILTER is not set + +# +# SCTP Configuration (EXPERIMENTAL) +# +# CONFIG_IP_SCTP is not set +# CONFIG_ATM is not set +# CONFIG_BRIDGE is not set +# CONFIG_VLAN_8021Q is not set +# CONFIG_DECNET is not set +# CONFIG_LLC2 is not set +# CONFIG_IPX is not set +# CONFIG_ATALK is not set +# CONFIG_X25 is not set +# CONFIG_LAPB is not set +# CONFIG_NET_DIVERT is not set +# CONFIG_ECONET is not set +# CONFIG_WAN_ROUTER is not set +CONFIG_NET_SCHED=y +CONFIG_NET_SCH_CLK_JIFFIES=y +# CONFIG_NET_SCH_CLK_GETTIMEOFDAY is not set +# CONFIG_NET_SCH_CLK_CPU is not set +CONFIG_NET_SCH_CBQ=m +# CONFIG_NET_SCH_HTB is not set +# CONFIG_NET_SCH_HFSC is not set +CONFIG_NET_SCH_PRIO=m +CONFIG_NET_SCH_RED=m +CONFIG_NET_SCH_SFQ=m +CONFIG_NET_SCH_TEQL=m +CONFIG_NET_SCH_TBF=m +CONFIG_NET_SCH_GRED=m +CONFIG_NET_SCH_DSMARK=m +# CONFIG_NET_SCH_NETEM is not set +# CONFIG_NET_SCH_INGRESS is not set +CONFIG_NET_QOS=y +CONFIG_NET_ESTIMATOR=y +CONFIG_NET_CLS=y +# CONFIG_NET_CLS_BASIC is not set +CONFIG_NET_CLS_TCINDEX=m +CONFIG_NET_CLS_ROUTE4=m +CONFIG_NET_CLS_ROUTE=y +CONFIG_NET_CLS_FW=m +CONFIG_NET_CLS_U32=m +# CONFIG_CLS_U32_PERF is not set +# CONFIG_NET_CLS_IND is not set +CONFIG_NET_CLS_RSVP=m +CONFIG_NET_CLS_RSVP6=m +# CONFIG_NET_EMATCH is not set +# CONFIG_NET_CLS_ACT is not set +CONFIG_NET_CLS_POLICE=y + +# +# Network testing +# +# CONFIG_NET_PKTGEN is not set +# CONFIG_HAMRADIO is not set +# CONFIG_IRDA is not set +# CONFIG_BT is not set # CONFIG_PCMCIA is not set # @@ -133,6 +245,7 @@ CONFIG_CHR_DEV_ST=y CONFIG_BLK_DEV_SR=y CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_CHR_DEV_SG=y +# CONFIG_CHR_DEV_SCH is not set # # Some SCSI devices (e.g. CD jukebox) support multiple LUNs @@ -205,7 +318,13 @@ CONFIG_MD_RAID5=m # CONFIG_MD_RAID6 is not set CONFIG_MD_MULTIPATH=m # CONFIG_MD_FAULTY is not set -# CONFIG_BLK_DEV_DM is not set +CONFIG_BLK_DEV_DM=y +CONFIG_DM_CRYPT=y +CONFIG_DM_SNAPSHOT=y +CONFIG_DM_MIRROR=y +CONFIG_DM_ZERO=y +CONFIG_DM_MULTIPATH=y +# CONFIG_DM_MULTIPATH_EMC is not set # # Character device drivers @@ -231,7 +350,8 @@ CONFIG_CCW_CONSOLE=y CONFIG_SCLP=y CONFIG_SCLP_TTY=y CONFIG_SCLP_CONSOLE=y -# CONFIG_SCLP_VT220_TTY is not set +CONFIG_SCLP_VT220_TTY=y +CONFIG_SCLP_VT220_CONSOLE=y CONFIG_SCLP_CPI=m CONFIG_S390_TAPE=m @@ -255,105 +375,8 @@ CONFIG_S390_TAPE_34XX=m CONFIG_Z90CRYPT=m # -# Networking support -# -CONFIG_NET=y - -# -# Networking options -# -CONFIG_PACKET=y -# CONFIG_PACKET_MMAP is not set -CONFIG_UNIX=y -CONFIG_NET_KEY=y -CONFIG_INET=y -CONFIG_IP_MULTICAST=y -# CONFIG_IP_ADVANCED_ROUTER is not set -# CONFIG_IP_PNP is not set -# CONFIG_NET_IPIP is not set -# CONFIG_NET_IPGRE is not set -# CONFIG_IP_MROUTE is not set -# CONFIG_ARPD is not set -# CONFIG_SYN_COOKIES is not set -# CONFIG_INET_AH is not set -# CONFIG_INET_ESP is not set -# CONFIG_INET_IPCOMP is not set -# CONFIG_INET_TUNNEL is not set -CONFIG_IP_TCPDIAG=y -CONFIG_IP_TCPDIAG_IPV6=y -CONFIG_IPV6=y -# CONFIG_IPV6_PRIVACY is not set -# CONFIG_INET6_AH is not set -# CONFIG_INET6_ESP is not set -# CONFIG_INET6_IPCOMP is not set -# CONFIG_INET6_TUNNEL is not set -# CONFIG_IPV6_TUNNEL is not set -# CONFIG_NETFILTER is not set -CONFIG_XFRM=y -# CONFIG_XFRM_USER is not set - -# -# SCTP Configuration (EXPERIMENTAL) -# -# CONFIG_IP_SCTP is not set -# CONFIG_ATM is not set -# CONFIG_BRIDGE is not set -# CONFIG_VLAN_8021Q is not set -# CONFIG_DECNET is not set -# CONFIG_LLC2 is not set -# CONFIG_IPX is not set -# CONFIG_ATALK is not set -# CONFIG_X25 is not set -# CONFIG_LAPB is not set -# CONFIG_NET_DIVERT is not set -# CONFIG_ECONET is not set -# CONFIG_WAN_ROUTER is not set - -# -# QoS and/or fair queueing -# -CONFIG_NET_SCHED=y -CONFIG_NET_SCH_CLK_JIFFIES=y -# CONFIG_NET_SCH_CLK_GETTIMEOFDAY is not set -# CONFIG_NET_SCH_CLK_CPU is not set -CONFIG_NET_SCH_CBQ=m -# CONFIG_NET_SCH_HTB is not set -# CONFIG_NET_SCH_HFSC is not set -CONFIG_NET_SCH_PRIO=m -CONFIG_NET_SCH_RED=m -CONFIG_NET_SCH_SFQ=m -CONFIG_NET_SCH_TEQL=m -CONFIG_NET_SCH_TBF=m -CONFIG_NET_SCH_GRED=m -CONFIG_NET_SCH_DSMARK=m -# CONFIG_NET_SCH_NETEM is not set -# CONFIG_NET_SCH_INGRESS is not set -CONFIG_NET_QOS=y -CONFIG_NET_ESTIMATOR=y -CONFIG_NET_CLS=y -# CONFIG_NET_CLS_BASIC is not set -CONFIG_NET_CLS_TCINDEX=m -CONFIG_NET_CLS_ROUTE4=m -CONFIG_NET_CLS_ROUTE=y -CONFIG_NET_CLS_FW=m -CONFIG_NET_CLS_U32=m -# CONFIG_CLS_U32_PERF is not set -# CONFIG_NET_CLS_IND is not set -CONFIG_NET_CLS_RSVP=m -CONFIG_NET_CLS_RSVP6=m -# CONFIG_NET_EMATCH is not set -# CONFIG_NET_CLS_ACT is not set -CONFIG_NET_CLS_POLICE=y - -# -# Network testing +# Network device support # -# CONFIG_NET_PKTGEN is not set -# CONFIG_NETPOLL is not set -# CONFIG_NET_POLL_CONTROLLER is not set -# CONFIG_HAMRADIO is not set -# CONFIG_IRDA is not set -# CONFIG_BT is not set CONFIG_NETDEVICES=y CONFIG_DUMMY=m CONFIG_BONDING=m @@ -411,12 +434,15 @@ CONFIG_CCWGROUP=y # CONFIG_SLIP is not set # CONFIG_SHAPER is not set # CONFIG_NETCONSOLE is not set +# CONFIG_NETPOLL is not set +# CONFIG_NET_POLL_CONTROLLER is not set # # File systems # CONFIG_EXT2_FS=y # CONFIG_EXT2_FS_XATTR is not set +# CONFIG_EXT2_FS_XIP is not set CONFIG_EXT3_FS=y CONFIG_EXT3_FS_XATTR=y # CONFIG_EXT3_FS_POSIX_ACL is not set @@ -426,6 +452,7 @@ CONFIG_JBD=y CONFIG_FS_MBCACHE=y # CONFIG_REISERFS_FS is not set # CONFIG_JFS_FS is not set +# CONFIG_FS_POSIX_ACL is not set # # XFS support @@ -433,6 +460,7 @@ CONFIG_FS_MBCACHE=y # CONFIG_XFS_FS is not set # CONFIG_MINIX_FS is not set # CONFIG_ROMFS_FS is not set +CONFIG_INOTIFY=y # CONFIG_QUOTA is not set CONFIG_DNOTIFY=y # CONFIG_AUTOFS_FS is not set @@ -457,7 +485,6 @@ CONFIG_DNOTIFY=y CONFIG_PROC_FS=y CONFIG_PROC_KCORE=y CONFIG_SYSFS=y -# CONFIG_DEVFS_FS is not set # CONFIG_DEVPTS_FS_XATTR is not set CONFIG_TMPFS=y # CONFIG_TMPFS_XATTR is not set @@ -486,15 +513,18 @@ CONFIG_RAMFS=y # CONFIG_NFS_FS=y CONFIG_NFS_V3=y +# CONFIG_NFS_V3_ACL is not set # CONFIG_NFS_V4 is not set # CONFIG_NFS_DIRECTIO is not set CONFIG_NFSD=y CONFIG_NFSD_V3=y +# CONFIG_NFSD_V3_ACL is not set # CONFIG_NFSD_V4 is not set CONFIG_NFSD_TCP=y CONFIG_LOCKD=y CONFIG_LOCKD_V4=y CONFIG_EXPORTFS=y +CONFIG_NFS_COMMON=y CONFIG_SUNRPC=y # CONFIG_RPCSEC_GSS_KRB5 is not set # CONFIG_RPCSEC_GSS_SPKM3 is not set @@ -544,11 +574,12 @@ CONFIG_MAGIC_SYSRQ=y CONFIG_LOG_BUF_SHIFT=17 # CONFIG_SCHEDSTATS is not set # CONFIG_DEBUG_SLAB is not set +CONFIG_DEBUG_PREEMPT=y # CONFIG_DEBUG_SPINLOCK is not set # CONFIG_DEBUG_SPINLOCK_SLEEP is not set # CONFIG_DEBUG_KOBJECT is not set # CONFIG_DEBUG_INFO is not set -# CONFIG_DEBUG_FS is not set +CONFIG_DEBUG_FS=y # # Security options diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S index d12cff1..2710e66 100644 --- a/arch/s390/kernel/head.S +++ b/arch/s390/kernel/head.S @@ -346,6 +346,13 @@ iplstart: la %r2,.Lreset lhi %r3,26 diag %r2,%r3,8 + la %r5,.Lirb + stsch 0(%r5) # check if irq is pending + tm 30(%r5),0x0f # by verifying if any of the + bnz .Lwaitforirq # activity or status control + tm 31(%r5),0xff # bits is set in the schib + bz .Lnoreset +.Lwaitforirq: mvc 0x78(8),.Lrdrnewpsw # set up IO interrupt psw .Lwaitrdrirq: lpsw .Lrdrwaitpsw diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index 10bc592..9a8263a 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -345,6 +345,13 @@ iplstart: la %r2,.Lreset lhi %r3,26 diag %r2,%r3,8 + la %r5,.Lirb + stsch 0(%r5) # check if irq is pending + tm 30(%r5),0x0f # by verifying if any of the + bnz .Lwaitforirq # activity or status control + tm 31(%r5),0xff # bits is set in the schib + bz .Lnoreset +.Lwaitforirq: mvc 0x78(8),.Lrdrnewpsw # set up IO interrupt psw .Lwaitrdrirq: lpsw .Lrdrwaitpsw diff --git a/arch/um/drivers/Makefile b/arch/um/drivers/Makefile index d6c31a9..de17d4c 100644 --- a/arch/um/drivers/Makefile +++ b/arch/um/drivers/Makefile @@ -19,6 +19,8 @@ harddog-objs := harddog_kern.o harddog_user.o LDFLAGS_pcap.o := -r $(shell $(CC) $(CFLAGS) -print-file-name=libpcap.a) +targets := pcap_kern.o pcap_user.o + $(obj)/pcap.o: $(obj)/pcap_kern.o $(obj)/pcap_user.o $(LD) -r -dp -o $@ $^ $(LDFLAGS) $(LDFLAGS_pcap.o) #XXX: The call below does not work because the flags are added before the @@ -26,7 +28,7 @@ $(obj)/pcap.o: $(obj)/pcap_kern.o $(obj)/pcap_user.o #$(call if_changed,ld) # When the above is fixed, don't forget to add this too! -#targets := $(obj)/pcap.o +#targets += $(obj)/pcap.o obj-y := stdio_console.o fd.o chan_kern.o chan_user.o line.o obj-$(CONFIG_SSL) += ssl.o diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c index 404de41..c190c24 100644 --- a/arch/um/drivers/mconsole_kern.c +++ b/arch/um/drivers/mconsole_kern.c @@ -557,7 +557,7 @@ static int create_proc_mconsole(void) ent = create_proc_entry("mconsole", S_IFREG | 0200, NULL); if(ent == NULL){ - printk("create_proc_mconsole : create_proc_entry failed\n"); + printk(KERN_INFO "create_proc_mconsole : create_proc_entry failed\n"); return(0); } diff --git a/arch/um/kernel/exitcode.c b/arch/um/kernel/exitcode.c index 0ea87f2..d21ebad 100644 --- a/arch/um/kernel/exitcode.c +++ b/arch/um/kernel/exitcode.c @@ -48,7 +48,7 @@ static int make_proc_exitcode(void) ent = create_proc_entry("exitcode", 0600, &proc_root); if(ent == NULL){ - printk("make_proc_exitcode : Failed to register " + printk(KERN_WARNING "make_proc_exitcode : Failed to register " "/proc/exitcode\n"); return(0); } diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index 8b01a55..67acd92 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -131,7 +131,7 @@ int start_fork_tramp(void *thread_arg, unsigned long temp_stack, return(arg.pid); } -static int ptrace_child(void) +static int ptrace_child(void *arg) { int ret; int pid = os_getpid(), ppid = getppid(); @@ -160,16 +160,20 @@ static int ptrace_child(void) _exit(ret); } -static int start_ptraced_child(void) +static int start_ptraced_child(void **stack_out) { + void *stack; + unsigned long sp; int pid, n, status; - pid = fork(); - if(pid == 0) - ptrace_child(); - + stack = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if(stack == MAP_FAILED) + panic("check_ptrace : mmap failed, errno = %d", errno); + sp = (unsigned long) stack + PAGE_SIZE - sizeof(void *); + pid = clone(ptrace_child, (void *) sp, SIGCHLD, NULL); if(pid < 0) - panic("check_ptrace : fork failed, errno = %d", errno); + panic("check_ptrace : clone failed, errno = %d", errno); CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED)); if(n < 0) panic("check_ptrace : wait failed, errno = %d", errno); @@ -177,6 +181,7 @@ static int start_ptraced_child(void) panic("check_ptrace : expected SIGSTOP, got status = %d", status); + *stack_out = stack; return(pid); } @@ -184,12 +189,12 @@ static int start_ptraced_child(void) * just avoid using sysemu, not panic, but only if SYSEMU features are broken. * So only for SYSEMU features we test mustpanic, while normal host features * must work anyway!*/ -static int stop_ptraced_child(int pid, int exitcode, int mustexit) +static int stop_ptraced_child(int pid, void *stack, int exitcode, int mustpanic) { int status, n, ret = 0; if(ptrace(PTRACE_CONT, pid, 0, 0) < 0) - panic("stop_ptraced_child : ptrace failed, errno = %d", errno); + panic("check_ptrace : ptrace failed, errno = %d", errno); CATCH_EINTR(n = waitpid(pid, &status, 0)); if(!WIFEXITED(status) || (WEXITSTATUS(status) != exitcode)) { int exit_with = WEXITSTATUS(status); @@ -200,13 +205,15 @@ static int stop_ptraced_child(int pid, int exitcode, int mustexit) printk("check_ptrace : child exited with exitcode %d, while " "expecting %d; status 0x%x", exit_with, exitcode, status); - if (mustexit) + if (mustpanic) panic("\n"); else printk("\n"); ret = -1; } + if(munmap(stack, PAGE_SIZE) < 0) + panic("check_ptrace : munmap failed, errno = %d", errno); return ret; } @@ -242,11 +249,12 @@ __uml_setup("nosysemu", nosysemu_cmd_param, static void __init check_sysemu(void) { + void *stack; int pid, syscall, n, status, count=0; printk("Checking syscall emulation patch for ptrace..."); sysemu_supported = 0; - pid = start_ptraced_child(); + pid = start_ptraced_child(&stack); if(ptrace(PTRACE_SYSEMU, pid, 0, 0) < 0) goto fail; @@ -264,7 +272,7 @@ static void __init check_sysemu(void) panic("check_sysemu : failed to modify system " "call return, errno = %d", errno); - if (stop_ptraced_child(pid, 0, 0) < 0) + if (stop_ptraced_child(pid, stack, 0, 0) < 0) goto fail_stopped; sysemu_supported = 1; @@ -272,7 +280,7 @@ static void __init check_sysemu(void) set_using_sysemu(!force_sysemu_disabled); printk("Checking advanced syscall emulation patch for ptrace..."); - pid = start_ptraced_child(); + pid = start_ptraced_child(&stack); while(1){ count++; if(ptrace(PTRACE_SYSEMU_SINGLESTEP, pid, 0, 0) < 0) @@ -297,7 +305,7 @@ static void __init check_sysemu(void) break; } } - if (stop_ptraced_child(pid, 0, 0) < 0) + if (stop_ptraced_child(pid, stack, 0, 0) < 0) goto fail_stopped; sysemu_supported = 2; @@ -308,17 +316,18 @@ static void __init check_sysemu(void) return; fail: - stop_ptraced_child(pid, 1, 0); + stop_ptraced_child(pid, stack, 1, 0); fail_stopped: printk("missing\n"); } void __init check_ptrace(void) { + void *stack; int pid, syscall, n, status; printk("Checking that ptrace can change system call numbers..."); - pid = start_ptraced_child(); + pid = start_ptraced_child(&stack); if (ptrace(PTRACE_OLDSETOPTIONS, pid, 0, (void *)PTRACE_O_TRACESYSGOOD) < 0) panic("check_ptrace: PTRACE_SETOPTIONS failed, errno = %d", errno); @@ -345,7 +354,7 @@ void __init check_ptrace(void) break; } } - stop_ptraced_child(pid, 0, 1); + stop_ptraced_child(pid, stack, 0, 1); printk("OK\n"); check_sysemu(); } @@ -380,10 +389,11 @@ extern void *__syscall_stub_start, __syscall_stub_end; static inline void check_skas3_ptrace_support(void) { struct ptrace_faultinfo fi; + void *stack; int pid, n; printf("Checking for the skas3 patch in the host..."); - pid = start_ptraced_child(); + pid = start_ptraced_child(&stack); n = ptrace(PTRACE_FAULTINFO, pid, 0, &fi); if (n < 0) { @@ -402,7 +412,7 @@ static inline void check_skas3_ptrace_support(void) } init_registers(pid); - stop_ptraced_child(pid, 1, 1); + stop_ptraced_child(pid, stack, 1, 1); } int can_do_skas(void) diff --git a/arch/um/kernel/process_kern.c b/arch/um/kernel/process_kern.c index d4036ed..c23d8a0 100644 --- a/arch/um/kernel/process_kern.c +++ b/arch/um/kernel/process_kern.c @@ -412,7 +412,7 @@ int __init make_proc_sysemu(void) if (ent == NULL) { - printk("Failed to register /proc/sysemu\n"); + printk(KERN_WARNING "Failed to register /proc/sysemu\n"); return(0); } diff --git a/arch/um/kernel/skas/process.c b/arch/um/kernel/skas/process.c index ba671da..6dd9e5b 100644 --- a/arch/um/kernel/skas/process.c +++ b/arch/um/kernel/skas/process.c @@ -64,7 +64,7 @@ void wait_stub_done(int pid, int sig, char * fname) (WSTOPSIG(status) == SIGVTALRM)); if((n < 0) || !WIFSTOPPED(status) || - (WSTOPSIG(status) != SIGUSR1 && WSTOPSIG(status != SIGTRAP))){ + (WSTOPSIG(status) != SIGUSR1 && WSTOPSIG(status) != SIGTRAP)){ panic("%s : failed to wait for SIGUSR1/SIGTRAP, " "pid = %d, n = %d, errno = %d, status = 0x%x\n", fname, pid, n, errno, status); diff --git a/arch/um/kernel/skas/trap_user.c b/arch/um/kernel/skas/trap_user.c index 0dee1d9..9950a67 100644 --- a/arch/um/kernel/skas/trap_user.c +++ b/arch/um/kernel/skas/trap_user.c @@ -58,7 +58,6 @@ void user_signal(int sig, union uml_pt_regs *regs, int pid) int segv = ((sig == SIGFPE) || (sig == SIGSEGV) || (sig == SIGBUS) || (sig == SIGILL) || (sig == SIGTRAP)); - regs->skas.is_user = 1; if (segv) get_skas_faultinfo(pid, ®s->skas.faultinfo); info = &sig_info[sig]; diff --git a/arch/um/kernel/time_kern.c b/arch/um/kernel/time_kern.c index a8b4ef6..4e08f75 100644 --- a/arch/um/kernel/time_kern.c +++ b/arch/um/kernel/time_kern.c @@ -137,7 +137,10 @@ long um_stime(int __user *tptr) void timer_handler(int sig, union uml_pt_regs *regs) { local_irq_disable(); - update_process_times(CHOOSE_MODE(user_context(UPT_SP(regs)), (regs)->skas.is_user)); + irq_enter(); + update_process_times(CHOOSE_MODE(user_context(UPT_SP(regs)), + (regs)->skas.is_user)); + irq_exit(); local_irq_enable(); if(current_thread->cpu == 0) timer_irq(regs); diff --git a/arch/um/os-Linux/elf_aux.c b/arch/um/os-Linux/elf_aux.c index 5423b1c..9416e1c 100644 --- a/arch/um/os-Linux/elf_aux.c +++ b/arch/um/os-Linux/elf_aux.c @@ -9,9 +9,10 @@ */ #include <elf.h> #include <stddef.h> +#include <asm/elf.h> #include "init.h" #include "elf_user.h" -#include <asm/elf.h> +#include "mem_user.h" #if ELF_CLASS == ELFCLASS32 typedef Elf32_auxv_t elf_auxv_t; @@ -41,6 +42,9 @@ __init void scan_elf_aux( char **envp) break; case AT_SYSINFO_EHDR: vsyscall_ehdr = auxv->a_un.a_val; + /* See if the page is under TASK_SIZE */ + if (vsyscall_ehdr < (unsigned long) envp) + vsyscall_ehdr = 0; break; case AT_HWCAP: elf_aux_hwcap = auxv->a_un.a_val; diff --git a/arch/um/os-Linux/user_syms.c b/arch/um/os-Linux/user_syms.c index 75d7af9..56d3f87 100644 --- a/arch/um/os-Linux/user_syms.c +++ b/arch/um/os-Linux/user_syms.c @@ -83,6 +83,9 @@ EXPORT_SYMBOL_PROTO(statfs64); EXPORT_SYMBOL_PROTO(getuid); +EXPORT_SYMBOL_PROTO(fsync); +EXPORT_SYMBOL_PROTO(fdatasync); + /* * Overrides for Emacs so that we follow Linus's tabbing style. * Emacs will notice this stuff at the end of the file and automatically diff --git a/arch/um/sys-i386/stub_segv.c b/arch/um/sys-i386/stub_segv.c index b251442..68aeabe 100644 --- a/arch/um/sys-i386/stub_segv.c +++ b/arch/um/sys-i386/stub_segv.c @@ -21,10 +21,10 @@ stub_segv_handler(int sig) __asm__("movl %0, %%eax ; int $0x80": : "g" (__NR_getpid)); __asm__("movl %%eax, %%ebx ; movl %0, %%eax ; movl %1, %%ecx ;" "int $0x80": : "g" (__NR_kill), "g" (SIGUSR1)); - /* Pop the frame pointer and return address since we need to leave + /* Load pointer to sigcontext into esp, since we need to leave * the stack in its original form when we do the sigreturn here, by * hand. */ - __asm__("popl %%eax ; popl %%eax ; popl %%eax ; movl %0, %%eax ; " - "int $0x80" : : "g" (__NR_sigreturn)); + __asm__("mov %0,%%esp ; movl %1, %%eax ; " + "int $0x80" : : "a" (sc), "g" (__NR_sigreturn)); } diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index 4b83261..660a03a 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig @@ -329,12 +329,15 @@ config HPET_EMULATE_RTC config GART_IOMMU bool "IOMMU support" + default y depends on PCI help - Support the K8 IOMMU. Needed to run systems with more than 4GB of memory + Support the IOMMU. Needed to run systems with more than 3GB of memory properly with 32-bit PCI devices that do not support DAC (Double Address Cycle). The IOMMU can be turned off at runtime with the iommu=off parameter. Normally the kernel will take the right choice by itself. + This option includes a driver for the AMD Opteron/Athlon64 IOMMU + and a software emulation used on some other systems. If unsure, say Y. # need this always enabled with GART_IOMMU for the VIA workaround diff --git a/arch/x86_64/Makefile b/arch/x86_64/Makefile index 4289156..4c6ed96 100644 --- a/arch/x86_64/Makefile +++ b/arch/x86_64/Makefile @@ -21,18 +21,6 @@ # # $Id: Makefile,v 1.31 2002/03/22 15:56:07 ak Exp $ -# -# early bootup linking needs 32bit. You can either use real 32bit tools -# here or 64bit tools in 32bit mode. -# -IA32_CC := $(CC) $(CPPFLAGS) -m32 -O2 -fomit-frame-pointer -IA32_LD := $(LD) -m elf_i386 -IA32_AS := $(CC) $(AFLAGS) -m32 -Wa,--32 -traditional -c -IA32_OBJCOPY := $(CROSS_COMPILE)objcopy -IA32_CPP := $(CROSS_COMPILE)gcc -m32 -E -export IA32_CC IA32_LD IA32_AS IA32_OBJCOPY IA32_CPP - - LDFLAGS := -m elf_x86_64 OBJCOPYFLAGS := -O binary -R .note -R .comment -S LDFLAGS_vmlinux := diff --git a/arch/x86_64/defconfig b/arch/x86_64/defconfig index 569595b..776f3c8 100644 --- a/arch/x86_64/defconfig +++ b/arch/x86_64/defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.12-rc4 -# Fri May 13 06:39:11 2005 +# Linux kernel version: 2.6.13-rc3 +# Fri Jul 22 16:47:31 2005 # CONFIG_X86_64=y CONFIG_64BIT=y @@ -84,14 +84,27 @@ CONFIG_X86_IO_APIC=y CONFIG_X86_LOCAL_APIC=y CONFIG_MTRR=y CONFIG_SMP=y -# CONFIG_PREEMPT is not set CONFIG_SCHED_SMT=y +CONFIG_PREEMPT_NONE=y +# CONFIG_PREEMPT_VOLUNTARY is not set +# CONFIG_PREEMPT is not set +CONFIG_PREEMPT_BKL=y CONFIG_K8_NUMA=y # CONFIG_NUMA_EMU is not set -CONFIG_DISCONTIGMEM=y +CONFIG_ARCH_DISCONTIGMEM_ENABLE=y CONFIG_NUMA=y +CONFIG_ARCH_DISCONTIGMEM_DEFAULT=y +CONFIG_ARCH_SPARSEMEM_ENABLE=y +CONFIG_SELECT_MEMORY_MODEL=y +# CONFIG_FLATMEM_MANUAL is not set +CONFIG_DISCONTIGMEM_MANUAL=y +# CONFIG_SPARSEMEM_MANUAL is not set +CONFIG_DISCONTIGMEM=y +CONFIG_FLAT_NODE_MEM_MAP=y +CONFIG_NEED_MULTIPLE_NODES=y +CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID=y CONFIG_HAVE_DEC_LOCK=y -CONFIG_NR_CPUS=8 +CONFIG_NR_CPUS=32 CONFIG_HPET_TIMER=y CONFIG_X86_PM_TIMER=y CONFIG_HPET_EMULATE_RTC=y @@ -99,7 +112,13 @@ CONFIG_GART_IOMMU=y CONFIG_SWIOTLB=y CONFIG_X86_MCE=y CONFIG_X86_MCE_INTEL=y +CONFIG_PHYSICAL_START=0x100000 +# CONFIG_KEXEC is not set CONFIG_SECCOMP=y +# CONFIG_HZ_100 is not set +CONFIG_HZ_250=y +# CONFIG_HZ_1000 is not set +CONFIG_HZ=250 CONFIG_GENERIC_HARDIRQS=y CONFIG_GENERIC_IRQ_PROBE=y CONFIG_ISA_DMA_API=y @@ -118,12 +137,11 @@ CONFIG_PM_STD_PARTITION="" CONFIG_ACPI=y CONFIG_ACPI_BOOT=y CONFIG_ACPI_INTERPRETER=y -CONFIG_ACPI_SLEEP=y -CONFIG_ACPI_SLEEP_PROC_FS=y CONFIG_ACPI_AC=y CONFIG_ACPI_BATTERY=y CONFIG_ACPI_BUTTON=y # CONFIG_ACPI_VIDEO is not set +CONFIG_ACPI_HOTKEY=m CONFIG_ACPI_FAN=y CONFIG_ACPI_PROCESSOR=y CONFIG_ACPI_THERMAL=y @@ -154,6 +172,7 @@ CONFIG_CPU_FREQ_GOV_PERFORMANCE=y # CONFIG_CPU_FREQ_GOV_POWERSAVE is not set CONFIG_CPU_FREQ_GOV_USERSPACE=y CONFIG_CPU_FREQ_GOV_ONDEMAND=y +# CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set # # CPUFreq processor drivers @@ -204,6 +223,76 @@ CONFIG_SYSVIPC_COMPAT=y CONFIG_UID16=y # +# Networking +# +CONFIG_NET=y + +# +# Networking options +# +CONFIG_PACKET=y +# CONFIG_PACKET_MMAP is not set +CONFIG_UNIX=y +# CONFIG_NET_KEY is not set +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +# CONFIG_IP_ADVANCED_ROUTER is not set +CONFIG_IP_FIB_HASH=y +# CONFIG_IP_PNP is not set +# CONFIG_NET_IPIP is not set +# CONFIG_NET_IPGRE is not set +# CONFIG_IP_MROUTE is not set +# CONFIG_ARPD is not set +# CONFIG_SYN_COOKIES is not set +# CONFIG_INET_AH is not set +# CONFIG_INET_ESP is not set +# CONFIG_INET_IPCOMP is not set +# CONFIG_INET_TUNNEL is not set +CONFIG_IP_TCPDIAG=y +CONFIG_IP_TCPDIAG_IPV6=y +# CONFIG_TCP_CONG_ADVANCED is not set +CONFIG_TCP_CONG_BIC=y +CONFIG_IPV6=y +# CONFIG_IPV6_PRIVACY is not set +# CONFIG_INET6_AH is not set +# CONFIG_INET6_ESP is not set +# CONFIG_INET6_IPCOMP is not set +# CONFIG_INET6_TUNNEL is not set +# CONFIG_IPV6_TUNNEL is not set +# CONFIG_NETFILTER is not set + +# +# SCTP Configuration (EXPERIMENTAL) +# +# CONFIG_IP_SCTP is not set +# CONFIG_ATM is not set +# CONFIG_BRIDGE is not set +# CONFIG_VLAN_8021Q is not set +# CONFIG_DECNET is not set +# CONFIG_LLC2 is not set +# CONFIG_IPX is not set +# CONFIG_ATALK is not set +# CONFIG_X25 is not set +# CONFIG_LAPB is not set +# CONFIG_NET_DIVERT is not set +# CONFIG_ECONET is not set +# CONFIG_WAN_ROUTER is not set +# CONFIG_NET_SCHED is not set +# CONFIG_NET_CLS_ROUTE is not set + +# +# Network testing +# +# CONFIG_NET_PKTGEN is not set +CONFIG_NETPOLL=y +# CONFIG_NETPOLL_RX is not set +# CONFIG_NETPOLL_TRAP is not set +CONFIG_NET_POLL_CONTROLLER=y +# CONFIG_HAMRADIO is not set +# CONFIG_IRDA is not set +# CONFIG_BT is not set + +# # Device Drivers # @@ -308,6 +397,7 @@ CONFIG_BLK_DEV_AMD74XX=y # CONFIG_BLK_DEV_HPT366 is not set # CONFIG_BLK_DEV_SC1200 is not set CONFIG_BLK_DEV_PIIX=y +# CONFIG_BLK_DEV_IT821X is not set # CONFIG_BLK_DEV_NS87415 is not set # CONFIG_BLK_DEV_PDC202XX_OLD is not set CONFIG_BLK_DEV_PDC202XX_NEW=y @@ -338,6 +428,7 @@ CONFIG_BLK_DEV_SD=y # CONFIG_CHR_DEV_OSST is not set # CONFIG_BLK_DEV_SR is not set # CONFIG_CHR_DEV_SG is not set +# CONFIG_CHR_DEV_SCH is not set # # Some SCSI devices (e.g. CD jukebox) support multiple LUNs @@ -372,7 +463,6 @@ CONFIG_AIC79XX_DEBUG_MASK=0 # CONFIG_MEGARAID_NEWGEN is not set # CONFIG_MEGARAID_LEGACY is not set CONFIG_SCSI_SATA=y -# CONFIG_SCSI_SATA_AHCI is not set # CONFIG_SCSI_SATA_SVW is not set CONFIG_SCSI_ATA_PIIX=y # CONFIG_SCSI_SATA_NV is not set @@ -410,14 +500,21 @@ CONFIG_SCSI_QLA2XXX=y # # Multi-device support (RAID and LVM) # -# CONFIG_MD is not set +CONFIG_MD=y +# CONFIG_BLK_DEV_MD is not set +CONFIG_BLK_DEV_DM=y +# CONFIG_DM_CRYPT is not set +# CONFIG_DM_SNAPSHOT is not set +# CONFIG_DM_MIRROR is not set +# CONFIG_DM_ZERO is not set +# CONFIG_DM_MULTIPATH is not set # # Fusion MPT device support # -CONFIG_FUSION=y -CONFIG_FUSION_MAX_SGE=40 -# CONFIG_FUSION_CTL is not set +# CONFIG_FUSION is not set +# CONFIG_FUSION_SPI is not set +# CONFIG_FUSION_FC is not set # # IEEE 1394 (FireWire) support @@ -430,75 +527,8 @@ CONFIG_FUSION_MAX_SGE=40 # CONFIG_I2O is not set # -# Networking support -# -CONFIG_NET=y - -# -# Networking options -# -CONFIG_PACKET=y -# CONFIG_PACKET_MMAP is not set -CONFIG_UNIX=y -# CONFIG_NET_KEY is not set -CONFIG_INET=y -CONFIG_IP_MULTICAST=y -# CONFIG_IP_ADVANCED_ROUTER is not set -# CONFIG_IP_PNP is not set -# CONFIG_NET_IPIP is not set -# CONFIG_NET_IPGRE is not set -# CONFIG_IP_MROUTE is not set -# CONFIG_ARPD is not set -# CONFIG_SYN_COOKIES is not set -# CONFIG_INET_AH is not set -# CONFIG_INET_ESP is not set -# CONFIG_INET_IPCOMP is not set -# CONFIG_INET_TUNNEL is not set -CONFIG_IP_TCPDIAG=y -CONFIG_IP_TCPDIAG_IPV6=y -CONFIG_IPV6=y -# CONFIG_IPV6_PRIVACY is not set -# CONFIG_INET6_AH is not set -# CONFIG_INET6_ESP is not set -# CONFIG_INET6_IPCOMP is not set -# CONFIG_INET6_TUNNEL is not set -# CONFIG_IPV6_TUNNEL is not set -# CONFIG_NETFILTER is not set - -# -# SCTP Configuration (EXPERIMENTAL) +# Network device support # -# CONFIG_IP_SCTP is not set -# CONFIG_ATM is not set -# CONFIG_BRIDGE is not set -# CONFIG_VLAN_8021Q is not set -# CONFIG_DECNET is not set -# CONFIG_LLC2 is not set -# CONFIG_IPX is not set -# CONFIG_ATALK is not set -# CONFIG_X25 is not set -# CONFIG_LAPB is not set -# CONFIG_NET_DIVERT is not set -# CONFIG_ECONET is not set -# CONFIG_WAN_ROUTER is not set - -# -# QoS and/or fair queueing -# -# CONFIG_NET_SCHED is not set -# CONFIG_NET_CLS_ROUTE is not set - -# -# Network testing -# -# CONFIG_NET_PKTGEN is not set -CONFIG_NETPOLL=y -# CONFIG_NETPOLL_RX is not set -# CONFIG_NETPOLL_TRAP is not set -CONFIG_NET_POLL_CONTROLLER=y -# CONFIG_HAMRADIO is not set -# CONFIG_IRDA is not set -# CONFIG_BT is not set CONFIG_NETDEVICES=y # CONFIG_DUMMY is not set # CONFIG_BONDING is not set @@ -517,7 +547,9 @@ CONFIG_NET_ETHERNET=y CONFIG_MII=y # CONFIG_HAPPYMEAL is not set # CONFIG_SUNGEM is not set -# CONFIG_NET_VENDOR_3COM is not set +CONFIG_NET_VENDOR_3COM=y +CONFIG_VORTEX=y +# CONFIG_TYPHOON is not set # # Tulip family network device support @@ -532,7 +564,7 @@ CONFIG_NET_PCI=y CONFIG_FORCEDETH=y # CONFIG_DGRS is not set # CONFIG_EEPRO100 is not set -# CONFIG_E100 is not set +CONFIG_E100=y # CONFIG_FEALNX is not set # CONFIG_NATSEMI is not set # CONFIG_NE2K_PCI is not set @@ -553,14 +585,15 @@ CONFIG_8139TOO=y # CONFIG_ACENIC is not set # CONFIG_DL2K is not set CONFIG_E1000=y -# CONFIG_E1000_NAPI is not set # CONFIG_NS83820 is not set # CONFIG_HAMACHI is not set # CONFIG_YELLOWFIN is not set # CONFIG_R8169 is not set +# CONFIG_SKGE is not set # CONFIG_SK98LIN is not set # CONFIG_VIA_VELOCITY is not set CONFIG_TIGON3=y +# CONFIG_BNX2 is not set # # Ethernet (10000 Mbit) @@ -647,7 +680,6 @@ CONFIG_SERIO_I8042=y CONFIG_SERIO_LIBPS2=y # CONFIG_SERIO_RAW is not set # CONFIG_GAMEPORT is not set -CONFIG_SOUND_GAMEPORT=y # # Character devices @@ -716,6 +748,7 @@ CONFIG_MAX_RAW_DEVS=256 # I2C support # # CONFIG_I2C is not set +# CONFIG_I2C_SENSOR is not set # # Dallas's 1-wire bus @@ -723,6 +756,12 @@ CONFIG_MAX_RAW_DEVS=256 # CONFIG_W1 is not set # +# Hardware Monitoring support +# +CONFIG_HWMON=y +# CONFIG_HWMON_DEBUG_CHIP is not set + +# # Misc devices # # CONFIG_IBM_ASM is not set @@ -808,6 +847,7 @@ CONFIG_USB_DEVICEFS=y CONFIG_USB_EHCI_HCD=y # CONFIG_USB_EHCI_SPLIT_ISO is not set # CONFIG_USB_EHCI_ROOT_HUB_TT is not set +# CONFIG_USB_ISP116X_HCD is not set CONFIG_USB_OHCI_HCD=y # CONFIG_USB_OHCI_BIG_ENDIAN is not set CONFIG_USB_OHCI_LITTLE_ENDIAN=y @@ -846,12 +886,15 @@ CONFIG_USB_HIDINPUT=y # CONFIG_USB_HIDDEV is not set # CONFIG_USB_AIPTEK is not set # CONFIG_USB_WACOM is not set +# CONFIG_USB_ACECAD is not set # CONFIG_USB_KBTAB is not set # CONFIG_USB_POWERMATE is not set # CONFIG_USB_MTOUCH is not set +# CONFIG_USB_ITMTOUCH is not set # CONFIG_USB_EGALAX is not set # CONFIG_USB_XPAD is not set # CONFIG_USB_ATI_REMOTE is not set +# CONFIG_USB_KEYSPAN_REMOTE is not set # # USB Imaging devices @@ -902,10 +945,11 @@ CONFIG_USB_MON=y # CONFIG_USB_PHIDGETSERVO is not set # CONFIG_USB_IDMOUSE is not set # CONFIG_USB_SISUSBVGA is not set +# CONFIG_USB_LD is not set # CONFIG_USB_TEST is not set # -# USB ATM/DSL drivers +# USB DSL modem support # # @@ -924,6 +968,10 @@ CONFIG_USB_MON=y # CONFIG_INFINIBAND is not set # +# SN Devices +# + +# # Firmware Drivers # # CONFIG_EDD is not set @@ -935,6 +983,7 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y # CONFIG_EXT2_FS_SECURITY is not set +# CONFIG_EXT2_FS_XIP is not set CONFIG_EXT3_FS=y CONFIG_EXT3_FS_XATTR=y CONFIG_EXT3_FS_POSIX_ACL=y @@ -957,6 +1006,7 @@ CONFIG_FS_POSIX_ACL=y # CONFIG_XFS_FS is not set # CONFIG_MINIX_FS is not set # CONFIG_ROMFS_FS is not set +CONFIG_INOTIFY=y # CONFIG_QUOTA is not set CONFIG_DNOTIFY=y CONFIG_AUTOFS_FS=y @@ -986,7 +1036,6 @@ CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1" CONFIG_PROC_FS=y CONFIG_PROC_KCORE=y CONFIG_SYSFS=y -# CONFIG_DEVFS_FS is not set # CONFIG_DEVPTS_FS_XATTR is not set CONFIG_TMPFS=y # CONFIG_TMPFS_XATTR is not set @@ -1016,15 +1065,18 @@ CONFIG_RAMFS=y # CONFIG_NFS_FS=y CONFIG_NFS_V3=y +# CONFIG_NFS_V3_ACL is not set # CONFIG_NFS_V4 is not set # CONFIG_NFS_DIRECTIO is not set CONFIG_NFSD=y CONFIG_NFSD_V3=y +# CONFIG_NFSD_V3_ACL is not set # CONFIG_NFSD_V4 is not set CONFIG_NFSD_TCP=y CONFIG_LOCKD=y CONFIG_LOCKD_V4=y CONFIG_EXPORTFS=y +CONFIG_NFS_COMMON=y CONFIG_SUNRPC=y # CONFIG_RPCSEC_GSS_KRB5 is not set # CONFIG_RPCSEC_GSS_SPKM3 is not set diff --git a/arch/x86_64/ia32/Makefile b/arch/x86_64/ia32/Makefile index a12b19d..f76217d 100644 --- a/arch/x86_64/ia32/Makefile +++ b/arch/x86_64/ia32/Makefile @@ -4,14 +4,14 @@ obj-$(CONFIG_IA32_EMULATION) := ia32entry.o sys_ia32.o ia32_ioctl.o \ ia32_signal.o tls32.o \ - ia32_binfmt.o fpu32.o ptrace32.o syscall32.o + ia32_binfmt.o fpu32.o ptrace32.o syscall32.o syscall32_syscall.o sysv-$(CONFIG_SYSVIPC) := ipc32.o obj-$(CONFIG_IA32_EMULATION) += $(sysv-y) obj-$(CONFIG_IA32_AOUT) += ia32_aout.o -$(obj)/syscall32.o: $(src)/syscall32.c \ +$(obj)/syscall32_syscall.o: \ $(foreach F,sysenter syscall,$(obj)/vsyscall-$F.so) # Teach kbuild about targets diff --git a/arch/x86_64/ia32/syscall32.c b/arch/x86_64/ia32/syscall32.c index 816a3b8..adbc5f8 100644 --- a/arch/x86_64/ia32/syscall32.c +++ b/arch/x86_64/ia32/syscall32.c @@ -14,16 +14,6 @@ #include <asm/tlbflush.h> #include <asm/ia32_unistd.h> -/* 32bit VDSOs mapped into user space. */ -asm(".section \".init.data\",\"aw\"\n" - "syscall32_syscall:\n" - ".incbin \"arch/x86_64/ia32/vsyscall-syscall.so\"\n" - "syscall32_syscall_end:\n" - "syscall32_sysenter:\n" - ".incbin \"arch/x86_64/ia32/vsyscall-sysenter.so\"\n" - "syscall32_sysenter_end:\n" - ".previous"); - extern unsigned char syscall32_syscall[], syscall32_syscall_end[]; extern unsigned char syscall32_sysenter[], syscall32_sysenter_end[]; extern int sysctl_vsyscall32; diff --git a/arch/x86_64/ia32/syscall32_syscall.S b/arch/x86_64/ia32/syscall32_syscall.S new file mode 100644 index 0000000..8f8271b --- /dev/null +++ b/arch/x86_64/ia32/syscall32_syscall.S @@ -0,0 +1,17 @@ +/* 32bit VDSOs mapped into user space. */ + + .section ".init.data","aw" + + .globl syscall32_syscall + .globl syscall32_syscall_end + +syscall32_syscall: + .incbin "arch/x86_64/ia32/vsyscall-syscall.so" +syscall32_syscall_end: + + .globl syscall32_sysenter + .globl syscall32_sysenter_end + +syscall32_sysenter: + .incbin "arch/x86_64/ia32/vsyscall-sysenter.so" +syscall32_sysenter_end: diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S index 2881749..096d470 100644 --- a/arch/x86_64/kernel/entry.S +++ b/arch/x86_64/kernel/entry.S @@ -76,7 +76,7 @@ .macro FAKE_STACK_FRAME child_rip /* push in order ss, rsp, eflags, cs, rip */ - xorq %rax, %rax + xorl %eax, %eax pushq %rax /* ss */ CFI_ADJUST_CFA_OFFSET 8 pushq %rax /* rsp */ @@ -423,7 +423,7 @@ ENTRY(stub_rt_sigreturn) testl $3,CS(%rdi) je 1f swapgs -1: addl $1,%gs:pda_irqcount # RED-PEN should check preempt count +1: incl %gs:pda_irqcount # RED-PEN should check preempt count movq %gs:pda_irqstackptr,%rax cmoveq %rax,%rsp pushq %rdi # save old stack @@ -436,7 +436,7 @@ ENTRY(common_interrupt) ret_from_intr: popq %rdi cli - subl $1,%gs:pda_irqcount + decl %gs:pda_irqcount #ifdef CONFIG_DEBUG_INFO movq RBP(%rdi),%rbp #endif @@ -494,7 +494,7 @@ retint_signal: sti SAVE_REST movq $-1,ORIG_RAX(%rsp) - xorq %rsi,%rsi # oldset + xorl %esi,%esi # oldset movq %rsp,%rdi # &pt_regs call do_notify_resume RESTORE_REST @@ -752,7 +752,7 @@ child_rip: movq %rsi, %rdi call *%rax # exit - xorq %rdi, %rdi + xorl %edi, %edi call do_exit /* @@ -918,3 +918,15 @@ ENTRY(machine_check) ENTRY(call_debug) zeroentry do_call_debug +ENTRY(call_softirq) + movq %gs:pda_irqstackptr,%rax + pushq %r15 + movq %rsp,%r15 + incl %gs:pda_irqcount + cmove %rax,%rsp + call __do_softirq + movq %r15,%rsp + decl %gs:pda_irqcount + popq %r15 + ret + diff --git a/arch/x86_64/kernel/genapic.c b/arch/x86_64/kernel/genapic.c index 69b9c25..30c843a 100644 --- a/arch/x86_64/kernel/genapic.c +++ b/arch/x86_64/kernel/genapic.c @@ -31,6 +31,7 @@ u8 x86_cpu_to_log_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; extern struct genapic apic_cluster; extern struct genapic apic_flat; +extern struct genapic apic_physflat; struct genapic *genapic = &apic_flat; @@ -44,12 +45,7 @@ void __init clustered_apic_check(void) u8 clusters, max_cluster; u8 id; u8 cluster_cnt[NUM_APIC_CLUSTERS]; - - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { - /* AMD always uses flat mode right now */ - genapic = &apic_flat; - goto print; - } + int num_cpus = 0; #if defined(CONFIG_ACPI_BUS) /* @@ -64,15 +60,34 @@ void __init clustered_apic_check(void) #endif memset(cluster_cnt, 0, sizeof(cluster_cnt)); - for (i = 0; i < NR_CPUS; i++) { id = bios_cpu_apicid[i]; - if (id != BAD_APICID) - cluster_cnt[APIC_CLUSTERID(id)]++; + if (id == BAD_APICID) + continue; + num_cpus++; + cluster_cnt[APIC_CLUSTERID(id)]++; } + /* Don't use clustered mode on AMD platforms. */ + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { + genapic = &apic_physflat; +#ifndef CONFIG_CPU_HOTPLUG + /* In the CPU hotplug case we cannot use broadcast mode + because that opens a race when a CPU is removed. + Stay at physflat mode in this case. + It is bad to do this unconditionally though. Once + we have ACPI platform support for CPU hotplug + we should detect hotplug capablity from ACPI tables and + only do this when really needed. -AK */ + if (num_cpus <= 8) + genapic = &apic_flat; +#endif + goto print; + } + clusters = 0; max_cluster = 0; + for (i = 0; i < NUM_APIC_CLUSTERS; i++) { if (cluster_cnt[i] > 0) { ++clusters; diff --git a/arch/x86_64/kernel/genapic_flat.c b/arch/x86_64/kernel/genapic_flat.c index 2828469..adc9628 100644 --- a/arch/x86_64/kernel/genapic_flat.c +++ b/arch/x86_64/kernel/genapic_flat.c @@ -2,13 +2,11 @@ * Copyright 2004 James Cleverdon, IBM. * Subject to the GNU Public License, v.2 * - * Flat APIC subarch code. Maximum 8 CPUs, logical delivery. + * Flat APIC subarch code. * * Hacked for x86-64 by James Cleverdon from i386 architecture code by * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and * James Cleverdon. - * Ashok Raj <ashok.raj@intel.com> - * Removed IPI broadcast shortcut to support CPU hotplug */ #include <linux/config.h> #include <linux/threads.h> @@ -20,47 +18,6 @@ #include <asm/smp.h> #include <asm/ipi.h> -/* - * The following permit choosing broadcast IPI shortcut v.s sending IPI only - * to online cpus via the send_IPI_mask varient. - * The mask version is my preferred option, since it eliminates a lot of - * other extra code that would need to be written to cleanup intrs sent - * to a CPU while offline. - * - * Sending broadcast introduces lots of trouble in CPU hotplug situations. - * These IPI's are delivered to cpu's irrespective of their offline status - * and could pickup stale intr data when these CPUS are turned online. - * - * Not using broadcast is a cleaner approach IMO, but Andi Kleen disagrees with - * the idea of not using broadcast IPI's anymore. Hence the run time check - * is introduced, on his request so we can choose an alternate mechanism. - * - * Initial wacky performance tests that collect cycle counts show - * no increase in using mask v.s broadcast version. In fact they seem - * identical in terms of cycle counts. - * - * if we need to use broadcast, we need to do the following. - * - * cli; - * hold call_lock; - * clear any pending IPI, just ack and clear all pending intr - * set cpu_online_map; - * release call_lock; - * sti; - * - * The complicated dummy irq processing shown above is not required if - * we didnt sent IPI's to wrong CPU's in the first place. - * - * - Ashok Raj <ashok.raj@intel.com> - */ -#ifdef CONFIG_HOTPLUG_CPU -#define DEFAULT_SEND_IPI (1) -#else -#define DEFAULT_SEND_IPI (0) -#endif - -static int no_broadcast=DEFAULT_SEND_IPI; - static cpumask_t flat_target_cpus(void) { return cpu_online_map; @@ -119,37 +76,15 @@ static void flat_send_IPI_mask(cpumask_t cpumask, int vector) local_irq_restore(flags); } -static inline void __local_flat_send_IPI_allbutself(int vector) -{ - if (no_broadcast) { - cpumask_t mask = cpu_online_map; - int this_cpu = get_cpu(); - - cpu_clear(this_cpu, mask); - flat_send_IPI_mask(mask, vector); - put_cpu(); - } - else - __send_IPI_shortcut(APIC_DEST_ALLBUT, vector, APIC_DEST_LOGICAL); -} - -static inline void __local_flat_send_IPI_all(int vector) -{ - if (no_broadcast) - flat_send_IPI_mask(cpu_online_map, vector); - else - __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL); -} - static void flat_send_IPI_allbutself(int vector) { if (((num_online_cpus()) - 1) >= 1) - __local_flat_send_IPI_allbutself(vector); + __send_IPI_shortcut(APIC_DEST_ALLBUT, vector,APIC_DEST_LOGICAL); } static void flat_send_IPI_all(int vector) { - __local_flat_send_IPI_all(vector); + __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL); } static int flat_apic_id_registered(void) @@ -170,16 +105,6 @@ static unsigned int phys_pkg_id(int index_msb) return ((ebx >> 24) & 0xFF) >> index_msb; } -static __init int no_ipi_broadcast(char *str) -{ - get_option(&str, &no_broadcast); - printk ("Using %s mode\n", no_broadcast ? "No IPI Broadcast" : - "IPI Broadcast"); - return 1; -} - -__setup("no_ipi_broadcast", no_ipi_broadcast); - struct genapic apic_flat = { .name = "flat", .int_delivery_mode = dest_LowestPrio, @@ -195,11 +120,62 @@ struct genapic apic_flat = { .phys_pkg_id = phys_pkg_id, }; -static int __init print_ipi_mode(void) +/* + * Physflat mode is used when there are more than 8 CPUs on a AMD system. + * We cannot use logical delivery in this case because the mask + * overflows, so use physical mode. + */ + +static cpumask_t physflat_target_cpus(void) +{ + return cpumask_of_cpu(0); +} + +static void physflat_send_IPI_mask(cpumask_t cpumask, int vector) +{ + send_IPI_mask_sequence(cpumask, vector); +} + +static void physflat_send_IPI_allbutself(int vector) +{ + cpumask_t allbutme = cpu_online_map; + int me = get_cpu(); + cpu_clear(me, allbutme); + physflat_send_IPI_mask(allbutme, vector); + put_cpu(); +} + +static void physflat_send_IPI_all(int vector) { - printk ("Using IPI %s mode\n", no_broadcast ? "No-Shortcut" : - "Shortcut"); - return 0; + physflat_send_IPI_mask(cpu_online_map, vector); } -late_initcall(print_ipi_mode); +static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask) +{ + int cpu; + + /* + * We're using fixed IRQ delivery, can only return one phys APIC ID. + * May as well be the first. + */ + cpu = first_cpu(cpumask); + if ((unsigned)cpu < NR_CPUS) + return x86_cpu_to_apicid[cpu]; + else + return BAD_APICID; +} + +struct genapic apic_physflat = { + .name = "physical flat", + .int_delivery_mode = dest_LowestPrio, + .int_dest_mode = (APIC_DEST_PHYSICAL != 0), + .int_delivery_dest = APIC_DEST_PHYSICAL | APIC_DM_LOWEST, + .target_cpus = physflat_target_cpus, + .apic_id_registered = flat_apic_id_registered, + .init_apic_ldr = flat_init_apic_ldr,/*not needed, but shouldn't hurt*/ + .send_IPI_all = physflat_send_IPI_all, + .send_IPI_allbutself = physflat_send_IPI_allbutself, + .send_IPI_mask = physflat_send_IPI_mask, + .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid, + .phys_pkg_id = phys_pkg_id, +}; diff --git a/arch/x86_64/kernel/head.S b/arch/x86_64/kernel/head.S index 8d765aa..98ff5eb 100644 --- a/arch/x86_64/kernel/head.S +++ b/arch/x86_64/kernel/head.S @@ -137,14 +137,14 @@ startup_64: wrmsr /* Setup cr0 */ - xorq %rax, %rax - btsq $31, %rax /* Enable paging */ - btsq $0, %rax /* Enable protected mode */ - btsq $1, %rax /* Enable MP */ - btsq $4, %rax /* Enable ET */ - btsq $5, %rax /* Enable NE */ - btsq $16, %rax /* Enable WP */ - btsq $18, %rax /* Enable AM */ +#define CR0_PM 1 /* protected mode */ +#define CR0_MP (1<<1) +#define CR0_ET (1<<4) +#define CR0_NE (1<<5) +#define CR0_WP (1<<16) +#define CR0_AM (1<<18) +#define CR0_PAGING (1<<31) + movl $CR0_PM|CR0_MP|CR0_ET|CR0_NE|CR0_WP|CR0_AM|CR0_PAGING,%eax /* Make changes effective */ movq %rax, %cr0 diff --git a/arch/x86_64/kernel/irq.c b/arch/x86_64/kernel/irq.c index cc3fb85..849a20a 100644 --- a/arch/x86_64/kernel/irq.c +++ b/arch/x86_64/kernel/irq.c @@ -135,3 +135,22 @@ void fixup_irqs(cpumask_t map) local_irq_disable(); } #endif + +extern void call_softirq(void); + +asmlinkage void do_softirq(void) +{ + __u32 pending; + unsigned long flags; + + if (in_interrupt()) + return; + + local_irq_save(flags); + pending = local_softirq_pending(); + /* Switch to interrupt stack */ + if (pending) + call_softirq(); + local_irq_restore(flags); +} +EXPORT_SYMBOL(do_softirq); diff --git a/arch/x86_64/kernel/machine_kexec.c b/arch/x86_64/kernel/machine_kexec.c index 60d1eff..89fab51 100644 --- a/arch/x86_64/kernel/machine_kexec.c +++ b/arch/x86_64/kernel/machine_kexec.c @@ -8,43 +8,26 @@ #include <linux/mm.h> #include <linux/kexec.h> -#include <linux/delay.h> #include <linux/string.h> #include <linux/reboot.h> -#include <asm/pda.h> #include <asm/pgtable.h> -#include <asm/pgalloc.h> #include <asm/tlbflush.h> #include <asm/mmu_context.h> #include <asm/io.h> -#include <asm/apic.h> -#include <asm/cpufeature.h> -#include <asm/hw_irq.h> - -#define LEVEL0_SIZE (1UL << 12UL) -#define LEVEL1_SIZE (1UL << 21UL) -#define LEVEL2_SIZE (1UL << 30UL) -#define LEVEL3_SIZE (1UL << 39UL) -#define LEVEL4_SIZE (1UL << 48UL) - -#define L0_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) -#define L1_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_PSE) -#define L2_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) -#define L3_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) - -static void init_level2_page(u64 *level2p, unsigned long addr) + +static void init_level2_page(pmd_t *level2p, unsigned long addr) { unsigned long end_addr; addr &= PAGE_MASK; - end_addr = addr + LEVEL2_SIZE; + end_addr = addr + PUD_SIZE; while (addr < end_addr) { - *(level2p++) = addr | L1_ATTR; - addr += LEVEL1_SIZE; + set_pmd(level2p++, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC)); + addr += PMD_SIZE; } } -static int init_level3_page(struct kimage *image, u64 *level3p, +static int init_level3_page(struct kimage *image, pud_t *level3p, unsigned long addr, unsigned long last_addr) { unsigned long end_addr; @@ -52,32 +35,32 @@ static int init_level3_page(struct kimage *image, u64 *level3p, result = 0; addr &= PAGE_MASK; - end_addr = addr + LEVEL3_SIZE; + end_addr = addr + PGDIR_SIZE; while ((addr < last_addr) && (addr < end_addr)) { struct page *page; - u64 *level2p; + pmd_t *level2p; page = kimage_alloc_control_pages(image, 0); if (!page) { result = -ENOMEM; goto out; } - level2p = (u64 *)page_address(page); + level2p = (pmd_t *)page_address(page); init_level2_page(level2p, addr); - *(level3p++) = __pa(level2p) | L2_ATTR; - addr += LEVEL2_SIZE; + set_pud(level3p++, __pud(__pa(level2p) | _KERNPG_TABLE)); + addr += PUD_SIZE; } /* clear the unused entries */ while (addr < end_addr) { - *(level3p++) = 0; - addr += LEVEL2_SIZE; + pud_clear(level3p++); + addr += PUD_SIZE; } out: return result; } -static int init_level4_page(struct kimage *image, u64 *level4p, +static int init_level4_page(struct kimage *image, pgd_t *level4p, unsigned long addr, unsigned long last_addr) { unsigned long end_addr; @@ -85,28 +68,28 @@ static int init_level4_page(struct kimage *image, u64 *level4p, result = 0; addr &= PAGE_MASK; - end_addr = addr + LEVEL4_SIZE; + end_addr = addr + (PTRS_PER_PGD * PGDIR_SIZE); while ((addr < last_addr) && (addr < end_addr)) { struct page *page; - u64 *level3p; + pud_t *level3p; page = kimage_alloc_control_pages(image, 0); if (!page) { result = -ENOMEM; goto out; } - level3p = (u64 *)page_address(page); + level3p = (pud_t *)page_address(page); result = init_level3_page(image, level3p, addr, last_addr); if (result) { goto out; } - *(level4p++) = __pa(level3p) | L3_ATTR; - addr += LEVEL3_SIZE; + set_pgd(level4p++, __pgd(__pa(level3p) | _KERNPG_TABLE)); + addr += PGDIR_SIZE; } /* clear the unused entries */ while (addr < end_addr) { - *(level4p++) = 0; - addr += LEVEL3_SIZE; + pgd_clear(level4p++); + addr += PGDIR_SIZE; } out: return result; @@ -115,52 +98,50 @@ out: static int init_pgtable(struct kimage *image, unsigned long start_pgtable) { - u64 *level4p; - level4p = (u64 *)__va(start_pgtable); + pgd_t *level4p; + level4p = (pgd_t *)__va(start_pgtable); return init_level4_page(image, level4p, 0, end_pfn << PAGE_SHIFT); } static void set_idt(void *newidt, u16 limit) { - unsigned char curidt[10]; + struct desc_ptr curidt; /* x86-64 supports unaliged loads & stores */ - (*(u16 *)(curidt)) = limit; - (*(u64 *)(curidt +2)) = (unsigned long)(newidt); + curidt.size = limit; + curidt.address = (unsigned long)newidt; __asm__ __volatile__ ( - "lidt %0\n" - : "=m" (curidt) + "lidtq %0\n" + : : "m" (curidt) ); }; static void set_gdt(void *newgdt, u16 limit) { - unsigned char curgdt[10]; + struct desc_ptr curgdt; /* x86-64 supports unaligned loads & stores */ - (*(u16 *)(curgdt)) = limit; - (*(u64 *)(curgdt +2)) = (unsigned long)(newgdt); + curgdt.size = limit; + curgdt.address = (unsigned long)newgdt; __asm__ __volatile__ ( - "lgdt %0\n" - : "=m" (curgdt) + "lgdtq %0\n" + : : "m" (curgdt) ); }; static void load_segments(void) { __asm__ __volatile__ ( - "\tmovl $"STR(__KERNEL_DS)",%eax\n" - "\tmovl %eax,%ds\n" - "\tmovl %eax,%es\n" - "\tmovl %eax,%ss\n" - "\tmovl %eax,%fs\n" - "\tmovl %eax,%gs\n" + "\tmovl %0,%%ds\n" + "\tmovl %0,%%es\n" + "\tmovl %0,%%ss\n" + "\tmovl %0,%%fs\n" + "\tmovl %0,%%gs\n" + : : "a" (__KERNEL_DS) ); -#undef STR -#undef __STR } typedef NORET_TYPE void (*relocate_new_kernel_t)(unsigned long indirection_page, @@ -178,7 +159,7 @@ int machine_kexec_prepare(struct kimage *image) /* Calculate the offsets */ start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT; - control_code_buffer = start_pgtable + 4096UL; + control_code_buffer = start_pgtable + PAGE_SIZE; /* Setup the identity mapped 64bit page table */ result = init_pgtable(image, start_pgtable); @@ -214,7 +195,7 @@ NORET_TYPE void machine_kexec(struct kimage *image) /* Calculate the offsets */ page_list = image->head; start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT; - control_code_buffer = start_pgtable + 4096UL; + control_code_buffer = start_pgtable + PAGE_SIZE; /* Set the low half of the page table to my identity mapped * page table for kexec. Leave the high half pointing at the diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c index 21e7062..3b267c9 100644 --- a/arch/x86_64/kernel/mce.c +++ b/arch/x86_64/kernel/mce.c @@ -15,6 +15,8 @@ #include <linux/sysdev.h> #include <linux/miscdevice.h> #include <linux/fs.h> +#include <linux/cpu.h> +#include <linux/percpu.h> #include <asm/processor.h> #include <asm/msr.h> #include <asm/mce.h> @@ -514,10 +516,7 @@ static struct sysdev_class mce_sysclass = { set_kset_name("machinecheck"), }; -static struct sys_device device_mce = { - .id = 0, - .cls = &mce_sysclass, -}; +static DEFINE_PER_CPU(struct sys_device, device_mce); /* Why are there no generic functions for this? */ #define ACCESSOR(name, var, start) \ @@ -542,27 +541,83 @@ ACCESSOR(bank4ctl,bank[4],mce_restart()) ACCESSOR(tolerant,tolerant,) ACCESSOR(check_interval,check_interval,mce_restart()) -static __cpuinit int mce_init_device(void) +/* Per cpu sysdev init. All of the cpus still share the same ctl bank */ +static __cpuinit int mce_create_device(unsigned int cpu) { int err; + if (!mce_available(&cpu_data[cpu])) + return -EIO; + + per_cpu(device_mce,cpu).id = cpu; + per_cpu(device_mce,cpu).cls = &mce_sysclass; + + err = sysdev_register(&per_cpu(device_mce,cpu)); + + if (!err) { + sysdev_create_file(&per_cpu(device_mce,cpu), &attr_bank0ctl); + sysdev_create_file(&per_cpu(device_mce,cpu), &attr_bank1ctl); + sysdev_create_file(&per_cpu(device_mce,cpu), &attr_bank2ctl); + sysdev_create_file(&per_cpu(device_mce,cpu), &attr_bank3ctl); + sysdev_create_file(&per_cpu(device_mce,cpu), &attr_bank4ctl); + sysdev_create_file(&per_cpu(device_mce,cpu), &attr_tolerant); + sysdev_create_file(&per_cpu(device_mce,cpu), &attr_check_interval); + } + return err; +} + +#ifdef CONFIG_HOTPLUG_CPU +static __cpuinit void mce_remove_device(unsigned int cpu) +{ + sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_bank0ctl); + sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_bank1ctl); + sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_bank2ctl); + sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_bank3ctl); + sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_bank4ctl); + sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_tolerant); + sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_check_interval); + sysdev_unregister(&per_cpu(device_mce,cpu)); +} +#endif + +/* Get notified when a cpu comes on/off. Be hotplug friendly. */ +static __cpuinit int +mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) +{ + unsigned int cpu = (unsigned long)hcpu; + + switch (action) { + case CPU_ONLINE: + mce_create_device(cpu); + break; +#ifdef CONFIG_HOTPLUG_CPU + case CPU_DEAD: + mce_remove_device(cpu); + break; +#endif + } + return NOTIFY_OK; +} + +static struct notifier_block mce_cpu_notifier = { + .notifier_call = mce_cpu_callback, +}; + +static __init int mce_init_device(void) +{ + int err; + int i = 0; + if (!mce_available(&boot_cpu_data)) return -EIO; err = sysdev_class_register(&mce_sysclass); - if (!err) - err = sysdev_register(&device_mce); - if (!err) { - /* could create per CPU objects, but it is not worth it. */ - sysdev_create_file(&device_mce, &attr_bank0ctl); - sysdev_create_file(&device_mce, &attr_bank1ctl); - sysdev_create_file(&device_mce, &attr_bank2ctl); - sysdev_create_file(&device_mce, &attr_bank3ctl); - sysdev_create_file(&device_mce, &attr_bank4ctl); - sysdev_create_file(&device_mce, &attr_tolerant); - sysdev_create_file(&device_mce, &attr_check_interval); - } - + + for_each_online_cpu(i) { + mce_create_device(i); + } + + register_cpu_notifier(&mce_cpu_notifier); misc_register(&mce_log_device); return err; - } + device_initcall(mce_init_device); diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c index 9c5aa2a..08abf9f 100644 --- a/arch/x86_64/kernel/mpparse.c +++ b/arch/x86_64/kernel/mpparse.c @@ -109,7 +109,7 @@ static int __init mpf_checksum(unsigned char *mp, int len) static void __init MP_processor_info (struct mpc_config_processor *m) { - int ver; + int ver, cpu; static int found_bsp=0; if (!(m->mpc_cpuflag & CPU_ENABLED)) @@ -131,7 +131,7 @@ static void __init MP_processor_info (struct mpc_config_processor *m) return; } - num_processors++; + cpu = num_processors++; if (m->mpc_apicid > MAX_APICS) { printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n", @@ -155,13 +155,18 @@ static void __init MP_processor_info (struct mpc_config_processor *m) * in same order as logical cpu numbers. Hence the first * entry is BSP, and so on. */ + cpu = 0; + bios_cpu_apicid[0] = m->mpc_apicid; x86_cpu_to_apicid[0] = m->mpc_apicid; found_bsp = 1; - } else { - bios_cpu_apicid[num_processors - found_bsp] = m->mpc_apicid; - x86_cpu_to_apicid[num_processors - found_bsp] = m->mpc_apicid; - } + } else + cpu = num_processors - found_bsp; + bios_cpu_apicid[cpu] = m->mpc_apicid; + x86_cpu_to_apicid[cpu] = m->mpc_apicid; + + cpu_set(cpu, cpu_possible_map); + cpu_set(cpu, cpu_present_map); } static void __init MP_bus_info (struct mpc_config_bus *m) diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 5fd0322..0aa5262 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -765,8 +765,6 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c) int cpu = smp_processor_id(); int node = 0; unsigned bits; - if (c->x86_num_cores == 1) - return; bits = 0; while ((1 << bits) < c->x86_num_cores) @@ -1081,8 +1079,7 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c) else mtrr_ap_init(); #ifdef CONFIG_NUMA - if (c != &boot_cpu_data) - numa_add_cpu(c - cpu_data); + numa_add_cpu(smp_processor_id()); #endif } diff --git a/arch/x86_64/kernel/setup64.c b/arch/x86_64/kernel/setup64.c index f2b9c6b..34082c1 100644 --- a/arch/x86_64/kernel/setup64.c +++ b/arch/x86_64/kernel/setup64.c @@ -12,6 +12,7 @@ #include <linux/string.h> #include <linux/bootmem.h> #include <linux/bitops.h> +#include <linux/module.h> #include <asm/bootsetup.h> #include <asm/pda.h> #include <asm/pgtable.h> @@ -22,10 +23,8 @@ #include <asm/smp.h> #include <asm/i387.h> #include <asm/percpu.h> -#include <asm/mtrr.h> #include <asm/proto.h> -#include <asm/mman.h> -#include <asm/numa.h> +#include <asm/sections.h> char x86_boot_params[BOOT_PARAM_SIZE] __initdata = {0,}; @@ -33,11 +32,6 @@ cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; struct x8664_pda cpu_pda[NR_CPUS] __cacheline_aligned; -extern struct task_struct init_task; - -extern unsigned char __per_cpu_start[], __per_cpu_end[]; - -extern struct desc_ptr cpu_gdt_descr[]; struct desc_ptr idt_descr = { 256 * 16, (unsigned long) idt_table }; char boot_cpu_stack[IRQSTACKSIZE] __attribute__((section(".bss.page_aligned"))); @@ -101,7 +95,7 @@ void __init setup_per_cpu_areas(void) #endif for (i = 0; i < NR_CPUS; i++) { - unsigned char *ptr; + char *ptr; if (!NODE_DATA(cpu_to_node(i))) { printk("cpu with no node %d, num_online_nodes %d\n", @@ -190,11 +184,7 @@ void __cpuinit check_efer(void) */ void __cpuinit cpu_init (void) { -#ifdef CONFIG_SMP int cpu = stack_smp_processor_id(); -#else - int cpu = smp_processor_id(); -#endif struct tss_struct *t = &per_cpu(init_tss, cpu); unsigned long v; char *estacks = NULL; @@ -214,7 +204,7 @@ void __cpuinit cpu_init (void) printk("Initializing CPU#%d\n", cpu); - clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); + clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); /* * Initialize the per-CPU GDT with the boot GDT, diff --git a/arch/x86_64/kernel/smp.c b/arch/x86_64/kernel/smp.c index ccae392..e595822 100644 --- a/arch/x86_64/kernel/smp.c +++ b/arch/x86_64/kernel/smp.c @@ -129,10 +129,9 @@ asmlinkage void smp_invalidate_interrupt (void) } else leave_mm(cpu); } +out: ack_APIC_irq(); cpu_clear(cpu, flush_cpumask); - -out: put_cpu_no_resched(); } @@ -294,6 +293,69 @@ void unlock_ipi_call_lock(void) } /* + * this function sends a 'generic call function' IPI to one other CPU + * in the system. + */ +static void __smp_call_function_single (int cpu, void (*func) (void *info), void *info, + int nonatomic, int wait) +{ + struct call_data_struct data; + int cpus = 1; + + data.func = func; + data.info = info; + atomic_set(&data.started, 0); + data.wait = wait; + if (wait) + atomic_set(&data.finished, 0); + + call_data = &data; + wmb(); + /* Send a message to all other CPUs and wait for them to respond */ + send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_VECTOR); + + /* Wait for response */ + while (atomic_read(&data.started) != cpus) + cpu_relax(); + + if (!wait) + return; + + while (atomic_read(&data.finished) != cpus) + cpu_relax(); +} + +/* + * smp_call_function_single - Run a function on another CPU + * @func: The function to run. This must be fast and non-blocking. + * @info: An arbitrary pointer to pass to the function. + * @nonatomic: Currently unused. + * @wait: If true, wait until function has completed on other CPUs. + * + * Retrurns 0 on success, else a negative status code. + * + * Does not return until the remote CPU is nearly ready to execute <func> + * or is or has executed. + */ + +int smp_call_function_single (int cpu, void (*func) (void *info), void *info, + int nonatomic, int wait) +{ + /* prevent preemption and reschedule on another processor */ + int me = get_cpu(); + if (cpu == me) { + WARN_ON(1); + put_cpu(); + return -EBUSY; + } + spin_lock_bh(&call_lock); + __smp_call_function_single(cpu, func, info, nonatomic, wait); + spin_unlock_bh(&call_lock); + put_cpu(); + return 0; +} + +/* * this function sends a 'generic call function' IPI to all other CPUs * in the system. */ diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c index e773a79..6e4807d 100644 --- a/arch/x86_64/kernel/smpboot.c +++ b/arch/x86_64/kernel/smpboot.c @@ -113,24 +113,6 @@ struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ; #define set_idle_for_cpu(x,p) (idle_thread_array[(x)] = (p)) /* - * cpu_possible_map should be static, it cannot change as cpu's - * are onlined, or offlined. The reason is per-cpu data-structures - * are allocated by some modules at init time, and dont expect to - * do this dynamically on cpu arrival/departure. - * cpu_present_map on the other hand can change dynamically. - * In case when cpu_hotplug is not compiled, then we resort to current - * behaviour, which is cpu_possible == cpu_present. - * If cpu-hotplug is supported, then we need to preallocate for all - * those NR_CPUS, hence cpu_possible_map represents entire NR_CPUS range. - * - Ashok Raj - */ -#ifdef CONFIG_HOTPLUG_CPU -#define fixup_cpu_possible_map(x) cpu_set((x), cpu_possible_map) -#else -#define fixup_cpu_possible_map(x) -#endif - -/* * Currently trivial. Write the real->protected mode * bootstrap into the page concerned. The caller * has made sure it's suitably aligned. @@ -229,9 +211,6 @@ static __cpuinit void sync_master(void *arg) { unsigned long flags, i; - if (smp_processor_id() != 0) - return; - go[MASTER] = 0; local_irq_save(flags); @@ -280,7 +259,7 @@ get_delta(long *rt, long *master) return tcenter - best_tm; } -static __cpuinit void sync_tsc(void) +static __cpuinit void sync_tsc(unsigned int master) { int i, done = 0; long delta, adj, adjust_latency = 0; @@ -294,9 +273,17 @@ static __cpuinit void sync_tsc(void) } t[NUM_ROUNDS] __cpuinitdata; #endif + printk(KERN_INFO "CPU %d: Syncing TSC to CPU %u.\n", + smp_processor_id(), master); + go[MASTER] = 1; - smp_call_function(sync_master, NULL, 1, 0); + /* It is dangerous to broadcast IPI as cpus are coming up, + * as they may not be ready to accept them. So since + * we only need to send the ipi to the boot cpu direct + * the message, and avoid the race. + */ + smp_call_function_single(master, sync_master, NULL, 1, 0); while (go[MASTER]) /* wait for master to be ready */ no_cpu_relax(); @@ -340,16 +327,14 @@ static __cpuinit void sync_tsc(void) printk(KERN_INFO "CPU %d: synchronized TSC with CPU %u (last diff %ld cycles, " "maxerr %lu cycles)\n", - smp_processor_id(), boot_cpu_id, delta, rt); + smp_processor_id(), master, delta, rt); } static void __cpuinit tsc_sync_wait(void) { if (notscsync || !cpu_has_tsc) return; - printk(KERN_INFO "CPU %d: Syncing TSC to CPU %u.\n", smp_processor_id(), - boot_cpu_id); - sync_tsc(); + sync_tsc(boot_cpu_id); } static __init int notscsync_setup(char *s) @@ -773,8 +758,9 @@ do_rest: initial_code = start_secondary; clear_ti_thread_flag(c_idle.idle->thread_info, TIF_FORK); - printk(KERN_INFO "Booting processor %d/%d rip %lx rsp %lx\n", cpu, apicid, - start_rip, init_rsp); + printk(KERN_INFO "Booting processor %d/%d APIC 0x%x\n", cpu, + cpus_weight(cpu_present_map), + apicid); /* * This grunge runs the startup process for @@ -924,6 +910,27 @@ static __init void enforce_max_cpus(unsigned max_cpus) } } +#ifdef CONFIG_HOTPLUG_CPU +/* + * cpu_possible_map should be static, it cannot change as cpu's + * are onlined, or offlined. The reason is per-cpu data-structures + * are allocated by some modules at init time, and dont expect to + * do this dynamically on cpu arrival/departure. + * cpu_present_map on the other hand can change dynamically. + * In case when cpu_hotplug is not compiled, then we resort to current + * behaviour, which is cpu_possible == cpu_present. + * If cpu-hotplug is supported, then we need to preallocate for all + * those NR_CPUS, hence cpu_possible_map represents entire NR_CPUS range. + * - Ashok Raj + */ +static void prefill_possible_map(void) +{ + int i; + for (i = 0; i < NR_CPUS; i++) + cpu_set(i, cpu_possible_map); +} +#endif + /* * Various sanity checks. */ @@ -987,25 +994,15 @@ static int __init smp_sanity_check(unsigned max_cpus) */ void __init smp_prepare_cpus(unsigned int max_cpus) { - int i; - nmi_watchdog_default(); current_cpu_data = boot_cpu_data; current_thread_info()->cpu = 0; /* needed? */ enforce_max_cpus(max_cpus); - /* - * Fill in cpu_present_mask - */ - for (i = 0; i < NR_CPUS; i++) { - int apicid = cpu_present_to_apicid(i); - if (physid_isset(apicid, phys_cpu_present_map)) { - cpu_set(i, cpu_present_map); - cpu_set(i, cpu_possible_map); - } - fixup_cpu_possible_map(i); - } +#ifdef CONFIG_HOTPLUG_CPU + prefill_possible_map(); +#endif if (smp_sanity_check(max_cpus) < 0) { printk(KERN_INFO "SMP disabled\n"); @@ -1189,8 +1186,7 @@ void __cpu_die(unsigned int cpu) printk ("CPU %d is now offline\n", cpu); return; } - current->state = TASK_UNINTERRUPTIBLE; - schedule_timeout(HZ/10); + msleep(100); } printk(KERN_ERR "CPU %u didn't die...\n", cpu); } diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index 1027366..6ead433 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -594,9 +594,6 @@ asmlinkage void default_do_nmi(struct pt_regs *regs) if (!cpu) reason = get_nmi_reason(); - if (!cpu_online(cpu)) - return; - if (!(reason & 0xc0)) { if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 0, SIGINT) == NOTIFY_STOP) diff --git a/arch/x86_64/lib/delay.c b/arch/x86_64/lib/delay.c index 33a873a..841bd73 100644 --- a/arch/x86_64/lib/delay.c +++ b/arch/x86_64/lib/delay.c @@ -18,8 +18,6 @@ #include <asm/smp.h> #endif -int x86_udelay_tsc = 0; /* Delay via TSC */ - int read_current_timer(unsigned long *timer_value) { rdtscll(*timer_value); diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c index 2f18798..1379272 100644 --- a/arch/x86_64/mm/fault.c +++ b/arch/x86_64/mm/fault.c @@ -23,7 +23,6 @@ #include <linux/vt_kern.h> /* For unblank_screen() */ #include <linux/compiler.h> #include <linux/module.h> -#include <linux/kprobes.h> #include <asm/system.h> #include <asm/uaccess.h> diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c index ac61c18..70cb290 100644 --- a/arch/x86_64/mm/numa.c +++ b/arch/x86_64/mm/numa.c @@ -36,34 +36,36 @@ int numa_off __initdata; int __init compute_hash_shift(struct node *nodes, int numnodes) { int i; - int shift = 24; - u64 addr; + int shift = 20; + unsigned long addr,maxend=0; - /* When in doubt use brute force. */ - while (shift < 48) { - memset(memnodemap,0xff,sizeof(*memnodemap) * NODEMAPSIZE); - for (i = 0; i < numnodes; i++) { - if (nodes[i].start == nodes[i].end) - continue; - for (addr = nodes[i].start; - addr < nodes[i].end; - addr += (1UL << shift)) { - if (memnodemap[addr >> shift] != 0xff && - memnodemap[addr >> shift] != i) { - printk(KERN_INFO - "node %d shift %d addr %Lx conflict %d\n", - i, shift, addr, memnodemap[addr>>shift]); - goto next; - } - memnodemap[addr >> shift] = i; + for (i = 0; i < numnodes; i++) + if ((nodes[i].start != nodes[i].end) && (nodes[i].end > maxend)) + maxend = nodes[i].end; + + while ((1UL << shift) < (maxend / NODEMAPSIZE)) + shift++; + + printk (KERN_DEBUG"Using %d for the hash shift. Max adder is %lx \n", + shift,maxend); + memset(memnodemap,0xff,sizeof(*memnodemap) * NODEMAPSIZE); + for (i = 0; i < numnodes; i++) { + if (nodes[i].start == nodes[i].end) + continue; + for (addr = nodes[i].start; + addr < nodes[i].end; + addr += (1UL << shift)) { + if (memnodemap[addr >> shift] != 0xff) { + printk(KERN_INFO + "Your memory is not aligned you need to rebuild your kernel " + "with a bigger NODEMAPSIZE shift=%d adder=%lu\n", + shift,addr); + return -1; } + memnodemap[addr >> shift] = i; } - return shift; - next: - shift++; } - memset(memnodemap,0,sizeof(*memnodemap) * NODEMAPSIZE); - return -1; + return shift; } #ifdef CONFIG_SPARSEMEM diff --git a/arch/x86_64/mm/srat.c b/arch/x86_64/mm/srat.c index 5d01b31..8e3d097 100644 --- a/arch/x86_64/mm/srat.c +++ b/arch/x86_64/mm/srat.c @@ -20,6 +20,9 @@ static struct acpi_table_slit *acpi_slit; +/* Internal processor count */ +static unsigned int __initdata num_processors = 0; + static nodemask_t nodes_parsed __initdata; static nodemask_t nodes_found __initdata; static struct node nodes[MAX_NUMNODES] __initdata; @@ -101,16 +104,18 @@ acpi_numa_processor_affinity_init(struct acpi_table_processor_affinity *pa) bad_srat(); return; } - if (pa->apic_id >= NR_CPUS) { - printk(KERN_ERR "SRAT: lapic %u too large.\n", - pa->apic_id); + if (num_processors >= NR_CPUS) { + printk(KERN_ERR "SRAT: Processor #%d (lapic %u) INVALID. (Max ID: %d).\n", + num_processors, pa->apic_id, NR_CPUS); bad_srat(); return; } - cpu_to_node[pa->apic_id] = node; + cpu_to_node[num_processors] = node; acpi_numa = 1; - printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n", - pxm, pa->apic_id, node); + printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> CPU %u -> Node %u\n", + pxm, pa->apic_id, num_processors, node); + + num_processors++; } /* Callback for parsing of the Proximity Domain <-> Memory Area mappings */ @@ -124,7 +129,6 @@ acpi_numa_memory_affinity_init(struct acpi_table_memory_affinity *ma) if (srat_disabled() || ma->flags.enabled == 0) return; - /* hotplug bit is ignored for now */ pxm = ma->proximity_domain; node = setup_node(pxm); if (node < 0) { @@ -134,6 +138,10 @@ acpi_numa_memory_affinity_init(struct acpi_table_memory_affinity *ma) } start = ma->base_addr_lo | ((u64)ma->base_addr_hi << 32); end = start + (ma->length_lo | ((u64)ma->length_hi << 32)); + /* It is fine to add this area to the nodes data it will be used later*/ + if (ma->flags.hot_pluggable == 1) + printk(KERN_INFO "SRAT: hot plug zone found %lx - %lx \n", + start, end); i = conflicting_nodes(start, end); if (i >= 0) { printk(KERN_ERR diff --git a/arch/x86_64/pci/k8-bus.c b/arch/x86_64/pci/k8-bus.c index 7e7d0c2..c2c38b5 100644 --- a/arch/x86_64/pci/k8-bus.c +++ b/arch/x86_64/pci/k8-bus.c @@ -29,7 +29,7 @@ __init static int fill_mp_bus_to_cpumask(void) { struct pci_dev *nb_dev = NULL; - int i, j, printed; + int i, j; u32 ldtbus, nid; static int lbnr[3] = { LDT_BUS_NUMBER_REGISTER_0, |