From 7e36e2f5355ab87f8946041d044b34cda01e2077 Mon Sep 17 00:00:00 2001 From: Rod Whitby Date: Tue, 1 Apr 2008 10:53:23 +0100 Subject: [ARM] 4874/2: ixp4xx: Add support for the Freecom FSG-3 board The Freecom-FSG3 is a small network-attached-storage device with the following feature set: * Intel IXP422 * 4MB Flash (ixp4xx flash driver) * 64MB RAM * 4 USB 2.0 host ports (ehci and ohci drivers) * 1 WAN (eth1) and 3 LAN (eth0) ethernet ports * Supported by the open source ixp4xx ethernet driver * Via VT6421 disk controller (libata and sata-via drivers) * Internal hard disk (PATA supported, SATA not yet supported) * External SATA port (not yet supported) * ISL1208 RTC chip * Winbond 83782 temp sensor and fan controller * MiniPCI slot The ixp4xx_defconfig is also updated to support this device (the leds-fsg driver is to be submitted separately via the leds tree after this initial support is merged, as it depends on header gpio defines). Signed-off-by: Rod Whitby Signed-off-by: Russell King diff --git a/arch/arm/configs/ixp4xx_defconfig b/arch/arm/configs/ixp4xx_defconfig index efa0485..fc14932 100644 --- a/arch/arm/configs/ixp4xx_defconfig +++ b/arch/arm/configs/ixp4xx_defconfig @@ -165,6 +165,7 @@ CONFIG_ARCH_PRPMC1100=y CONFIG_MACH_NAS100D=y CONFIG_MACH_DSMG600=y CONFIG_ARCH_IXDP4XX=y +CONFIG_MACH_FSG=y CONFIG_CPU_IXP46X=y CONFIG_CPU_IXP43X=y CONFIG_MACH_GTWX5715=y @@ -770,7 +771,7 @@ CONFIG_ATA=y # CONFIG_SATA_SIL24 is not set # CONFIG_SATA_SIS is not set # CONFIG_SATA_ULI is not set -# CONFIG_SATA_VIA is not set +CONFIG_SATA_VIA=y # CONFIG_SATA_VITESSE is not set # CONFIG_SATA_INIC162X is not set # CONFIG_PATA_ALI is not set @@ -1143,7 +1144,7 @@ CONFIG_HWMON=y # CONFIG_SENSORS_VIA686A is not set # CONFIG_SENSORS_VT1211 is not set # CONFIG_SENSORS_VT8231 is not set -# CONFIG_SENSORS_W83781D is not set +CONFIG_SENSORS_W83781D=y # CONFIG_SENSORS_W83791D is not set # CONFIG_SENSORS_W83792D is not set # CONFIG_SENSORS_W83793 is not set @@ -1334,8 +1335,8 @@ CONFIG_LEDS_CLASS=y # # LED drivers # -# CONFIG_LEDS_IXP4XX is not set CONFIG_LEDS_GPIO=y +CONFIG_LEDS_FSG=y # # LED Triggers @@ -1367,7 +1368,7 @@ CONFIG_RTC_INTF_DEV=y # CONFIG_RTC_DRV_DS1672 is not set # CONFIG_RTC_DRV_MAX6900 is not set # CONFIG_RTC_DRV_RS5C372 is not set -# CONFIG_RTC_DRV_ISL1208 is not set +CONFIG_RTC_DRV_ISL1208=y CONFIG_RTC_DRV_X1205=y CONFIG_RTC_DRV_PCF8563=y # CONFIG_RTC_DRV_PCF8583 is not set diff --git a/arch/arm/mach-ixp4xx/Kconfig b/arch/arm/mach-ixp4xx/Kconfig index e774447..db8b5fe 100644 --- a/arch/arm/mach-ixp4xx/Kconfig +++ b/arch/arm/mach-ixp4xx/Kconfig @@ -125,6 +125,15 @@ config ARCH_IXDP4XX depends on ARCH_IXDP425 || MACH_IXDP465 || MACH_KIXRP435 default y +config MACH_FSG + bool + prompt "Freecom FSG-3" + select PCI + help + Say 'Y' here if you want your kernel to support Freecom's + FSG-3 device. For more information on this platform, + see http://www.nslu2-linux.org/wiki/FSG3/HomePage + # # Certain registers and IRQs are only enabled if supporting IXP465 CPUs # diff --git a/arch/arm/mach-ixp4xx/Makefile b/arch/arm/mach-ixp4xx/Makefile index c195688..2e6bbf9 100644 --- a/arch/arm/mach-ixp4xx/Makefile +++ b/arch/arm/mach-ixp4xx/Makefile @@ -15,6 +15,7 @@ obj-pci-$(CONFIG_MACH_NAS100D) += nas100d-pci.o obj-pci-$(CONFIG_MACH_DSMG600) += dsmg600-pci.o obj-pci-$(CONFIG_MACH_GATEWAY7001) += gateway7001-pci.o obj-pci-$(CONFIG_MACH_WG302V2) += wg302v2-pci.o +obj-pci-$(CONFIG_MACH_FSG) += fsg-pci.o obj-y += common.o @@ -28,6 +29,7 @@ obj-$(CONFIG_MACH_NAS100D) += nas100d-setup.o obj-$(CONFIG_MACH_DSMG600) += dsmg600-setup.o obj-$(CONFIG_MACH_GATEWAY7001) += gateway7001-setup.o obj-$(CONFIG_MACH_WG302V2) += wg302v2-setup.o +obj-$(CONFIG_MACH_FSG) += fsg-setup.o obj-$(CONFIG_PCI) += $(obj-pci-$(CONFIG_PCI)) common-pci.o obj-$(CONFIG_IXP4XX_QMGR) += ixp4xx_qmgr.o diff --git a/arch/arm/mach-ixp4xx/fsg-pci.c b/arch/arm/mach-ixp4xx/fsg-pci.c new file mode 100644 index 0000000..f19f3f6 --- /dev/null +++ b/arch/arm/mach-ixp4xx/fsg-pci.c @@ -0,0 +1,71 @@ +/* + * arch/arch/mach-ixp4xx/fsg-pci.c + * + * FSG board-level PCI initialization + * + * Author: Rod Whitby + * Maintainer: http://www.nslu2-linux.org/ + * + * based on ixdp425-pci.c: + * Copyright (C) 2002 Intel Corporation. + * Copyright (C) 2003-2004 MontaVista Software, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include +#include +#include + +#include +#include + +void __init fsg_pci_preinit(void) +{ + set_irq_type(IRQ_FSG_PCI_INTA, IRQT_LOW); + set_irq_type(IRQ_FSG_PCI_INTB, IRQT_LOW); + set_irq_type(IRQ_FSG_PCI_INTC, IRQT_LOW); + + ixp4xx_pci_preinit(); +} + +static int __init fsg_map_irq(struct pci_dev *dev, u8 slot, u8 pin) +{ + static int pci_irq_table[FSG_PCI_IRQ_LINES] = { + IRQ_FSG_PCI_INTC, + IRQ_FSG_PCI_INTB, + IRQ_FSG_PCI_INTA, + }; + + int irq = -1; + slot = slot - 11; + + if (slot >= 1 && slot <= FSG_PCI_MAX_DEV && + pin >= 1 && pin <= FSG_PCI_IRQ_LINES) + irq = pci_irq_table[(slot - 1)]; + printk(KERN_INFO "%s: Mapped slot %d pin %d to IRQ %d\n", + __func__, slot, pin, irq); + + return irq; +} + +struct hw_pci fsg_pci __initdata = { + .nr_controllers = 1, + .preinit = fsg_pci_preinit, + .swizzle = pci_std_swizzle, + .setup = ixp4xx_setup, + .scan = ixp4xx_scan_bus, + .map_irq = fsg_map_irq, +}; + +int __init fsg_pci_init(void) +{ + if (machine_is_fsg()) + pci_common_init(&fsg_pci); + return 0; +} + +subsys_initcall(fsg_pci_init); diff --git a/arch/arm/mach-ixp4xx/fsg-setup.c b/arch/arm/mach-ixp4xx/fsg-setup.c new file mode 100644 index 0000000..0db3a90 --- /dev/null +++ b/arch/arm/mach-ixp4xx/fsg-setup.c @@ -0,0 +1,276 @@ +/* + * arch/arm/mach-ixp4xx/fsg-setup.c + * + * FSG board-setup + * + * Copyright (C) 2008 Rod Whitby + * + * based on ixdp425-setup.c: + * Copyright (C) 2003-2004 MontaVista Software, Inc. + * based on nslu2-power.c + * Copyright (C) 2005 Tower Technologies + * + * Author: Rod Whitby + * Maintainers: http://www.nslu2-linux.org/ + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +static struct flash_platform_data fsg_flash_data = { + .map_name = "cfi_probe", + .width = 2, +}; + +static struct resource fsg_flash_resource = { + .flags = IORESOURCE_MEM, +}; + +static struct platform_device fsg_flash = { + .name = "IXP4XX-Flash", + .id = 0, + .dev = { + .platform_data = &fsg_flash_data, + }, + .num_resources = 1, + .resource = &fsg_flash_resource, +}; + +static struct i2c_gpio_platform_data fsg_i2c_gpio_data = { + .sda_pin = FSG_SDA_PIN, + .scl_pin = FSG_SCL_PIN, +}; + +static struct platform_device fsg_i2c_gpio = { + .name = "i2c-gpio", + .id = 0, + .dev = { + .platform_data = &fsg_i2c_gpio_data, + }, +}; + +static struct i2c_board_info __initdata fsg_i2c_board_info [] = { + { + I2C_BOARD_INFO("rtc-isl1208", 0x6f), + }, +}; + +static struct resource fsg_uart_resources[] = { + { + .start = IXP4XX_UART1_BASE_PHYS, + .end = IXP4XX_UART1_BASE_PHYS + 0x0fff, + .flags = IORESOURCE_MEM, + }, + { + .start = IXP4XX_UART2_BASE_PHYS, + .end = IXP4XX_UART2_BASE_PHYS + 0x0fff, + .flags = IORESOURCE_MEM, + } +}; + +static struct plat_serial8250_port fsg_uart_data[] = { + { + .mapbase = IXP4XX_UART1_BASE_PHYS, + .membase = (char *)IXP4XX_UART1_BASE_VIRT + REG_OFFSET, + .irq = IRQ_IXP4XX_UART1, + .flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST, + .iotype = UPIO_MEM, + .regshift = 2, + .uartclk = IXP4XX_UART_XTAL, + }, + { + .mapbase = IXP4XX_UART2_BASE_PHYS, + .membase = (char *)IXP4XX_UART2_BASE_VIRT + REG_OFFSET, + .irq = IRQ_IXP4XX_UART2, + .flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST, + .iotype = UPIO_MEM, + .regshift = 2, + .uartclk = IXP4XX_UART_XTAL, + }, + { } +}; + +static struct platform_device fsg_uart = { + .name = "serial8250", + .id = PLAT8250_DEV_PLATFORM, + .dev = { + .platform_data = fsg_uart_data, + }, + .num_resources = ARRAY_SIZE(fsg_uart_resources), + .resource = fsg_uart_resources, +}; + +static struct platform_device fsg_leds = { + .name = "fsg-led", + .id = -1, +}; + +/* Built-in 10/100 Ethernet MAC interfaces */ +static struct eth_plat_info fsg_plat_eth[] = { + { + .phy = 5, + .rxq = 3, + .txreadyq = 20, + }, { + .phy = 4, + .rxq = 4, + .txreadyq = 21, + } +}; + +static struct platform_device fsg_eth[] = { + { + .name = "ixp4xx_eth", + .id = IXP4XX_ETH_NPEB, + .dev = { + .platform_data = fsg_plat_eth, + }, + }, { + .name = "ixp4xx_eth", + .id = IXP4XX_ETH_NPEC, + .dev = { + .platform_data = fsg_plat_eth + 1, + }, + } +}; + +static struct platform_device *fsg_devices[] __initdata = { + &fsg_i2c_gpio, + &fsg_flash, + &fsg_leds, + &fsg_eth[0], + &fsg_eth[1], +}; + +static irqreturn_t fsg_power_handler(int irq, void *dev_id) +{ + /* Signal init to do the ctrlaltdel action, this will bypass init if + * it hasn't started and do a kernel_restart. + */ + ctrl_alt_del(); + + return IRQ_HANDLED; +} + +static irqreturn_t fsg_reset_handler(int irq, void *dev_id) +{ + /* This is the paper-clip reset which does an emergency reboot. */ + printk(KERN_INFO "Restarting system.\n"); + machine_restart(NULL); + + /* This should never be reached. */ + return IRQ_HANDLED; +} + +static void __init fsg_init(void) +{ + DECLARE_MAC_BUF(mac_buf); + uint8_t __iomem *f; + int i; + + ixp4xx_sys_init(); + + fsg_flash_resource.start = IXP4XX_EXP_BUS_BASE(0); + fsg_flash_resource.end = + IXP4XX_EXP_BUS_BASE(0) + ixp4xx_exp_bus_size - 1; + + *IXP4XX_EXP_CS0 |= IXP4XX_FLASH_WRITABLE; + *IXP4XX_EXP_CS1 = *IXP4XX_EXP_CS0; + + /* Configure CS2 for operation, 8bit and writable */ + *IXP4XX_EXP_CS2 = 0xbfff0002; + + i2c_register_board_info(0, fsg_i2c_board_info, + ARRAY_SIZE(fsg_i2c_board_info)); + + /* This is only useful on a modified machine, but it is valuable + * to have it first in order to see debug messages, and so that + * it does *not* get removed if platform_add_devices fails! + */ + (void)platform_device_register(&fsg_uart); + + platform_add_devices(fsg_devices, ARRAY_SIZE(fsg_devices)); + + if (request_irq(gpio_to_irq(FSG_RB_GPIO), &fsg_reset_handler, + IRQF_DISABLED | IRQF_TRIGGER_LOW, + "FSG reset button", NULL) < 0) { + + printk(KERN_DEBUG "Reset Button IRQ %d not available\n", + gpio_to_irq(FSG_RB_GPIO)); + } + + if (request_irq(gpio_to_irq(FSG_SB_GPIO), &fsg_power_handler, + IRQF_DISABLED | IRQF_TRIGGER_LOW, + "FSG power button", NULL) < 0) { + + printk(KERN_DEBUG "Power Button IRQ %d not available\n", + gpio_to_irq(FSG_SB_GPIO)); + } + + /* + * Map in a portion of the flash and read the MAC addresses. + * Since it is stored in BE in the flash itself, we need to + * byteswap it if we're in LE mode. + */ + f = ioremap(IXP4XX_EXP_BUS_BASE(0), 0x400000); + if (f) { +#ifdef __ARMEB__ + for (i = 0; i < 6; i++) { + fsg_plat_eth[0].hwaddr[i] = readb(f + 0x3C0422 + i); + fsg_plat_eth[1].hwaddr[i] = readb(f + 0x3C043B + i); + } +#else + + /* + Endian-swapped reads from unaligned addresses are + required to extract the two MACs from the big-endian + Redboot config area in flash. + */ + + fsg_plat_eth[0].hwaddr[0] = readb(f + 0x3C0421); + fsg_plat_eth[0].hwaddr[1] = readb(f + 0x3C0420); + fsg_plat_eth[0].hwaddr[2] = readb(f + 0x3C0427); + fsg_plat_eth[0].hwaddr[3] = readb(f + 0x3C0426); + fsg_plat_eth[0].hwaddr[4] = readb(f + 0x3C0425); + fsg_plat_eth[0].hwaddr[5] = readb(f + 0x3C0424); + + fsg_plat_eth[1].hwaddr[0] = readb(f + 0x3C0439); + fsg_plat_eth[1].hwaddr[1] = readb(f + 0x3C043F); + fsg_plat_eth[1].hwaddr[2] = readb(f + 0x3C043E); + fsg_plat_eth[1].hwaddr[3] = readb(f + 0x3C043D); + fsg_plat_eth[1].hwaddr[4] = readb(f + 0x3C043C); + fsg_plat_eth[1].hwaddr[5] = readb(f + 0x3C0443); +#endif + iounmap(f); + } + printk(KERN_INFO "FSG: Using MAC address %s for port 0\n", + print_mac(mac_buf, fsg_plat_eth[0].hwaddr)); + printk(KERN_INFO "FSG: Using MAC address %s for port 1\n", + print_mac(mac_buf, fsg_plat_eth[1].hwaddr)); + +} + +MACHINE_START(FSG, "Freecom FSG-3") + /* Maintainer: www.nslu2-linux.org */ + .phys_io = IXP4XX_PERIPHERAL_BASE_PHYS, + .io_pg_offst = ((IXP4XX_PERIPHERAL_BASE_VIRT) >> 18) & 0xfffc, + .map_io = ixp4xx_map_io, + .init_irq = ixp4xx_init_irq, + .timer = &ixp4xx_timer, + .boot_params = 0x0100, + .init_machine = fsg_init, +MACHINE_END + diff --git a/include/asm-arm/arch-ixp4xx/fsg.h b/include/asm-arm/arch-ixp4xx/fsg.h new file mode 100644 index 0000000..c0100cc --- /dev/null +++ b/include/asm-arm/arch-ixp4xx/fsg.h @@ -0,0 +1,50 @@ +/* + * include/asm-arm/arch-ixp4xx/fsg.h + * + * Freecom FSG-3 platform specific definitions + * + * Author: Rod Whitby + * Author: Tomasz Chmielewski + * Maintainers: http://www.nslu2-linux.org + * + * Based on coyote.h by + * Copyright 2004 (c) MontaVista, Software, Inc. + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + */ + +#ifndef __ASM_ARCH_HARDWARE_H__ +#error "Do not include this directly, instead #include " +#endif + +#define FSG_SDA_PIN 12 +#define FSG_SCL_PIN 13 + +/* + * FSG PCI IRQs + */ +#define FSG_PCI_MAX_DEV 3 +#define FSG_PCI_IRQ_LINES 3 + + +/* PCI controller GPIO to IRQ pin mappings */ +#define FSG_PCI_INTA_PIN 6 +#define FSG_PCI_INTB_PIN 7 +#define FSG_PCI_INTC_PIN 5 + +/* Buttons */ + +#define FSG_SB_GPIO 4 /* sync button */ +#define FSG_RB_GPIO 9 /* reset button */ +#define FSG_UB_GPIO 10 /* usb button */ + +/* LEDs */ + +#define FSG_LED_WLAN_BIT 0 +#define FSG_LED_WAN_BIT 1 +#define FSG_LED_SATA_BIT 2 +#define FSG_LED_USB_BIT 4 +#define FSG_LED_RING_BIT 5 +#define FSG_LED_SYNC_BIT 7 diff --git a/include/asm-arm/arch-ixp4xx/hardware.h b/include/asm-arm/arch-ixp4xx/hardware.h index 73e8dc3..fa723a6 100644 --- a/include/asm-arm/arch-ixp4xx/hardware.h +++ b/include/asm-arm/arch-ixp4xx/hardware.h @@ -45,5 +45,6 @@ #include "nslu2.h" #include "nas100d.h" #include "dsmg600.h" +#include "fsg.h" #endif /* _ASM_ARCH_HARDWARE_H */ diff --git a/include/asm-arm/arch-ixp4xx/irqs.h b/include/asm-arm/arch-ixp4xx/irqs.h index 1180160..674af4a 100644 --- a/include/asm-arm/arch-ixp4xx/irqs.h +++ b/include/asm-arm/arch-ixp4xx/irqs.h @@ -128,4 +128,11 @@ #define IRQ_DSMG600_PCI_INTE IRQ_IXP4XX_GPIO7 #define IRQ_DSMG600_PCI_INTF IRQ_IXP4XX_GPIO6 +/* + * Freecom FSG-3 Board IRQs + */ +#define IRQ_FSG_PCI_INTA IRQ_IXP4XX_GPIO6 +#define IRQ_FSG_PCI_INTB IRQ_IXP4XX_GPIO7 +#define IRQ_FSG_PCI_INTC IRQ_IXP4XX_GPIO5 + #endif -- cgit v0.10.2 From e055d5bff318845f99c0fbf93245767fab8dce88 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 22 Apr 2008 01:43:27 +0100 Subject: [ARM] 5015/1: arm: remove ARCH_CO285 Trying to compile a kerel for ARCH_CO285 fails with the following error: <-- snip --> ... CC arch/arm/mach-footbridge/dc21285.o /home/bunk/linux/kernel-2.6/git/linux-2.6/arch/arm/mach-footbridge/dc21285.c: In function 'dc21285_base_address': /home/bunk/linux/kernel-2.6/git/linux-2.6/arch/arm/mach-footbridge/dc21285.c:54: error: 'PCICFG0_BASE' undeclared (first use in this function) /home/bunk/linux/kernel-2.6/git/linux-2.6/arch/arm/mach-footbridge/dc21285.c:54: error: (Each undeclared identifier is reported only once /home/bunk/linux/kernel-2.6/git/linux-2.6/arch/arm/mach-footbridge/dc21285.c:54: error: for each function it appears in.) /home/bunk/linux/kernel-2.6/git/linux-2.6/arch/arm/mach-footbridge/dc21285.c:57: error: 'PCICFG1_BASE' undeclared (first use in this function) /home/bunk/linux/kernel-2.6/git/linux-2.6/arch/arm/mach-footbridge/dc21285.c: In function 'dc21285_scan_bus': /home/bunk/linux/kernel-2.6/git/linux-2.6/arch/arm/mach-footbridge/dc21285.c:286: error: implicit declaration of function 'pci_scan_bus' ... make[2]: *** [arch/arm/mach-footbridge/dc21285.o] Error 1 <-- snip --> This does not seem to be a recent breakage. The ARCH_CO285 support is old - kernel 2.2.0 contains first traces of it, an it seems to have been pretty complete in later 2.2 kernels. Since it seems to be completely dead code now this patch therefore removes it. Signed-off-by: Adrian Bunk Signed-off-by: Russell King diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 4039a13..ab78d5c 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -232,14 +232,6 @@ config ARCH_CLPS711X help Support for Cirrus Logic 711x/721x based boards. -config ARCH_CO285 - bool "Co-EBSA285" - select FOOTBRIDGE - select FOOTBRIDGE_ADDIN - select HAVE_IDE - help - Support for Intel's EBSA285 companion chip. - config ARCH_EBSA110 bool "EBSA-110" select ISA @@ -784,7 +776,7 @@ source "mm/Kconfig" config LEDS bool "Timer and CPU usage LEDs" - depends on ARCH_CDB89712 || ARCH_CO285 || ARCH_EBSA110 || \ + depends on ARCH_CDB89712 || ARCH_EBSA110 || \ ARCH_EBSA285 || ARCH_IMX || ARCH_INTEGRATOR || \ ARCH_LUBBOCK || MACH_MAINSTONE || ARCH_NETWINDER || \ ARCH_OMAP || ARCH_P720T || ARCH_PXA_IDP || \ diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 1a46496..79b8ca3 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -100,8 +100,6 @@ textofs-y := 0x00008000 incdir-$(CONFIG_ARCH_CLPS7500) := cl7500 machine-$(CONFIG_FOOTBRIDGE) := footbridge incdir-$(CONFIG_FOOTBRIDGE) := ebsa285 - machine-$(CONFIG_ARCH_CO285) := footbridge - incdir-$(CONFIG_ARCH_CO285) := ebsa285 machine-$(CONFIG_ARCH_SHARK) := shark machine-$(CONFIG_ARCH_SA1100) := sa1100 ifeq ($(CONFIG_ARCH_SA1100),y) diff --git a/arch/arm/mach-footbridge/Makefile b/arch/arm/mach-footbridge/Makefile index 0694ad6..32f8609 100644 --- a/arch/arm/mach-footbridge/Makefile +++ b/arch/arm/mach-footbridge/Makefile @@ -14,12 +14,10 @@ pci-$(CONFIG_ARCH_EBSA285_HOST) += ebsa285-pci.o pci-$(CONFIG_ARCH_NETWINDER) += netwinder-pci.o pci-$(CONFIG_ARCH_PERSONAL_SERVER) += personal-pci.o -leds-$(CONFIG_ARCH_CO285) += ebsa285-leds.o leds-$(CONFIG_ARCH_EBSA285) += ebsa285-leds.o leds-$(CONFIG_ARCH_NETWINDER) += netwinder-leds.o obj-$(CONFIG_ARCH_CATS) += cats-hw.o isa-timer.o -obj-$(CONFIG_ARCH_CO285) += co285.o dc21285-timer.o obj-$(CONFIG_ARCH_EBSA285) += ebsa285.o dc21285-timer.o obj-$(CONFIG_ARCH_NETWINDER) += netwinder-hw.o isa-timer.o obj-$(CONFIG_ARCH_PERSONAL_SERVER) += personal.o dc21285-timer.o diff --git a/arch/arm/mach-footbridge/co285.c b/arch/arm/mach-footbridge/co285.c deleted file mode 100644 index 4545576..0000000 --- a/arch/arm/mach-footbridge/co285.c +++ /dev/null @@ -1,39 +0,0 @@ -/* - * linux/arch/arm/mach-footbridge/co285.c - * - * CO285 machine fixup - */ -#include - -#include -#include - -#include - -#include "common.h" - -static void __init -fixup_coebsa285(struct machine_desc *desc, struct tag *tags, - char **cmdline, struct meminfo *mi) -{ - extern unsigned long boot_memory_end; - extern char boot_command_line[]; - - mi->nr_banks = 1; - mi->bank[0].start = PHYS_OFFSET; - mi->bank[0].size = boot_memory_end; - mi->bank[0].node = 0; - - *cmdline = boot_command_line; -} - -MACHINE_START(CO285, "co-EBSA285") - /* Maintainer: Mark van Doesburg */ - .phys_io = DC21285_ARMCSR_BASE, - .io_pg_offst = ((0x7cf00000) >> 18) & 0xfffc, - .fixup = fixup_coebsa285, - .map_io = footbridge_map_io, - .init_irq = footbridge_init_irq, - .timer = &footbridge_timer, -MACHINE_END - diff --git a/arch/arm/mach-footbridge/common.c b/arch/arm/mach-footbridge/common.c index ef29fc3..b08ab50 100644 --- a/arch/arm/mach-footbridge/common.c +++ b/arch/arm/mach-footbridge/common.c @@ -177,25 +177,6 @@ static struct map_desc ebsa285_host_io_desc[] __initdata = { #endif }; -/* - * The CO-ebsa285 mapping. - */ -static struct map_desc co285_io_desc[] __initdata = { -#ifdef CONFIG_ARCH_CO285 - { - .virtual = PCIO_BASE, - .pfn = __phys_to_pfn(DC21285_PCI_IO), - .length = PCIO_SIZE, - .type = MT_DEVICE, - }, { - .virtual = PCIMEM_BASE, - .pfn = __phys_to_pfn(DC21285_PCI_MEM), - .length = PCIMEM_SIZE, - .type = MT_DEVICE, - }, -#endif -}; - void __init footbridge_map_io(void) { /* @@ -208,8 +189,6 @@ void __init footbridge_map_io(void) * Now, work out what we've got to map in addition on this * platform. */ - if (machine_is_co285()) - iotable_init(co285_io_desc, ARRAY_SIZE(co285_io_desc)); if (footbridge_cfn_mode()) iotable_init(ebsa285_host_io_desc, ARRAY_SIZE(ebsa285_host_io_desc)); } diff --git a/arch/arm/mach-footbridge/ebsa285-leds.c b/arch/arm/mach-footbridge/ebsa285-leds.c index a64e222..09c1fbc 100644 --- a/arch/arm/mach-footbridge/ebsa285-leds.c +++ b/arch/arm/mach-footbridge/ebsa285-leds.c @@ -128,7 +128,7 @@ static void ebsa285_leds_event(led_event_t evt) static int __init leds_init(void) { - if (machine_is_ebsa285() || machine_is_co285()) + if (machine_is_ebsa285()) leds_event = ebsa285_leds_event; leds_event(led_start); diff --git a/arch/arm/mach-footbridge/time.c b/arch/arm/mach-footbridge/time.c index 5d02e95..d5cfcda 100644 --- a/arch/arm/mach-footbridge/time.c +++ b/arch/arm/mach-footbridge/time.c @@ -115,8 +115,7 @@ static int set_isa_cmos_time(void) void __init isa_rtc_init(void) { - if (machine_is_co285() || - machine_is_personal_server()) + if (machine_is_personal_server()) /* * Add-in 21285s shouldn't access the RTC */ diff --git a/include/asm-arm/arch-ebsa285/hardware.h b/include/asm-arm/arch-ebsa285/hardware.h index daad8ee..74610c2 100644 --- a/include/asm-arm/arch-ebsa285/hardware.h +++ b/include/asm-arm/arch-ebsa285/hardware.h @@ -14,7 +14,6 @@ #include -#ifdef CONFIG_ARCH_FOOTBRIDGE /* Virtual Physical Size * 0xff800000 0x40000000 1MB X-Bus * 0xff000000 0x7c000000 1MB PCI I/O space @@ -50,31 +49,6 @@ #define PCIMEM_SIZE 0x01000000 #define PCIMEM_BASE 0xf0000000 -#elif defined(CONFIG_ARCH_CO285) -/* - * This is the COEBSA285 cut-down mapping - */ -#define PCIMEM_SIZE 0x80000000 -#define PCIMEM_BASE 0x80000000 - -#define WFLUSH_SIZE 0x01000000 -#define WFLUSH_BASE 0x7d000000 - -#define ARMCSR_SIZE 0x00100000 -#define ARMCSR_BASE 0x7cf00000 - -#define XBUS_SIZE 0x00020000 -#define XBUS_BASE 0x7cee0000 - -#define PCIO_SIZE 0x00010000 -#define PCIO_BASE 0x7ced0000 - -#else - -#error "Undefined footbridge architecture" - -#endif - #define XBUS_LEDS ((volatile unsigned char *)(XBUS_BASE + 0x12000)) #define XBUS_LED_AMBER (1 << 0) #define XBUS_LED_GREEN (1 << 1) diff --git a/include/asm-arm/arch-ebsa285/memory.h b/include/asm-arm/arch-ebsa285/memory.h index cbd7ae6..9019a3b 100644 --- a/include/asm-arm/arch-ebsa285/memory.h +++ b/include/asm-arm/arch-ebsa285/memory.h @@ -42,8 +42,6 @@ extern unsigned long __bus_to_virt(unsigned long); #endif -#if defined(CONFIG_ARCH_FOOTBRIDGE) - /* Task size and page offset at 3GB */ #define TASK_SIZE UL(0xbf000000) #define PAGE_OFFSET UL(0xc0000000) @@ -53,23 +51,6 @@ extern unsigned long __bus_to_virt(unsigned long); */ #define FLUSH_BASE 0xf9000000 -#elif defined(CONFIG_ARCH_CO285) - -/* Task size and page offset at 1.5GB */ -#define TASK_SIZE UL(0x5f000000) -#define PAGE_OFFSET UL(0x60000000) - -/* - * Cache flushing area. - */ -#define FLUSH_BASE 0x7e000000 - -#else - -#error "Undefined footbridge architecture" - -#endif - /* * Physical DRAM offset. */ diff --git a/include/asm-arm/arch-ebsa285/vmalloc.h b/include/asm-arm/arch-ebsa285/vmalloc.h index 0259820..e487d7e 100644 --- a/include/asm-arm/arch-ebsa285/vmalloc.h +++ b/include/asm-arm/arch-ebsa285/vmalloc.h @@ -7,8 +7,4 @@ */ -#ifdef CONFIG_ARCH_FOOTBRIDGE #define VMALLOC_END (PAGE_OFFSET + 0x30000000) -#else -#define VMALLOC_END (PAGE_OFFSET + 0x20000000) -#endif -- cgit v0.10.2 From 205bee6ad804d7034773b5978c74dde495df2301 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 20 Apr 2008 13:57:26 +0100 Subject: [ARM] dyntick: Remove obsolete and unused ARM dyntick support dyntick is superseded by the clocksource/clockevent infrastructure, using the NO_HZ configuration option. No one implements dyntick on ARM anymore, so it's pointless keeping it around. Remove dyntick support. Signed-off-by: Russell King diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index b786e68..167824e 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -703,27 +703,6 @@ config PREEMPT Say Y here if you are building a kernel for a desktop, embedded or real-time system. Say N if you are unsure. -config NO_IDLE_HZ - bool "Dynamic tick timer" - depends on !GENERIC_CLOCKEVENTS - help - Select this option if you want to disable continuous timer ticks - and have them programmed to occur as required. This option saves - power as the system can remain in idle state for longer. - - By default dynamic tick is disabled during the boot, and can be - manually enabled with: - - echo 1 > /sys/devices/system/timer/timer0/dyn_tick - - Alternatively, if you want dynamic tick automatically enabled - during boot, pass "dyntick=enable" via the kernel command string. - - Please note that dynamic tick may affect the accuracy of - timekeeping on some platforms depending on the implementation. - Currently at least OMAP, PXA2xx and SA11x0 platforms are known - to have accurate timekeeping with dynamic tick. - config HZ int default 128 if ARCH_L7200 diff --git a/arch/arm/configs/at91cap9adk_defconfig b/arch/arm/configs/at91cap9adk_defconfig index e32e736..b8e73e9 100644 --- a/arch/arm/configs/at91cap9adk_defconfig +++ b/arch/arm/configs/at91cap9adk_defconfig @@ -213,7 +213,6 @@ CONFIG_CPU_CP15_MMU=y # # CONFIG_TICK_ONESHOT is not set # CONFIG_PREEMPT is not set -# CONFIG_NO_IDLE_HZ is not set CONFIG_HZ=100 CONFIG_AEABI=y CONFIG_OABI_COMPAT=y diff --git a/arch/arm/configs/at91rm9200dk_defconfig b/arch/arm/configs/at91rm9200dk_defconfig index 2dbbbc3..868fb7b 100644 --- a/arch/arm/configs/at91rm9200dk_defconfig +++ b/arch/arm/configs/at91rm9200dk_defconfig @@ -169,7 +169,6 @@ CONFIG_AT91_CF=y # Kernel Features # # CONFIG_PREEMPT is not set -# CONFIG_NO_IDLE_HZ is not set # CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set CONFIG_SELECT_MEMORY_MODEL=y CONFIG_FLATMEM_MANUAL=y diff --git a/arch/arm/configs/at91rm9200ek_defconfig b/arch/arm/configs/at91rm9200ek_defconfig index 6e994f7..de43fc6 100644 --- a/arch/arm/configs/at91rm9200ek_defconfig +++ b/arch/arm/configs/at91rm9200ek_defconfig @@ -160,7 +160,6 @@ CONFIG_ISA_DMA_API=y # Kernel Features # # CONFIG_PREEMPT is not set -# CONFIG_NO_IDLE_HZ is not set # CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set CONFIG_SELECT_MEMORY_MODEL=y CONFIG_FLATMEM_MANUAL=y diff --git a/arch/arm/configs/at91sam9260ek_defconfig b/arch/arm/configs/at91sam9260ek_defconfig index f659c93..2011adf 100644 --- a/arch/arm/configs/at91sam9260ek_defconfig +++ b/arch/arm/configs/at91sam9260ek_defconfig @@ -220,7 +220,6 @@ CONFIG_CPU_CP15_MMU=y # # CONFIG_TICK_ONESHOT is not set # CONFIG_PREEMPT is not set -# CONFIG_NO_IDLE_HZ is not set CONFIG_HZ=100 # CONFIG_AEABI is not set # CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set diff --git a/arch/arm/configs/at91sam9261ek_defconfig b/arch/arm/configs/at91sam9261ek_defconfig index 3802e85..4049768 100644 --- a/arch/arm/configs/at91sam9261ek_defconfig +++ b/arch/arm/configs/at91sam9261ek_defconfig @@ -213,7 +213,6 @@ CONFIG_CPU_CP15_MMU=y # # CONFIG_TICK_ONESHOT is not set # CONFIG_PREEMPT is not set -# CONFIG_NO_IDLE_HZ is not set CONFIG_HZ=100 # CONFIG_AEABI is not set # CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set diff --git a/arch/arm/configs/at91sam9263ek_defconfig b/arch/arm/configs/at91sam9263ek_defconfig index 32a0d74..fa1c5ae 100644 --- a/arch/arm/configs/at91sam9263ek_defconfig +++ b/arch/arm/configs/at91sam9263ek_defconfig @@ -213,7 +213,6 @@ CONFIG_CPU_CP15_MMU=y # # CONFIG_TICK_ONESHOT is not set # CONFIG_PREEMPT is not set -# CONFIG_NO_IDLE_HZ is not set CONFIG_HZ=100 # CONFIG_AEABI is not set # CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set diff --git a/arch/arm/configs/at91sam9rlek_defconfig b/arch/arm/configs/at91sam9rlek_defconfig index 98e6746..d8ec5f9 100644 --- a/arch/arm/configs/at91sam9rlek_defconfig +++ b/arch/arm/configs/at91sam9rlek_defconfig @@ -211,7 +211,6 @@ CONFIG_CPU_CP15_MMU=y # # CONFIG_TICK_ONESHOT is not set # CONFIG_PREEMPT is not set -# CONFIG_NO_IDLE_HZ is not set CONFIG_HZ=100 # CONFIG_AEABI is not set # CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set diff --git a/arch/arm/configs/ateb9200_defconfig b/arch/arm/configs/ateb9200_defconfig index d846a49..85c80f7 100644 --- a/arch/arm/configs/ateb9200_defconfig +++ b/arch/arm/configs/ateb9200_defconfig @@ -171,7 +171,6 @@ CONFIG_AT91_CF=m # Kernel Features # CONFIG_PREEMPT=y -CONFIG_NO_IDLE_HZ=y CONFIG_HZ=100 # CONFIG_AEABI is not set # CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set diff --git a/arch/arm/configs/cm_x270_defconfig b/arch/arm/configs/cm_x270_defconfig index 5cab083..33b201c 100644 --- a/arch/arm/configs/cm_x270_defconfig +++ b/arch/arm/configs/cm_x270_defconfig @@ -194,7 +194,6 @@ CONFIG_PCI_HOST_ITE8152=y # # CONFIG_TICK_ONESHOT is not set # CONFIG_PREEMPT is not set -# CONFIG_NO_IDLE_HZ is not set CONFIG_HZ=100 # CONFIG_AEABI is not set # CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set diff --git a/arch/arm/configs/collie_defconfig b/arch/arm/configs/collie_defconfig index 4264e27..f7622e6 100644 --- a/arch/arm/configs/collie_defconfig +++ b/arch/arm/configs/collie_defconfig @@ -166,7 +166,6 @@ CONFIG_PCMCIA_SA1100=y # Kernel Features # # CONFIG_PREEMPT is not set -# CONFIG_NO_IDLE_HZ is not set CONFIG_HZ=100 # CONFIG_AEABI is not set CONFIG_ARCH_DISCONTIGMEM_ENABLE=y diff --git a/arch/arm/configs/corgi_defconfig b/arch/arm/configs/corgi_defconfig index e8980a9..9b8748a 100644 --- a/arch/arm/configs/corgi_defconfig +++ b/arch/arm/configs/corgi_defconfig @@ -165,7 +165,6 @@ CONFIG_PCMCIA_PXA2XX=y # Kernel Features # CONFIG_PREEMPT=y -# CONFIG_NO_IDLE_HZ is not set # CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set CONFIG_SELECT_MEMORY_MODEL=y CONFIG_FLATMEM_MANUAL=y diff --git a/arch/arm/configs/ecbat91_defconfig b/arch/arm/configs/ecbat91_defconfig index 90ed214..cfeb817 100644 --- a/arch/arm/configs/ecbat91_defconfig +++ b/arch/arm/configs/ecbat91_defconfig @@ -230,7 +230,6 @@ CONFIG_AT91_CF=y # # CONFIG_TICK_ONESHOT is not set CONFIG_PREEMPT=y -# CONFIG_NO_IDLE_HZ is not set CONFIG_HZ=100 # CONFIG_AEABI is not set # CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set diff --git a/arch/arm/configs/em_x270_defconfig b/arch/arm/configs/em_x270_defconfig index 6bea090..d3114c2 100644 --- a/arch/arm/configs/em_x270_defconfig +++ b/arch/arm/configs/em_x270_defconfig @@ -197,7 +197,6 @@ CONFIG_XSCALE_PMU=y # # CONFIG_TICK_ONESHOT is not set # CONFIG_PREEMPT is not set -# CONFIG_NO_IDLE_HZ is not set CONFIG_HZ=100 CONFIG_AEABI=y CONFIG_OABI_COMPAT=y diff --git a/arch/arm/configs/ep93xx_defconfig b/arch/arm/configs/ep93xx_defconfig index 24a701a..21aa013 100644 --- a/arch/arm/configs/ep93xx_defconfig +++ b/arch/arm/configs/ep93xx_defconfig @@ -184,7 +184,6 @@ CONFIG_ARM_AMBA=y # Kernel Features # # CONFIG_PREEMPT is not set -# CONFIG_NO_IDLE_HZ is not set CONFIG_HZ=100 # CONFIG_AEABI is not set # CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set diff --git a/arch/arm/configs/eseries_pxa_defconfig b/arch/arm/configs/eseries_pxa_defconfig index ed487b9..493ecee 100644 --- a/arch/arm/configs/eseries_pxa_defconfig +++ b/arch/arm/configs/eseries_pxa_defconfig @@ -251,7 +251,6 @@ CONFIG_PCMCIA_PXA2XX=m # Kernel Features # # CONFIG_PREEMPT is not set -# CONFIG_NO_IDLE_HZ is not set CONFIG_HZ=100 CONFIG_AEABI=y CONFIG_OABI_COMPAT=y diff --git a/arch/arm/configs/iop13xx_defconfig b/arch/arm/configs/iop13xx_defconfig index 988b4d1..482e570 100644 --- a/arch/arm/configs/iop13xx_defconfig +++ b/arch/arm/configs/iop13xx_defconfig @@ -197,7 +197,6 @@ CONFIG_PCI_LEGACY=y # # CONFIG_TICK_ONESHOT is not set # CONFIG_PREEMPT is not set -# CONFIG_NO_IDLE_HZ is not set CONFIG_HZ=100 # CONFIG_AEABI is not set # CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set diff --git a/arch/arm/configs/iop32x_defconfig b/arch/arm/configs/iop32x_defconfig index 83f40d4..8612f58 100644 --- a/arch/arm/configs/iop32x_defconfig +++ b/arch/arm/configs/iop32x_defconfig @@ -201,7 +201,6 @@ CONFIG_PCI_LEGACY=y # # CONFIG_TICK_ONESHOT is not set # CONFIG_PREEMPT is not set -# CONFIG_NO_IDLE_HZ is not set CONFIG_HZ=100 # CONFIG_AEABI is not set # CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set diff --git a/arch/arm/configs/iop33x_defconfig b/arch/arm/configs/iop33x_defconfig index 917afb5..8b0098d 100644 --- a/arch/arm/configs/iop33x_defconfig +++ b/arch/arm/configs/iop33x_defconfig @@ -197,7 +197,6 @@ CONFIG_PCI_LEGACY=y # # CONFIG_TICK_ONESHOT is not set # CONFIG_PREEMPT is not set -# CONFIG_NO_IDLE_HZ is not set CONFIG_HZ=100 # CONFIG_AEABI is not set # CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set diff --git a/arch/arm/configs/ixp2000_defconfig b/arch/arm/configs/ixp2000_defconfig index f8f9793..84680db 100644 --- a/arch/arm/configs/ixp2000_defconfig +++ b/arch/arm/configs/ixp2000_defconfig @@ -184,7 +184,6 @@ CONFIG_PCI=y # Kernel Features # # CONFIG_PREEMPT is not set -# CONFIG_NO_IDLE_HZ is not set CONFIG_HZ=100 # CONFIG_AEABI is not set # CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set diff --git a/arch/arm/configs/ixp23xx_defconfig b/arch/arm/configs/ixp23xx_defconfig index 27cf022..4a2f7b2 100644 --- a/arch/arm/configs/ixp23xx_defconfig +++ b/arch/arm/configs/ixp23xx_defconfig @@ -180,7 +180,6 @@ CONFIG_PCI=y # Kernel Features # # CONFIG_PREEMPT is not set -# CONFIG_NO_IDLE_HZ is not set CONFIG_HZ=100 # CONFIG_AEABI is not set # CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set diff --git a/arch/arm/configs/kafa_defconfig b/arch/arm/configs/kafa_defconfig index ae51a40..6dd95a2 100644 --- a/arch/arm/configs/kafa_defconfig +++ b/arch/arm/configs/kafa_defconfig @@ -162,7 +162,6 @@ CONFIG_CPU_TLB_V4WBI=y # Kernel Features # CONFIG_PREEMPT=y -# CONFIG_NO_IDLE_HZ is not set CONFIG_HZ=100 # CONFIG_AEABI is not set # CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set diff --git a/arch/arm/configs/kb9202_defconfig b/arch/arm/configs/kb9202_defconfig index c16537d..8e74c66 100644 --- a/arch/arm/configs/kb9202_defconfig +++ b/arch/arm/configs/kb9202_defconfig @@ -126,7 +126,6 @@ CONFIG_ISA_DMA_API=y # # Kernel Features # -# CONFIG_NO_IDLE_HZ is not set # CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set CONFIG_FLATMEM=y CONFIG_FLAT_NODE_MEM_MAP=y diff --git a/arch/arm/configs/ks8695_defconfig b/arch/arm/configs/ks8695_defconfig index 8ab21a0..6077f2c 100644 --- a/arch/arm/configs/ks8695_defconfig +++ b/arch/arm/configs/ks8695_defconfig @@ -174,7 +174,6 @@ CONFIG_PCCARD_NONSTATIC=y # Kernel Features # # CONFIG_PREEMPT is not set -# CONFIG_NO_IDLE_HZ is not set CONFIG_HZ=100 # CONFIG_AEABI is not set # CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set diff --git a/arch/arm/configs/lpd270_defconfig b/arch/arm/configs/lpd270_defconfig index a3bf583..1a38d8e 100644 --- a/arch/arm/configs/lpd270_defconfig +++ b/arch/arm/configs/lpd270_defconfig @@ -173,7 +173,6 @@ CONFIG_XSCALE_PMU=y # Kernel Features # # CONFIG_PREEMPT is not set -# CONFIG_NO_IDLE_HZ is not set CONFIG_HZ=100 # CONFIG_AEABI is not set # CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set diff --git a/arch/arm/configs/lpd7a404_defconfig b/arch/arm/configs/lpd7a404_defconfig index 46a0f7f..7a2e932 100644 --- a/arch/arm/configs/lpd7a404_defconfig +++ b/arch/arm/configs/lpd7a404_defconfig @@ -148,7 +148,6 @@ CONFIG_ARM_AMBA=y # Kernel Features # CONFIG_PREEMPT=y -# CONFIG_NO_IDLE_HZ is not set # CONFIG_AEABI is not set CONFIG_ARCH_DISCONTIGMEM_ENABLE=y CONFIG_SELECT_MEMORY_MODEL=y diff --git a/arch/arm/configs/netx_defconfig b/arch/arm/configs/netx_defconfig index 57f32f3..0884f23 100644 --- a/arch/arm/configs/netx_defconfig +++ b/arch/arm/configs/netx_defconfig @@ -154,7 +154,6 @@ CONFIG_ARM_AMBA=y # Kernel Features # CONFIG_PREEMPT=y -# CONFIG_NO_IDLE_HZ is not set CONFIG_HZ=100 # CONFIG_AEABI is not set # CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set diff --git a/arch/arm/configs/onearm_defconfig b/arch/arm/configs/onearm_defconfig index 650a248..418ca2f 100644 --- a/arch/arm/configs/onearm_defconfig +++ b/arch/arm/configs/onearm_defconfig @@ -202,7 +202,6 @@ CONFIG_AT91_CF=y # Kernel Features # # CONFIG_PREEMPT is not set -# CONFIG_NO_IDLE_HZ is not set CONFIG_HZ=100 # CONFIG_AEABI is not set # CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set diff --git a/arch/arm/configs/picotux200_defconfig b/arch/arm/configs/picotux200_defconfig index 95a22f5..14826f0 100644 --- a/arch/arm/configs/picotux200_defconfig +++ b/arch/arm/configs/picotux200_defconfig @@ -201,7 +201,6 @@ CONFIG_ARM_THUMB=y # Kernel Features # # CONFIG_PREEMPT is not set -CONFIG_NO_IDLE_HZ=y CONFIG_HZ=100 CONFIG_AEABI=y CONFIG_OABI_COMPAT=y diff --git a/arch/arm/configs/pnx4008_defconfig b/arch/arm/configs/pnx4008_defconfig index b5e11aa..811b8f6 100644 --- a/arch/arm/configs/pnx4008_defconfig +++ b/arch/arm/configs/pnx4008_defconfig @@ -151,7 +151,6 @@ CONFIG_ARM_THUMB=y # Kernel Features # CONFIG_PREEMPT=y -# CONFIG_NO_IDLE_HZ is not set CONFIG_HZ=100 # CONFIG_AEABI is not set # CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set diff --git a/arch/arm/configs/realview-smp_defconfig b/arch/arm/configs/realview-smp_defconfig index fc39ba1..0c09b23 100644 --- a/arch/arm/configs/realview-smp_defconfig +++ b/arch/arm/configs/realview-smp_defconfig @@ -177,7 +177,6 @@ CONFIG_NR_CPUS=4 CONFIG_HOTPLUG_CPU=y CONFIG_LOCAL_TIMERS=y # CONFIG_PREEMPT is not set -# CONFIG_NO_IDLE_HZ is not set CONFIG_HZ=100 # CONFIG_AEABI is not set # CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set diff --git a/arch/arm/configs/realview_defconfig b/arch/arm/configs/realview_defconfig index accbf52..907e543 100644 --- a/arch/arm/configs/realview_defconfig +++ b/arch/arm/configs/realview_defconfig @@ -126,7 +126,6 @@ CONFIG_ISA_DMA_API=y # # Kernel Features # -# CONFIG_NO_IDLE_HZ is not set # CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set CONFIG_FLATMEM=y CONFIG_FLAT_NODE_MEM_MAP=y diff --git a/arch/arm/configs/rpc_defconfig b/arch/arm/configs/rpc_defconfig index 5ddecb9..f62d181 100644 --- a/arch/arm/configs/rpc_defconfig +++ b/arch/arm/configs/rpc_defconfig @@ -190,7 +190,6 @@ CONFIG_ISA_DMA_API=y # # CONFIG_TICK_ONESHOT is not set # CONFIG_PREEMPT is not set -# CONFIG_NO_IDLE_HZ is not set CONFIG_HZ=100 # CONFIG_AEABI is not set # CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set diff --git a/arch/arm/configs/s3c2410_defconfig b/arch/arm/configs/s3c2410_defconfig index f8a1645..6d03763 100644 --- a/arch/arm/configs/s3c2410_defconfig +++ b/arch/arm/configs/s3c2410_defconfig @@ -246,7 +246,6 @@ CONFIG_ISA=y # Kernel Features # # CONFIG_PREEMPT is not set -# CONFIG_NO_IDLE_HZ is not set CONFIG_HZ=200 # CONFIG_AEABI is not set # CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set diff --git a/arch/arm/configs/sam9_l9260_defconfig b/arch/arm/configs/sam9_l9260_defconfig index 484dc97..8688362 100644 --- a/arch/arm/configs/sam9_l9260_defconfig +++ b/arch/arm/configs/sam9_l9260_defconfig @@ -211,7 +211,6 @@ CONFIG_ARM_THUMB=y # # CONFIG_TICK_ONESHOT is not set CONFIG_PREEMPT=y -# CONFIG_NO_IDLE_HZ is not set CONFIG_HZ=100 # CONFIG_AEABI is not set # CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set diff --git a/arch/arm/configs/spitz_defconfig b/arch/arm/configs/spitz_defconfig index aa7a011..7d59fb1 100644 --- a/arch/arm/configs/spitz_defconfig +++ b/arch/arm/configs/spitz_defconfig @@ -164,7 +164,6 @@ CONFIG_PCMCIA_PXA2XX=y # Kernel Features # CONFIG_PREEMPT=y -# CONFIG_NO_IDLE_HZ is not set # CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set CONFIG_SELECT_MEMORY_MODEL=y CONFIG_FLATMEM_MANUAL=y diff --git a/arch/arm/configs/tct_hammer_defconfig b/arch/arm/configs/tct_hammer_defconfig index 576b833..07dfb98 100644 --- a/arch/arm/configs/tct_hammer_defconfig +++ b/arch/arm/configs/tct_hammer_defconfig @@ -247,7 +247,6 @@ CONFIG_ARM_THUMB=y # # CONFIG_TICK_ONESHOT is not set # CONFIG_PREEMPT is not set -# CONFIG_NO_IDLE_HZ is not set CONFIG_HZ=200 # CONFIG_AEABI is not set # CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set diff --git a/arch/arm/configs/trizeps4_defconfig b/arch/arm/configs/trizeps4_defconfig index 6db6392..8b7a431 100644 --- a/arch/arm/configs/trizeps4_defconfig +++ b/arch/arm/configs/trizeps4_defconfig @@ -195,7 +195,6 @@ CONFIG_PCMCIA_PXA2XX=y # Kernel Features # CONFIG_PREEMPT=y -# CONFIG_NO_IDLE_HZ is not set CONFIG_HZ=100 CONFIG_AEABI=y CONFIG_OABI_COMPAT=y diff --git a/arch/arm/configs/versatile_defconfig b/arch/arm/configs/versatile_defconfig index 48dca69..8355f88 100644 --- a/arch/arm/configs/versatile_defconfig +++ b/arch/arm/configs/versatile_defconfig @@ -151,7 +151,6 @@ CONFIG_ARM_AMBA=y # Kernel Features # # CONFIG_PREEMPT is not set -# CONFIG_NO_IDLE_HZ is not set CONFIG_HZ=100 # CONFIG_AEABI is not set # CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index 46bf2ede..199b368 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -133,10 +133,8 @@ static void default_idle(void) cpu_relax(); else { local_irq_disable(); - if (!need_resched()) { - timer_dyn_reprogram(); + if (!need_resched()) arch_idle(); - } local_irq_enable(); } } diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c index b5867ec..cc5145b 100644 --- a/arch/arm/kernel/time.c +++ b/arch/arm/kernel/time.c @@ -365,108 +365,6 @@ static struct sysdev_class timer_sysclass = { .resume = timer_resume, }; -#ifdef CONFIG_NO_IDLE_HZ -static int timer_dyn_tick_enable(void) -{ - struct dyn_tick_timer *dyn_tick = system_timer->dyn_tick; - unsigned long flags; - int ret = -ENODEV; - - if (dyn_tick) { - spin_lock_irqsave(&dyn_tick->lock, flags); - ret = 0; - if (!(dyn_tick->state & DYN_TICK_ENABLED)) { - ret = dyn_tick->enable(); - - if (ret == 0) - dyn_tick->state |= DYN_TICK_ENABLED; - } - spin_unlock_irqrestore(&dyn_tick->lock, flags); - } - - return ret; -} - -static int timer_dyn_tick_disable(void) -{ - struct dyn_tick_timer *dyn_tick = system_timer->dyn_tick; - unsigned long flags; - int ret = -ENODEV; - - if (dyn_tick) { - spin_lock_irqsave(&dyn_tick->lock, flags); - ret = 0; - if (dyn_tick->state & DYN_TICK_ENABLED) { - ret = dyn_tick->disable(); - - if (ret == 0) - dyn_tick->state &= ~DYN_TICK_ENABLED; - } - spin_unlock_irqrestore(&dyn_tick->lock, flags); - } - - return ret; -} - -/* - * Reprogram the system timer for at least the calculated time interval. - * This function should be called from the idle thread with IRQs disabled, - * immediately before sleeping. - */ -void timer_dyn_reprogram(void) -{ - struct dyn_tick_timer *dyn_tick = system_timer->dyn_tick; - unsigned long next, seq, flags; - - if (!dyn_tick) - return; - - spin_lock_irqsave(&dyn_tick->lock, flags); - if (dyn_tick->state & DYN_TICK_ENABLED) { - next = next_timer_interrupt(); - do { - seq = read_seqbegin(&xtime_lock); - dyn_tick->reprogram(next - jiffies); - } while (read_seqretry(&xtime_lock, seq)); - } - spin_unlock_irqrestore(&dyn_tick->lock, flags); -} - -static ssize_t timer_show_dyn_tick(struct sys_device *dev, char *buf) -{ - return sprintf(buf, "%i\n", - (system_timer->dyn_tick->state & DYN_TICK_ENABLED) >> 1); -} - -static ssize_t timer_set_dyn_tick(struct sys_device *dev, const char *buf, - size_t count) -{ - unsigned int enable = simple_strtoul(buf, NULL, 2); - - if (enable) - timer_dyn_tick_enable(); - else - timer_dyn_tick_disable(); - - return count; -} -static SYSDEV_ATTR(dyn_tick, 0644, timer_show_dyn_tick, timer_set_dyn_tick); - -/* - * dyntick=enable|disable - */ -static char dyntick_str[4] __initdata = ""; - -static int __init dyntick_setup(char *str) -{ - if (str) - strlcpy(dyntick_str, str, sizeof(dyntick_str)); - return 1; -} - -__setup("dyntick=", dyntick_setup); -#endif - static int __init timer_init_sysfs(void) { int ret = sysdev_class_register(&timer_sysclass); @@ -475,19 +373,6 @@ static int __init timer_init_sysfs(void) ret = sysdev_register(&system_timer->dev); } -#ifdef CONFIG_NO_IDLE_HZ - if (ret == 0 && system_timer->dyn_tick) { - ret = sysdev_create_file(&system_timer->dev, &attr_dyn_tick); - - /* - * Turn on dynamic tick after calibrate delay - * for correct bogomips - */ - if (ret == 0 && dyntick_str[0] == 'e') - ret = timer_dyn_tick_enable(); - } -#endif - return ret; } @@ -500,10 +385,5 @@ void __init time_init(void) system_timer->offset = dummy_gettimeoffset; #endif system_timer->init(); - -#ifdef CONFIG_NO_IDLE_HZ - if (system_timer->dyn_tick) - spin_lock_init(&system_timer->dyn_tick->lock); -#endif } diff --git a/arch/arm/mach-omap1/pm.c b/arch/arm/mach-omap1/pm.c index e6c64e1..742f79e 100644 --- a/arch/arm/mach-omap1/pm.c +++ b/arch/arm/mach-omap1/pm.c @@ -116,13 +116,6 @@ void omap_pm_idle(void) return; } - /* - * Since an interrupt may set up a timer, we don't want to - * reprogram the hardware timer with interrupts enabled. - * Re-enable interrupts only after returning from idle. - */ - timer_dyn_reprogram(); - #ifdef CONFIG_OMAP_MPU_TIMER #warning Enable 32kHz OS timer in order to allow sleep states in idle use_idlect1 = use_idlect1 & ~(1 << 9); diff --git a/arch/arm/mach-omap2/pm.c b/arch/arm/mach-omap2/pm.c index aad781d..d6c9de8 100644 --- a/arch/arm/mach-omap2/pm.c +++ b/arch/arm/mach-omap2/pm.c @@ -57,13 +57,6 @@ void omap2_pm_idle(void) return; } - /* - * Since an interrupt may set up a timer, we don't want to - * reprogram the hardware timer with interrupts enabled. - * Re-enable interrupts only after returning from idle. - */ - timer_dyn_reprogram(); - omap2_sram_idle(); local_fiq_enable(); local_irq_enable(); diff --git a/include/asm-arm/dyntick.h b/include/asm-arm/dyntick.h deleted file mode 100644 index 19fab2d..0000000 --- a/include/asm-arm/dyntick.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _ASMARM_DYNTICK_H -#define _ASMARM_DYNTICK_H - -#include - -#endif /* _ASMARM_DYNTICK_H */ diff --git a/include/asm-arm/hw_irq.h b/include/asm-arm/hw_irq.h index 98d594a..f1a08a5 100644 --- a/include/asm-arm/hw_irq.h +++ b/include/asm-arm/hw_irq.h @@ -6,15 +6,4 @@ #include -#if defined(CONFIG_NO_IDLE_HZ) -# include -# define handle_dynamic_tick(action) \ - if (!(action->flags & IRQF_TIMER) && system_timer->dyn_tick) { \ - write_seqlock(&xtime_lock); \ - if (system_timer->dyn_tick->state & DYN_TICK_ENABLED) \ - system_timer->dyn_tick->handler(irq, NULL); \ - write_sequnlock(&xtime_lock); \ - } -#endif - #endif diff --git a/include/asm-arm/mach/time.h b/include/asm-arm/mach/time.h index 5dc3570..2fd36ea 100644 --- a/include/asm-arm/mach/time.h +++ b/include/asm-arm/mach/time.h @@ -41,30 +41,8 @@ struct sys_timer { #ifndef CONFIG_GENERIC_TIME unsigned long (*offset)(void); #endif - -#ifdef CONFIG_NO_IDLE_HZ - struct dyn_tick_timer *dyn_tick; -#endif -}; - -#ifdef CONFIG_NO_IDLE_HZ - -#define DYN_TICK_ENABLED (1 << 1) - -struct dyn_tick_timer { - spinlock_t lock; - unsigned int state; /* Current state */ - int (*enable)(void); /* Enables dynamic tick */ - int (*disable)(void); /* Disables dynamic tick */ - void (*reprogram)(unsigned long); /* Reprograms the timer */ - int (*handler)(int, void *); }; -void timer_dyn_reprogram(void); -#else -#define timer_dyn_reprogram() do { } while (0) -#endif - extern struct sys_timer *system_timer; extern void timer_tick(void); -- cgit v0.10.2 From ee9c578527a93c66becb526c4a122c5358a959c5 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 20 Apr 2008 13:59:33 +0100 Subject: dyntick: Remove last reminants of dyntick support Remove the last reminants of dyntick support from the generic kernel. Signed-off-by: Russell King diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 421be5f..543d9ca 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -1078,7 +1078,7 @@ ktime_t hrtimer_get_remaining(const struct hrtimer *timer) } EXPORT_SYMBOL_GPL(hrtimer_get_remaining); -#if defined(CONFIG_NO_IDLE_HZ) || defined(CONFIG_NO_HZ) +#ifdef CONFIG_NO_HZ /** * hrtimer_get_next_event - get the time until next expiry event * diff --git a/kernel/sysctl.c b/kernel/sysctl.c index d7ffdc5..76426d9 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -131,8 +131,6 @@ extern int sysctl_userprocess_debug; extern int spin_retry; #endif -extern int sysctl_hz_timer; - #ifdef CONFIG_BSD_PROCESS_ACCT extern int acct_parm[]; #endif @@ -562,16 +560,6 @@ static struct ctl_table kern_table[] = { .proc_handler = &proc_dointvec, }, #endif -#ifdef CONFIG_NO_IDLE_HZ - { - .ctl_name = KERN_HZ_TIMER, - .procname = "hz_timer", - .data = &sysctl_hz_timer, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, -#endif { .ctl_name = KERN_S390_USER_DEBUG_LOGGING, .procname = "userprocess_debug", diff --git a/kernel/timer.c b/kernel/timer.c index ceacc66..ef3fa69 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -812,7 +812,7 @@ static inline void __run_timers(struct tvec_base *base) spin_unlock_irq(&base->lock); } -#if defined(CONFIG_NO_IDLE_HZ) || defined(CONFIG_NO_HZ) +#ifdef CONFIG_NO_HZ /* * Find out when the next timer event is due to happen. This * is used on S/390 to stop all activity when a cpus is idle. @@ -947,14 +947,6 @@ unsigned long get_next_timer_interrupt(unsigned long now) return cmp_next_hrtimer_event(now, expires); } - -#ifdef CONFIG_NO_IDLE_HZ -unsigned long next_timer_interrupt(void) -{ - return get_next_timer_interrupt(jiffies); -} -#endif - #endif #ifndef CONFIG_VIRT_CPU_ACCOUNTING -- cgit v0.10.2 From 150c9173031d43746d70582a17931350f3da8932 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 12 May 2008 17:37:21 +0100 Subject: dyntick: remove deferences from SH SH does not have a configuration option for NO_IDLE_HZ their Kconfig files, yet their defconfig files lists them. Remove those references. Signed-off-by: Russell King diff --git a/arch/sh/configs/landisk_defconfig b/arch/sh/configs/landisk_defconfig index f52db12..38f934a 100644 --- a/arch/sh/configs/landisk_defconfig +++ b/arch/sh/configs/landisk_defconfig @@ -226,7 +226,6 @@ CONFIG_CPU_HAS_PTEA=y # CONFIG_SH_TMU=y CONFIG_SH_TIMER_IRQ=16 -# CONFIG_NO_IDLE_HZ is not set CONFIG_SH_PCLK_FREQ=33333333 # diff --git a/arch/sh/configs/lboxre2_defconfig b/arch/sh/configs/lboxre2_defconfig index 9fa66d9..b68b6cd 100644 --- a/arch/sh/configs/lboxre2_defconfig +++ b/arch/sh/configs/lboxre2_defconfig @@ -231,7 +231,6 @@ CONFIG_CPU_HAS_PTEA=y # CONFIG_SH_TMU=y CONFIG_SH_TIMER_IRQ=16 -# CONFIG_NO_IDLE_HZ is not set CONFIG_SH_PCLK_FREQ=40000000 # diff --git a/arch/sh/configs/se7705_defconfig b/arch/sh/configs/se7705_defconfig index 84717d8..490dcbc 100644 --- a/arch/sh/configs/se7705_defconfig +++ b/arch/sh/configs/se7705_defconfig @@ -239,7 +239,6 @@ CONFIG_CPU_HAS_SR_RB=y # CONFIG_SH_TMU=y CONFIG_SH_TIMER_IRQ=16 -# CONFIG_NO_IDLE_HZ is not set CONFIG_SH_PCLK_FREQ=33333333 # diff --git a/arch/sh/configs/se7712_defconfig b/arch/sh/configs/se7712_defconfig index 240a1ce..2dd83af 100644 --- a/arch/sh/configs/se7712_defconfig +++ b/arch/sh/configs/se7712_defconfig @@ -236,7 +236,6 @@ CONFIG_CPU_HAS_SR_RB=y # CONFIG_SH_TMU=y CONFIG_SH_TIMER_IRQ=16 -# CONFIG_NO_IDLE_HZ is not set CONFIG_SH_PCLK_FREQ=66666666 # diff --git a/arch/sh/configs/se7750_defconfig b/arch/sh/configs/se7750_defconfig index c60b6fd..167786f 100644 --- a/arch/sh/configs/se7750_defconfig +++ b/arch/sh/configs/se7750_defconfig @@ -235,7 +235,6 @@ CONFIG_CPU_HAS_PTEA=y # CONFIG_SH_TMU=y CONFIG_SH_TIMER_IRQ=16 -# CONFIG_NO_IDLE_HZ is not set CONFIG_SH_PCLK_FREQ=33333333 # -- cgit v0.10.2 From 6fd592daae2182adc47f405e20d07f34f52d07dd Mon Sep 17 00:00:00 2001 From: "Carlos R. Mafra" Date: Mon, 5 May 2008 20:11:22 -0300 Subject: x86: clean up computation of HPET .mult variables While reading through the HPET code I realized that the computation of .mult variables could be done with less lines of code, resulting in a 1.6% text size saving for hpet.o So I propose the following patch, which applies against today's Linus -git tree. >From 0c6507e400e9ca5f7f14331e18f8c12baf75a9d3 Mon Sep 17 00:00:00 2001 From: Carlos R. Mafra Date: Mon, 5 May 2008 19:38:53 -0300 The computation of clocksource_hpet.mult tmp = (u64)hpet_period << HPET_SHIFT; do_div(tmp, FSEC_PER_NSEC); clocksource_hpet.mult = (u32)tmp; can be streamlined if we note that it is equal to clocksource_hpet.mult = div_sc(hpet_period, FSEC_PER_NSEC, HPET_SHIFT); Furthermore, the computation of hpet_clockevent.mult uint64_t hpet_freq; hpet_freq = 1000000000000000ULL; do_div(hpet_freq, hpet_period); hpet_clockevent.mult = div_sc((unsigned long) hpet_freq, NSEC_PER_SEC, hpet_clockevent.shift); can also be streamlined with the observation that hpet_period and hpet_freq are inverse to each other (in proper units). So instead of computing hpet_freq and using (schematically) div_sc(hpet_freq, 10^9, shift) we use the trick of calling with the arguments in reverse order, div_sc(10^6, hpet_period, shift). The different power of ten is due to frequency being in Hertz (1/sec) and the period being in units of femtosecond. Explicitly, mult = (hpet_freq * 2^shift)/10^9 (before) mult = (10^6 * 2^shift)/hpet_period (after) because hpet_freq = 10^15/hpet_period. The comments in the code are also updated to reflect the changes. As a result, text data bss dec hex filename 2957 425 92 3474 d92 arch/x86/kernel/hpet.o 3006 425 92 3523 dc3 arch/x86/kernel/hpet.o.old a 1.6% reduction in text size. Signed-off-by: Carlos R. Mafra Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 9b5cfcd..ea230ec 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -17,7 +17,7 @@ /* FSEC = 10^-15 NSEC = 10^-9 */ -#define FSEC_PER_NSEC 1000000 +#define FSEC_PER_NSEC 1000000L /* * HPET address is set in acpi/boot.c, when an ACPI entry exists @@ -206,20 +206,19 @@ static void hpet_enable_legacy_int(void) static void hpet_legacy_clockevent_register(void) { - uint64_t hpet_freq; - /* Start HPET legacy interrupts */ hpet_enable_legacy_int(); /* - * The period is a femto seconds value. We need to calculate the - * scaled math multiplication factor for nanosecond to hpet tick - * conversion. + * The mult factor is defined as (include/linux/clockchips.h) + * mult/2^shift = cyc/ns (in contrast to ns/cyc in clocksource.h) + * hpet_period is in units of femtoseconds (per cycle), so + * mult/2^shift = cyc/ns = 10^6/hpet_period + * mult = (10^6 * 2^shift)/hpet_period + * mult = (FSEC_PER_NSEC << hpet_clockevent.shift)/hpet_period */ - hpet_freq = 1000000000000000ULL; - do_div(hpet_freq, hpet_period); - hpet_clockevent.mult = div_sc((unsigned long) hpet_freq, - NSEC_PER_SEC, hpet_clockevent.shift); + hpet_clockevent.mult = div_sc((unsigned long) FSEC_PER_NSEC, + hpet_period, hpet_clockevent.shift); /* Calculate the min / max delta */ hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, &hpet_clockevent); @@ -324,7 +323,7 @@ static struct clocksource clocksource_hpet = { static int hpet_clocksource_register(void) { - u64 tmp, start, now; + u64 start, now; cycle_t t1; /* Start the counter */ @@ -351,21 +350,15 @@ static int hpet_clocksource_register(void) return -ENODEV; } - /* Initialize and register HPET clocksource - * - * hpet period is in femto seconds per cycle - * so we need to convert this to ns/cyc units - * approximated by mult/2^shift - * - * fsec/cyc * 1nsec/1000000fsec = nsec/cyc = mult/2^shift - * fsec/cyc * 1ns/1000000fsec * 2^shift = mult - * fsec/cyc * 2^shift * 1nsec/1000000fsec = mult - * (fsec/cyc << shift)/1000000 = mult - * (hpet_period << shift)/FSEC_PER_NSEC = mult + /* + * The definition of mult is (include/linux/clocksource.h) + * mult/2^shift = ns/cyc and hpet_period is in units of fsec/cyc + * so we first need to convert hpet_period to ns/cyc units: + * mult/2^shift = ns/cyc = hpet_period/10^6 + * mult = (hpet_period * 2^shift)/10^6 + * mult = (hpet_period << shift)/FSEC_PER_NSEC */ - tmp = (u64)hpet_period << HPET_SHIFT; - do_div(tmp, FSEC_PER_NSEC); - clocksource_hpet.mult = (u32)tmp; + clocksource_hpet.mult = div_sc(hpet_period, FSEC_PER_NSEC, HPET_SHIFT); clocksource_register(&clocksource_hpet); -- cgit v0.10.2 From e8aa4667baf74dfd85fbaab86861465acb811085 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Fri, 9 May 2008 11:49:11 +0200 Subject: x86: enable hpet=force for AMD SB400 Add quirk to allow forced usage of HPET on ATI SB400. I stumbled over machines where HPET is enabled but not reported by BIOS. This patch configures the HPET base address and makes it known to the OS. Signed-off-by: Andreas Herrmann Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index d89a648..5fe6bd5 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c @@ -65,6 +65,7 @@ static enum { ICH_FORCE_HPET_RESUME, VT8237_FORCE_HPET_RESUME, NVIDIA_FORCE_HPET_RESUME, + ATI_FORCE_HPET_RESUME, } force_hpet_resume_type; static void __iomem *rcba_base; @@ -330,6 +331,31 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8235, DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8237, vt8237_force_enable_hpet); +static void ati_force_hpet_resume(void) +{ + pci_write_config_dword(cached_dev, 0x14, 0xfed00000); + printk(KERN_DEBUG "Force enabled HPET at resume\n"); +} + +static void ati_force_enable_hpet(struct pci_dev *dev) +{ + u32 uninitialized_var(val); + + if (!hpet_force_user || hpet_address || force_hpet_address) + return; + + pci_write_config_dword(dev, 0x14, 0xfed00000); + pci_read_config_dword(dev, 0x14, &val); + force_hpet_address = val; + force_hpet_resume_type = ATI_FORCE_HPET_RESUME; + dev_printk(KERN_DEBUG, &dev->dev, "Force enabled HPET at 0x%lx\n", + force_hpet_address); + cached_dev = dev; + return; +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP400_SMBUS, + ati_force_enable_hpet); + /* * Undocumented chipset feature taken from LinuxBIOS. */ @@ -397,6 +423,9 @@ void force_hpet_resume(void) case NVIDIA_FORCE_HPET_RESUME: nvidia_force_hpet_resume(); return; + case ATI_FORCE_HPET_RESUME: + ati_force_hpet_resume(); + return; default: break; } -- cgit v0.10.2 From 7c4728f4a865067d96fb84f1d9c65e0ccd1f355d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 10 May 2008 21:42:14 +0200 Subject: x86: print info about available HPET quirk We have a lot of HPET quirks available which might force enable HPET even when the BIOS does not enable it. Some of those quirks depend on the command line option "hpet=force". Andrew pointed out that hoping that the user will find out about this boot option is not really helpful. Emit a kernel info which informs the user about the "hpet=force" boot option when we enter a quirk which depends on this option and the user did not provide it. Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index 5fe6bd5..ddbf34d 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c @@ -175,6 +175,12 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH9_7, static struct pci_dev *cached_dev; +static void hpet_print_force_info(void) +{ + printk(KERN_INFO "HPET not enabled in BIOS. " + "You might try hpet=force boot option\n"); +} + static void old_ich_force_hpet_resume(void) { u32 val; @@ -254,6 +260,8 @@ static void old_ich_force_enable_hpet_user(struct pci_dev *dev) { if (hpet_force_user) old_ich_force_enable_hpet(dev); + else + hpet_print_force_info(); } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_0, @@ -291,8 +299,13 @@ static void vt8237_force_enable_hpet(struct pci_dev *dev) { u32 uninitialized_var(val); - if (!hpet_force_user || hpet_address || force_hpet_address) + if (hpet_address || force_hpet_address) + return; + + if (!hpet_force_user) { + hpet_print_force_info(); return; + } pci_read_config_dword(dev, 0x68, &val); /* @@ -341,8 +354,13 @@ static void ati_force_enable_hpet(struct pci_dev *dev) { u32 uninitialized_var(val); - if (!hpet_force_user || hpet_address || force_hpet_address) + if (hpet_address || force_hpet_address) + return; + + if (!hpet_force_user) { + hpet_print_force_info(); return; + } pci_write_config_dword(dev, 0x14, 0xfed00000); pci_read_config_dword(dev, 0x14, &val); @@ -369,8 +387,13 @@ static void nvidia_force_enable_hpet(struct pci_dev *dev) { u32 uninitialized_var(val); - if (!hpet_force_user || hpet_address || force_hpet_address) + if (hpet_address || force_hpet_address) + return; + + if (!hpet_force_user) { + hpet_print_force_info(); return; + } pci_write_config_dword(dev, 0x44, 0xfed00001); pci_read_config_dword(dev, 0x44, &val); -- cgit v0.10.2 From 8edc5cc5ec880c96de8e6686fb0d7a5231e91c05 Mon Sep 17 00:00:00 2001 From: Venki Pallipadi Date: Mon, 12 May 2008 15:43:34 +0200 Subject: x86: remove 6 bank limitation in 64 bit MCE reporting code Eliminate the 6 bank restriction in 64 bit mce reporting code. This restriction is artificial (due to static creation of sysfs files) and 32 bit code does not have any such restriction. This change helps in reporting the details of machine checks on a machine check exception with errors in bank 6 and above on CPUs that support those banks. Without the patch, machine check errors in those banks are not reported. We still have 128 (MCE_EXTENDED_BANK) bank restriction instead of max 256 supported in hardware. That is not changed in the patch below as it will have some user level mcelog utility dependency, with bank 128 being used for thermal reporting currently. The patch below does not create sysfs control (bankNctl) for banks higher than 6 as well. That needs some pre-cleanup in /sysfs mce layout, removal of per cpu /sysfs entries for bankctl as they are really global system level control today. That change will follow. This basic change is critical to report the detailed errors on banks higher than 6. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c index e07e8c0..f1f3f5e 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_64.c @@ -31,7 +31,7 @@ #include #define MISC_MCELOG_MINOR 227 -#define NR_BANKS 6 +#define NR_SYSFS_BANKS 6 atomic_t mce_entry; @@ -46,7 +46,7 @@ static int mce_dont_init; */ static int tolerant = 1; static int banks; -static unsigned long bank[NR_BANKS] = { [0 ... NR_BANKS-1] = ~0UL }; +static unsigned long bank[NR_SYSFS_BANKS] = { [0 ... NR_SYSFS_BANKS-1] = ~0UL }; static unsigned long notify_user; static int rip_msr; static int mce_bootlog = -1; @@ -209,7 +209,7 @@ void do_machine_check(struct pt_regs * regs, long error_code) barrier(); for (i = 0; i < banks; i++) { - if (!bank[i]) + if (i < NR_SYSFS_BANKS && !bank[i]) continue; m.misc = 0; @@ -444,9 +444,10 @@ static void mce_init(void *dummy) rdmsrl(MSR_IA32_MCG_CAP, cap); banks = cap & 0xff; - if (banks > NR_BANKS) { - printk(KERN_INFO "MCE: warning: using only %d banks\n", banks); - banks = NR_BANKS; + if (banks > MCE_EXTENDED_BANK) { + printk(KERN_INFO "MCE: warning: using only %d banks\n", + MCE_EXTENDED_BANK); + banks = MCE_EXTENDED_BANK; } /* Use accurate RIP reporting if available. */ if ((cap & (1<<9)) && ((cap >> 16) & 0xff) >= 9) @@ -462,7 +463,7 @@ static void mce_init(void *dummy) wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); for (i = 0; i < banks; i++) { - wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]); + wrmsrl(MSR_IA32_MC0_CTL+4*i, ~0UL); wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); } } @@ -766,7 +767,10 @@ DEFINE_PER_CPU(struct sys_device, device_mce); } \ static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name); -/* TBD should generate these dynamically based on number of available banks */ +/* + * TBD should generate these dynamically based on number of available banks. + * Have only 6 contol banks in /sysfs until then. + */ ACCESSOR(bank0ctl,bank[0],mce_restart()) ACCESSOR(bank1ctl,bank[1],mce_restart()) ACCESSOR(bank2ctl,bank[2],mce_restart()) -- cgit v0.10.2 From 1c7d06d419dbe82c76fbb4d3e1fa61b2da2dc00b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 30 Apr 2008 22:12:05 +0200 Subject: revert: thread_info.h change temporarily revert parts of "signals: x86 TS_RESTORE_SIGMASK". Signed-off-by: Ingo Molnar diff --git a/include/asm-x86/thread_info_32.h b/include/asm-x86/thread_info_32.h index b633882..5318599 100644 --- a/include/asm-x86/thread_info_32.h +++ b/include/asm-x86/thread_info_32.h @@ -131,6 +131,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_SYSCALL_EMU 5 /* syscall emulation active */ #define TIF_SYSCALL_AUDIT 6 /* syscall auditing active */ #define TIF_SECCOMP 7 /* secure computing */ +#define TIF_RESTORE_SIGMASK 8 /* restore signal mask in do_signal() */ #define TIF_HRTICK_RESCHED 9 /* reprogram hrtick timer */ #define TIF_MEMDIE 16 #define TIF_DEBUG 17 /* uses debug registers */ @@ -150,6 +151,7 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) +#define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK) #define _TIF_HRTICK_RESCHED (1 << TIF_HRTICK_RESCHED) #define _TIF_DEBUG (1 << TIF_DEBUG) #define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) @@ -186,20 +188,9 @@ static inline struct thread_info *current_thread_info(void) this quantum (SMP) */ #define TS_POLLING 0x0002 /* True if in idle loop and not sleeping */ -#define TS_RESTORE_SIGMASK 0x0004 /* restore signal mask in do_signal() */ #define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING) -#ifndef __ASSEMBLY__ -#define HAVE_SET_RESTORE_SIGMASK 1 -static inline void set_restore_sigmask(void) -{ - struct thread_info *ti = current_thread_info(); - ti->status |= TS_RESTORE_SIGMASK; - set_bit(TIF_SIGPENDING, &ti->flags); -} -#endif /* !__ASSEMBLY__ */ - #endif /* __KERNEL__ */ #endif /* _ASM_THREAD_INFO_H */ diff --git a/include/asm-x86/thread_info_64.h b/include/asm-x86/thread_info_64.h index cb69f70..ed664e8 100644 --- a/include/asm-x86/thread_info_64.h +++ b/include/asm-x86/thread_info_64.h @@ -109,6 +109,7 @@ static inline struct thread_info *stack_thread_info(void) #define TIF_IRET 5 /* force IRET */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SECCOMP 8 /* secure computing */ +#define TIF_RESTORE_SIGMASK 9 /* restore signal mask in do_signal */ #define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */ #define TIF_HRTICK_RESCHED 11 /* reprogram hrtick timer */ /* 16 free */ @@ -132,6 +133,7 @@ static inline struct thread_info *stack_thread_info(void) #define _TIF_IRET (1 << TIF_IRET) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) +#define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK) #define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY) #define _TIF_HRTICK_RESCHED (1 << TIF_HRTICK_RESCHED) #define _TIF_IA32 (1 << TIF_IA32) @@ -176,20 +178,9 @@ static inline struct thread_info *stack_thread_info(void) #define TS_COMPAT 0x0002 /* 32bit syscall active */ #define TS_POLLING 0x0004 /* true if in idle loop and not sleeping */ -#define TS_RESTORE_SIGMASK 0x0008 /* restore signal mask in do_signal() */ #define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING) -#ifndef __ASSEMBLY__ -#define HAVE_SET_RESTORE_SIGMASK 1 -static inline void set_restore_sigmask(void) -{ - struct thread_info *ti = current_thread_info(); - ti->status |= TS_RESTORE_SIGMASK; - set_bit(TIF_SIGPENDING, &ti->flags); -} -#endif /* !__ASSEMBLY__ */ - #endif /* __KERNEL__ */ #endif /* _ASM_THREAD_INFO_H */ -- cgit v0.10.2 From 2052e8d40ad58b1d364f900e70edfda62caa0874 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 12 May 2008 15:43:41 +0200 Subject: x86: merge thread_info.h Simple merge of both thread_info_32.h and thread_info_64.h into thread_info.h. Comments for #ifndef __ASM_THREAD_INFO_H and #ifdef __KERNEL__ are the same. Signed-off-by: Christoph Lameter Signed-off-by: Ingo Molnar diff --git a/include/asm-x86/thread_info.h b/include/asm-x86/thread_info.h index 77244f1..d59384a 100644 --- a/include/asm-x86/thread_info.h +++ b/include/asm-x86/thread_info.h @@ -1,10 +1,372 @@ +/* thread_info.h: low-level thread information + * + * Copyright (C) 2002 David Howells (dhowells@redhat.com) + * - Incorporating suggestions made by Linus Torvalds and Dave Miller + */ + #ifndef _ASM_X86_THREAD_INFO_H +#define _ASM_X86_THREAD_INFO_H + #ifdef CONFIG_X86_32 -# include "thread_info_32.h" +#include +#include + +#ifndef __ASSEMBLY__ +#include +#endif + +/* + * low level task data that entry.S needs immediate access to + * - this struct should fit entirely inside of one cache line + * - this struct shares the supervisor stack pages + * - if the contents of this structure are changed, + * the assembly constants must also be changed + */ +#ifndef __ASSEMBLY__ + +struct thread_info { + struct task_struct *task; /* main task structure */ + struct exec_domain *exec_domain; /* execution domain */ + unsigned long flags; /* low level flags */ + unsigned long status; /* thread-synchronous flags */ + __u32 cpu; /* current CPU */ + int preempt_count; /* 0 => preemptable, + <0 => BUG */ + mm_segment_t addr_limit; /* thread address space: + 0-0xBFFFFFFF user-thread + 0-0xFFFFFFFF kernel-thread + */ + void *sysenter_return; + struct restart_block restart_block; + unsigned long previous_esp; /* ESP of the previous stack in + case of nested (IRQ) stacks + */ + __u8 supervisor_stack[0]; +}; + +#else /* !__ASSEMBLY__ */ + +#include + +#endif + +#define PREEMPT_ACTIVE 0x10000000 +#ifdef CONFIG_4KSTACKS +#define THREAD_SIZE (4096) #else -# include "thread_info_64.h" +#define THREAD_SIZE (8192) #endif +#define STACK_WARN (THREAD_SIZE/8) +/* + * macros/functions for gaining access to the thread information structure + * + * preempt_count needs to be 1 initially, until the scheduler is functional. + */ +#ifndef __ASSEMBLY__ + +#define INIT_THREAD_INFO(tsk) \ +{ \ + .task = &tsk, \ + .exec_domain = &default_exec_domain, \ + .flags = 0, \ + .cpu = 0, \ + .preempt_count = 1, \ + .addr_limit = KERNEL_DS, \ + .restart_block = { \ + .fn = do_no_restart_syscall, \ + }, \ +} + +#define init_thread_info (init_thread_union.thread_info) +#define init_stack (init_thread_union.stack) + + +/* how to get the current stack pointer from C */ +register unsigned long current_stack_pointer asm("esp") __used; + +/* how to get the thread information struct from C */ +static inline struct thread_info *current_thread_info(void) +{ + return (struct thread_info *) + (current_stack_pointer & ~(THREAD_SIZE - 1)); +} + +/* thread information allocation */ +#ifdef CONFIG_DEBUG_STACK_USAGE +#define alloc_thread_info(tsk) ((struct thread_info *) \ + __get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(THREAD_SIZE))) +#else +#define alloc_thread_info(tsk) ((struct thread_info *) \ + __get_free_pages(GFP_KERNEL, get_order(THREAD_SIZE))) +#endif + +#else /* !__ASSEMBLY__ */ + +/* how to get the thread information struct from ASM */ +#define GET_THREAD_INFO(reg) \ + movl $-THREAD_SIZE, reg; \ + andl %esp, reg + +/* use this one if reg already contains %esp */ +#define GET_THREAD_INFO_WITH_ESP(reg) \ + andl $-THREAD_SIZE, reg + +#endif + +/* + * thread information flags + * - these are process state flags that various + * assembly files may need to access + * - pending work-to-be-done flags are in LSW + * - other flags in MSW + */ +#define TIF_SYSCALL_TRACE 0 /* syscall trace active */ +#define TIF_SIGPENDING 1 /* signal pending */ +#define TIF_NEED_RESCHED 2 /* rescheduling necessary */ +#define TIF_SINGLESTEP 3 /* restore singlestep on return to + user mode */ +#define TIF_IRET 4 /* return with iret */ +#define TIF_SYSCALL_EMU 5 /* syscall emulation active */ +#define TIF_SYSCALL_AUDIT 6 /* syscall auditing active */ +#define TIF_SECCOMP 7 /* secure computing */ +#define TIF_RESTORE_SIGMASK 8 /* restore signal mask in do_signal() */ +#define TIF_HRTICK_RESCHED 9 /* reprogram hrtick timer */ +#define TIF_MEMDIE 16 +#define TIF_DEBUG 17 /* uses debug registers */ +#define TIF_IO_BITMAP 18 /* uses I/O bitmap */ +#define TIF_FREEZE 19 /* is freezing for suspend */ +#define TIF_NOTSC 20 /* TSC is not accessible in userland */ +#define TIF_FORCED_TF 21 /* true if TF in eflags artificially */ +#define TIF_DEBUGCTLMSR 22 /* uses thread_struct.debugctlmsr */ +#define TIF_DS_AREA_MSR 23 /* uses thread_struct.ds_area_msr */ +#define TIF_BTS_TRACE_TS 24 /* record scheduling event timestamps */ + +#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) +#define _TIF_SIGPENDING (1 << TIF_SIGPENDING) +#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) +#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) +#define _TIF_IRET (1 << TIF_IRET) +#define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) +#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) +#define _TIF_SECCOMP (1 << TIF_SECCOMP) +#define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK) +#define _TIF_HRTICK_RESCHED (1 << TIF_HRTICK_RESCHED) +#define _TIF_DEBUG (1 << TIF_DEBUG) +#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) +#define _TIF_FREEZE (1 << TIF_FREEZE) +#define _TIF_NOTSC (1 << TIF_NOTSC) +#define _TIF_FORCED_TF (1 << TIF_FORCED_TF) +#define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR) +#define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR) +#define _TIF_BTS_TRACE_TS (1 << TIF_BTS_TRACE_TS) + +/* work to do on interrupt/exception return */ +#define _TIF_WORK_MASK \ + (0x0000FFFF & ~(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ + _TIF_SECCOMP | _TIF_SYSCALL_EMU)) +/* work to do on any return to u-space */ +#define _TIF_ALLWORK_MASK (0x0000FFFF & ~_TIF_SECCOMP) + +/* flags to check in __switch_to() */ +#define _TIF_WORK_CTXSW \ + (_TIF_IO_BITMAP | _TIF_NOTSC | _TIF_DEBUGCTLMSR | \ + _TIF_DS_AREA_MSR | _TIF_BTS_TRACE_TS) +#define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW +#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW | _TIF_DEBUG) + + +/* + * Thread-synchronous status. + * + * This is different from the flags in that nobody else + * ever touches our thread-synchronous status, so we don't + * have to worry about atomic accesses. + */ +#define TS_USEDFPU 0x0001 /* FPU was used by this task + this quantum (SMP) */ +#define TS_POLLING 0x0002 /* True if in idle loop + and not sleeping */ + +#define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING) + +#else /* X86_32 */ + +#include +#include +#include + +/* + * low level task data that entry.S needs immediate access to + * - this struct should fit entirely inside of one cache line + * - this struct shares the supervisor stack pages + */ +#ifndef __ASSEMBLY__ +struct task_struct; +struct exec_domain; +#include + +struct thread_info { + struct task_struct *task; /* main task structure */ + struct exec_domain *exec_domain; /* execution domain */ + __u32 flags; /* low level flags */ + __u32 status; /* thread synchronous flags */ + __u32 cpu; /* current CPU */ + int preempt_count; /* 0 => preemptable, + <0 => BUG */ + mm_segment_t addr_limit; + struct restart_block restart_block; +#ifdef CONFIG_IA32_EMULATION + void __user *sysenter_return; +#endif +}; +#endif + +/* + * macros/functions for gaining access to the thread information structure + * preempt_count needs to be 1 initially, until the scheduler is functional. + */ +#ifndef __ASSEMBLY__ +#define INIT_THREAD_INFO(tsk) \ +{ \ + .task = &tsk, \ + .exec_domain = &default_exec_domain, \ + .flags = 0, \ + .cpu = 0, \ + .preempt_count = 1, \ + .addr_limit = KERNEL_DS, \ + .restart_block = { \ + .fn = do_no_restart_syscall, \ + }, \ +} + +#define init_thread_info (init_thread_union.thread_info) +#define init_stack (init_thread_union.stack) + +static inline struct thread_info *current_thread_info(void) +{ + struct thread_info *ti; + ti = (void *)(read_pda(kernelstack) + PDA_STACKOFFSET - THREAD_SIZE); + return ti; +} + +/* do not use in interrupt context */ +static inline struct thread_info *stack_thread_info(void) +{ + struct thread_info *ti; + asm("andq %%rsp,%0; " : "=r" (ti) : "0" (~(THREAD_SIZE - 1))); + return ti; +} + +/* thread information allocation */ +#ifdef CONFIG_DEBUG_STACK_USAGE +#define THREAD_FLAGS (GFP_KERNEL | __GFP_ZERO) +#else +#define THREAD_FLAGS GFP_KERNEL +#endif + +#define alloc_thread_info(tsk) \ + ((struct thread_info *)__get_free_pages(THREAD_FLAGS, THREAD_ORDER)) + +#else /* !__ASSEMBLY__ */ + +/* how to get the thread information struct from ASM */ +#define GET_THREAD_INFO(reg) \ + movq %gs:pda_kernelstack,reg ; \ + subq $(THREAD_SIZE-PDA_STACKOFFSET),reg + +#endif + +/* + * thread information flags + * - these are process state flags that various assembly files + * may need to access + * - pending work-to-be-done flags are in LSW + * - other flags in MSW + * Warning: layout of LSW is hardcoded in entry.S + */ +#define TIF_SYSCALL_TRACE 0 /* syscall trace active */ +#define TIF_SIGPENDING 2 /* signal pending */ +#define TIF_NEED_RESCHED 3 /* rescheduling necessary */ +#define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/ +#define TIF_IRET 5 /* force IRET */ +#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ +#define TIF_SECCOMP 8 /* secure computing */ +#define TIF_RESTORE_SIGMASK 9 /* restore signal mask in do_signal */ +#define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */ +#define TIF_HRTICK_RESCHED 11 /* reprogram hrtick timer */ +/* 16 free */ +#define TIF_IA32 17 /* 32bit process */ +#define TIF_FORK 18 /* ret_from_fork */ +#define TIF_ABI_PENDING 19 +#define TIF_MEMDIE 20 +#define TIF_DEBUG 21 /* uses debug registers */ +#define TIF_IO_BITMAP 22 /* uses I/O bitmap */ +#define TIF_FREEZE 23 /* is freezing for suspend */ +#define TIF_FORCED_TF 24 /* true if TF in eflags artificially */ +#define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */ +#define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */ +#define TIF_BTS_TRACE_TS 27 /* record scheduling event timestamps */ +#define TIF_NOTSC 28 /* TSC is not accessible in userland */ + +#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) +#define _TIF_SIGPENDING (1 << TIF_SIGPENDING) +#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) +#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) +#define _TIF_IRET (1 << TIF_IRET) +#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) +#define _TIF_SECCOMP (1 << TIF_SECCOMP) +#define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK) +#define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY) +#define _TIF_HRTICK_RESCHED (1 << TIF_HRTICK_RESCHED) +#define _TIF_IA32 (1 << TIF_IA32) +#define _TIF_FORK (1 << TIF_FORK) +#define _TIF_ABI_PENDING (1 << TIF_ABI_PENDING) +#define _TIF_DEBUG (1 << TIF_DEBUG) +#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) +#define _TIF_FREEZE (1 << TIF_FREEZE) +#define _TIF_FORCED_TF (1 << TIF_FORCED_TF) +#define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR) +#define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR) +#define _TIF_BTS_TRACE_TS (1 << TIF_BTS_TRACE_TS) +#define _TIF_NOTSC (1 << TIF_NOTSC) + +/* work to do on interrupt/exception return */ +#define _TIF_WORK_MASK \ + (0x0000FFFF & \ + ~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP|_TIF_SECCOMP)) +/* work to do on any return to user space */ +#define _TIF_ALLWORK_MASK (0x0000FFFF & ~_TIF_SECCOMP) + +#define _TIF_DO_NOTIFY_MASK \ + (_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY|_TIF_HRTICK_RESCHED) + +/* flags to check in __switch_to() */ +#define _TIF_WORK_CTXSW \ + (_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_BTS_TRACE_TS|_TIF_NOTSC) +#define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW +#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG) + +#define PREEMPT_ACTIVE 0x10000000 + +/* + * Thread-synchronous status. + * + * This is different from the flags in that nobody else + * ever touches our thread-synchronous status, so we don't + * have to worry about atomic accesses. + */ +#define TS_USEDFPU 0x0001 /* FPU was used by this task + this quantum (SMP) */ +#define TS_COMPAT 0x0002 /* 32bit syscall active */ +#define TS_POLLING 0x0004 /* true if in idle loop + and not sleeping */ + +#define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING) + +#endif /* !X86_32 */ + + #ifndef __ASSEMBLY__ extern void arch_task_cache_init(void); extern void free_thread_info(struct thread_info *ti); diff --git a/include/asm-x86/thread_info_32.h b/include/asm-x86/thread_info_32.h deleted file mode 100644 index 5318599..0000000 --- a/include/asm-x86/thread_info_32.h +++ /dev/null @@ -1,196 +0,0 @@ -/* thread_info.h: i386 low-level thread information - * - * Copyright (C) 2002 David Howells (dhowells@redhat.com) - * - Incorporating suggestions made by Linus Torvalds and Dave Miller - */ - -#ifndef _ASM_THREAD_INFO_H -#define _ASM_THREAD_INFO_H - -#ifdef __KERNEL__ - -#include -#include - -#ifndef __ASSEMBLY__ -#include -#endif - -/* - * low level task data that entry.S needs immediate access to - * - this struct should fit entirely inside of one cache line - * - this struct shares the supervisor stack pages - * - if the contents of this structure are changed, - * the assembly constants must also be changed - */ -#ifndef __ASSEMBLY__ - -struct thread_info { - struct task_struct *task; /* main task structure */ - struct exec_domain *exec_domain; /* execution domain */ - unsigned long flags; /* low level flags */ - unsigned long status; /* thread-synchronous flags */ - __u32 cpu; /* current CPU */ - int preempt_count; /* 0 => preemptable, - <0 => BUG */ - mm_segment_t addr_limit; /* thread address space: - 0-0xBFFFFFFF user-thread - 0-0xFFFFFFFF kernel-thread - */ - void *sysenter_return; - struct restart_block restart_block; - unsigned long previous_esp; /* ESP of the previous stack in - case of nested (IRQ) stacks - */ - __u8 supervisor_stack[0]; -}; - -#else /* !__ASSEMBLY__ */ - -#include - -#endif - -#define PREEMPT_ACTIVE 0x10000000 -#ifdef CONFIG_4KSTACKS -#define THREAD_SIZE (4096) -#else -#define THREAD_SIZE (8192) -#endif - -#define STACK_WARN (THREAD_SIZE/8) -/* - * macros/functions for gaining access to the thread information structure - * - * preempt_count needs to be 1 initially, until the scheduler is functional. - */ -#ifndef __ASSEMBLY__ - -#define INIT_THREAD_INFO(tsk) \ -{ \ - .task = &tsk, \ - .exec_domain = &default_exec_domain, \ - .flags = 0, \ - .cpu = 0, \ - .preempt_count = 1, \ - .addr_limit = KERNEL_DS, \ - .restart_block = { \ - .fn = do_no_restart_syscall, \ - }, \ -} - -#define init_thread_info (init_thread_union.thread_info) -#define init_stack (init_thread_union.stack) - - -/* how to get the current stack pointer from C */ -register unsigned long current_stack_pointer asm("esp") __used; - -/* how to get the thread information struct from C */ -static inline struct thread_info *current_thread_info(void) -{ - return (struct thread_info *) - (current_stack_pointer & ~(THREAD_SIZE - 1)); -} - -/* thread information allocation */ -#ifdef CONFIG_DEBUG_STACK_USAGE -#define alloc_thread_info(tsk) ((struct thread_info *) \ - __get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(THREAD_SIZE))) -#else -#define alloc_thread_info(tsk) ((struct thread_info *) \ - __get_free_pages(GFP_KERNEL, get_order(THREAD_SIZE))) -#endif - -#else /* !__ASSEMBLY__ */ - -/* how to get the thread information struct from ASM */ -#define GET_THREAD_INFO(reg) \ - movl $-THREAD_SIZE, reg; \ - andl %esp, reg - -/* use this one if reg already contains %esp */ -#define GET_THREAD_INFO_WITH_ESP(reg) \ - andl $-THREAD_SIZE, reg - -#endif - -/* - * thread information flags - * - these are process state flags that various - * assembly files may need to access - * - pending work-to-be-done flags are in LSW - * - other flags in MSW - */ -#define TIF_SYSCALL_TRACE 0 /* syscall trace active */ -#define TIF_SIGPENDING 1 /* signal pending */ -#define TIF_NEED_RESCHED 2 /* rescheduling necessary */ -#define TIF_SINGLESTEP 3 /* restore singlestep on return to - user mode */ -#define TIF_IRET 4 /* return with iret */ -#define TIF_SYSCALL_EMU 5 /* syscall emulation active */ -#define TIF_SYSCALL_AUDIT 6 /* syscall auditing active */ -#define TIF_SECCOMP 7 /* secure computing */ -#define TIF_RESTORE_SIGMASK 8 /* restore signal mask in do_signal() */ -#define TIF_HRTICK_RESCHED 9 /* reprogram hrtick timer */ -#define TIF_MEMDIE 16 -#define TIF_DEBUG 17 /* uses debug registers */ -#define TIF_IO_BITMAP 18 /* uses I/O bitmap */ -#define TIF_FREEZE 19 /* is freezing for suspend */ -#define TIF_NOTSC 20 /* TSC is not accessible in userland */ -#define TIF_FORCED_TF 21 /* true if TF in eflags artificially */ -#define TIF_DEBUGCTLMSR 22 /* uses thread_struct.debugctlmsr */ -#define TIF_DS_AREA_MSR 23 /* uses thread_struct.ds_area_msr */ -#define TIF_BTS_TRACE_TS 24 /* record scheduling event timestamps */ - -#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) -#define _TIF_SIGPENDING (1 << TIF_SIGPENDING) -#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) -#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) -#define _TIF_IRET (1 << TIF_IRET) -#define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) -#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) -#define _TIF_SECCOMP (1 << TIF_SECCOMP) -#define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK) -#define _TIF_HRTICK_RESCHED (1 << TIF_HRTICK_RESCHED) -#define _TIF_DEBUG (1 << TIF_DEBUG) -#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) -#define _TIF_FREEZE (1 << TIF_FREEZE) -#define _TIF_NOTSC (1 << TIF_NOTSC) -#define _TIF_FORCED_TF (1 << TIF_FORCED_TF) -#define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR) -#define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR) -#define _TIF_BTS_TRACE_TS (1 << TIF_BTS_TRACE_TS) - -/* work to do on interrupt/exception return */ -#define _TIF_WORK_MASK \ - (0x0000FFFF & ~(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ - _TIF_SECCOMP | _TIF_SYSCALL_EMU)) -/* work to do on any return to u-space */ -#define _TIF_ALLWORK_MASK (0x0000FFFF & ~_TIF_SECCOMP) - -/* flags to check in __switch_to() */ -#define _TIF_WORK_CTXSW \ - (_TIF_IO_BITMAP | _TIF_NOTSC | _TIF_DEBUGCTLMSR | \ - _TIF_DS_AREA_MSR | _TIF_BTS_TRACE_TS) -#define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW -#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW | _TIF_DEBUG) - - -/* - * Thread-synchronous status. - * - * This is different from the flags in that nobody else - * ever touches our thread-synchronous status, so we don't - * have to worry about atomic accesses. - */ -#define TS_USEDFPU 0x0001 /* FPU was used by this task - this quantum (SMP) */ -#define TS_POLLING 0x0002 /* True if in idle loop - and not sleeping */ - -#define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING) - -#endif /* __KERNEL__ */ - -#endif /* _ASM_THREAD_INFO_H */ diff --git a/include/asm-x86/thread_info_64.h b/include/asm-x86/thread_info_64.h deleted file mode 100644 index ed664e8..0000000 --- a/include/asm-x86/thread_info_64.h +++ /dev/null @@ -1,186 +0,0 @@ -/* thread_info.h: x86_64 low-level thread information - * - * Copyright (C) 2002 David Howells (dhowells@redhat.com) - * - Incorporating suggestions made by Linus Torvalds and Dave Miller - */ - -#ifndef _ASM_THREAD_INFO_H -#define _ASM_THREAD_INFO_H - -#ifdef __KERNEL__ - -#include -#include -#include - -/* - * low level task data that entry.S needs immediate access to - * - this struct should fit entirely inside of one cache line - * - this struct shares the supervisor stack pages - */ -#ifndef __ASSEMBLY__ -struct task_struct; -struct exec_domain; -#include - -struct thread_info { - struct task_struct *task; /* main task structure */ - struct exec_domain *exec_domain; /* execution domain */ - __u32 flags; /* low level flags */ - __u32 status; /* thread synchronous flags */ - __u32 cpu; /* current CPU */ - int preempt_count; /* 0 => preemptable, - <0 => BUG */ - mm_segment_t addr_limit; - struct restart_block restart_block; -#ifdef CONFIG_IA32_EMULATION - void __user *sysenter_return; -#endif -}; -#endif - -/* - * macros/functions for gaining access to the thread information structure - * preempt_count needs to be 1 initially, until the scheduler is functional. - */ -#ifndef __ASSEMBLY__ -#define INIT_THREAD_INFO(tsk) \ -{ \ - .task = &tsk, \ - .exec_domain = &default_exec_domain, \ - .flags = 0, \ - .cpu = 0, \ - .preempt_count = 1, \ - .addr_limit = KERNEL_DS, \ - .restart_block = { \ - .fn = do_no_restart_syscall, \ - }, \ -} - -#define init_thread_info (init_thread_union.thread_info) -#define init_stack (init_thread_union.stack) - -static inline struct thread_info *current_thread_info(void) -{ - struct thread_info *ti; - ti = (void *)(read_pda(kernelstack) + PDA_STACKOFFSET - THREAD_SIZE); - return ti; -} - -/* do not use in interrupt context */ -static inline struct thread_info *stack_thread_info(void) -{ - struct thread_info *ti; - asm("andq %%rsp,%0; " : "=r" (ti) : "0" (~(THREAD_SIZE - 1))); - return ti; -} - -/* thread information allocation */ -#ifdef CONFIG_DEBUG_STACK_USAGE -#define THREAD_FLAGS (GFP_KERNEL | __GFP_ZERO) -#else -#define THREAD_FLAGS GFP_KERNEL -#endif - -#define alloc_thread_info(tsk) \ - ((struct thread_info *)__get_free_pages(THREAD_FLAGS, THREAD_ORDER)) - -#else /* !__ASSEMBLY__ */ - -/* how to get the thread information struct from ASM */ -#define GET_THREAD_INFO(reg) \ - movq %gs:pda_kernelstack,reg ; \ - subq $(THREAD_SIZE-PDA_STACKOFFSET),reg - -#endif - -/* - * thread information flags - * - these are process state flags that various assembly files - * may need to access - * - pending work-to-be-done flags are in LSW - * - other flags in MSW - * Warning: layout of LSW is hardcoded in entry.S - */ -#define TIF_SYSCALL_TRACE 0 /* syscall trace active */ -#define TIF_SIGPENDING 2 /* signal pending */ -#define TIF_NEED_RESCHED 3 /* rescheduling necessary */ -#define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/ -#define TIF_IRET 5 /* force IRET */ -#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ -#define TIF_SECCOMP 8 /* secure computing */ -#define TIF_RESTORE_SIGMASK 9 /* restore signal mask in do_signal */ -#define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */ -#define TIF_HRTICK_RESCHED 11 /* reprogram hrtick timer */ -/* 16 free */ -#define TIF_IA32 17 /* 32bit process */ -#define TIF_FORK 18 /* ret_from_fork */ -#define TIF_ABI_PENDING 19 -#define TIF_MEMDIE 20 -#define TIF_DEBUG 21 /* uses debug registers */ -#define TIF_IO_BITMAP 22 /* uses I/O bitmap */ -#define TIF_FREEZE 23 /* is freezing for suspend */ -#define TIF_FORCED_TF 24 /* true if TF in eflags artificially */ -#define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */ -#define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */ -#define TIF_BTS_TRACE_TS 27 /* record scheduling event timestamps */ -#define TIF_NOTSC 28 /* TSC is not accessible in userland */ - -#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) -#define _TIF_SIGPENDING (1 << TIF_SIGPENDING) -#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) -#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) -#define _TIF_IRET (1 << TIF_IRET) -#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) -#define _TIF_SECCOMP (1 << TIF_SECCOMP) -#define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK) -#define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY) -#define _TIF_HRTICK_RESCHED (1 << TIF_HRTICK_RESCHED) -#define _TIF_IA32 (1 << TIF_IA32) -#define _TIF_FORK (1 << TIF_FORK) -#define _TIF_ABI_PENDING (1 << TIF_ABI_PENDING) -#define _TIF_DEBUG (1 << TIF_DEBUG) -#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) -#define _TIF_FREEZE (1 << TIF_FREEZE) -#define _TIF_FORCED_TF (1 << TIF_FORCED_TF) -#define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR) -#define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR) -#define _TIF_BTS_TRACE_TS (1 << TIF_BTS_TRACE_TS) -#define _TIF_NOTSC (1 << TIF_NOTSC) - -/* work to do on interrupt/exception return */ -#define _TIF_WORK_MASK \ - (0x0000FFFF & \ - ~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP|_TIF_SECCOMP)) -/* work to do on any return to user space */ -#define _TIF_ALLWORK_MASK (0x0000FFFF & ~_TIF_SECCOMP) - -#define _TIF_DO_NOTIFY_MASK \ - (_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY|_TIF_HRTICK_RESCHED) - -/* flags to check in __switch_to() */ -#define _TIF_WORK_CTXSW \ - (_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_BTS_TRACE_TS|_TIF_NOTSC) -#define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW -#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG) - -#define PREEMPT_ACTIVE 0x10000000 - -/* - * Thread-synchronous status. - * - * This is different from the flags in that nobody else - * ever touches our thread-synchronous status, so we don't - * have to worry about atomic accesses. - */ -#define TS_USEDFPU 0x0001 /* FPU was used by this task - this quantum (SMP) */ -#define TS_COMPAT 0x0002 /* 32bit syscall active */ -#define TS_POLLING 0x0004 /* true if in idle loop - and not sleeping */ - -#define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING) - -#endif /* __KERNEL__ */ - -#endif /* _ASM_THREAD_INFO_H */ -- cgit v0.10.2 From 12a638e13c68bbe187782518dab375f4fa800fc4 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 28 Apr 2008 18:52:33 -0700 Subject: x86: threadinfo: common include files Move shared includes to a common area in thread_info.h Adds asm/types.h for x86_64 and linux/compiler.h for x86_32. Not needed but we can avoid some ifdeffing and it simplifies later joining. Signed-off-by: Christoph Lameter Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/include/asm-x86/thread_info.h b/include/asm-x86/thread_info.h index d59384a..d2dc1a3 100644 --- a/include/asm-x86/thread_info.h +++ b/include/asm-x86/thread_info.h @@ -7,9 +7,11 @@ #ifndef _ASM_X86_THREAD_INFO_H #define _ASM_X86_THREAD_INFO_H -#ifdef CONFIG_X86_32 #include #include +#include + +#ifdef CONFIG_X86_32 #ifndef __ASSEMBLY__ #include @@ -192,8 +194,6 @@ static inline struct thread_info *current_thread_info(void) #else /* X86_32 */ -#include -#include #include /* -- cgit v0.10.2 From f2ea3b1d4d7ab66d86da57899993282f3deb1f74 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 28 Apr 2008 18:52:34 -0700 Subject: x86: threadinfo: merge thread sync state definitions Merge both. x86_64 has an additional TS_COMPAT that is harmless for 32 bit. Signed-off-by: Christoph Lameter Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/include/asm-x86/thread_info.h b/include/asm-x86/thread_info.h index d2dc1a3..4b91f59 100644 --- a/include/asm-x86/thread_info.h +++ b/include/asm-x86/thread_info.h @@ -177,21 +177,6 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW | _TIF_DEBUG) - -/* - * Thread-synchronous status. - * - * This is different from the flags in that nobody else - * ever touches our thread-synchronous status, so we don't - * have to worry about atomic accesses. - */ -#define TS_USEDFPU 0x0001 /* FPU was used by this task - this quantum (SMP) */ -#define TS_POLLING 0x0002 /* True if in idle loop - and not sleeping */ - -#define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING) - #else /* X86_32 */ #include @@ -349,6 +334,8 @@ static inline struct thread_info *stack_thread_info(void) #define PREEMPT_ACTIVE 0x10000000 +#endif /* !X86_32 */ + /* * Thread-synchronous status. * @@ -358,15 +345,12 @@ static inline struct thread_info *stack_thread_info(void) */ #define TS_USEDFPU 0x0001 /* FPU was used by this task this quantum (SMP) */ -#define TS_COMPAT 0x0002 /* 32bit syscall active */ +#define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/ #define TS_POLLING 0x0004 /* true if in idle loop and not sleeping */ #define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING) -#endif /* !X86_32 */ - - #ifndef __ASSEMBLY__ extern void arch_task_cache_init(void); extern void free_thread_info(struct thread_info *ti); -- cgit v0.10.2 From 006c484bb3d9547e82a33a09668c9b54b912c8fb Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 28 Apr 2008 18:52:35 -0700 Subject: x86: common thread_info definitions Merge the thread_info definition into one structure definition for both arches. The __u32 is equal to unsigned long for 32 bit. sysenter_return is used both for the IA32 emulation for 64 and x86_32. Avoid complicated #ifdef by simply always including it. Signed-off-by: Christoph Lameter Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/include/asm-x86/thread_info.h b/include/asm-x86/thread_info.h index 4b91f59..71b0880 100644 --- a/include/asm-x86/thread_info.h +++ b/include/asm-x86/thread_info.h @@ -11,47 +11,42 @@ #include #include -#ifdef CONFIG_X86_32 - -#ifndef __ASSEMBLY__ -#include -#endif - /* * low level task data that entry.S needs immediate access to * - this struct should fit entirely inside of one cache line * - this struct shares the supervisor stack pages - * - if the contents of this structure are changed, - * the assembly constants must also be changed */ #ifndef __ASSEMBLY__ +struct task_struct; +struct exec_domain; +#include struct thread_info { struct task_struct *task; /* main task structure */ struct exec_domain *exec_domain; /* execution domain */ - unsigned long flags; /* low level flags */ - unsigned long status; /* thread-synchronous flags */ + __u32 flags; /* low level flags */ + __u32 status; /* thread synchronous flags */ __u32 cpu; /* current CPU */ - int preempt_count; /* 0 => preemptable, + int preempt_count; /* 0 => preemptable, <0 => BUG */ - mm_segment_t addr_limit; /* thread address space: - 0-0xBFFFFFFF user-thread - 0-0xFFFFFFFF kernel-thread - */ - void *sysenter_return; + mm_segment_t addr_limit; struct restart_block restart_block; + void __user *sysenter_return; +#ifdef CONFIG_X86_32 unsigned long previous_esp; /* ESP of the previous stack in case of nested (IRQ) stacks */ __u8 supervisor_stack[0]; +#endif }; - #else /* !__ASSEMBLY__ */ #include #endif +#ifdef CONFIG_X86_32 + #define PREEMPT_ACTIVE 0x10000000 #ifdef CONFIG_4KSTACKS #define THREAD_SIZE (4096) @@ -182,32 +177,6 @@ static inline struct thread_info *current_thread_info(void) #include /* - * low level task data that entry.S needs immediate access to - * - this struct should fit entirely inside of one cache line - * - this struct shares the supervisor stack pages - */ -#ifndef __ASSEMBLY__ -struct task_struct; -struct exec_domain; -#include - -struct thread_info { - struct task_struct *task; /* main task structure */ - struct exec_domain *exec_domain; /* execution domain */ - __u32 flags; /* low level flags */ - __u32 status; /* thread synchronous flags */ - __u32 cpu; /* current CPU */ - int preempt_count; /* 0 => preemptable, - <0 => BUG */ - mm_segment_t addr_limit; - struct restart_block restart_block; -#ifdef CONFIG_IA32_EMULATION - void __user *sysenter_return; -#endif -}; -#endif - -/* * macros/functions for gaining access to the thread information structure * preempt_count needs to be 1 initially, until the scheduler is functional. */ -- cgit v0.10.2 From 3351cc03c0762353225a79507e38db4c1e656d52 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 28 Apr 2008 18:52:36 -0700 Subject: x86: threadinfo: merge INIT_THREAD_INFO Both definitions are the same. So move to common x86 area. Signed-off-by: Christoph Lameter Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/include/asm-x86/thread_info.h b/include/asm-x86/thread_info.h index 71b0880..8cd52d4 100644 --- a/include/asm-x86/thread_info.h +++ b/include/asm-x86/thread_info.h @@ -39,6 +39,23 @@ struct thread_info { __u8 supervisor_stack[0]; #endif }; + +#define INIT_THREAD_INFO(tsk) \ +{ \ + .task = &tsk, \ + .exec_domain = &default_exec_domain, \ + .flags = 0, \ + .cpu = 0, \ + .preempt_count = 1, \ + .addr_limit = KERNEL_DS, \ + .restart_block = { \ + .fn = do_no_restart_syscall, \ + }, \ +} + +#define init_thread_info (init_thread_union.thread_info) +#define init_stack (init_thread_union.stack) + #else /* !__ASSEMBLY__ */ #include @@ -62,22 +79,6 @@ struct thread_info { */ #ifndef __ASSEMBLY__ -#define INIT_THREAD_INFO(tsk) \ -{ \ - .task = &tsk, \ - .exec_domain = &default_exec_domain, \ - .flags = 0, \ - .cpu = 0, \ - .preempt_count = 1, \ - .addr_limit = KERNEL_DS, \ - .restart_block = { \ - .fn = do_no_restart_syscall, \ - }, \ -} - -#define init_thread_info (init_thread_union.thread_info) -#define init_stack (init_thread_union.stack) - /* how to get the current stack pointer from C */ register unsigned long current_stack_pointer asm("esp") __used; @@ -181,22 +182,6 @@ static inline struct thread_info *current_thread_info(void) * preempt_count needs to be 1 initially, until the scheduler is functional. */ #ifndef __ASSEMBLY__ -#define INIT_THREAD_INFO(tsk) \ -{ \ - .task = &tsk, \ - .exec_domain = &default_exec_domain, \ - .flags = 0, \ - .cpu = 0, \ - .preempt_count = 1, \ - .addr_limit = KERNEL_DS, \ - .restart_block = { \ - .fn = do_no_restart_syscall, \ - }, \ -} - -#define init_thread_info (init_thread_union.thread_info) -#define init_stack (init_thread_union.stack) - static inline struct thread_info *current_thread_info(void) { struct thread_info *ti; -- cgit v0.10.2 From 24e2de6e28a453cd114b06215df2f9931cd0c342 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 28 Apr 2008 18:52:37 -0700 Subject: x86: thread_info: PREEMPT_ACTIVE Same for both 32 and 64 bit so simply put it in to the common area. Signed-off-by: Christoph Lameter Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/include/asm-x86/thread_info.h b/include/asm-x86/thread_info.h index 8cd52d4..f8d5cf5 100644 --- a/include/asm-x86/thread_info.h +++ b/include/asm-x86/thread_info.h @@ -62,9 +62,10 @@ struct thread_info { #endif +#define PREEMPT_ACTIVE 0x10000000 + #ifdef CONFIG_X86_32 -#define PREEMPT_ACTIVE 0x10000000 #ifdef CONFIG_4KSTACKS #define THREAD_SIZE (4096) #else @@ -286,8 +287,6 @@ static inline struct thread_info *stack_thread_info(void) #define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG) -#define PREEMPT_ACTIVE 0x10000000 - #endif /* !X86_32 */ /* -- cgit v0.10.2 From e57549b017552f7a493b366f5ccd4781801083e4 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 28 Apr 2008 18:52:38 -0700 Subject: x86: thread_info: merge TIF_ flags. Both TIF lists are essentially the same. x86_32 also has TIF_SYSCALL_EMU which must be undefined for the 64 bit case. Signed-off-by: Christoph Lameter Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/include/asm-x86/thread_info.h b/include/asm-x86/thread_info.h index f8d5cf5..983743a 100644 --- a/include/asm-x86/thread_info.h +++ b/include/asm-x86/thread_info.h @@ -62,6 +62,67 @@ struct thread_info { #endif +/* + * thread information flags + * - these are process state flags that various assembly files + * may need to access + * - pending work-to-be-done flags are in LSW + * - other flags in MSW + * Warning: layout of LSW is hardcoded in entry.S + */ +#define TIF_SYSCALL_TRACE 0 /* syscall trace active */ +#define TIF_SIGPENDING 2 /* signal pending */ +#define TIF_NEED_RESCHED 3 /* rescheduling necessary */ +#define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/ +#define TIF_IRET 5 /* force IRET */ +#ifdef CONFIG_X86_32 +#define TIF_SYSCALL_EMU 6 /* syscall emulation active */ +#endif +#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ +#define TIF_SECCOMP 8 /* secure computing */ +#define TIF_RESTORE_SIGMASK 9 /* restore signal mask in do_signal */ +#define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */ +#define TIF_HRTICK_RESCHED 11 /* reprogram hrtick timer */ +#define TIF_NOTSC 16 /* TSC is not accessible in userland */ +#define TIF_IA32 17 /* 32bit process */ +#define TIF_FORK 18 /* ret_from_fork */ +#define TIF_ABI_PENDING 19 +#define TIF_MEMDIE 20 +#define TIF_DEBUG 21 /* uses debug registers */ +#define TIF_IO_BITMAP 22 /* uses I/O bitmap */ +#define TIF_FREEZE 23 /* is freezing for suspend */ +#define TIF_FORCED_TF 24 /* true if TF in eflags artificially */ +#define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */ +#define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */ +#define TIF_BTS_TRACE_TS 27 /* record scheduling event timestamps */ + +#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) +#define _TIF_SIGPENDING (1 << TIF_SIGPENDING) +#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) +#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) +#define _TIF_IRET (1 << TIF_IRET) +#ifdef CONFIG_X86_32 +#define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) +#else +#define _TIF_SYSCALL_EMU 0 +#endif +#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) +#define _TIF_SECCOMP (1 << TIF_SECCOMP) +#define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK) +#define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY) +#define _TIF_HRTICK_RESCHED (1 << TIF_HRTICK_RESCHED) +#define _TIF_NOTSC (1 << TIF_NOTSC) +#define _TIF_IA32 (1 << TIF_IA32) +#define _TIF_FORK (1 << TIF_FORK) +#define _TIF_ABI_PENDING (1 << TIF_ABI_PENDING) +#define _TIF_DEBUG (1 << TIF_DEBUG) +#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) +#define _TIF_FREEZE (1 << TIF_FREEZE) +#define _TIF_FORCED_TF (1 << TIF_FORCED_TF) +#define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR) +#define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR) +#define _TIF_BTS_TRACE_TS (1 << TIF_BTS_TRACE_TS) + #define PREEMPT_ACTIVE 0x10000000 #ifdef CONFIG_X86_32 @@ -113,53 +174,6 @@ static inline struct thread_info *current_thread_info(void) #endif -/* - * thread information flags - * - these are process state flags that various - * assembly files may need to access - * - pending work-to-be-done flags are in LSW - * - other flags in MSW - */ -#define TIF_SYSCALL_TRACE 0 /* syscall trace active */ -#define TIF_SIGPENDING 1 /* signal pending */ -#define TIF_NEED_RESCHED 2 /* rescheduling necessary */ -#define TIF_SINGLESTEP 3 /* restore singlestep on return to - user mode */ -#define TIF_IRET 4 /* return with iret */ -#define TIF_SYSCALL_EMU 5 /* syscall emulation active */ -#define TIF_SYSCALL_AUDIT 6 /* syscall auditing active */ -#define TIF_SECCOMP 7 /* secure computing */ -#define TIF_RESTORE_SIGMASK 8 /* restore signal mask in do_signal() */ -#define TIF_HRTICK_RESCHED 9 /* reprogram hrtick timer */ -#define TIF_MEMDIE 16 -#define TIF_DEBUG 17 /* uses debug registers */ -#define TIF_IO_BITMAP 18 /* uses I/O bitmap */ -#define TIF_FREEZE 19 /* is freezing for suspend */ -#define TIF_NOTSC 20 /* TSC is not accessible in userland */ -#define TIF_FORCED_TF 21 /* true if TF in eflags artificially */ -#define TIF_DEBUGCTLMSR 22 /* uses thread_struct.debugctlmsr */ -#define TIF_DS_AREA_MSR 23 /* uses thread_struct.ds_area_msr */ -#define TIF_BTS_TRACE_TS 24 /* record scheduling event timestamps */ - -#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) -#define _TIF_SIGPENDING (1 << TIF_SIGPENDING) -#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) -#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) -#define _TIF_IRET (1 << TIF_IRET) -#define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) -#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) -#define _TIF_SECCOMP (1 << TIF_SECCOMP) -#define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK) -#define _TIF_HRTICK_RESCHED (1 << TIF_HRTICK_RESCHED) -#define _TIF_DEBUG (1 << TIF_DEBUG) -#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) -#define _TIF_FREEZE (1 << TIF_FREEZE) -#define _TIF_NOTSC (1 << TIF_NOTSC) -#define _TIF_FORCED_TF (1 << TIF_FORCED_TF) -#define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR) -#define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR) -#define _TIF_BTS_TRACE_TS (1 << TIF_BTS_TRACE_TS) - /* work to do on interrupt/exception return */ #define _TIF_WORK_MASK \ (0x0000FFFF & ~(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ @@ -217,60 +231,6 @@ static inline struct thread_info *stack_thread_info(void) #endif -/* - * thread information flags - * - these are process state flags that various assembly files - * may need to access - * - pending work-to-be-done flags are in LSW - * - other flags in MSW - * Warning: layout of LSW is hardcoded in entry.S - */ -#define TIF_SYSCALL_TRACE 0 /* syscall trace active */ -#define TIF_SIGPENDING 2 /* signal pending */ -#define TIF_NEED_RESCHED 3 /* rescheduling necessary */ -#define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/ -#define TIF_IRET 5 /* force IRET */ -#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ -#define TIF_SECCOMP 8 /* secure computing */ -#define TIF_RESTORE_SIGMASK 9 /* restore signal mask in do_signal */ -#define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */ -#define TIF_HRTICK_RESCHED 11 /* reprogram hrtick timer */ -/* 16 free */ -#define TIF_IA32 17 /* 32bit process */ -#define TIF_FORK 18 /* ret_from_fork */ -#define TIF_ABI_PENDING 19 -#define TIF_MEMDIE 20 -#define TIF_DEBUG 21 /* uses debug registers */ -#define TIF_IO_BITMAP 22 /* uses I/O bitmap */ -#define TIF_FREEZE 23 /* is freezing for suspend */ -#define TIF_FORCED_TF 24 /* true if TF in eflags artificially */ -#define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */ -#define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */ -#define TIF_BTS_TRACE_TS 27 /* record scheduling event timestamps */ -#define TIF_NOTSC 28 /* TSC is not accessible in userland */ - -#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) -#define _TIF_SIGPENDING (1 << TIF_SIGPENDING) -#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) -#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) -#define _TIF_IRET (1 << TIF_IRET) -#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) -#define _TIF_SECCOMP (1 << TIF_SECCOMP) -#define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK) -#define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY) -#define _TIF_HRTICK_RESCHED (1 << TIF_HRTICK_RESCHED) -#define _TIF_IA32 (1 << TIF_IA32) -#define _TIF_FORK (1 << TIF_FORK) -#define _TIF_ABI_PENDING (1 << TIF_ABI_PENDING) -#define _TIF_DEBUG (1 << TIF_DEBUG) -#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) -#define _TIF_FREEZE (1 << TIF_FREEZE) -#define _TIF_FORCED_TF (1 << TIF_FORCED_TF) -#define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR) -#define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR) -#define _TIF_BTS_TRACE_TS (1 << TIF_BTS_TRACE_TS) -#define _TIF_NOTSC (1 << TIF_NOTSC) - /* work to do on interrupt/exception return */ #define _TIF_WORK_MASK \ (0x0000FFFF & \ -- cgit v0.10.2 From 00c1bb133cf351fa3904b00a48a9cf535d018de6 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 28 Apr 2008 18:52:39 -0700 Subject: x86: thread_info: merge tif masks The TIF masks are basically the same. x86_32 also has _TIF_SYSCALL_EMU which is zero for the 64 bit case. The tif masks become the same. x86_64 has an additional _TIF_DONOTIFY_MASK. Does not hurt for the 32 bit case since it is only used in x86_64 arch code. Signed-off-by: Christoph Lameter Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/include/asm-x86/thread_info.h b/include/asm-x86/thread_info.h index 983743a..b7cd413 100644 --- a/include/asm-x86/thread_info.h +++ b/include/asm-x86/thread_info.h @@ -123,6 +123,27 @@ struct thread_info { #define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR) #define _TIF_BTS_TRACE_TS (1 << TIF_BTS_TRACE_TS) +/* work to do on interrupt/exception return */ +#define _TIF_WORK_MASK \ + (0x0000FFFF & \ + ~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP| \ + _TIF_SECCOMP|_TIF_SYSCALL_EMU)) + +/* work to do on any return to user space */ +#define _TIF_ALLWORK_MASK (0x0000FFFF & ~_TIF_SECCOMP) + +#define _TIF_DO_NOTIFY_MASK \ + (_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY|_TIF_HRTICK_RESCHED) + +/* flags to check in __switch_to() */ +#define _TIF_WORK_CTXSW \ + (_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_BTS_TRACE_TS| \ + _TIF_NOTSC) + +#define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW +#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG) + + #define PREEMPT_ACTIVE 0x10000000 #ifdef CONFIG_X86_32 @@ -174,20 +195,6 @@ static inline struct thread_info *current_thread_info(void) #endif -/* work to do on interrupt/exception return */ -#define _TIF_WORK_MASK \ - (0x0000FFFF & ~(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ - _TIF_SECCOMP | _TIF_SYSCALL_EMU)) -/* work to do on any return to u-space */ -#define _TIF_ALLWORK_MASK (0x0000FFFF & ~_TIF_SECCOMP) - -/* flags to check in __switch_to() */ -#define _TIF_WORK_CTXSW \ - (_TIF_IO_BITMAP | _TIF_NOTSC | _TIF_DEBUGCTLMSR | \ - _TIF_DS_AREA_MSR | _TIF_BTS_TRACE_TS) -#define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW -#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW | _TIF_DEBUG) - #else /* X86_32 */ #include @@ -231,22 +238,6 @@ static inline struct thread_info *stack_thread_info(void) #endif -/* work to do on interrupt/exception return */ -#define _TIF_WORK_MASK \ - (0x0000FFFF & \ - ~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP|_TIF_SECCOMP)) -/* work to do on any return to user space */ -#define _TIF_ALLWORK_MASK (0x0000FFFF & ~_TIF_SECCOMP) - -#define _TIF_DO_NOTIFY_MASK \ - (_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY|_TIF_HRTICK_RESCHED) - -/* flags to check in __switch_to() */ -#define _TIF_WORK_CTXSW \ - (_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_BTS_TRACE_TS|_TIF_NOTSC) -#define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW -#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG) - #endif /* !X86_32 */ /* -- cgit v0.10.2 From b84200b3a0fafa167185201319940d8df62a8c7b Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 28 Apr 2008 18:52:40 -0700 Subject: x86: thread_info: merge thread_info allocation Make them similar so that both use THREAD_ORDER and THREAD_FLAGS and have a THREAD_SIZE definition that is setup in asm/page_xx.h Signed-off-by: Christoph Lameter Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/include/asm-x86/page_32.h b/include/asm-x86/page_32.h index 424e82f..50b33eb 100644 --- a/include/asm-x86/page_32.h +++ b/include/asm-x86/page_32.h @@ -13,6 +13,14 @@ */ #define __PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL) +#ifdef CONFIG_4KSTACKS +#define THREAD_ORDER 0 +#else +#define THREAD_ORDER 1 +#endif +#define THREAD_SIZE (PAGE_SIZE << THREAD_ORDER) + + #ifdef CONFIG_X86_PAE #define __PHYSICAL_MASK_SHIFT 36 #define __VIRTUAL_MASK_SHIFT 32 diff --git a/include/asm-x86/thread_info.h b/include/asm-x86/thread_info.h index b7cd413..348f0e0 100644 --- a/include/asm-x86/thread_info.h +++ b/include/asm-x86/thread_info.h @@ -132,6 +132,7 @@ struct thread_info { /* work to do on any return to user space */ #define _TIF_ALLWORK_MASK (0x0000FFFF & ~_TIF_SECCOMP) +/* Only used for 64 bit */ #define _TIF_DO_NOTIFY_MASK \ (_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY|_TIF_HRTICK_RESCHED) @@ -143,18 +144,21 @@ struct thread_info { #define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG) - #define PREEMPT_ACTIVE 0x10000000 -#ifdef CONFIG_X86_32 - -#ifdef CONFIG_4KSTACKS -#define THREAD_SIZE (4096) +/* thread information allocation */ +#ifdef CONFIG_DEBUG_STACK_USAGE +#define THREAD_FLAGS (GFP_KERNEL | __GFP_ZERO) #else -#define THREAD_SIZE (8192) +#define THREAD_FLAGS GFP_KERNEL #endif -#define STACK_WARN (THREAD_SIZE/8) +#define alloc_thread_info(tsk) \ + ((struct thread_info *)__get_free_pages(THREAD_FLAGS, THREAD_ORDER)) + +#ifdef CONFIG_X86_32 + +#define STACK_WARN (THREAD_SIZE/8) /* * macros/functions for gaining access to the thread information structure * @@ -173,15 +177,6 @@ static inline struct thread_info *current_thread_info(void) (current_stack_pointer & ~(THREAD_SIZE - 1)); } -/* thread information allocation */ -#ifdef CONFIG_DEBUG_STACK_USAGE -#define alloc_thread_info(tsk) ((struct thread_info *) \ - __get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(THREAD_SIZE))) -#else -#define alloc_thread_info(tsk) ((struct thread_info *) \ - __get_free_pages(GFP_KERNEL, get_order(THREAD_SIZE))) -#endif - #else /* !__ASSEMBLY__ */ /* how to get the thread information struct from ASM */ @@ -219,16 +214,6 @@ static inline struct thread_info *stack_thread_info(void) return ti; } -/* thread information allocation */ -#ifdef CONFIG_DEBUG_STACK_USAGE -#define THREAD_FLAGS (GFP_KERNEL | __GFP_ZERO) -#else -#define THREAD_FLAGS GFP_KERNEL -#endif - -#define alloc_thread_info(tsk) \ - ((struct thread_info *)__get_free_pages(THREAD_FLAGS, THREAD_ORDER)) - #else /* !__ASSEMBLY__ */ /* how to get the thread information struct from ASM */ -- cgit v0.10.2 From 8a6c160a2a13d82c75a50af7282b906cce948df5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 30 Apr 2008 22:13:44 +0200 Subject: x86: redo thread_info.h change redo Roland's "signals: x86 TS_RESTORE_SIGMASK" ontop of the unified thread_info.h file. Signed-off-by: Ingo Molnar diff --git a/include/asm-x86/thread_info.h b/include/asm-x86/thread_info.h index 348f0e0..74481b7 100644 --- a/include/asm-x86/thread_info.h +++ b/include/asm-x86/thread_info.h @@ -80,7 +80,6 @@ struct thread_info { #endif #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SECCOMP 8 /* secure computing */ -#define TIF_RESTORE_SIGMASK 9 /* restore signal mask in do_signal */ #define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */ #define TIF_HRTICK_RESCHED 11 /* reprogram hrtick timer */ #define TIF_NOTSC 16 /* TSC is not accessible in userland */ @@ -108,7 +107,6 @@ struct thread_info { #endif #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) -#define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK) #define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY) #define _TIF_HRTICK_RESCHED (1 << TIF_HRTICK_RESCHED) #define _TIF_NOTSC (1 << TIF_NOTSC) @@ -237,10 +235,21 @@ static inline struct thread_info *stack_thread_info(void) #define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/ #define TS_POLLING 0x0004 /* true if in idle loop and not sleeping */ +#define TS_RESTORE_SIGMASK 0x0008 /* restore signal mask in do_signal() */ #define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING) #ifndef __ASSEMBLY__ +#define HAVE_SET_RESTORE_SIGMASK 1 +static inline void set_restore_sigmask(void) +{ + struct thread_info *ti = current_thread_info(); + ti->status |= TS_RESTORE_SIGMASK; + set_bit(TIF_SIGPENDING, &ti->flags); +} +#endif /* !__ASSEMBLY__ */ + +#ifndef __ASSEMBLY__ extern void arch_task_cache_init(void); extern void free_thread_info(struct thread_info *ti); extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); -- cgit v0.10.2 From 41b3eae669fb1ef6ae4acaa937b4e4617a1aa078 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 22 Apr 2008 16:29:26 +0100 Subject: x86: minor polishing to top-level arch Makefile Use build target when creating compatibility link, and use $(boot) where possible. Signed-off-by: Jan Beulich Signed-off-by: Ingo Molnar diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 3cff3c8..5df0d1e 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -210,12 +210,12 @@ all: bzImage # KBUILD_IMAGE specify target image being built KBUILD_IMAGE := $(boot)/bzImage -zImage zlilo zdisk: KBUILD_IMAGE := arch/x86/boot/zImage +zImage zlilo zdisk: KBUILD_IMAGE := $(boot)/zImage zImage bzImage: vmlinux $(Q)$(MAKE) $(build)=$(boot) $(KBUILD_IMAGE) $(Q)mkdir -p $(objtree)/arch/$(UTS_MACHINE)/boot - $(Q)ln -fsn ../../x86/boot/bzImage $(objtree)/arch/$(UTS_MACHINE)/boot/bzImage + $(Q)ln -fsn ../../x86/boot/bzImage $(objtree)/arch/$(UTS_MACHINE)/boot/$@ compressed: zImage -- cgit v0.10.2 From 6859a8402945cf1d74af75a2e1aa4e327a506ab4 Mon Sep 17 00:00:00 2001 From: Alan Mayer Date: Wed, 26 Mar 2008 16:11:31 -0500 Subject: x86: resize NR_IRQS for large machines On machines with very large numbers of cpus, tables that are dimensioned by NR_IRQS get very large, especially the irq_desc table. They are also very sparsely used. When the cpu count is > MAX_IO_APICS, use MAX_IO_APICS to set NR_IRQS, otherwise use NR_CPUS. Signed-off-by: Alan Mayer Reviewed-by: Christoph Lameter Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/include/asm-x86/irq_64.h b/include/asm-x86/irq_64.h index 083d35a..7608176 100644 --- a/include/asm-x86/irq_64.h +++ b/include/asm-x86/irq_64.h @@ -10,6 +10,8 @@ * */ +#include + #define TIMER_IRQ 0 /* @@ -31,7 +33,11 @@ #define FIRST_SYSTEM_VECTOR 0xef /* duplicated in hw_irq.h */ +#if NR_CPUS < MAX_IO_APICS #define NR_IRQS (NR_VECTORS + (32 * NR_CPUS)) +#else +#define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS)) +#endif #define NR_IRQ_VECTORS NR_IRQS static inline int irq_canonicalize(int irq) diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index e8ffce8..cf9f40a 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -1,11 +1,11 @@ #ifndef _LINUX_KERNEL_STAT_H #define _LINUX_KERNEL_STAT_H -#include #include #include #include #include +#include #include /* -- cgit v0.10.2 From 2e0884362d1fe36ef2d673d763d6ce35e2044e66 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 2 May 2008 19:00:30 +0200 Subject: x86: move common declarations to hw_irq.h Move the common declarations from hw_irq_32/64 into hw_irq.h Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/include/asm-x86/hw_irq.h b/include/asm-x86/hw_irq.h index bf02539..38af08e 100644 --- a/include/asm-x86/hw_irq.h +++ b/include/asm-x86/hw_irq.h @@ -1,5 +1,86 @@ +#ifndef _ASM_HW_IRQ_H +#define _ASM_HW_IRQ_H + +/* + * (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar + * + * moved some of the old arch/i386/kernel/irq.h to here. VY + * + * IRQ/IPI changes taken from work by Thomas Radke + * + * + * hacked by Andi Kleen for x86-64. + * unified by tglx + */ + +#define NMI_VECTOR 0x02 + +#ifndef __ASSEMBLY__ + +#include +#include +#include + +#include +#include +#include + +#define platform_legacy_irq(irq) ((irq) < 16) + +/* Interrupt handlers registered during init_IRQ */ +extern void apic_timer_interrupt(void); +extern void error_interrupt(void); +extern void spurious_interrupt(void); +extern void thermal_interrupt(void); +extern void reschedule_interrupt(void); + +extern void invalidate_interrupt(void); +extern void invalidate_interrupt0(void); +extern void invalidate_interrupt1(void); +extern void invalidate_interrupt2(void); +extern void invalidate_interrupt3(void); +extern void invalidate_interrupt4(void); +extern void invalidate_interrupt5(void); +extern void invalidate_interrupt6(void); +extern void invalidate_interrupt7(void); + +extern void irq_move_cleanup_interrupt(void); +extern void threshold_interrupt(void); + +extern void call_function_interrupt(void); + +/* PIC specific functions */ +extern void disable_8259A_irq(unsigned int irq); +extern void enable_8259A_irq(unsigned int irq); +extern int i8259A_irq_pending(unsigned int irq); +extern void make_8259A_irq(unsigned int irq); +extern void init_8259A(int aeoi); + +/* IOAPIC */ +#define IO_APIC_IRQ(x) (((x) >= 16) || ((1<<(x)) & io_apic_irqs)) +extern unsigned long io_apic_irqs; + +extern void init_VISWS_APIC_irqs(void); +extern void setup_IO_APIC(void); +extern void disable_IO_APIC(void); +extern void print_IO_APIC(void); +extern int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn); +extern void setup_ioapic_dest(void); + +/* IPI functions */ +extern void send_IPI_self(int vector); +extern void send_IPI(int dest, int vector); + +/* Statistics */ +extern atomic_t irq_err_count; +extern atomic_t irq_mis_count; + +#endif /* !ASSEMBLY_ */ + #ifdef CONFIG_X86_32 # include "hw_irq_32.h" #else # include "hw_irq_64.h" #endif + +#endif diff --git a/include/asm-x86/hw_irq_32.h b/include/asm-x86/hw_irq_32.h index ea88054..89fca5a 100644 --- a/include/asm-x86/hw_irq_32.h +++ b/include/asm-x86/hw_irq_32.h @@ -1,66 +1,5 @@ -#ifndef _ASM_HW_IRQ_H -#define _ASM_HW_IRQ_H - -/* - * linux/include/asm/hw_irq.h - * - * (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar - * - * moved some of the old arch/i386/kernel/irq.h to here. VY - * - * IRQ/IPI changes taken from work by Thomas Radke - * - */ - -#include -#include -#include -#include - -#define NMI_VECTOR 0x02 - -/* - * Various low-level irq details needed by irq.c, process.c, - * time.c, io_apic.c and smp.c - * - * Interrupt entry/exit code at both C and assembly level - */ extern void (*const interrupt[NR_IRQS])(void); -#ifdef CONFIG_SMP -void reschedule_interrupt(void); -void invalidate_interrupt(void); -void call_function_interrupt(void); -#endif - -#ifdef CONFIG_X86_LOCAL_APIC -void apic_timer_interrupt(void); -void error_interrupt(void); -void spurious_interrupt(void); -void thermal_interrupt(void); -#define platform_legacy_irq(irq) ((irq) < 16) -#endif - -void disable_8259A_irq(unsigned int irq); -void enable_8259A_irq(unsigned int irq); -int i8259A_irq_pending(unsigned int irq); -void make_8259A_irq(unsigned int irq); -void init_8259A(int aeoi); -void send_IPI_self(int vector); -void init_VISWS_APIC_irqs(void); -void setup_IO_APIC(void); -void disable_IO_APIC(void); -void print_IO_APIC(void); -int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn); -void send_IPI(int dest, int vector); -void setup_ioapic_dest(void); - -extern unsigned long io_apic_irqs; - -extern atomic_t irq_err_count; -extern atomic_t irq_mis_count; -#define IO_APIC_IRQ(x) (((x) >= 16) || ((1<<(x)) & io_apic_irqs)) -#endif /* _ASM_HW_IRQ_H */ diff --git a/include/asm-x86/hw_irq_64.h b/include/asm-x86/hw_irq_64.h index 0062ef3..2867457 100644 --- a/include/asm-x86/hw_irq_64.h +++ b/include/asm-x86/hw_irq_64.h @@ -1,28 +1,3 @@ -#ifndef _ASM_HW_IRQ_H -#define _ASM_HW_IRQ_H - -/* - * linux/include/asm/hw_irq.h - * - * (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar - * - * moved some of the old arch/i386/kernel/irq.h to here. VY - * - * IRQ/IPI changes taken from work by Thomas Radke - * - * - * hacked by Andi Kleen for x86-64. - */ - -#ifndef __ASSEMBLY__ -#include -#include -#include -#include -#include -#endif - -#define NMI_VECTOR 0x02 /* * IDT vectors usable for external interrupt sources start * at 0x20: @@ -96,25 +71,6 @@ #ifndef __ASSEMBLY__ -/* Interrupt handlers registered during init_IRQ */ -void apic_timer_interrupt(void); -void spurious_interrupt(void); -void error_interrupt(void); -void reschedule_interrupt(void); -void call_function_interrupt(void); -void irq_move_cleanup_interrupt(void); -void invalidate_interrupt0(void); -void invalidate_interrupt1(void); -void invalidate_interrupt2(void); -void invalidate_interrupt3(void); -void invalidate_interrupt4(void); -void invalidate_interrupt5(void); -void invalidate_interrupt6(void); -void invalidate_interrupt7(void); -void thermal_interrupt(void); -void threshold_interrupt(void); -void i8254_timer_resume(void); - typedef int vector_irq_t[NR_VECTORS]; DECLARE_PER_CPU(vector_irq_t, vector_irq); extern void __setup_vector_irq(int cpu); @@ -127,29 +83,9 @@ extern spinlock_t vector_lock; * Interrupt entry/exit code at both C and assembly level */ -extern void disable_8259A_irq(unsigned int irq); -extern void enable_8259A_irq(unsigned int irq); -extern int i8259A_irq_pending(unsigned int irq); -extern void make_8259A_irq(unsigned int irq); -extern void init_8259A(int aeoi); -extern void send_IPI_self(int vector); -extern void init_VISWS_APIC_irqs(void); -extern void setup_IO_APIC(void); extern void enable_IO_APIC(void); -extern void disable_IO_APIC(void); -extern void print_IO_APIC(void); -extern int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn); -extern void send_IPI(int dest, int vector); -extern void setup_ioapic_dest(void); extern void native_init_IRQ(void); -extern unsigned long io_apic_irqs; - -extern atomic_t irq_err_count; -extern atomic_t irq_mis_count; - -#define IO_APIC_IRQ(x) (((x) >= 16) || ((1<<(x)) & io_apic_irqs)) - #include #define IRQ_NAME2(nr) nr##_interrupt(void) @@ -166,8 +102,4 @@ extern atomic_t irq_mis_count; "push $~(" #nr ") ; " \ "jmp common_interrupt"); -#define platform_legacy_irq(irq) ((irq) < 16) - #endif - -#endif /* _ASM_HW_IRQ_H */ -- cgit v0.10.2 From 9b7dc567d03d74a1fbae84e88949b6a60d922d82 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 2 May 2008 20:10:09 +0200 Subject: x86: unify interrupt vector defines The interrupt vector defines are copied 4 times around with minimal differences. Move them all into asm-x86/irq_vectors.h Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 2a609dc..0c7f64b 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -51,7 +51,7 @@ #include #include #include -#include "irq_vectors.h" +#include /* * We use macros for low-level operations which need to be overridden diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c index a2b0307..ba7d19e 100644 --- a/arch/x86/kernel/vmiclock_32.c +++ b/arch/x86/kernel/vmiclock_32.c @@ -33,8 +33,7 @@ #include #include #include - -#include +#include #define VMI_ONESHOT (VMI_ALARM_IS_ONESHOT | VMI_CYCLES_REAL | vmi_get_alarm_wiring()) #define VMI_PERIODIC (VMI_ALARM_IS_PERIODIC | VMI_CYCLES_REAL | vmi_get_alarm_wiring()) diff --git a/arch/x86/mach-visws/visws_apic.c b/arch/x86/mach-visws/visws_apic.c index cef9cb1..d8b2cfd 100644 --- a/arch/x86/mach-visws/visws_apic.c +++ b/arch/x86/mach-visws/visws_apic.c @@ -21,10 +21,9 @@ #include #include #include +#include #include "cobalt.h" -#include "irq_vectors.h" - static DEFINE_SPINLOCK(cobalt_lock); diff --git a/include/asm-x86/hw_irq.h b/include/asm-x86/hw_irq.h index 38af08e..a8c5e8b 100644 --- a/include/asm-x86/hw_irq.h +++ b/include/asm-x86/hw_irq.h @@ -13,7 +13,7 @@ * unified by tglx */ -#define NMI_VECTOR 0x02 +#include #ifndef __ASSEMBLY__ @@ -75,6 +75,16 @@ extern void send_IPI(int dest, int vector); extern atomic_t irq_err_count; extern atomic_t irq_mis_count; +/* Voyager functions */ +extern asmlinkage void vic_cpi_interrupt(void); +extern asmlinkage void vic_sys_interrupt(void); +extern asmlinkage void vic_cmn_interrupt(void); +extern asmlinkage void qic_timer_interrupt(void); +extern asmlinkage void qic_invalidate_interrupt(void); +extern asmlinkage void qic_reschedule_interrupt(void); +extern asmlinkage void qic_enable_irq_interrupt(void); +extern asmlinkage void qic_call_function_interrupt(void); + #endif /* !ASSEMBLY_ */ #ifdef CONFIG_X86_32 diff --git a/include/asm-x86/hw_irq_64.h b/include/asm-x86/hw_irq_64.h index 2867457..98c9d49 100644 --- a/include/asm-x86/hw_irq_64.h +++ b/include/asm-x86/hw_irq_64.h @@ -1,74 +1,3 @@ -/* - * IDT vectors usable for external interrupt sources start - * at 0x20: - */ -#define FIRST_EXTERNAL_VECTOR 0x20 - -#define IA32_SYSCALL_VECTOR 0x80 - - -/* Reserve the lowest usable priority level 0x20 - 0x2f for triggering - * cleanup after irq migration. - */ -#define IRQ_MOVE_CLEANUP_VECTOR FIRST_EXTERNAL_VECTOR - -/* - * Vectors 0x30-0x3f are used for ISA interrupts. - */ -#define IRQ0_VECTOR (FIRST_EXTERNAL_VECTOR + 0x10) -#define IRQ1_VECTOR (IRQ0_VECTOR + 1) -#define IRQ2_VECTOR (IRQ0_VECTOR + 2) -#define IRQ3_VECTOR (IRQ0_VECTOR + 3) -#define IRQ4_VECTOR (IRQ0_VECTOR + 4) -#define IRQ5_VECTOR (IRQ0_VECTOR + 5) -#define IRQ6_VECTOR (IRQ0_VECTOR + 6) -#define IRQ7_VECTOR (IRQ0_VECTOR + 7) -#define IRQ8_VECTOR (IRQ0_VECTOR + 8) -#define IRQ9_VECTOR (IRQ0_VECTOR + 9) -#define IRQ10_VECTOR (IRQ0_VECTOR + 10) -#define IRQ11_VECTOR (IRQ0_VECTOR + 11) -#define IRQ12_VECTOR (IRQ0_VECTOR + 12) -#define IRQ13_VECTOR (IRQ0_VECTOR + 13) -#define IRQ14_VECTOR (IRQ0_VECTOR + 14) -#define IRQ15_VECTOR (IRQ0_VECTOR + 15) - -/* - * Special IRQ vectors used by the SMP architecture, 0xf0-0xff - * - * some of the following vectors are 'rare', they are merged - * into a single vector (CALL_FUNCTION_VECTOR) to save vector space. - * TLB, reschedule and local APIC vectors are performance-critical. - */ -#define SPURIOUS_APIC_VECTOR 0xff -#define ERROR_APIC_VECTOR 0xfe -#define RESCHEDULE_VECTOR 0xfd -#define CALL_FUNCTION_VECTOR 0xfc -/* fb free - please don't readd KDB here because it's useless - (hint - think what a NMI bit does to a vector) */ -#define THERMAL_APIC_VECTOR 0xfa -#define THRESHOLD_APIC_VECTOR 0xf9 -/* f8 free */ -#define INVALIDATE_TLB_VECTOR_END 0xf7 -#define INVALIDATE_TLB_VECTOR_START 0xf0 /* f0-f7 used for TLB flush */ - -#define NUM_INVALIDATE_TLB_VECTORS 8 - -/* - * Local APIC timer IRQ vector is on a different priority level, - * to work around the 'lost local interrupt if more than 2 IRQ - * sources per level' errata. - */ -#define LOCAL_TIMER_VECTOR 0xef - -/* - * First APIC vector available to drivers: (vectors 0x30-0xee) - * we start at 0x41 to spread out vectors evenly between priority - * levels. (0x80 is the syscall vector) - */ -#define FIRST_DEVICE_VECTOR (IRQ15_VECTOR + 2) -#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in irq.h */ - - #ifndef __ASSEMBLY__ typedef int vector_irq_t[NR_VECTORS]; diff --git a/include/asm-x86/irq_32.h b/include/asm-x86/irq_32.h index 0b79f31..c5c7542 100644 --- a/include/asm-x86/irq_32.h +++ b/include/asm-x86/irq_32.h @@ -12,7 +12,7 @@ #include /* include comes from machine specific directory */ -#include "irq_vectors.h" +#include #include static inline int irq_canonicalize(int irq) diff --git a/include/asm-x86/irq_64.h b/include/asm-x86/irq_64.h index 7608176..3037ec6 100644 --- a/include/asm-x86/irq_64.h +++ b/include/asm-x86/irq_64.h @@ -11,34 +11,7 @@ */ #include - -#define TIMER_IRQ 0 - -/* - * 16 8259A IRQ's, 208 potential APIC interrupt sources. - * Right now the APIC is mostly only used for SMP. - * 256 vectors is an architectural limit. (we can have - * more than 256 devices theoretically, but they will - * have to use shared interrupts) - * Since vectors 0x00-0x1f are used/reserved for the CPU, - * the usable vector space is 0x20-0xff (224 vectors) - */ - -/* - * The maximum number of vectors supported by x86_64 processors - * is limited to 256. For processors other than x86_64, NR_VECTORS - * should be changed accordingly. - */ -#define NR_VECTORS 256 - -#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in hw_irq.h */ - -#if NR_CPUS < MAX_IO_APICS -#define NR_IRQS (NR_VECTORS + (32 * NR_CPUS)) -#else -#define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS)) -#endif -#define NR_IRQ_VECTORS NR_IRQS +#include static inline int irq_canonicalize(int irq) { diff --git a/include/asm-x86/irq_vectors.h b/include/asm-x86/irq_vectors.h new file mode 100644 index 0000000..daceaaf --- /dev/null +++ b/include/asm-x86/irq_vectors.h @@ -0,0 +1,168 @@ +#ifndef _ASM_IRQ_VECTORS_H +#define _ASM_IRQ_VECTORS_H + +#include + +#define NMI_VECTOR 0x02 + +/* + * IDT vectors usable for external interrupt sources start + * at 0x20: + */ +#define FIRST_EXTERNAL_VECTOR 0x20 + +#ifdef CONFIG_X86_32 +# define SYSCALL_VECTOR 0x80 +#else +# define IA32_SYSCALL_VECTOR 0x80 +#endif + +/* + * Vectors 0x20-0x2f are used for ISA interrupts on 32 bit. + * + * Reserve the lowest usable priority level 0x20 - 0x2f for triggering + * cleanup after irq migration on 64 bit. + */ +#define IRQ_MOVE_CLEANUP_VECTOR FIRST_EXTERNAL_VECTOR + +/* + * Vectors 0x30-0x3f are used for ISA interrupts on 64 bit + */ +#define IRQ0_VECTOR (FIRST_EXTERNAL_VECTOR + 0x10) +#define IRQ1_VECTOR (IRQ0_VECTOR + 1) +#define IRQ2_VECTOR (IRQ0_VECTOR + 2) +#define IRQ3_VECTOR (IRQ0_VECTOR + 3) +#define IRQ4_VECTOR (IRQ0_VECTOR + 4) +#define IRQ5_VECTOR (IRQ0_VECTOR + 5) +#define IRQ6_VECTOR (IRQ0_VECTOR + 6) +#define IRQ7_VECTOR (IRQ0_VECTOR + 7) +#define IRQ8_VECTOR (IRQ0_VECTOR + 8) +#define IRQ9_VECTOR (IRQ0_VECTOR + 9) +#define IRQ10_VECTOR (IRQ0_VECTOR + 10) +#define IRQ11_VECTOR (IRQ0_VECTOR + 11) +#define IRQ12_VECTOR (IRQ0_VECTOR + 12) +#define IRQ13_VECTOR (IRQ0_VECTOR + 13) +#define IRQ14_VECTOR (IRQ0_VECTOR + 14) +#define IRQ15_VECTOR (IRQ0_VECTOR + 15) + +/* + * Special IRQ vectors used by the SMP architecture, 0xf0-0xff + * + * some of the following vectors are 'rare', they are merged + * into a single vector (CALL_FUNCTION_VECTOR) to save vector space. + * TLB, reschedule and local APIC vectors are performance-critical. + * + * Vectors 0xf0-0xfa are free (reserved for future Linux use). + */ +#ifdef CONFIG_X86_32 + +# define SPURIOUS_APIC_VECTOR 0xff +# define ERROR_APIC_VECTOR 0xfe +# define INVALIDATE_TLB_VECTOR 0xfd +# define RESCHEDULE_VECTOR 0xfc +# define CALL_FUNCTION_VECTOR 0xfb +# define THERMAL_APIC_VECTOR 0xf0 + +#else + +#define SPURIOUS_APIC_VECTOR 0xff +#define ERROR_APIC_VECTOR 0xfe +#define RESCHEDULE_VECTOR 0xfd +#define CALL_FUNCTION_VECTOR 0xfc +#define THERMAL_APIC_VECTOR 0xfa +#define THRESHOLD_APIC_VECTOR 0xf9 +#define INVALIDATE_TLB_VECTOR_END 0xf7 +#define INVALIDATE_TLB_VECTOR_START 0xf0 /* f0-f7 used for TLB flush */ + +#define NUM_INVALIDATE_TLB_VECTORS 8 + +#endif + +/* + * Local APIC timer IRQ vector is on a different priority level, + * to work around the 'lost local interrupt if more than 2 IRQ + * sources per level' errata. + */ +#define LOCAL_TIMER_VECTOR 0xef + +/* + * First APIC vector available to drivers: (vectors 0x30-0xee) we + * start at 0x31(0x41) to spread out vectors evenly between priority + * levels. (0x80 is the syscall vector) + */ +#ifdef CONFIG_X86_32 +# define FIRST_DEVICE_VECTOR 0x31 +#else +# define FIRST_DEVICE_VECTOR (IRQ15_VECTOR + 2) +#endif + +#define FIRST_SYSTEM_VECTOR 0xef + +#define NR_VECTORS 256 + +#define FPU_IRQ 13 + +#define FIRST_VM86_IRQ 3 +#define LAST_VM86_IRQ 15 +#define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15) + +#if !defined(CONFIG_X86_VISWS) && !defined(CONFIG_X86_VOYAGER) + +# if defined(CONFIG_X86_IO_APIC) || defined(CONFIG_PARAVIRT) + +# define NR_IRQS 224 + +# if (224 >= 32 * NR_CPUS) +# define NR_IRQ_VECTORS NR_IRQS +# else +# define NR_IRQ_VECTORS (32 * NR_CPUS) +# endif + +# else /* IO_APIC || PARAVIRT */ + +# define NR_IRQS 16 +# define NR_IRQ_VECTORS NR_IRQS + +# endif + +#else /* !VISWS && !VOYAGER */ + +# define NR_IRQS 224 +# define NR_IRQ_VECTORS NR_IRQS + +#endif /* VISWS */ + +/* Voyager specific defines */ +/* These define the CPIs we use in linux */ +#define VIC_CPI_LEVEL0 0 +#define VIC_CPI_LEVEL1 1 +/* now the fake CPIs */ +#define VIC_TIMER_CPI 2 +#define VIC_INVALIDATE_CPI 3 +#define VIC_RESCHEDULE_CPI 4 +#define VIC_ENABLE_IRQ_CPI 5 +#define VIC_CALL_FUNCTION_CPI 6 + +/* Now the QIC CPIs: Since we don't need the two initial levels, + * these are 2 less than the VIC CPIs */ +#define QIC_CPI_OFFSET 1 +#define QIC_TIMER_CPI (VIC_TIMER_CPI - QIC_CPI_OFFSET) +#define QIC_INVALIDATE_CPI (VIC_INVALIDATE_CPI - QIC_CPI_OFFSET) +#define QIC_RESCHEDULE_CPI (VIC_RESCHEDULE_CPI - QIC_CPI_OFFSET) +#define QIC_ENABLE_IRQ_CPI (VIC_ENABLE_IRQ_CPI - QIC_CPI_OFFSET) +#define QIC_CALL_FUNCTION_CPI (VIC_CALL_FUNCTION_CPI - QIC_CPI_OFFSET) + +#define VIC_START_FAKE_CPI VIC_TIMER_CPI +#define VIC_END_FAKE_CPI VIC_CALL_FUNCTION_CPI + +/* this is the SYS_INT CPI. */ +#define VIC_SYS_INT 8 +#define VIC_CMN_INT 15 + +/* This is the boot CPI for alternate processors. It gets overwritten + * by the above once the system has activated all available processors */ +#define VIC_CPU_BOOT_CPI VIC_CPI_LEVEL0 +#define VIC_CPU_BOOT_ERRATA_CPI (VIC_CPI_LEVEL0 + 8) + + +#endif /* _ASM_IRQ_VECTORS_H */ diff --git a/include/asm-x86/mach-default/irq_vectors.h b/include/asm-x86/mach-default/irq_vectors.h deleted file mode 100644 index 881c63c..0000000 --- a/include/asm-x86/mach-default/irq_vectors.h +++ /dev/null @@ -1,96 +0,0 @@ -/* - * This file should contain #defines for all of the interrupt vector - * numbers used by this architecture. - * - * In addition, there are some standard defines: - * - * FIRST_EXTERNAL_VECTOR: - * The first free place for external interrupts - * - * SYSCALL_VECTOR: - * The IRQ vector a syscall makes the user to kernel transition - * under. - * - * TIMER_IRQ: - * The IRQ number the timer interrupt comes in at. - * - * NR_IRQS: - * The total number of interrupt vectors (including all the - * architecture specific interrupts) needed. - * - */ -#ifndef _ASM_IRQ_VECTORS_H -#define _ASM_IRQ_VECTORS_H - -/* - * IDT vectors usable for external interrupt sources start - * at 0x20: - */ -#define FIRST_EXTERNAL_VECTOR 0x20 - -#define SYSCALL_VECTOR 0x80 - -/* - * Vectors 0x20-0x2f are used for ISA interrupts. - */ - -/* - * Special IRQ vectors used by the SMP architecture, 0xf0-0xff - * - * some of the following vectors are 'rare', they are merged - * into a single vector (CALL_FUNCTION_VECTOR) to save vector space. - * TLB, reschedule and local APIC vectors are performance-critical. - * - * Vectors 0xf0-0xfa are free (reserved for future Linux use). - */ -#define SPURIOUS_APIC_VECTOR 0xff -#define ERROR_APIC_VECTOR 0xfe -#define INVALIDATE_TLB_VECTOR 0xfd -#define RESCHEDULE_VECTOR 0xfc -#define CALL_FUNCTION_VECTOR 0xfb - -#define THERMAL_APIC_VECTOR 0xf0 -/* - * Local APIC timer IRQ vector is on a different priority level, - * to work around the 'lost local interrupt if more than 2 IRQ - * sources per level' errata. - */ -#define LOCAL_TIMER_VECTOR 0xef - -/* - * First APIC vector available to drivers: (vectors 0x30-0xee) - * we start at 0x31 to spread out vectors evenly between priority - * levels. (0x80 is the syscall vector) - */ -#define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef - -#define TIMER_IRQ 0 - -/* - * 16 8259A IRQ's, 208 potential APIC interrupt sources. - * Right now the APIC is mostly only used for SMP. - * 256 vectors is an architectural limit. (we can have - * more than 256 devices theoretically, but they will - * have to use shared interrupts) - * Since vectors 0x00-0x1f are used/reserved for the CPU, - * the usable vector space is 0x20-0xff (224 vectors) - */ - -/* - * The maximum number of vectors supported by i386 processors - * is limited to 256. For processors other than i386, NR_VECTORS - * should be changed accordingly. - */ -#define NR_VECTORS 256 - -#include "irq_vectors_limits.h" - -#define FPU_IRQ 13 - -#define FIRST_VM86_IRQ 3 -#define LAST_VM86_IRQ 15 -#define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15) - - -#endif /* _ASM_IRQ_VECTORS_H */ diff --git a/include/asm-x86/mach-default/irq_vectors_limits.h b/include/asm-x86/mach-default/irq_vectors_limits.h deleted file mode 100644 index a90c7a6..0000000 --- a/include/asm-x86/mach-default/irq_vectors_limits.h +++ /dev/null @@ -1,16 +0,0 @@ -#ifndef _ASM_IRQ_VECTORS_LIMITS_H -#define _ASM_IRQ_VECTORS_LIMITS_H - -#if defined(CONFIG_X86_IO_APIC) || defined(CONFIG_PARAVIRT) -#define NR_IRQS 224 -# if (224 >= 32 * NR_CPUS) -# define NR_IRQ_VECTORS NR_IRQS -# else -# define NR_IRQ_VECTORS (32 * NR_CPUS) -# endif -#else -#define NR_IRQS 16 -#define NR_IRQ_VECTORS NR_IRQS -#endif - -#endif /* _ASM_IRQ_VECTORS_LIMITS_H */ diff --git a/include/asm-x86/mach-visws/irq_vectors.h b/include/asm-x86/mach-visws/irq_vectors.h deleted file mode 100644 index cb572d8..0000000 --- a/include/asm-x86/mach-visws/irq_vectors.h +++ /dev/null @@ -1,62 +0,0 @@ -#ifndef _ASM_IRQ_VECTORS_H -#define _ASM_IRQ_VECTORS_H - -/* - * IDT vectors usable for external interrupt sources start - * at 0x20: - */ -#define FIRST_EXTERNAL_VECTOR 0x20 - -#define SYSCALL_VECTOR 0x80 - -/* - * Vectors 0x20-0x2f are used for ISA interrupts. - */ - -/* - * Special IRQ vectors used by the SMP architecture, 0xf0-0xff - * - * some of the following vectors are 'rare', they are merged - * into a single vector (CALL_FUNCTION_VECTOR) to save vector space. - * TLB, reschedule and local APIC vectors are performance-critical. - * - * Vectors 0xf0-0xfa are free (reserved for future Linux use). - */ -#define SPURIOUS_APIC_VECTOR 0xff -#define ERROR_APIC_VECTOR 0xfe -#define INVALIDATE_TLB_VECTOR 0xfd -#define RESCHEDULE_VECTOR 0xfc -#define CALL_FUNCTION_VECTOR 0xfb - -#define THERMAL_APIC_VECTOR 0xf0 -/* - * Local APIC timer IRQ vector is on a different priority level, - * to work around the 'lost local interrupt if more than 2 IRQ - * sources per level' errata. - */ -#define LOCAL_TIMER_VECTOR 0xef - -/* - * First APIC vector available to drivers: (vectors 0x30-0xee) - * we start at 0x31 to spread out vectors evenly between priority - * levels. (0x80 is the syscall vector) - */ -#define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef - -#define TIMER_IRQ 0 - -/* - * IRQ definitions - */ -#define NR_VECTORS 256 -#define NR_IRQS 224 -#define NR_IRQ_VECTORS NR_IRQS - -#define FPU_IRQ 13 - -#define FIRST_VM86_IRQ 3 -#define LAST_VM86_IRQ 15 -#define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15) - -#endif /* _ASM_IRQ_VECTORS_H */ diff --git a/include/asm-x86/mach-voyager/irq_vectors.h b/include/asm-x86/mach-voyager/irq_vectors.h deleted file mode 100644 index 165421f..0000000 --- a/include/asm-x86/mach-voyager/irq_vectors.h +++ /dev/null @@ -1,79 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8 -*- */ - -/* Copyright (C) 2002 - * - * Author: James.Bottomley@HansenPartnership.com - * - * linux/arch/i386/voyager/irq_vectors.h - * - * This file provides definitions for the VIC and QIC CPIs - */ - -#ifndef _ASM_IRQ_VECTORS_H -#define _ASM_IRQ_VECTORS_H - -/* - * IDT vectors usable for external interrupt sources start - * at 0x20: - */ -#define FIRST_EXTERNAL_VECTOR 0x20 - -#define SYSCALL_VECTOR 0x80 - -/* - * Vectors 0x20-0x2f are used for ISA interrupts. - */ - -/* These define the CPIs we use in linux */ -#define VIC_CPI_LEVEL0 0 -#define VIC_CPI_LEVEL1 1 -/* now the fake CPIs */ -#define VIC_TIMER_CPI 2 -#define VIC_INVALIDATE_CPI 3 -#define VIC_RESCHEDULE_CPI 4 -#define VIC_ENABLE_IRQ_CPI 5 -#define VIC_CALL_FUNCTION_CPI 6 - -/* Now the QIC CPIs: Since we don't need the two initial levels, - * these are 2 less than the VIC CPIs */ -#define QIC_CPI_OFFSET 1 -#define QIC_TIMER_CPI (VIC_TIMER_CPI - QIC_CPI_OFFSET) -#define QIC_INVALIDATE_CPI (VIC_INVALIDATE_CPI - QIC_CPI_OFFSET) -#define QIC_RESCHEDULE_CPI (VIC_RESCHEDULE_CPI - QIC_CPI_OFFSET) -#define QIC_ENABLE_IRQ_CPI (VIC_ENABLE_IRQ_CPI - QIC_CPI_OFFSET) -#define QIC_CALL_FUNCTION_CPI (VIC_CALL_FUNCTION_CPI - QIC_CPI_OFFSET) - -#define VIC_START_FAKE_CPI VIC_TIMER_CPI -#define VIC_END_FAKE_CPI VIC_CALL_FUNCTION_CPI - -/* this is the SYS_INT CPI. */ -#define VIC_SYS_INT 8 -#define VIC_CMN_INT 15 - -/* This is the boot CPI for alternate processors. It gets overwritten - * by the above once the system has activated all available processors */ -#define VIC_CPU_BOOT_CPI VIC_CPI_LEVEL0 -#define VIC_CPU_BOOT_ERRATA_CPI (VIC_CPI_LEVEL0 + 8) - -#define NR_VECTORS 256 -#define NR_IRQS 224 -#define NR_IRQ_VECTORS NR_IRQS - -#define FPU_IRQ 13 - -#define FIRST_VM86_IRQ 3 -#define LAST_VM86_IRQ 15 -#define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15) - -#ifndef __ASSEMBLY__ -extern asmlinkage void vic_cpi_interrupt(void); -extern asmlinkage void vic_sys_interrupt(void); -extern asmlinkage void vic_cmn_interrupt(void); -extern asmlinkage void qic_timer_interrupt(void); -extern asmlinkage void qic_invalidate_interrupt(void); -extern asmlinkage void qic_reschedule_interrupt(void); -extern asmlinkage void qic_enable_irq_interrupt(void); -extern asmlinkage void qic_call_function_interrupt(void); -#endif /* !__ASSEMBLY__ */ - -#endif /* _ASM_IRQ_VECTORS_H */ -- cgit v0.10.2 From 0bc471d93051a19545257909bc2ed2ad3b389b54 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 2 May 2008 21:55:12 +0200 Subject: x86: move BUILD_IRQ macro magic to i8259_64.c i8259_64.c is the only place which uses those macros. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/i8259_64.c b/arch/x86/kernel/i8259_64.c index fa57a15..c4ae476 100644 --- a/arch/x86/kernel/i8259_64.c +++ b/arch/x86/kernel/i8259_64.c @@ -34,6 +34,20 @@ * interrupt-controller happy. */ +#define IRQ_NAME2(nr) nr##_interrupt(void) +#define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr) + +/* + * SMP has a few special interrupts for IPI messages + */ + +#define BUILD_IRQ(nr) \ + asmlinkage void IRQ_NAME(nr); \ + asm("\n.p2align\n" \ + "IRQ" #nr "_interrupt:\n\t" \ + "push $~(" #nr ") ; " \ + "jmp common_interrupt"); + #define BI(x,y) \ BUILD_IRQ(x##y) diff --git a/include/asm-x86/hw_irq_64.h b/include/asm-x86/hw_irq_64.h index 98c9d49..9305f74 100644 --- a/include/asm-x86/hw_irq_64.h +++ b/include/asm-x86/hw_irq_64.h @@ -17,18 +17,4 @@ extern void native_init_IRQ(void); #include -#define IRQ_NAME2(nr) nr##_interrupt(void) -#define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr) - -/* - * SMP has a few special interrupts for IPI messages - */ - -#define BUILD_IRQ(nr) \ - asmlinkage void IRQ_NAME(nr); \ - asm("\n.p2align\n" \ - "IRQ" #nr "_interrupt:\n\t" \ - "push $~(" #nr ") ; " \ - "jmp common_interrupt"); - #endif -- cgit v0.10.2 From 97e7b6f54c0d66586a658e985630cd63040311fb Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 2 May 2008 22:02:25 +0200 Subject: x86: unify apic interrupt function declarations Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/include/asm-x86/hw_irq.h b/include/asm-x86/hw_irq.h index a8c5e8b..cdb09d7 100644 --- a/include/asm-x86/hw_irq.h +++ b/include/asm-x86/hw_irq.h @@ -67,6 +67,10 @@ extern void print_IO_APIC(void); extern int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn); extern void setup_ioapic_dest(void); +#ifdef CONFIG_X86_64 +extern void enable_IO_APIC(void); +#endif + /* IPI functions */ extern void send_IPI_self(int vector); extern void send_IPI(int dest, int vector); diff --git a/include/asm-x86/hw_irq_64.h b/include/asm-x86/hw_irq_64.h index 9305f74..428785b 100644 --- a/include/asm-x86/hw_irq_64.h +++ b/include/asm-x86/hw_irq_64.h @@ -12,9 +12,6 @@ extern spinlock_t vector_lock; * Interrupt entry/exit code at both C and assembly level */ -extern void enable_IO_APIC(void); -extern void native_init_IRQ(void); - #include #endif diff --git a/include/asm-x86/irq_64.h b/include/asm-x86/irq_64.h index 3037ec6..1ef233d 100644 --- a/include/asm-x86/irq_64.h +++ b/include/asm-x86/irq_64.h @@ -27,4 +27,6 @@ extern void fixup_irqs(cpumask_t map); #define __ARCH_HAS_DO_SOFTIRQ 1 +extern void native_init_IRQ(void); + #endif /* _ASM_IRQ_H */ -- cgit v0.10.2 From 22dc12d1f694b9af88e616ab758ff90c69d0fc83 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 2 May 2008 22:10:39 +0200 Subject: x86: unify hwirq.h Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/include/asm-x86/hw_irq.h b/include/asm-x86/hw_irq.h index cdb09d7..1db2dff 100644 --- a/include/asm-x86/hw_irq.h +++ b/include/asm-x86/hw_irq.h @@ -89,12 +89,15 @@ extern asmlinkage void qic_reschedule_interrupt(void); extern asmlinkage void qic_enable_irq_interrupt(void); extern asmlinkage void qic_call_function_interrupt(void); -#endif /* !ASSEMBLY_ */ - #ifdef CONFIG_X86_32 -# include "hw_irq_32.h" +extern void (*const interrupt[NR_IRQS])(void); #else -# include "hw_irq_64.h" +typedef int vector_irq_t[NR_VECTORS]; +DECLARE_PER_CPU(vector_irq_t, vector_irq); +extern void __setup_vector_irq(int cpu); +extern spinlock_t vector_lock; #endif +#endif /* !ASSEMBLY_ */ + #endif diff --git a/include/asm-x86/hw_irq_32.h b/include/asm-x86/hw_irq_32.h deleted file mode 100644 index 89fca5a..0000000 --- a/include/asm-x86/hw_irq_32.h +++ /dev/null @@ -1,5 +0,0 @@ - -extern void (*const interrupt[NR_IRQS])(void); - - - diff --git a/include/asm-x86/hw_irq_64.h b/include/asm-x86/hw_irq_64.h deleted file mode 100644 index 428785b..0000000 --- a/include/asm-x86/hw_irq_64.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef __ASSEMBLY__ - -typedef int vector_irq_t[NR_VECTORS]; -DECLARE_PER_CPU(vector_irq_t, vector_irq); -extern void __setup_vector_irq(int cpu); -extern spinlock_t vector_lock; - -/* - * Various low-level irq details needed by irq.c, process.c, - * time.c, io_apic.c and smp.c - * - * Interrupt entry/exit code at both C and assembly level - */ - -#include - -#endif -- cgit v0.10.2 From 22067d4501bfb47c8ca34144f68fad734178914d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 2 May 2008 22:14:44 +0200 Subject: x86: unify irq.h Not much difference in those files. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/include/asm-x86/irq.h b/include/asm-x86/irq.h index 7ba9054..1a29257 100644 --- a/include/asm-x86/irq.h +++ b/include/asm-x86/irq.h @@ -1,5 +1,50 @@ -#ifdef CONFIG_X86_32 -# include "irq_32.h" +#ifndef _ASM_IRQ_H +#define _ASM_IRQ_H +/* + * (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar + * + * IRQ/IPI changes taken from work by Thomas Radke + * + */ + +#include +#include + +static inline int irq_canonicalize(int irq) +{ + return ((irq == 2) ? 9 : irq); +} + +#ifdef CONFIG_X86_LOCAL_APIC +# define ARCH_HAS_NMI_WATCHDOG +#endif + +#ifdef CONFIG_4KSTACKS + extern void irq_ctx_init(int cpu); + extern void irq_ctx_exit(int cpu); +# define __ARCH_HAS_DO_SOFTIRQ #else -# include "irq_64.h" +# define irq_ctx_init(cpu) do { } while (0) +# define irq_ctx_exit(cpu) do { } while (0) +# ifdef CONFIG_X86_64 +# define __ARCH_HAS_DO_SOFTIRQ +# endif +#endif + +#ifdef CONFIG_IRQBALANCE +extern int irqbalance_disable(char *str); +#endif + +#ifdef CONFIG_HOTPLUG_CPU +#include +extern void fixup_irqs(cpumask_t map); #endif + +extern unsigned int do_IRQ(struct pt_regs *regs); +extern void init_IRQ(void); +extern void native_init_IRQ(void); + +/* Interrupt vector management */ +extern DECLARE_BITMAP(used_vectors, NR_VECTORS); + +#endif /* _ASM_IRQ_H */ diff --git a/include/asm-x86/irq_32.h b/include/asm-x86/irq_32.h deleted file mode 100644 index c5c7542..0000000 --- a/include/asm-x86/irq_32.h +++ /dev/null @@ -1,51 +0,0 @@ -#ifndef _ASM_IRQ_H -#define _ASM_IRQ_H - -/* - * linux/include/asm/irq.h - * - * (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar - * - * IRQ/IPI changes taken from work by Thomas Radke - * - */ - -#include -/* include comes from machine specific directory */ -#include -#include - -static inline int irq_canonicalize(int irq) -{ - return ((irq == 2) ? 9 : irq); -} - -#ifdef CONFIG_X86_LOCAL_APIC -# define ARCH_HAS_NMI_WATCHDOG /* See include/linux/nmi.h */ -#endif - -#ifdef CONFIG_4KSTACKS - extern void irq_ctx_init(int cpu); - extern void irq_ctx_exit(int cpu); -# define __ARCH_HAS_DO_SOFTIRQ -#else -# define irq_ctx_init(cpu) do { } while (0) -# define irq_ctx_exit(cpu) do { } while (0) -#endif - -#ifdef CONFIG_IRQBALANCE -extern int irqbalance_disable(char *str); -#endif - -#ifdef CONFIG_HOTPLUG_CPU -extern void fixup_irqs(cpumask_t map); -#endif - -unsigned int do_IRQ(struct pt_regs *regs); -void init_IRQ(void); -void __init native_init_IRQ(void); - -/* Interrupt vector management */ -extern DECLARE_BITMAP(used_vectors, NR_VECTORS); - -#endif /* _ASM_IRQ_H */ diff --git a/include/asm-x86/irq_64.h b/include/asm-x86/irq_64.h deleted file mode 100644 index 1ef233d..0000000 --- a/include/asm-x86/irq_64.h +++ /dev/null @@ -1,32 +0,0 @@ -#ifndef _ASM_IRQ_H -#define _ASM_IRQ_H - -/* - * linux/include/asm/irq.h - * - * (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar - * - * IRQ/IPI changes taken from work by Thomas Radke - * - */ - -#include -#include - -static inline int irq_canonicalize(int irq) -{ - return ((irq == 2) ? 9 : irq); -} - -#define ARCH_HAS_NMI_WATCHDOG /* See include/linux/nmi.h */ - -#ifdef CONFIG_HOTPLUG_CPU -#include -extern void fixup_irqs(cpumask_t map); -#endif - -#define __ARCH_HAS_DO_SOFTIRQ 1 - -extern void native_init_IRQ(void); - -#endif /* _ASM_IRQ_H */ -- cgit v0.10.2 From 88a83350bc1955fa9d8fca059e19bc360fcef2ba Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 2 May 2008 23:19:26 +0200 Subject: x86: declare setup_apic_routing Global functions need a prototype. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/include/asm-x86/genapic_64.h b/include/asm-x86/genapic_64.h index 1de931b..0f85046 100644 --- a/include/asm-x86/genapic_64.h +++ b/include/asm-x86/genapic_64.h @@ -44,4 +44,6 @@ DECLARE_PER_CPU(int, x2apic_extra_bits); extern void uv_cpu_init(void); extern int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip); +extern void setup_apic_routing(void); + #endif -- cgit v0.10.2 From 1a331957efd214fc3a84f70956dfaec65e70c031 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 3 May 2008 00:30:50 +0200 Subject: x86: move eisa_set_level_irq declaration to header Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index c49ebcc..671591f 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -507,8 +507,6 @@ int acpi_register_gsi(u32 gsi, int triggering, int polarity) * Make sure all (legacy) PCI IRQs are set as level-triggered. */ if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) { - extern void eisa_set_level_irq(unsigned int irq); - if (triggering == ACPI_LEVEL_SENSITIVE) eisa_set_level_irq(gsi); } diff --git a/include/asm-x86/hw_irq.h b/include/asm-x86/hw_irq.h index 1db2dff..1428b41 100644 --- a/include/asm-x86/hw_irq.h +++ b/include/asm-x86/hw_irq.h @@ -79,6 +79,9 @@ extern void send_IPI(int dest, int vector); extern atomic_t irq_err_count; extern atomic_t irq_mis_count; +/* EISA */ +extern void eisa_set_level_irq(unsigned int irq); + /* Voyager functions */ extern asmlinkage void vic_cpi_interrupt(void); extern asmlinkage void vic_sys_interrupt(void); -- cgit v0.10.2 From 305b92a2323eeaa4b481f409d54f778dd7e21a46 Mon Sep 17 00:00:00 2001 From: Alan Mayer Date: Tue, 15 Apr 2008 15:36:56 -0500 Subject: x86: change FIRST_SYSTEM_VECTOR to a variable The SGI UV system needs several more system vectors than a vanilla x86_64 system. Rather than burden the other archs with extra system vectors that they don't use, change FIRST_SYSTEM_VECTOR to a variable, so that it can be dynamic. Signed-off-by: Alan Mayer Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 4b99b1b..807158e 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1351,13 +1351,13 @@ void __init smp_intr_init(void) * The reschedule interrupt is a CPU-to-CPU reschedule-helper * IPI, driven by wakeup. */ - set_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); + alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); /* IPI for invalidation */ - set_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt); + alloc_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt); /* IPI for generic function call */ - set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); + alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); } #endif @@ -1370,15 +1370,15 @@ void __init apic_intr_init(void) smp_intr_init(); #endif /* self generated IPI for local APIC timer */ - set_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); + alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); /* IPI vectors for APIC spurious and error interrupts */ - set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); - set_intr_gate(ERROR_APIC_VECTOR, error_interrupt); + alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); + alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); /* thermal monitor LVT interrupt */ #ifdef CONFIG_X86_MCE_P4THERMAL - set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); + alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); #endif } diff --git a/arch/x86/kernel/i8259_64.c b/arch/x86/kernel/i8259_64.c index c4ae476..1870e0e 100644 --- a/arch/x86/kernel/i8259_64.c +++ b/arch/x86/kernel/i8259_64.c @@ -493,33 +493,33 @@ void __init native_init_IRQ(void) * The reschedule interrupt is a CPU-to-CPU reschedule-helper * IPI, driven by wakeup. */ - set_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); + alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); /* IPIs for invalidation */ - set_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0); - set_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1); - set_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2); - set_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3); - set_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4); - set_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5); - set_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6); - set_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7); /* IPI for generic function call */ - set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); + alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); /* Low priority IPI to cleanup after moving an irq */ set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); #endif - set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); - set_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); + alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); + alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); /* self generated IPI for local APIC timer */ - set_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); + alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); /* IPI vectors for APIC spurious and error interrupts */ - set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); - set_intr_gate(ERROR_APIC_VECTOR, error_interrupt); + alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); + alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); if (!acpi_ioapic) setup_irq(2, &irq2); diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index a40d54f..0ae4a9d 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -83,6 +83,10 @@ int mp_irq_entries; static int disable_timer_pin_1 __initdata; +int first_system_vector = 0xfe; + +char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE}; + /* * Rough estimation of how many shared IRQs there are, can * be changed anytime. @@ -1176,7 +1180,7 @@ static int __assign_irq_vector(int irq) offset = current_offset; next: vector += 8; - if (vector >= FIRST_SYSTEM_VECTOR) { + if (vector >= first_system_vector) { offset = (offset + 1) % 8; vector = FIRST_DEVICE_VECTOR + offset; } @@ -2269,7 +2273,7 @@ void __init setup_IO_APIC(void) int i; /* Reserve all the system vectors. */ - for (i = FIRST_SYSTEM_VECTOR; i < NR_VECTORS; i++) + for (i = first_system_vector; i < NR_VECTORS; i++) set_bit(i, used_vectors); enable_IO_APIC(); diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index ef1a8df..f1e1ae3 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -82,6 +82,10 @@ struct irq_cfg irq_cfg[NR_IRQS] __read_mostly = { static int assign_irq_vector(int irq, cpumask_t mask); +int first_system_vector = 0xfe; + +char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE}; + #define __apicdebuginit __init int sis_apic_bug; /* not actually supported, dummy for compile */ @@ -730,7 +734,7 @@ static int __assign_irq_vector(int irq, cpumask_t mask) offset = current_offset; next: vector += 8; - if (vector >= FIRST_SYSTEM_VECTOR) { + if (vector >= first_system_vector) { /* If we run out of vectors on large boxen, must share them. */ offset = (offset + 1) % 8; vector = FIRST_DEVICE_VECTOR + offset; diff --git a/include/asm-x86/desc.h b/include/asm-x86/desc.h index 268a012..b3875d4 100644 --- a/include/asm-x86/desc.h +++ b/include/asm-x86/desc.h @@ -311,6 +311,28 @@ static inline void set_intr_gate(unsigned int n, void *addr) _set_gate(n, GATE_INTERRUPT, addr, 0, 0, __KERNEL_CS); } +#define SYS_VECTOR_FREE 0 +#define SYS_VECTOR_ALLOCED 1 + +extern int first_system_vector; +extern char system_vectors[]; + +static inline void alloc_system_vector(int vector) +{ + if (system_vectors[vector] == SYS_VECTOR_FREE) { + system_vectors[vector] = SYS_VECTOR_ALLOCED; + if (first_system_vector > vector) + first_system_vector = vector; + } else + BUG(); +} + +static inline void alloc_intr_gate(unsigned int n, void *addr) +{ + alloc_system_vector(n); + set_intr_gate(n, addr); +} + /* * This routine sets up an interrupt gate at directory privilege level 3. */ diff --git a/include/asm-x86/irq_vectors.h b/include/asm-x86/irq_vectors.h index daceaaf..3cb6d8c 100644 --- a/include/asm-x86/irq_vectors.h +++ b/include/asm-x86/irq_vectors.h @@ -96,8 +96,6 @@ # define FIRST_DEVICE_VECTOR (IRQ15_VECTOR + 2) #endif -#define FIRST_SYSTEM_VECTOR 0xef - #define NR_VECTORS 256 #define FPU_IRQ 13 -- cgit v0.10.2 From ce17833183bf0a08ce3d174a2088eff0a06f2080 Mon Sep 17 00:00:00 2001 From: Alan Mayer Date: Wed, 16 Apr 2008 15:17:20 -0500 Subject: x86: change FIRST_SYSTEM_VECTOR to a variable, fix Fixes the build error introduced by my FIRST_SYSTEM_VECTOR patch Signed-off-by: Alan Mayer Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 807158e..d5767cb 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -71,6 +71,10 @@ int local_apic_timer_disabled; int local_apic_timer_c2_ok; EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); +int first_system_vector = 0xfe; + +char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE}; + /* * Debug level, exported for io_apic.c */ diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index 0ae4a9d..76769cc 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -83,10 +83,6 @@ int mp_irq_entries; static int disable_timer_pin_1 __initdata; -int first_system_vector = 0xfe; - -char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE}; - /* * Rough estimation of how many shared IRQs there are, can * be changed anytime. -- cgit v0.10.2 From 04b361abfdc522239e3a071f3afdebf5787d9f03 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 5 May 2008 12:36:38 +0200 Subject: i386: Execute stack overflow warning on interrupt stack v2 Previously the reporting printk would run on the process stack, which risks overflow an already low stack. Instead execute it on the interrupt stack. This makes it more likely for the printk to make it actually out. It adds one not taken test/branch more to the interrupt path when stack overflow checking is enabled. We could avoid that by duplicating more code, but that seemed not worth it. Based on an observation by Eric Sandeen. v2: Fix warnings in some configs Signed-off-by: Andi Kleen Cc: mingo@elte.hu Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 147352d..3f76561 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -61,6 +61,26 @@ static union irq_ctx *hardirq_ctx[NR_CPUS] __read_mostly; static union irq_ctx *softirq_ctx[NR_CPUS] __read_mostly; #endif +static void stack_overflow(void) +{ + printk("low stack detected by irq handler\n"); + dump_stack(); +} + +static inline void call_on_stack2(void *func, void *stack, + unsigned long arg1, unsigned long arg2) +{ + unsigned long bx; + asm volatile( + " xchgl %%ebx,%%esp \n" + " call *%%edi \n" + " movl %%ebx,%%esp \n" + : "=a" (arg1), "=d" (arg2), "=b" (bx) + : "0" (arg1), "1" (arg2), "2" (stack), + "D" (func) + : "memory", "cc", "ecx"); +} + /* * do_IRQ handles all normal device IRQ's (the special * SMP cross-CPU interrupts have their own specific @@ -76,6 +96,7 @@ unsigned int do_IRQ(struct pt_regs *regs) union irq_ctx *curctx, *irqctx; u32 *isp; #endif + int overflow = 0; if (unlikely((unsigned)irq >= NR_IRQS)) { printk(KERN_EMERG "%s: cannot handle IRQ %d\n", @@ -92,11 +113,8 @@ unsigned int do_IRQ(struct pt_regs *regs) __asm__ __volatile__("andl %%esp,%0" : "=r" (sp) : "0" (THREAD_SIZE - 1)); - if (unlikely(sp < (sizeof(struct thread_info) + STACK_WARN))) { - printk("do_IRQ: stack overflow: %ld\n", - sp - sizeof(struct thread_info)); - dump_stack(); - } + if (unlikely(sp < (sizeof(struct thread_info) + STACK_WARN))) + overflow = 1; } #endif @@ -112,8 +130,6 @@ unsigned int do_IRQ(struct pt_regs *regs) * current stack (which is the irq stack already after all) */ if (curctx != irqctx) { - int arg1, arg2, bx; - /* build the stack frame on the IRQ stack */ isp = (u32*) ((char*)irqctx + sizeof(*irqctx)); irqctx->tinfo.task = curctx->tinfo.task; @@ -127,18 +143,19 @@ unsigned int do_IRQ(struct pt_regs *regs) (irqctx->tinfo.preempt_count & ~SOFTIRQ_MASK) | (curctx->tinfo.preempt_count & SOFTIRQ_MASK); - asm volatile( - " xchgl %%ebx,%%esp \n" - " call *%%edi \n" - " movl %%ebx,%%esp \n" - : "=a" (arg1), "=d" (arg2), "=b" (bx) - : "0" (irq), "1" (desc), "2" (isp), - "D" (desc->handle_irq) - : "memory", "cc", "ecx" - ); + /* Execute warning on interrupt stack */ + if (unlikely(overflow)) + call_on_stack2(stack_overflow, isp, 0, 0); + + call_on_stack2(desc->handle_irq, isp, irq, (unsigned long)desc); } else #endif + { + /* AK: Slightly bogus here */ + if (overflow) + stack_overflow(); desc->handle_irq(irq, desc); + } irq_exit(); set_irq_regs(old_regs); -- cgit v0.10.2 From de9b10af1287bf25b9c0433de53a2e95ef611aa7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 5 May 2008 15:58:15 +0200 Subject: x86: janitor stack overflow warning patch Add KERN_WARNING to the printk as this could not be done in the original patch, which allegedly only moves code around. Un#ifdef do_IRQ. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 3f76561..1c470d2 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -48,6 +48,29 @@ void ack_bad_irq(unsigned int irq) #endif } +#ifdef CONFIG_DEBUG_STACKOVERFLOW +/* Debugging check for stack overflow: is there less than 1KB free? */ +static int check_stack_overflow(void) +{ + long sp; + + __asm__ __volatile__("andl %%esp,%0" : + "=r" (sp) : "0" (THREAD_SIZE - 1)); + + return sp < (sizeof(struct thread_info) + STACK_WARN); +} + +static void print_stack_overflow(void) +{ + printk(KERN_WARNING "low stack detected by irq handler\n"); + dump_stack(); +} + +#else +static inline int check_stack_overflow(void) { return 0; } +static inline void print_stack_overflow(void) { } +#endif + #ifdef CONFIG_4KSTACKS /* * per-CPU IRQ handling contexts (thread information and stack) @@ -59,18 +82,12 @@ union irq_ctx { static union irq_ctx *hardirq_ctx[NR_CPUS] __read_mostly; static union irq_ctx *softirq_ctx[NR_CPUS] __read_mostly; -#endif - -static void stack_overflow(void) -{ - printk("low stack detected by irq handler\n"); - dump_stack(); -} -static inline void call_on_stack2(void *func, void *stack, - unsigned long arg1, unsigned long arg2) +static inline void call_on_stack(void *func, void *stack, + unsigned long arg1, void *arg2) { unsigned long bx; + asm volatile( " xchgl %%ebx,%%esp \n" " call *%%edi \n" @@ -81,22 +98,61 @@ static inline void call_on_stack2(void *func, void *stack, : "memory", "cc", "ecx"); } +static inline int +execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) +{ + union irq_ctx *curctx, *irqctx; + u32 *isp; + + curctx = (union irq_ctx *) current_thread_info(); + irqctx = hardirq_ctx[smp_processor_id()]; + + /* + * this is where we switch to the IRQ stack. However, if we are + * already using the IRQ stack (because we interrupted a hardirq + * handler) we can't do that and just have to keep using the + * current stack (which is the irq stack already after all) + */ + if (unlikely(curctx == irqctx)) + return 0; + + /* build the stack frame on the IRQ stack */ + isp = (u32 *) ((char*)irqctx + sizeof(*irqctx)); + irqctx->tinfo.task = curctx->tinfo.task; + irqctx->tinfo.previous_esp = current_stack_pointer; + + /* + * Copy the softirq bits in preempt_count so that the + * softirq checks work in the hardirq context. + */ + irqctx->tinfo.preempt_count = + (irqctx->tinfo.preempt_count & ~SOFTIRQ_MASK) | + (curctx->tinfo.preempt_count & SOFTIRQ_MASK); + + if (unlikely(overflow)) + call_on_stack(print_stack_overflow, isp, 0, NULL); + + call_on_stack(desc->handle_irq, isp, irq, desc); + + return 1; +} + +#else +static inline int +execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) { return 0; } +#endif + /* * do_IRQ handles all normal device IRQ's (the special * SMP cross-CPU interrupts have their own specific * handlers). */ unsigned int do_IRQ(struct pt_regs *regs) -{ +{ struct pt_regs *old_regs; /* high bit used in ret_from_ code */ - int irq = ~regs->orig_ax; + int overflow, irq = ~regs->orig_ax; struct irq_desc *desc = irq_desc + irq; -#ifdef CONFIG_4KSTACKS - union irq_ctx *curctx, *irqctx; - u32 *isp; -#endif - int overflow = 0; if (unlikely((unsigned)irq >= NR_IRQS)) { printk(KERN_EMERG "%s: cannot handle IRQ %d\n", @@ -106,54 +162,12 @@ unsigned int do_IRQ(struct pt_regs *regs) old_regs = set_irq_regs(regs); irq_enter(); -#ifdef CONFIG_DEBUG_STACKOVERFLOW - /* Debugging check for stack overflow: is there less than 1KB free? */ - { - long sp; - - __asm__ __volatile__("andl %%esp,%0" : - "=r" (sp) : "0" (THREAD_SIZE - 1)); - if (unlikely(sp < (sizeof(struct thread_info) + STACK_WARN))) - overflow = 1; - } -#endif - -#ifdef CONFIG_4KSTACKS - curctx = (union irq_ctx *) current_thread_info(); - irqctx = hardirq_ctx[smp_processor_id()]; + overflow = check_stack_overflow(); - /* - * this is where we switch to the IRQ stack. However, if we are - * already using the IRQ stack (because we interrupted a hardirq - * handler) we can't do that and just have to keep using the - * current stack (which is the irq stack already after all) - */ - if (curctx != irqctx) { - /* build the stack frame on the IRQ stack */ - isp = (u32*) ((char*)irqctx + sizeof(*irqctx)); - irqctx->tinfo.task = curctx->tinfo.task; - irqctx->tinfo.previous_esp = current_stack_pointer; - - /* - * Copy the softirq bits in preempt_count so that the - * softirq checks work in the hardirq context. - */ - irqctx->tinfo.preempt_count = - (irqctx->tinfo.preempt_count & ~SOFTIRQ_MASK) | - (curctx->tinfo.preempt_count & SOFTIRQ_MASK); - - /* Execute warning on interrupt stack */ + if (!execute_on_irq_stack(overflow, desc, irq)) { if (unlikely(overflow)) - call_on_stack2(stack_overflow, isp, 0, 0); - - call_on_stack2(desc->handle_irq, isp, irq, (unsigned long)desc); - } else -#endif - { - /* AK: Slightly bogus here */ - if (overflow) - stack_overflow(); + print_stack_overflow(); desc->handle_irq(irq, desc); } -- cgit v0.10.2 From 403d8efc94cd02ae36e7db13c4edf1d06d7b7fac Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 5 May 2008 18:13:50 +0200 Subject: x86: irq_32 move 4kstacks code to one place Move the 4KSTACKS related code to one place. This allows to un#ifdef do_IRQ() and share the executed on stack for the stack overflow printk and the softirq call. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 1c470d2..4e3e8ec 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -83,26 +83,28 @@ union irq_ctx { static union irq_ctx *hardirq_ctx[NR_CPUS] __read_mostly; static union irq_ctx *softirq_ctx[NR_CPUS] __read_mostly; -static inline void call_on_stack(void *func, void *stack, - unsigned long arg1, void *arg2) +static char softirq_stack[NR_CPUS * THREAD_SIZE] + __attribute__((__section__(".bss.page_aligned"))); + +static char hardirq_stack[NR_CPUS * THREAD_SIZE] + __attribute__((__section__(".bss.page_aligned"))); + +static void call_on_stack(void *func, void *stack) { - unsigned long bx; - - asm volatile( - " xchgl %%ebx,%%esp \n" - " call *%%edi \n" - " movl %%ebx,%%esp \n" - : "=a" (arg1), "=d" (arg2), "=b" (bx) - : "0" (arg1), "1" (arg2), "2" (stack), - "D" (func) - : "memory", "cc", "ecx"); + asm volatile("xchgl %%ebx,%%esp \n" + "call *%%edi \n" + "movl %%ebx,%%esp \n" + : "=b" (stack) + : "0" (stack), + "D"(func) + : "memory", "cc", "edx", "ecx", "eax"); } static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) { union irq_ctx *curctx, *irqctx; - u32 *isp; + u32 *isp, arg1, arg2; curctx = (union irq_ctx *) current_thread_info(); irqctx = hardirq_ctx[smp_processor_id()]; @@ -130,64 +132,22 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) (curctx->tinfo.preempt_count & SOFTIRQ_MASK); if (unlikely(overflow)) - call_on_stack(print_stack_overflow, isp, 0, NULL); - - call_on_stack(desc->handle_irq, isp, irq, desc); - - return 1; -} - -#else -static inline int -execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) { return 0; } -#endif - -/* - * do_IRQ handles all normal device IRQ's (the special - * SMP cross-CPU interrupts have their own specific - * handlers). - */ -unsigned int do_IRQ(struct pt_regs *regs) -{ - struct pt_regs *old_regs; - /* high bit used in ret_from_ code */ - int overflow, irq = ~regs->orig_ax; - struct irq_desc *desc = irq_desc + irq; - - if (unlikely((unsigned)irq >= NR_IRQS)) { - printk(KERN_EMERG "%s: cannot handle IRQ %d\n", - __func__, irq); - BUG(); - } - - old_regs = set_irq_regs(regs); - irq_enter(); - - overflow = check_stack_overflow(); - - if (!execute_on_irq_stack(overflow, desc, irq)) { - if (unlikely(overflow)) - print_stack_overflow(); - desc->handle_irq(irq, desc); - } - - irq_exit(); - set_irq_regs(old_regs); + call_on_stack(print_stack_overflow, isp); + + asm volatile("xchgl %%ebx,%%esp \n" + "call *%%edi \n" + "movl %%ebx,%%esp \n" + : "=a" (arg1), "=d" (arg2), "=b" (isp) + : "0" (irq), "1" (desc), "2" (isp), + "D" (desc->handle_irq) + : "memory", "cc", "ecx"); return 1; } -#ifdef CONFIG_4KSTACKS - -static char softirq_stack[NR_CPUS * THREAD_SIZE] - __attribute__((__section__(".bss.page_aligned"))); - -static char hardirq_stack[NR_CPUS * THREAD_SIZE] - __attribute__((__section__(".bss.page_aligned"))); - /* * allocate per-cpu stacks for hardirq and for softirq processing */ -void irq_ctx_init(int cpu) +void __cpuinit irq_ctx_init(int cpu) { union irq_ctx *irqctx; @@ -195,25 +155,25 @@ void irq_ctx_init(int cpu) return; irqctx = (union irq_ctx*) &hardirq_stack[cpu*THREAD_SIZE]; - irqctx->tinfo.task = NULL; - irqctx->tinfo.exec_domain = NULL; - irqctx->tinfo.cpu = cpu; - irqctx->tinfo.preempt_count = HARDIRQ_OFFSET; - irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); + irqctx->tinfo.task = NULL; + irqctx->tinfo.exec_domain = NULL; + irqctx->tinfo.cpu = cpu; + irqctx->tinfo.preempt_count = HARDIRQ_OFFSET; + irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); hardirq_ctx[cpu] = irqctx; irqctx = (union irq_ctx*) &softirq_stack[cpu*THREAD_SIZE]; - irqctx->tinfo.task = NULL; - irqctx->tinfo.exec_domain = NULL; - irqctx->tinfo.cpu = cpu; - irqctx->tinfo.preempt_count = 0; - irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); + irqctx->tinfo.task = NULL; + irqctx->tinfo.exec_domain = NULL; + irqctx->tinfo.cpu = cpu; + irqctx->tinfo.preempt_count = 0; + irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); softirq_ctx[cpu] = irqctx; - printk("CPU %u irqstacks, hard=%p soft=%p\n", - cpu,hardirq_ctx[cpu],softirq_ctx[cpu]); + printk(KERN_DEBUG "CPU %u irqstacks, hard=%p soft=%p\n", + cpu,hardirq_ctx[cpu],softirq_ctx[cpu]); } void irq_ctx_exit(int cpu) @@ -242,25 +202,56 @@ asmlinkage void do_softirq(void) /* build the stack frame on the softirq stack */ isp = (u32*) ((char*)irqctx + sizeof(*irqctx)); - asm volatile( - " xchgl %%ebx,%%esp \n" - " call __do_softirq \n" - " movl %%ebx,%%esp \n" - : "=b"(isp) - : "0"(isp) - : "memory", "cc", "edx", "ecx", "eax" - ); + call_on_stack(__do_softirq, isp); /* * Shouldnt happen, we returned above if in_interrupt(): - */ + */ WARN_ON_ONCE(softirq_count()); } local_irq_restore(flags); } + +#else +static inline int +execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) { return 0; } #endif /* + * do_IRQ handles all normal device IRQ's (the special + * SMP cross-CPU interrupts have their own specific + * handlers). + */ +unsigned int do_IRQ(struct pt_regs *regs) +{ + struct pt_regs *old_regs; + /* high bit used in ret_from_ code */ + int overflow, irq = ~regs->orig_ax; + struct irq_desc *desc = irq_desc + irq; + + if (unlikely((unsigned)irq >= NR_IRQS)) { + printk(KERN_EMERG "%s: cannot handle IRQ %d\n", + __func__, irq); + BUG(); + } + + old_regs = set_irq_regs(regs); + irq_enter(); + + overflow = check_stack_overflow(); + + if (!execute_on_irq_stack(overflow, desc, irq)) { + if (unlikely(overflow)) + print_stack_overflow(); + desc->handle_irq(irq, desc); + } + + irq_exit(); + set_irq_regs(old_regs); + return 1; +} + +/* * Interrupt statistics: */ -- cgit v0.10.2 From 988f7b5789ccf5cfed14c72e28573a49f0cb4809 Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Tue, 18 Mar 2008 17:00:22 -0700 Subject: x86: PAT export resource_wc in pci sysfs For the ranges with IORESOURCE_PREFETCH, export a new resource_wc interface in pci /sysfs along with resource (which is uncached). Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Acked-by: Jesse Barnes Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/Documentation/filesystems/sysfs-pci.txt b/Documentation/filesystems/sysfs-pci.txt index 5daa2aa..68ef488 100644 --- a/Documentation/filesystems/sysfs-pci.txt +++ b/Documentation/filesystems/sysfs-pci.txt @@ -36,6 +36,7 @@ files, each with their own function. local_cpus nearby CPU mask (cpumask, ro) resource PCI resource host addresses (ascii, ro) resource0..N PCI resource N, if present (binary, mmap) + resource0_wc..N_wc PCI WC map resource N, if prefetchable (binary, mmap) rom PCI ROM resource, if present (binary, ro) subsystem_device PCI subsystem device (ascii, ro) subsystem_vendor PCI subsystem vendor (ascii, ro) diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index 271d41c..9ec7d39 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -489,13 +489,14 @@ pci_mmap_legacy_mem(struct kobject *kobj, struct bin_attribute *attr, * @kobj: kobject for mapping * @attr: struct bin_attribute for the file being mapped * @vma: struct vm_area_struct passed into the mmap + * @write_combine: 1 for write_combine mapping * * Use the regular PCI mapping routines to map a PCI resource into userspace. * FIXME: write combining? maybe automatic for prefetchable regions? */ static int pci_mmap_resource(struct kobject *kobj, struct bin_attribute *attr, - struct vm_area_struct *vma) + struct vm_area_struct *vma, int write_combine) { struct pci_dev *pdev = to_pci_dev(container_of(kobj, struct device, kobj)); @@ -518,7 +519,21 @@ pci_mmap_resource(struct kobject *kobj, struct bin_attribute *attr, vma->vm_pgoff += start >> PAGE_SHIFT; mmap_type = res->flags & IORESOURCE_MEM ? pci_mmap_mem : pci_mmap_io; - return pci_mmap_page_range(pdev, vma, mmap_type, 0); + return pci_mmap_page_range(pdev, vma, mmap_type, write_combine); +} + +static int +pci_mmap_resource_uc(struct kobject *kobj, struct bin_attribute *attr, + struct vm_area_struct *vma) +{ + return pci_mmap_resource(kobj, attr, vma, 0); +} + +static int +pci_mmap_resource_wc(struct kobject *kobj, struct bin_attribute *attr, + struct vm_area_struct *vma) +{ + return pci_mmap_resource(kobj, attr, vma, 1); } /** @@ -541,9 +556,46 @@ pci_remove_resource_files(struct pci_dev *pdev) sysfs_remove_bin_file(&pdev->dev.kobj, res_attr); kfree(res_attr); } + + res_attr = pdev->res_attr_wc[i]; + if (res_attr) { + sysfs_remove_bin_file(&pdev->dev.kobj, res_attr); + kfree(res_attr); + } } } +static int pci_create_attr(struct pci_dev *pdev, int num, int write_combine) +{ + /* allocate attribute structure, piggyback attribute name */ + int name_len = write_combine ? 13 : 10; + struct bin_attribute *res_attr; + int retval; + + res_attr = kzalloc(sizeof(*res_attr) + name_len, GFP_ATOMIC); + if (res_attr) { + char *res_attr_name = (char *)(res_attr + 1); + + if (write_combine) { + pdev->res_attr_wc[num] = res_attr; + sprintf(res_attr_name, "resource%d_wc", num); + res_attr->mmap = pci_mmap_resource_wc; + } else { + pdev->res_attr[num] = res_attr; + sprintf(res_attr_name, "resource%d", num); + res_attr->mmap = pci_mmap_resource_uc; + } + res_attr->attr.name = res_attr_name; + res_attr->attr.mode = S_IRUSR | S_IWUSR; + res_attr->size = pci_resource_len(pdev, num); + res_attr->private = &pdev->resource[num]; + retval = sysfs_create_bin_file(&pdev->dev.kobj, res_attr); + } else + retval = -ENOMEM; + + return retval; +} + /** * pci_create_resource_files - create resource files in sysfs for @dev * @dev: dev in question @@ -557,31 +609,19 @@ static int pci_create_resource_files(struct pci_dev *pdev) /* Expose the PCI resources from this device as files */ for (i = 0; i < PCI_ROM_RESOURCE; i++) { - struct bin_attribute *res_attr; /* skip empty resources */ if (!pci_resource_len(pdev, i)) continue; - /* allocate attribute structure, piggyback attribute name */ - res_attr = kzalloc(sizeof(*res_attr) + 10, GFP_ATOMIC); - if (res_attr) { - char *res_attr_name = (char *)(res_attr + 1); - - pdev->res_attr[i] = res_attr; - sprintf(res_attr_name, "resource%d", i); - res_attr->attr.name = res_attr_name; - res_attr->attr.mode = S_IRUSR | S_IWUSR; - res_attr->size = pci_resource_len(pdev, i); - res_attr->mmap = pci_mmap_resource; - res_attr->private = &pdev->resource[i]; - retval = sysfs_create_bin_file(&pdev->dev.kobj, res_attr); - if (retval) { - pci_remove_resource_files(pdev); - return retval; - } - } else { - return -ENOMEM; + retval = pci_create_attr(pdev, i, 0); + /* for prefetchable resources, create a WC mappable file */ + if (!retval && pdev->resource[i].flags & IORESOURCE_PREFETCH) + retval = pci_create_attr(pdev, i, 1); + + if (retval) { + pci_remove_resource_files(pdev); + return retval; } } return 0; diff --git a/include/linux/pci.h b/include/linux/pci.h index 509159b..d18b1dd 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -206,6 +206,7 @@ struct pci_dev { struct bin_attribute *rom_attr; /* attribute descriptor for sysfs ROM entry */ int rom_attr_enabled; /* has display of the rom attribute been enabled? */ struct bin_attribute *res_attr[DEVICE_COUNT_RESOURCE]; /* sysfs file for resources */ + struct bin_attribute *res_attr_wc[DEVICE_COUNT_RESOURCE]; /* sysfs file for WC mapping of resources */ #ifdef CONFIG_PCI_MSI struct list_head msi_list; #endif -- cgit v0.10.2 From 77b52b4c5c66175553ee59eb43f74366f1e54bde Mon Sep 17 00:00:00 2001 From: Venki Pallipadi Date: Mon, 5 May 2008 19:09:10 -0700 Subject: x86: add "debugpat" boot option enable debug messages by a boot option "debugpat". Signed-off-by: Venkatesh Pallipadi Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 60adbe2..1a82f4d 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -42,6 +42,19 @@ static int nopat(char *str) early_param("nopat", nopat); #endif + +static int debug_enable; +static int __init pat_debug_setup(char *str) +{ + debug_enable = 1; + return 0; +} +__setup("debugpat", pat_debug_setup); + +#define dprintk(fmt, arg...) \ + do { if (debug_enable) printk(KERN_INFO fmt, ##arg); } while (0) + + static u64 __read_mostly boot_pat_state; enum { @@ -279,7 +292,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, struct memtype *saved_ptr; if (parse->start >= end) { - pr_debug("New Entry\n"); + dprintk("New Entry\n"); list_add(&new_entry->nd, parse->nd.prev); new_entry = NULL; break; @@ -329,7 +342,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, break; } - pr_debug("Overlap at 0x%Lx-0x%Lx\n", + dprintk("Overlap at 0x%Lx-0x%Lx\n", saved_ptr->start, saved_ptr->end); /* No conflict. Go ahead and add this new entry */ list_add(&new_entry->nd, saved_ptr->nd.prev); @@ -381,7 +394,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, break; } - pr_debug(KERN_INFO "Overlap at 0x%Lx-0x%Lx\n", + dprintk("Overlap at 0x%Lx-0x%Lx\n", saved_ptr->start, saved_ptr->end); /* No conflict. Go ahead and add this new entry */ list_add(&new_entry->nd, &saved_ptr->nd); @@ -403,16 +416,16 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, if (new_entry) { /* No conflict. Not yet added to the list. Add to the tail */ list_add_tail(&new_entry->nd, &memtype_list); - pr_debug("New Entry\n"); + dprintk("New Entry\n"); } if (ret_type) { - pr_debug( + dprintk( "reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n", start, end, cattr_name(actual_type), cattr_name(req_type), cattr_name(*ret_type)); } else { - pr_debug( + dprintk( "reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s\n", start, end, cattr_name(actual_type), cattr_name(req_type)); @@ -453,7 +466,7 @@ int free_memtype(u64 start, u64 end) current->comm, current->pid, start, end); } - pr_debug("free_memtype request 0x%Lx-0x%Lx\n", start, end); + dprintk("free_memtype request 0x%Lx-0x%Lx\n", start, end); return err; } -- cgit v0.10.2 From f8096f92b87d81c55ed63964d27baa9ce5ffe508 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 22 Apr 2008 16:27:29 +0100 Subject: x86: separate cmpxchg8b checking from PAE checking .. allowing the former to be use in non-PAE kernels, too. Signed-off-by: Jan Beulich Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 2ad6301..3d22bb8 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -399,6 +399,10 @@ config X86_TSC def_bool y depends on ((MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2) && !X86_NUMAQ) || X86_64 +config X86_CMPXCHG64 + def_bool y + depends on X86_PAE || X86_64 + # this should be set for all -march=.. options where the compiler # generates cmov. config X86_CMOV diff --git a/include/asm-x86/required-features.h b/include/asm-x86/required-features.h index 7400d3a..8c38719 100644 --- a/include/asm-x86/required-features.h +++ b/include/asm-x86/required-features.h @@ -19,9 +19,13 @@ #if defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64) # define NEED_PAE (1<<(X86_FEATURE_PAE & 31)) -# define NEED_CX8 (1<<(X86_FEATURE_CX8 & 31)) #else # define NEED_PAE 0 +#endif + +#ifdef CONFIG_X86_CMPXCHG64 +# define NEED_CX8 (1<<(X86_FEATURE_CX8 & 31)) +#else # define NEED_CX8 0 #endif -- cgit v0.10.2 From aa134f1b09df6beaa4d031a50d5fda1f3cebce6c Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Tue, 8 Apr 2008 10:49:03 +0200 Subject: x86: iommu: use symbolic constants, not hardcoded numbers Move symbolic constants into gart.h, and use them instead of hardcoded constant. Signed-off-by: Pavel Machek Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index c07455d..bffcf45 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -598,13 +598,13 @@ static __init int init_k8_gatt(struct agp_kern_info *info) dev = k8_northbridges[i]; gatt_reg = __pa(gatt) >> 12; gatt_reg <<= 4; - pci_write_config_dword(dev, 0x98, gatt_reg); - pci_read_config_dword(dev, 0x90, &ctl); + pci_write_config_dword(dev, AMD64_GARTTABLEBASE, gatt_reg); + pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl); - ctl |= 1; - ctl &= ~((1<<4) | (1<<5)); + ctl |= GARTEN; + ctl &= ~(DISGARTCPU | DISGARTIO); - pci_write_config_dword(dev, 0x90, ctl); + pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, ctl); } flush_gart(); diff --git a/drivers/char/agp/amd64-agp.c b/drivers/char/agp/amd64-agp.c index d8200ac..25d6422 100644 --- a/drivers/char/agp/amd64-agp.c +++ b/drivers/char/agp/amd64-agp.c @@ -16,28 +16,9 @@ #include /* PAGE_SIZE */ #include #include +#include #include "agp.h" -/* PTE bits. */ -#define GPTE_VALID 1 -#define GPTE_COHERENT 2 - -/* Aperture control register bits. */ -#define GARTEN (1<<0) -#define DISGARTCPU (1<<4) -#define DISGARTIO (1<<5) - -/* GART cache control register bits. */ -#define INVGART (1<<0) -#define GARTPTEERR (1<<1) - -/* K8 On-cpu GART registers */ -#define AMD64_GARTAPERTURECTL 0x90 -#define AMD64_GARTAPERTUREBASE 0x94 -#define AMD64_GARTTABLEBASE 0x98 -#define AMD64_GARTCACHECTL 0x9c -#define AMD64_GARTEN (1<<0) - /* NVIDIA K8 registers */ #define NVIDIA_X86_64_0_APBASE 0x10 #define NVIDIA_X86_64_1_APBASE1 0x50 @@ -165,7 +146,7 @@ static int amd64_fetch_size(void) * In a multiprocessor x86-64 system, this function gets * called once for each CPU. */ -static u64 amd64_configure (struct pci_dev *hammer, u64 gatt_table) +static u64 amd64_configure(struct pci_dev *hammer, u64 gatt_table) { u64 aperturebase; u32 tmp; @@ -181,7 +162,7 @@ static u64 amd64_configure (struct pci_dev *hammer, u64 gatt_table) addr >>= 12; tmp = (u32) addr<<4; tmp &= ~0xf; - pci_write_config_dword (hammer, AMD64_GARTTABLEBASE, tmp); + pci_write_config_dword(hammer, AMD64_GARTTABLEBASE, tmp); /* Enable GART translation for this hammer. */ pci_read_config_dword(hammer, AMD64_GARTAPERTURECTL, &tmp); diff --git a/include/asm-x86/gart.h b/include/asm-x86/gart.h index 90958ed..248e577 100644 --- a/include/asm-x86/gart.h +++ b/include/asm-x86/gart.h @@ -5,6 +5,7 @@ extern void pci_iommu_shutdown(void); extern void no_iommu_init(void); extern int force_iommu, no_iommu; extern int iommu_detected; +extern int agp_amd64_init(void); #ifdef CONFIG_GART_IOMMU extern void gart_iommu_init(void); extern void gart_iommu_shutdown(void); @@ -31,4 +32,24 @@ static inline void gart_iommu_shutdown(void) #endif +/* PTE bits. */ +#define GPTE_VALID 1 +#define GPTE_COHERENT 2 + +/* Aperture control register bits. */ +#define GARTEN (1<<0) +#define DISGARTCPU (1<<4) +#define DISGARTIO (1<<5) + +/* GART cache control register bits. */ +#define INVGART (1<<0) +#define GARTPTEERR (1<<1) + +/* K8 On-cpu GART registers */ +#define AMD64_GARTAPERTURECTL 0x90 +#define AMD64_GARTAPERTUREBASE 0x94 +#define AMD64_GARTTABLEBASE 0x98 +#define AMD64_GARTCACHECTL 0x9c +#define AMD64_GARTEN (1<<0) + #endif -- cgit v0.10.2 From 1edc1ab3f68168ec6815e6d630f38948a6da005a Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 13 Apr 2008 01:11:41 -0700 Subject: x86: agp_gart size checking for buggy device while looking at Rafael J. Wysocki's system boot log, I found a funny printout: Node 0: aperture @ de000000 size 32 MB Aperture too small (32 MB) AGP bridge at 00:04:00 Aperture from AGP @ de000000 size 4096 MB (APSIZE 0) Aperture too small (0 MB) Your BIOS doesn't leave a aperture memory hole Please enable the IOMMU option in the BIOS setup This costs you 64 MB of RAM Mapping aperture over 65536 KB of RAM @ 4000000 ... agpgart: Detected AGP bridge 20 agpgart: Aperture pointing to RAM agpgart: Aperture from AGP @ de000000 size 4096 MB agpgart: Aperture too small (0 MB) agpgart: No usable aperture found. agpgart: Consider rebooting with iommu=memaper=2 to get a good aperture. it means BIOS allocated the correct gart on the NB and AGP bridge, but because a bug in the silicon (the agp bridge reports the wrong order, it wants 4G instead) the kernel will reject that allocation. Also, because the size is only 32MB, and we try to get another 64M for gart, late fix_northbridge can not revert that change because it still reads the wrong size from agp bridge. So try to double check the order value from the agp bridge, before calling aperture_valid(). [ mingo@elte.hu: 32-bit fix. ] Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 479926d..2e93b31 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -138,6 +138,7 @@ static __u32 __init read_agp(int num, int slot, int func, int cap, u32 *order) int nbits; u32 aper_low, aper_hi; u64 aper; + u32 old_order; printk(KERN_INFO "AGP bridge at %02x:%02x:%02x\n", num, slot, func); apsizereg = read_pci_config_16(num, slot, func, cap + 0x14); @@ -146,6 +147,9 @@ static __u32 __init read_agp(int num, int slot, int func, int cap, u32 *order) return 0; } + /* old_order could be the value from NB gart setting */ + old_order = *order; + apsize = apsizereg & 0xfff; /* Some BIOS use weird encodings not in the AGPv3 table. */ if (apsize & 0xff) @@ -159,6 +163,16 @@ static __u32 __init read_agp(int num, int slot, int func, int cap, u32 *order) aper_hi = read_pci_config(num, slot, func, 0x14); aper = (aper_low & ~((1<<22)-1)) | ((u64)aper_hi << 32); + /* + * On some sick chips, APSIZE is 0. It means it wants 4G + * so let double check that order, and lets trust AMD NB settings: + */ + if (aper + (32UL<<(20 + *order)) > 0x100000000UL) { + printk(KERN_INFO "Aperture size %u MB (APSIZE %x) is not right, using settings from NB\n", + 32 << *order, apsizereg); + *order = old_order; + } + printk(KERN_INFO "Aperture from AGP @ %Lx size %u MB (APSIZE %x)\n", aper, 32 << *order, apsizereg); diff --git a/drivers/char/agp/amd64-agp.c b/drivers/char/agp/amd64-agp.c index 25d6422..9e3939d 100644 --- a/drivers/char/agp/amd64-agp.c +++ b/drivers/char/agp/amd64-agp.c @@ -312,6 +312,17 @@ static __devinit int fix_northbridge(struct pci_dev *nb, struct pci_dev *agp, pci_read_config_dword(agp, 0x10, &aper_low); pci_read_config_dword(agp, 0x14, &aper_hi); aper = (aper_low & ~((1<<22)-1)) | ((u64)aper_hi << 32); + + /* + * On some sick chips APSIZE is 0. This means it wants 4G + * so let double check that order, and lets trust the AMD NB settings + */ + if (aper + (32ULL<<(20 + order)) > 0x100000000ULL) { + printk(KERN_INFO "Aperture size %u MB is not right, using settings from NB\n", + 32 << order); + order = nb_order; + } + printk(KERN_INFO PFX "Aperture from AGP @ %Lx size %u MB\n", aper, 32 << order); if (order < 0 || !aperture_valid(aper, (32*1024*1024)< Date: Sun, 13 Apr 2008 18:42:31 -0700 Subject: x86: checking aperture size order some systems are using 32M for gart and agp when memory is less than 4G. Kernel will reject and try to allcate another 64M that is not needed, and we will waste 64M of perfectly good RAM. this patch adds a workaround by checking aper_base/order between NB and agp bridge. If they are the same, and memory size is less than 4G, it will allow it. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 2e93b31..8c325b7f 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -83,7 +83,7 @@ static u32 __init allocate_aperture(void) return (u32)__pa(p); } -static int __init aperture_valid(u64 aper_base, u32 aper_size) +static int __init aperture_valid(u64 aper_base, u32 aper_size, u32 min_size) { if (!aper_base) return 0; @@ -96,8 +96,9 @@ static int __init aperture_valid(u64 aper_base, u32 aper_size) printk(KERN_ERR "Aperture pointing to e820 RAM. Ignoring.\n"); return 0; } - if (aper_size < 64*1024*1024) { - printk(KERN_ERR "Aperture too small (%d MB)\n", aper_size>>20); + if (aper_size < min_size) { + printk(KERN_ERR "Aperture too small (%d MB) than (%d MB)\n", + aper_size>>20, min_size>>20); return 0; } @@ -167,7 +168,9 @@ static __u32 __init read_agp(int num, int slot, int func, int cap, u32 *order) * On some sick chips, APSIZE is 0. It means it wants 4G * so let double check that order, and lets trust AMD NB settings: */ - if (aper + (32UL<<(20 + *order)) > 0x100000000UL) { + printk(KERN_INFO "Aperture from AGP @ %Lx old size %u MB\n", + aper, 32 << old_order); + if (aper + (32ULL<<(20 + *order)) > 0x100000000ULL) { printk(KERN_INFO "Aperture size %u MB (APSIZE %x) is not right, using settings from NB\n", 32 << *order, apsizereg); *order = old_order; @@ -176,7 +179,7 @@ static __u32 __init read_agp(int num, int slot, int func, int cap, u32 *order) printk(KERN_INFO "Aperture from AGP @ %Lx size %u MB (APSIZE %x)\n", aper, 32 << *order, apsizereg); - if (!aperture_valid(aper, (32*1024*1024) << *order)) + if (!aperture_valid(aper, (32*1024*1024) << *order, 32<<20)) return 0; return (u32)aper; } @@ -302,8 +305,8 @@ void __init early_gart_iommu_check(void) fix = 1; if (gart_fix_e820 && !fix && aper_enabled) { - if (e820_any_mapped(aper_base, aper_base + aper_size, - E820_RAM)) { + if (!e820_all_mapped(aper_base, aper_base + aper_size, + E820_RESERVED)) { /* reserved it, so we can resuse it in second kernel */ printk(KERN_INFO "update e820 for GART\n"); add_memory_region(aper_base, aper_size, E820_RESERVED); @@ -324,8 +327,11 @@ void __init early_gart_iommu_check(void) } +static int __initdata printed_gart_size_msg; + void __init gart_iommu_hole_init(void) { + u32 agp_aper_base = 0, agp_aper_order = 0; u32 aper_size, aper_alloc = 0, aper_order = 0, last_aper_order = 0; u64 aper_base, last_aper_base = 0; int fix, num, valid_agp = 0; @@ -337,6 +343,9 @@ void __init gart_iommu_hole_init(void) printk(KERN_INFO "Checking aperture...\n"); + if (!fallback_aper_force) + agp_aper_base = search_agp_bridge(&agp_aper_order, &valid_agp); + fix = 0; node = 0; for (num = 24; num < 32; num++) { @@ -355,9 +364,21 @@ void __init gart_iommu_hole_init(void) node, aper_base, aper_size >> 20); node++; - if (!aperture_valid(aper_base, aper_size)) { - fix = 1; - break; + if (!aperture_valid(aper_base, aper_size, 64<<20)) { + if (valid_agp && agp_aper_base && + agp_aper_base == aper_base && + agp_aper_order == aper_order) { + /* the same between two setting from NB and agp */ + if (!no_iommu && end_pfn > MAX_DMA32_PFN && !printed_gart_size_msg) { + printk(KERN_ERR "you are using iommu with agp, but GART size is less than 64M\n"); + printk(KERN_ERR "please increase GART size in your BIOS setup\n"); + printk(KERN_ERR "if BIOS doesn't have that option, contact your HW vendor!\n"); + printed_gart_size_msg = 1; + } + } else { + fix = 1; + break; + } } if ((last_aper_order && aper_order != last_aper_order) || @@ -378,8 +399,10 @@ void __init gart_iommu_hole_init(void) return; } - if (!fallback_aper_force) - aper_alloc = search_agp_bridge(&aper_order, &valid_agp); + if (!fallback_aper_force) { + aper_alloc = agp_aper_base; + aper_order = agp_aper_order; + } if (aper_alloc) { /* Got the aperture from the AGP bridge */ diff --git a/drivers/char/agp/amd64-agp.c b/drivers/char/agp/amd64-agp.c index 9e3939d..9c24470 100644 --- a/drivers/char/agp/amd64-agp.c +++ b/drivers/char/agp/amd64-agp.c @@ -245,11 +245,7 @@ static int __devinit aperture_valid(u64 aper, u32 size) printk(KERN_ERR PFX "No aperture\n"); return 0; } - if (size < 32*1024*1024) { - printk(KERN_ERR PFX "Aperture too small (%d MB)\n", size>>20); - return 0; - } - if ((u64)aper + size > 0x100000000ULL) { + if ((u64)aper + size > 0x100000000ULL) { printk(KERN_ERR PFX "Aperture out of bounds\n"); return 0; } @@ -257,6 +253,10 @@ static int __devinit aperture_valid(u64 aper, u32 size) printk(KERN_ERR PFX "Aperture pointing to RAM\n"); return 0; } + if (size < 32*1024*1024) { + printk(KERN_ERR PFX "Aperture too small (%d MB)\n", size>>20); + return 0; + } /* Request the Aperture. This catches cases when someone else already put a mapping in there - happens with some very broken BIOS @@ -317,7 +317,7 @@ static __devinit int fix_northbridge(struct pci_dev *nb, struct pci_dev *agp, * On some sick chips APSIZE is 0. This means it wants 4G * so let double check that order, and lets trust the AMD NB settings */ - if (aper + (32ULL<<(20 + order)) > 0x100000000ULL) { + if (order >=0 && aper + (32ULL<<(20 + order)) > 0x100000000ULL) { printk(KERN_INFO "Aperture size %u MB is not right, using settings from NB\n", 32 << order); order = nb_order; -- cgit v0.10.2 From 7677b2ef6c0c4fddc84f6473f3863f40eb71821b Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 14 Apr 2008 20:40:37 -0700 Subject: x86_64: allocate gart aperture from 512M because we try to reserve dma32 early, so we have chance to get aperture from 64M. with some sequence aperture allocated from RAM, could become E820_RESERVED. and then if doing a kexec with a big kernel that uncompressed size is above 64M we could have a range conflict with still using gart. So allocate gart aperture from 512M instead. Also change the fallback_aper_order to 5, because we don't have chance to get 2G or 4G aperture. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 8c325b7f..c63f8d9 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -55,8 +55,9 @@ static u32 __init allocate_aperture(void) u32 aper_size; void *p; - if (fallback_aper_order > 7) - fallback_aper_order = 7; + /* aper_size should <= 1G */ + if (fallback_aper_order > 5) + fallback_aper_order = 5; aper_size = (32 * 1024 * 1024) << fallback_aper_order; /* @@ -65,7 +66,20 @@ static u32 __init allocate_aperture(void) * memory. Unfortunately we cannot move it up because that would * make the IOMMU useless. */ - p = __alloc_bootmem_nopanic(aper_size, aper_size, 0); + /* + * using 512M as goal, in case kexec will load kernel_big + * that will do the on position decompress, and could overlap with + * that positon with gart that is used. + * sequende: + * kernel_small + * ==> kexec (with kdump trigger path or previous doesn't shutdown gart) + * ==> kernel_small(gart area become e820_reserved) + * ==> kexec (with kdump trigger path or previous doesn't shutdown gart) + * ==> kerne_big (uncompressed size will be big than 64M or 128M) + * so don't use 512M below as gart iommu, leave the space for kernel + * code for safe + */ + p = __alloc_bootmem_nopanic(aper_size, aper_size, 512ULL<<20); if (!p || __pa(p)+aper_size > 0xffffffff) { printk(KERN_ERR "Cannot allocate aperture memory hole (%p,%uK)\n", diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 0c37f16..1a017f0 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -77,10 +77,14 @@ void __init dma32_reserve_bootmem(void) if (end_pfn <= MAX_DMA32_PFN) return; + /* + * check aperture_64.c allocate_aperture() for reason about + * using 512M as goal + */ align = 64ULL<<20; size = round_up(dma32_bootmem_size, align); dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align, - __pa(MAX_DMA_ADDRESS)); + 512ULL<<20); if (dma32_bootmem_ptr) dma32_bootmem_size = size; else -- cgit v0.10.2 From 55c0d721df80dcc505dc888e85d4ca51ea150ce9 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 19 Apr 2008 01:31:11 -0700 Subject: x86: clean up aperture_64.c 1. use symbolic register names where appropriate. 2. num to bus or slot changing 3. handle for new opteron for bus other than 0 Signed-off-by: Yinghai Lu Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index c63f8d9..02f4dba 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -35,6 +35,18 @@ int fallback_aper_force __initdata; int fix_aperture __initdata = 1; +struct bus_dev_range { + int bus; + int dev_base; + int dev_limit; +}; + +static struct bus_dev_range bus_dev_ranges[] __initdata = { + { 0x00, 0x18, 0x20}, + { 0xff, 0x00, 0x20}, + { 0xfe, 0x00, 0x20} +}; + static struct resource gart_resource = { .name = "GART", .flags = IORESOURCE_MEM, @@ -120,33 +132,33 @@ static int __init aperture_valid(u64 aper_base, u32 aper_size, u32 min_size) } /* Find a PCI capability */ -static __u32 __init find_cap(int num, int slot, int func, int cap) +static __u32 __init find_cap(int bus, int slot, int func, int cap) { int bytes; u8 pos; - if (!(read_pci_config_16(num, slot, func, PCI_STATUS) & + if (!(read_pci_config_16(bus, slot, func, PCI_STATUS) & PCI_STATUS_CAP_LIST)) return 0; - pos = read_pci_config_byte(num, slot, func, PCI_CAPABILITY_LIST); + pos = read_pci_config_byte(bus, slot, func, PCI_CAPABILITY_LIST); for (bytes = 0; bytes < 48 && pos >= 0x40; bytes++) { u8 id; pos &= ~3; - id = read_pci_config_byte(num, slot, func, pos+PCI_CAP_LIST_ID); + id = read_pci_config_byte(bus, slot, func, pos+PCI_CAP_LIST_ID); if (id == 0xff) break; if (id == cap) return pos; - pos = read_pci_config_byte(num, slot, func, + pos = read_pci_config_byte(bus, slot, func, pos+PCI_CAP_LIST_NEXT); } return 0; } /* Read a standard AGPv3 bridge header */ -static __u32 __init read_agp(int num, int slot, int func, int cap, u32 *order) +static __u32 __init read_agp(int bus, int slot, int func, int cap, u32 *order) { u32 apsize; u32 apsizereg; @@ -155,8 +167,8 @@ static __u32 __init read_agp(int num, int slot, int func, int cap, u32 *order) u64 aper; u32 old_order; - printk(KERN_INFO "AGP bridge at %02x:%02x:%02x\n", num, slot, func); - apsizereg = read_pci_config_16(num, slot, func, cap + 0x14); + printk(KERN_INFO "AGP bridge at %02x:%02x:%02x\n", bus, slot, func); + apsizereg = read_pci_config_16(bus, slot, func, cap + 0x14); if (apsizereg == 0xffffffff) { printk(KERN_ERR "APSIZE in AGP bridge unreadable\n"); return 0; @@ -174,8 +186,8 @@ static __u32 __init read_agp(int num, int slot, int func, int cap, u32 *order) if ((int)*order < 0) /* < 32MB */ *order = 0; - aper_low = read_pci_config(num, slot, func, 0x10); - aper_hi = read_pci_config(num, slot, func, 0x14); + aper_low = read_pci_config(bus, slot, func, 0x10); + aper_hi = read_pci_config(bus, slot, func, 0x14); aper = (aper_low & ~((1<<22)-1)) | ((u64)aper_hi << 32); /* @@ -213,15 +225,15 @@ static __u32 __init read_agp(int num, int slot, int func, int cap, u32 *order) */ static __u32 __init search_agp_bridge(u32 *order, int *valid_agp) { - int num, slot, func; + int bus, slot, func; /* Poor man's PCI discovery */ - for (num = 0; num < 256; num++) { + for (bus = 0; bus < 256; bus++) { for (slot = 0; slot < 32; slot++) { for (func = 0; func < 8; func++) { u32 class, cap; u8 type; - class = read_pci_config(num, slot, func, + class = read_pci_config(bus, slot, func, PCI_CLASS_REVISION); if (class == 0xffffffff) break; @@ -230,17 +242,17 @@ static __u32 __init search_agp_bridge(u32 *order, int *valid_agp) case PCI_CLASS_BRIDGE_HOST: case PCI_CLASS_BRIDGE_OTHER: /* needed? */ /* AGP bridge? */ - cap = find_cap(num, slot, func, + cap = find_cap(bus, slot, func, PCI_CAP_ID_AGP); if (!cap) break; *valid_agp = 1; - return read_agp(num, slot, func, cap, + return read_agp(bus, slot, func, cap, order); } /* No multi-function device? */ - type = read_pci_config_byte(num, slot, func, + type = read_pci_config_byte(bus, slot, func, PCI_HEADER_TYPE); if (!(type & 0x80)) break; @@ -280,38 +292,49 @@ void __init early_gart_iommu_check(void) * or BIOS forget to put that in reserved. * try to update e820 to make that region as reserved. */ - int fix, num; + int fix, slot; u32 ctl; u32 aper_size = 0, aper_order = 0, last_aper_order = 0; u64 aper_base = 0, last_aper_base = 0; int aper_enabled = 0, last_aper_enabled = 0; + int i; if (!early_pci_allowed()) return; fix = 0; - for (num = 24; num < 32; num++) { - if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00))) - continue; - - ctl = read_pci_config(0, num, 3, 0x90); - aper_enabled = ctl & 1; - aper_order = (ctl >> 1) & 7; - aper_size = (32 * 1024 * 1024) << aper_order; - aper_base = read_pci_config(0, num, 3, 0x94) & 0x7fff; - aper_base <<= 25; - - if ((last_aper_order && aper_order != last_aper_order) || - (last_aper_base && aper_base != last_aper_base) || - (last_aper_enabled && aper_enabled != last_aper_enabled)) { - fix = 1; - break; + for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { + int bus; + int dev_base, dev_limit; + + bus = bus_dev_ranges[i].bus; + dev_base = bus_dev_ranges[i].dev_base; + dev_limit = bus_dev_ranges[i].dev_limit; + + for (slot = dev_base; slot < dev_limit; slot++) { + if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00))) + continue; + + ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL); + aper_enabled = ctl & AMD64_GARTEN; + aper_order = (ctl >> 1) & 7; + aper_size = (32 * 1024 * 1024) << aper_order; + aper_base = read_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE) & 0x7fff; + aper_base <<= 25; + + if ((last_aper_order && aper_order != last_aper_order) || + (last_aper_base && aper_base != last_aper_base) || + (last_aper_enabled && aper_enabled != last_aper_enabled)) { + fix = 1; + goto out; + } + last_aper_order = aper_order; + last_aper_base = aper_base; + last_aper_enabled = aper_enabled; } - last_aper_order = aper_order; - last_aper_base = aper_base; - last_aper_enabled = aper_enabled; } +out: if (!fix && !aper_enabled) return; @@ -330,13 +353,22 @@ void __init early_gart_iommu_check(void) } /* different nodes have different setting, disable them all at first*/ - for (num = 24; num < 32; num++) { - if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00))) - continue; + for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { + int bus; + int dev_base, dev_limit; + + bus = bus_dev_ranges[i].bus; + dev_base = bus_dev_ranges[i].dev_base; + dev_limit = bus_dev_ranges[i].dev_limit; - ctl = read_pci_config(0, num, 3, 0x90); - ctl &= ~1; - write_pci_config(0, num, 3, 0x90, ctl); + for (slot = dev_base; slot < dev_limit; slot++) { + if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00))) + continue; + + ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL); + ctl &= ~AMD64_GARTEN; + write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, ctl); + } } } @@ -348,8 +380,8 @@ void __init gart_iommu_hole_init(void) u32 agp_aper_base = 0, agp_aper_order = 0; u32 aper_size, aper_alloc = 0, aper_order = 0, last_aper_order = 0; u64 aper_base, last_aper_base = 0; - int fix, num, valid_agp = 0; - int node; + int fix, slot, valid_agp = 0; + int i, node; if (gart_iommu_aperture_disabled || !fix_aperture || !early_pci_allowed()) @@ -362,48 +394,58 @@ void __init gart_iommu_hole_init(void) fix = 0; node = 0; - for (num = 24; num < 32; num++) { - if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00))) - continue; - - iommu_detected = 1; - gart_iommu_aperture = 1; - - aper_order = (read_pci_config(0, num, 3, 0x90) >> 1) & 7; - aper_size = (32 * 1024 * 1024) << aper_order; - aper_base = read_pci_config(0, num, 3, 0x94) & 0x7fff; - aper_base <<= 25; - - printk(KERN_INFO "Node %d: aperture @ %Lx size %u MB\n", - node, aper_base, aper_size >> 20); - node++; - - if (!aperture_valid(aper_base, aper_size, 64<<20)) { - if (valid_agp && agp_aper_base && - agp_aper_base == aper_base && - agp_aper_order == aper_order) { - /* the same between two setting from NB and agp */ - if (!no_iommu && end_pfn > MAX_DMA32_PFN && !printed_gart_size_msg) { - printk(KERN_ERR "you are using iommu with agp, but GART size is less than 64M\n"); - printk(KERN_ERR "please increase GART size in your BIOS setup\n"); - printk(KERN_ERR "if BIOS doesn't have that option, contact your HW vendor!\n"); - printed_gart_size_msg = 1; + for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { + int bus; + int dev_base, dev_limit; + + bus = bus_dev_ranges[i].bus; + dev_base = bus_dev_ranges[i].dev_base; + dev_limit = bus_dev_ranges[i].dev_limit; + + for (slot = dev_base; slot < dev_limit; slot++) { + if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00))) + continue; + + iommu_detected = 1; + gart_iommu_aperture = 1; + + aper_order = (read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL) >> 1) & 7; + aper_size = (32 * 1024 * 1024) << aper_order; + aper_base = read_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE) & 0x7fff; + aper_base <<= 25; + + printk(KERN_INFO "Node %d: aperture @ %Lx size %u MB\n", + node, aper_base, aper_size >> 20); + node++; + + if (!aperture_valid(aper_base, aper_size, 64<<20)) { + if (valid_agp && agp_aper_base && + agp_aper_base == aper_base && + agp_aper_order == aper_order) { + /* the same between two setting from NB and agp */ + if (!no_iommu && end_pfn > MAX_DMA32_PFN && !printed_gart_size_msg) { + printk(KERN_ERR "you are using iommu with agp, but GART size is less than 64M\n"); + printk(KERN_ERR "please increase GART size in your BIOS setup\n"); + printk(KERN_ERR "if BIOS doesn't have that option, contact your HW vendor!\n"); + printed_gart_size_msg = 1; + } + } else { + fix = 1; + goto out; } - } else { - fix = 1; - break; } - } - if ((last_aper_order && aper_order != last_aper_order) || - (last_aper_base && aper_base != last_aper_base)) { - fix = 1; - break; + if ((last_aper_order && aper_order != last_aper_order) || + (last_aper_base && aper_base != last_aper_base)) { + fix = 1; + goto out; + } + last_aper_order = aper_order; + last_aper_base = aper_base; } - last_aper_order = aper_order; - last_aper_base = aper_base; } +out: if (!fix && !fallback_aper_force) { if (last_aper_base) { unsigned long n = (32 * 1024 * 1024) << last_aper_order; @@ -452,16 +494,22 @@ void __init gart_iommu_hole_init(void) } /* Fix up the north bridges */ - for (num = 24; num < 32; num++) { - if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00))) - continue; - - /* - * Don't enable translation yet. That is done later. - * Assume this BIOS didn't initialise the GART so - * just overwrite all previous bits - */ - write_pci_config(0, num, 3, 0x90, aper_order<<1); - write_pci_config(0, num, 3, 0x94, aper_alloc>>25); + for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { + int bus; + int dev_base, dev_limit; + + bus = bus_dev_ranges[i].bus; + dev_base = bus_dev_ranges[i].dev_base; + dev_limit = bus_dev_ranges[i].dev_limit; + for (slot = dev_base; slot < dev_limit; slot++) { + if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00))) + continue; + + /* Don't enable translation yet. That is done later. + Assume this BIOS didn't initialise the GART so + just overwrite all previous bits */ + write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, aper_order << 1); + write_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE, aper_alloc >> 25); + } } } -- cgit v0.10.2 From 330fce23dab6e6a3d1979e55f27aba4c0c301331 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 19 Apr 2008 01:31:45 -0700 Subject: x86: reserve dma32 early for gart fix we can use free_bootmem() directly. Signed-off-by: Yinghai Lu Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 1a017f0..81862d0 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -92,7 +92,6 @@ void __init dma32_reserve_bootmem(void) } static void __init dma32_free_bootmem(void) { - int node; if (end_pfn <= MAX_DMA32_PFN) return; @@ -100,9 +99,7 @@ static void __init dma32_free_bootmem(void) if (!dma32_bootmem_ptr) return; - for_each_online_node(node) - free_bootmem_node(NODE_DATA(node), __pa(dma32_bootmem_ptr), - dma32_bootmem_size); + free_bootmem(__pa(dma32_bootmem_ptr), dma32_bootmem_size); dma32_bootmem_ptr = NULL; dma32_bootmem_size = 0; -- cgit v0.10.2 From 3bb6fbf9969a8bbe4892968659239273d092e78a Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Tue, 15 Apr 2008 12:43:57 +0200 Subject: x86 gart: factor out common code Cleanup gart handling on amd64 a bit: move common code into enable_gart_translation , and use symbolic register names where appropriate. Signed-off-by: Pavel Machek Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index bffcf45..1f99b62 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -533,8 +533,8 @@ static __init unsigned read_aperture(struct pci_dev *dev, u32 *size) unsigned aper_size = 0, aper_base_32, aper_order; u64 aper_base; - pci_read_config_dword(dev, 0x94, &aper_base_32); - pci_read_config_dword(dev, 0x90, &aper_order); + pci_read_config_dword(dev, AMD64_GARTAPERTUREBASE, &aper_base_32); + pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &aper_order); aper_order = (aper_order >> 1) & 7; aper_base = aper_base_32 & 0x7fff; @@ -592,19 +592,8 @@ static __init int init_k8_gatt(struct agp_kern_info *info) agp_gatt_table = gatt; for (i = 0; i < num_k8_northbridges; i++) { - u32 gatt_reg; - u32 ctl; - dev = k8_northbridges[i]; - gatt_reg = __pa(gatt) >> 12; - gatt_reg <<= 4; - pci_write_config_dword(dev, AMD64_GARTTABLEBASE, gatt_reg); - pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl); - - ctl |= GARTEN; - ctl &= ~(DISGARTCPU | DISGARTIO); - - pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, ctl); + enable_gart_translation(dev, __pa(gatt)); } flush_gart(); @@ -648,11 +637,11 @@ void gart_iommu_shutdown(void) u32 ctl; dev = k8_northbridges[i]; - pci_read_config_dword(dev, 0x90, &ctl); + pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl); - ctl &= ~1; + ctl &= ~GARTEN; - pci_write_config_dword(dev, 0x90, ctl); + pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, ctl); } } diff --git a/drivers/char/agp/amd64-agp.c b/drivers/char/agp/amd64-agp.c index 9c24470..e3c7ea0 100644 --- a/drivers/char/agp/amd64-agp.c +++ b/drivers/char/agp/amd64-agp.c @@ -150,25 +150,14 @@ static u64 amd64_configure(struct pci_dev *hammer, u64 gatt_table) { u64 aperturebase; u32 tmp; - u64 addr, aper_base; + u64 aper_base; /* Address to map to */ - pci_read_config_dword (hammer, AMD64_GARTAPERTUREBASE, &tmp); + pci_read_config_dword(hammer, AMD64_GARTAPERTUREBASE, &tmp); aperturebase = tmp << 25; aper_base = (aperturebase & PCI_BASE_ADDRESS_MEM_MASK); - /* address of the mappings table */ - addr = (u64) gatt_table; - addr >>= 12; - tmp = (u32) addr<<4; - tmp &= ~0xf; - pci_write_config_dword(hammer, AMD64_GARTTABLEBASE, tmp); - - /* Enable GART translation for this hammer. */ - pci_read_config_dword(hammer, AMD64_GARTAPERTURECTL, &tmp); - tmp |= GARTEN; - tmp &= ~(DISGARTCPU | DISGARTIO); - pci_write_config_dword(hammer, AMD64_GARTAPERTURECTL, tmp); + enable_gart_translation(hammer, gatt_table); return aper_base; } @@ -207,9 +196,9 @@ static void amd64_cleanup(void) for (i = 0; i < num_k8_northbridges; i++) { struct pci_dev *dev = k8_northbridges[i]; /* disable gart translation */ - pci_read_config_dword (dev, AMD64_GARTAPERTURECTL, &tmp); + pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &tmp); tmp &= ~AMD64_GARTEN; - pci_write_config_dword (dev, AMD64_GARTAPERTURECTL, tmp); + pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, tmp); } } @@ -289,9 +278,9 @@ static __devinit int fix_northbridge(struct pci_dev *nb, struct pci_dev *agp, u32 nb_order, nb_base; u16 apsize; - pci_read_config_dword(nb, 0x90, &nb_order); + pci_read_config_dword(nb, AMD64_GARTAPERTURECTL, &nb_order); nb_order = (nb_order >> 1) & 7; - pci_read_config_dword(nb, 0x94, &nb_base); + pci_read_config_dword(nb, AMD64_GARTAPERTUREBASE, &nb_base); nb_aper = nb_base << 25; if (aperture_valid(nb_aper, (32*1024*1024)<> 25); + pci_write_config_dword(nb, AMD64_GARTAPERTURECTL, order << 1); + pci_write_config_dword(nb, AMD64_GARTAPERTUREBASE, aper >> 25); return 0; } diff --git a/include/asm-x86/gart.h b/include/asm-x86/gart.h index 248e577..6f22786 100644 --- a/include/asm-x86/gart.h +++ b/include/asm-x86/gart.h @@ -52,4 +52,21 @@ static inline void gart_iommu_shutdown(void) #define AMD64_GARTCACHECTL 0x9c #define AMD64_GARTEN (1<<0) +static inline void enable_gart_translation(struct pci_dev *dev, u64 addr) +{ + u32 tmp, ctl; + + /* address of the mappings table */ + addr >>= 12; + tmp = (u32) addr<<4; + tmp &= ~0xf; + pci_write_config_dword(dev, AMD64_GARTTABLEBASE, tmp); + + /* Enable GART translation for this hammer. */ + pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl); + ctl |= GARTEN; + ctl &= ~(DISGARTCPU | DISGARTIO); + pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, ctl); +} + #endif -- cgit v0.10.2 From f784946ded3e734524d1f48a6a8286b8a152c3b9 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Fri, 2 May 2008 16:45:08 -0700 Subject: x86: use per_cpu data in nmi_32 use per_cpu for per CPU data. Signed-off-by: Hiroshi Shimamoto Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/nmi_32.c b/arch/x86/kernel/nmi_32.c index 11b14bb..69bdae5 100644 --- a/arch/x86/kernel/nmi_32.c +++ b/arch/x86/kernel/nmi_32.c @@ -293,9 +293,9 @@ void stop_apic_nmi_watchdog(void *unused) * here too!] */ -static unsigned int - last_irq_sums [NR_CPUS], - alert_counter [NR_CPUS]; +static DEFINE_PER_CPU(unsigned, last_irq_sum); +static DEFINE_PER_CPU(local_t, alert_counter); +static DEFINE_PER_CPU(int, nmi_touch); void touch_nmi_watchdog(void) { @@ -303,12 +303,13 @@ void touch_nmi_watchdog(void) unsigned cpu; /* - * Just reset the alert counters, (other CPUs might be - * spinning on locks we hold): + * Tell other CPUs to reset their alert counters. We cannot + * do it ourselves because the alert count increase is not + * atomic. */ for_each_present_cpu(cpu) { - if (alert_counter[cpu]) - alert_counter[cpu] = 0; + if (per_cpu(nmi_touch, cpu) != 1) + per_cpu(nmi_touch, cpu) = 1; } } @@ -358,22 +359,26 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) */ sum = per_cpu(irq_stat, cpu).apic_timer_irqs + per_cpu(irq_stat, cpu).irq0_irqs; + if (__get_cpu_var(nmi_touch)) { + __get_cpu_var(nmi_touch) = 0; + touched = 1; + } /* if the none of the timers isn't firing, this cpu isn't doing much */ - if (!touched && last_irq_sums[cpu] == sum) { + if (!touched && __get_cpu_var(last_irq_sum) == sum) { /* * Ayiee, looks like this CPU is stuck ... * wait a few IRQs (5 seconds) before doing the oops ... */ - alert_counter[cpu]++; - if (alert_counter[cpu] == 5*nmi_hz) + local_inc(&__get_cpu_var(alert_counter)); + if (local_read(&__get_cpu_var(alert_counter)) == 5*nmi_hz) /* * die_nmi will return ONLY if NOTIFY_STOP happens.. */ die_nmi(regs, "BUG: NMI Watchdog detected LOCKUP"); } else { - last_irq_sums[cpu] = sum; - alert_counter[cpu] = 0; + __get_cpu_var(last_irq_sum) = sum; + local_set(&__get_cpu_var(alert_counter), 0); } /* see if the nmi watchdog went off */ if (!__get_cpu_var(wd_enabled)) -- cgit v0.10.2 From f59a9310b97879159ef4d25227bc270278f1ee71 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Fri, 2 May 2008 16:44:52 -0700 Subject: x86: nmi and irq counters are unsigned in nmi_64.c __nmi_count, apic_timer_irqs and irq0_irqs are unsigned. Signed-off-by: Hiroshi Shimamoto Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/nmi_64.c b/arch/x86/kernel/nmi_64.c index 5a29ded..33cb8ec 100644 --- a/arch/x86/kernel/nmi_64.c +++ b/arch/x86/kernel/nmi_64.c @@ -79,7 +79,7 @@ static __init void nmi_cpu_busy(void *data) int __init check_nmi_watchdog(void) { - int *prev_nmi_count; + unsigned int *prev_nmi_count; int cpu; if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DISABLED)) @@ -316,7 +316,7 @@ EXPORT_SYMBOL(touch_nmi_watchdog); notrace __kprobes int nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) { - int sum; + unsigned int sum; int touched = 0; int cpu = smp_processor_id(); int rc = 0; -- cgit v0.10.2 From 622e3f28e50fe30cc199c735440cd7c75e0033b0 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sun, 4 May 2008 19:57:19 +0400 Subject: x86: 64-bit defconfig remake The current x86_64_defconfig contains a number of nonexistent symbols. Lets fix it. Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index 2d6f5b2..2f1b356 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -1,48 +1,65 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.22-git14 -# Fri Jul 20 09:53:15 2007 +# Linux kernel version: 2.6.26-rc1 +# Sun May 4 19:27:29 2008 # -CONFIG_X86_64=y CONFIG_64BIT=y +# CONFIG_X86_32 is not set +CONFIG_X86_64=y CONFIG_X86=y +CONFIG_DEFCONFIG_LIST="arch/x86/configs/x86_64_defconfig" +# CONFIG_GENERIC_LOCKBREAK is not set CONFIG_GENERIC_TIME=y -CONFIG_GENERIC_TIME_VSYSCALL=y CONFIG_GENERIC_CMOS_UPDATE=y -CONFIG_ZONE_DMA32=y +CONFIG_CLOCKSOURCE_WATCHDOG=y +CONFIG_GENERIC_CLOCKEVENTS=y +CONFIG_GENERIC_CLOCKEVENTS_BROADCAST=y CONFIG_LOCKDEP_SUPPORT=y CONFIG_STACKTRACE_SUPPORT=y -CONFIG_SEMAPHORE_SLEEPERS=y +CONFIG_HAVE_LATENCYTOP_SUPPORT=y +CONFIG_FAST_CMPXCHG_LOCAL=y CONFIG_MMU=y CONFIG_ZONE_DMA=y -CONFIG_QUICKLIST=y -CONFIG_NR_QUICK=2 -CONFIG_RWSEM_GENERIC_SPINLOCK=y -CONFIG_GENERIC_HWEIGHT=y -CONFIG_GENERIC_CALIBRATE_DELAY=y -CONFIG_X86_CMPXCHG=y -CONFIG_EARLY_PRINTK=y CONFIG_GENERIC_ISA_DMA=y CONFIG_GENERIC_IOMAP=y -CONFIG_ARCH_MAY_HAVE_PC_FDC=y -CONFIG_ARCH_POPULATES_NODE_MAP=y -CONFIG_DMI=y -CONFIG_AUDIT_ARCH=y CONFIG_GENERIC_BUG=y +CONFIG_GENERIC_HWEIGHT=y +# CONFIG_GENERIC_GPIO is not set +CONFIG_ARCH_MAY_HAVE_PC_FDC=y +CONFIG_RWSEM_GENERIC_SPINLOCK=y +# CONFIG_RWSEM_XCHGADD_ALGORITHM is not set # CONFIG_ARCH_HAS_ILOG2_U32 is not set # CONFIG_ARCH_HAS_ILOG2_U64 is not set -CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" +CONFIG_ARCH_HAS_CPU_IDLE_WAIT=y +CONFIG_GENERIC_CALIBRATE_DELAY=y +CONFIG_GENERIC_TIME_VSYSCALL=y +CONFIG_ARCH_HAS_CPU_RELAX=y +CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y +CONFIG_HAVE_SETUP_PER_CPU_AREA=y +CONFIG_HAVE_CPUMASK_OF_CPU_MAP=y +CONFIG_ARCH_HIBERNATION_POSSIBLE=y +CONFIG_ARCH_SUSPEND_POSSIBLE=y +CONFIG_ZONE_DMA32=y +CONFIG_ARCH_POPULATES_NODE_MAP=y +CONFIG_AUDIT_ARCH=y +CONFIG_ARCH_SUPPORTS_AOUT=y +CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING=y +CONFIG_GENERIC_HARDIRQS=y +CONFIG_GENERIC_IRQ_PROBE=y +CONFIG_GENERIC_PENDING_IRQ=y +CONFIG_X86_SMP=y +CONFIG_X86_64_SMP=y +CONFIG_X86_HT=y +CONFIG_X86_BIOS_REBOOT=y +CONFIG_X86_TRAMPOLINE=y +# CONFIG_KTIME_SCALAR is not set # -# Code maturity level options +# General setup # CONFIG_EXPERIMENTAL=y CONFIG_LOCK_KERNEL=y CONFIG_INIT_ENV_ARG_LIMIT=32 - -# -# General setup -# CONFIG_LOCALVERSION="" CONFIG_LOCALVERSION_AUTO=y CONFIG_SWAP=y @@ -51,14 +68,21 @@ CONFIG_SYSVIPC_SYSCTL=y CONFIG_POSIX_MQUEUE=y # CONFIG_BSD_PROCESS_ACCT is not set # CONFIG_TASKSTATS is not set -# CONFIG_USER_NS is not set # CONFIG_AUDIT is not set CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=18 -# CONFIG_CPUSETS is not set -CONFIG_SYSFS_DEPRECATED=y +# CONFIG_CGROUPS is not set +# CONFIG_GROUP_SCHED is not set +# CONFIG_USER_SCHED is not set +# CONFIG_CGROUP_SCHED is not set +# CONFIG_SYSFS_DEPRECATED_V2 is not set CONFIG_RELAY=y +CONFIG_NAMESPACES=y +CONFIG_UTS_NS=y +CONFIG_IPC_NS=y +# CONFIG_USER_NS is not set +CONFIG_PID_NS=y CONFIG_BLK_DEV_INITRD=y CONFIG_INITRAMFS_SOURCE="" CONFIG_CC_OPTIMIZE_FOR_SIZE=y @@ -66,6 +90,7 @@ CONFIG_SYSCTL=y # CONFIG_EMBEDDED is not set CONFIG_UID16=y CONFIG_SYSCTL_SYSCALL=y +CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_KALLSYMS=y CONFIG_KALLSYMS_ALL=y # CONFIG_KALLSYMS_EXTRA_PASS is not set @@ -73,6 +98,7 @@ CONFIG_HOTPLUG=y CONFIG_PRINTK=y CONFIG_BUG=y CONFIG_ELF_CORE=y +# CONFIG_COMPAT_BRK is not set CONFIG_BASE_FULL=y CONFIG_FUTEX=y CONFIG_ANON_INODES=y @@ -85,6 +111,17 @@ CONFIG_VM_EVENT_COUNTERS=y CONFIG_SLAB=y # CONFIG_SLUB is not set # CONFIG_SLOB is not set +CONFIG_PROFILING=y +# CONFIG_MARKERS is not set +CONFIG_OPROFILE=y +CONFIG_HAVE_OPROFILE=y +CONFIG_KPROBES=y +CONFIG_KRETPROBES=y +CONFIG_HAVE_KPROBES=y +CONFIG_HAVE_KRETPROBES=y +# CONFIG_HAVE_DMA_ATTRS is not set +CONFIG_PROC_PAGE_MONITOR=y +CONFIG_SLABINFO=y CONFIG_RT_MUTEXES=y # CONFIG_TINY_SHMEM is not set CONFIG_BASE_SMALL=0 @@ -98,6 +135,7 @@ CONFIG_STOP_MACHINE=y CONFIG_BLOCK=y # CONFIG_BLK_DEV_IO_TRACE is not set # CONFIG_BLK_DEV_BSG is not set +CONFIG_BLOCK_COMPAT=y # # IO Schedulers @@ -111,107 +149,169 @@ CONFIG_IOSCHED_CFQ=y CONFIG_DEFAULT_CFQ=y # CONFIG_DEFAULT_NOOP is not set CONFIG_DEFAULT_IOSCHED="cfq" +CONFIG_CLASSIC_RCU=y # # Processor type and features # +CONFIG_TICK_ONESHOT=y +# CONFIG_NO_HZ is not set +CONFIG_HIGH_RES_TIMERS=y +CONFIG_GENERIC_CLOCKEVENTS_BUILD=y +CONFIG_SMP=y CONFIG_X86_PC=y +# CONFIG_X86_ELAN is not set +# CONFIG_X86_VOYAGER is not set +# CONFIG_X86_NUMAQ is not set +# CONFIG_X86_SUMMIT is not set +# CONFIG_X86_BIGSMP is not set +# CONFIG_X86_VISWS is not set +# CONFIG_X86_GENERICARCH is not set +# CONFIG_X86_ES7000 is not set +# CONFIG_X86_RDC321X is not set # CONFIG_X86_VSMP is not set +# CONFIG_PARAVIRT_GUEST is not set +# CONFIG_MEMTEST_BOOTPARAM is not set +# CONFIG_M386 is not set +# CONFIG_M486 is not set +# CONFIG_M586 is not set +# CONFIG_M586TSC is not set +# CONFIG_M586MMX is not set +# CONFIG_M686 is not set +# CONFIG_MPENTIUMII is not set +# CONFIG_MPENTIUMIII is not set +# CONFIG_MPENTIUMM is not set +# CONFIG_MPENTIUM4 is not set +# CONFIG_MK6 is not set +# CONFIG_MK7 is not set # CONFIG_MK8 is not set +# CONFIG_MCRUSOE is not set +# CONFIG_MEFFICEON is not set +# CONFIG_MWINCHIPC6 is not set +# CONFIG_MWINCHIP2 is not set +# CONFIG_MWINCHIP3D is not set +# CONFIG_MGEODEGX1 is not set +# CONFIG_MGEODE_LX is not set +# CONFIG_MCYRIXIII is not set +# CONFIG_MVIAC3_2 is not set +# CONFIG_MVIAC7 is not set # CONFIG_MPSC is not set # CONFIG_MCORE2 is not set CONFIG_GENERIC_CPU=y +CONFIG_X86_CPU=y CONFIG_X86_L1_CACHE_BYTES=128 -CONFIG_X86_L1_CACHE_SHIFT=7 CONFIG_X86_INTERNODE_CACHE_BYTES=128 -CONFIG_X86_TSC=y +CONFIG_X86_CMPXCHG=y +CONFIG_X86_L1_CACHE_SHIFT=7 CONFIG_X86_GOOD_APIC=y -# CONFIG_MICROCODE is not set -CONFIG_X86_MSR=y -CONFIG_X86_CPUID=y -CONFIG_X86_HT=y -CONFIG_X86_IO_APIC=y -CONFIG_X86_LOCAL_APIC=y -CONFIG_MTRR=y -CONFIG_SMP=y +CONFIG_X86_TSC=y +CONFIG_X86_CMOV=y +CONFIG_X86_MINIMUM_CPU_FAMILY=64 +CONFIG_X86_DEBUGCTLMSR=y +CONFIG_HPET_TIMER=y +CONFIG_HPET_EMULATE_RTC=y +CONFIG_DMI=y +CONFIG_GART_IOMMU=y +# CONFIG_CALGARY_IOMMU is not set +CONFIG_SWIOTLB=y +CONFIG_IOMMU_HELPER=y +CONFIG_NR_CPUS=32 CONFIG_SCHED_SMT=y CONFIG_SCHED_MC=y # CONFIG_PREEMPT_NONE is not set CONFIG_PREEMPT_VOLUNTARY=y # CONFIG_PREEMPT is not set -CONFIG_PREEMPT_BKL=y +CONFIG_X86_LOCAL_APIC=y +CONFIG_X86_IO_APIC=y +CONFIG_X86_MCE=y +CONFIG_X86_MCE_INTEL=y +CONFIG_X86_MCE_AMD=y +# CONFIG_I8K is not set +# CONFIG_MICROCODE is not set +CONFIG_X86_MSR=y +CONFIG_X86_CPUID=y CONFIG_NUMA=y CONFIG_K8_NUMA=y -CONFIG_NODES_SHIFT=6 CONFIG_X86_64_ACPI_NUMA=y +CONFIG_NODES_SPAN_OTHER_NODES=y CONFIG_NUMA_EMU=y +CONFIG_NODES_SHIFT=6 +CONFIG_ARCH_SPARSEMEM_DEFAULT=y +CONFIG_ARCH_SPARSEMEM_ENABLE=y +CONFIG_ARCH_SELECT_MEMORY_MODEL=y +CONFIG_SELECT_MEMORY_MODEL=y +# CONFIG_FLATMEM_MANUAL is not set +# CONFIG_DISCONTIGMEM_MANUAL is not set +CONFIG_SPARSEMEM_MANUAL=y +CONFIG_SPARSEMEM=y CONFIG_NEED_MULTIPLE_NODES=y +CONFIG_HAVE_MEMORY_PRESENT=y # CONFIG_SPARSEMEM_STATIC is not set +CONFIG_SPARSEMEM_EXTREME=y +CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y +# CONFIG_SPARSEMEM_VMEMMAP is not set + +# +# Memory hotplug is currently incompatible with Software Suspend +# +CONFIG_PAGEFLAGS_EXTENDED=y CONFIG_SPLIT_PTLOCK_CPUS=4 CONFIG_MIGRATION=y CONFIG_RESOURCES_64BIT=y CONFIG_ZONE_DMA_FLAG=1 CONFIG_BOUNCE=y CONFIG_VIRT_TO_BUS=y -CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID=y -CONFIG_OUT_OF_LINE_PFN_TO_PAGE=y -CONFIG_NR_CPUS=32 -CONFIG_PHYSICAL_ALIGN=0x200000 -CONFIG_HOTPLUG_CPU=y -CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y -CONFIG_HPET_TIMER=y -CONFIG_HPET_EMULATE_RTC=y -CONFIG_GART_IOMMU=y -# CONFIG_CALGARY_IOMMU is not set -CONFIG_SWIOTLB=y -CONFIG_X86_MCE=y -CONFIG_X86_MCE_INTEL=y -CONFIG_X86_MCE_AMD=y -# CONFIG_KEXEC is not set -# CONFIG_CRASH_DUMP is not set -# CONFIG_RELOCATABLE is not set -CONFIG_PHYSICAL_START=0x200000 +CONFIG_MTRR=y +# CONFIG_X86_PAT is not set +# CONFIG_EFI is not set CONFIG_SECCOMP=y -# CONFIG_CC_STACKPROTECTOR is not set # CONFIG_HZ_100 is not set CONFIG_HZ_250=y # CONFIG_HZ_300 is not set # CONFIG_HZ_1000 is not set CONFIG_HZ=250 -CONFIG_K8_NB=y -CONFIG_GENERIC_HARDIRQS=y -CONFIG_GENERIC_IRQ_PROBE=y -CONFIG_ISA_DMA_API=y -CONFIG_GENERIC_PENDING_IRQ=y +CONFIG_SCHED_HRTICK=y +# CONFIG_KEXEC is not set +# CONFIG_CRASH_DUMP is not set +CONFIG_PHYSICAL_START=0x200000 +# CONFIG_RELOCATABLE is not set +CONFIG_PHYSICAL_ALIGN=0x200000 +CONFIG_HOTPLUG_CPU=y +# CONFIG_COMPAT_VDSO is not set +CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y +CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID=y # # Power management options # +CONFIG_ARCH_HIBERNATION_HEADER=y CONFIG_PM=y -# CONFIG_PM_LEGACY is not set # CONFIG_PM_DEBUG is not set +CONFIG_PM_SLEEP_SMP=y +CONFIG_PM_SLEEP=y +# CONFIG_SUSPEND is not set CONFIG_HIBERNATION=y CONFIG_PM_STD_PARTITION="" - -# -# ACPI (Advanced Configuration and Power Interface) Support -# CONFIG_ACPI=y CONFIG_ACPI_SLEEP=y -CONFIG_ACPI_SLEEP_PROC_FS=y -CONFIG_ACPI_SLEEP_PROC_SLEEP=y CONFIG_ACPI_PROCFS=y +CONFIG_ACPI_PROCFS_POWER=y +CONFIG_ACPI_SYSFS_POWER=y +CONFIG_ACPI_PROC_EVENT=y CONFIG_ACPI_AC=y CONFIG_ACPI_BATTERY=y CONFIG_ACPI_BUTTON=y CONFIG_ACPI_FAN=y -# CONFIG_ACPI_DOCK is not set +CONFIG_ACPI_DOCK=y +# CONFIG_ACPI_BAY is not set CONFIG_ACPI_PROCESSOR=y CONFIG_ACPI_HOTPLUG_CPU=y CONFIG_ACPI_THERMAL=y CONFIG_ACPI_NUMA=y +# CONFIG_ACPI_WMI is not set # CONFIG_ACPI_ASUS is not set # CONFIG_ACPI_TOSHIBA is not set +# CONFIG_ACPI_CUSTOM_DSDT is not set CONFIG_ACPI_BLACKLIST_YEAR=0 # CONFIG_ACPI_DEBUG is not set CONFIG_ACPI_EC=y @@ -230,7 +330,10 @@ CONFIG_CPU_FREQ_DEBUG=y CONFIG_CPU_FREQ_STAT=y # CONFIG_CPU_FREQ_STAT_DETAILS is not set CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y +# CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE is not set # CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set +# CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND is not set +# CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE is not set CONFIG_CPU_FREQ_GOV_PERFORMANCE=y # CONFIG_CPU_FREQ_GOV_POWERSAVE is not set CONFIG_CPU_FREQ_GOV_USERSPACE=y @@ -240,16 +343,19 @@ CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y # # CPUFreq processor drivers # +CONFIG_X86_ACPI_CPUFREQ=y CONFIG_X86_POWERNOW_K8=y CONFIG_X86_POWERNOW_K8_ACPI=y # CONFIG_X86_SPEEDSTEP_CENTRINO is not set -CONFIG_X86_ACPI_CPUFREQ=y +CONFIG_X86_P4_CLOCKMOD=y # # shared options # CONFIG_X86_ACPI_CPUFREQ_PROC_INTF=y -# CONFIG_X86_SPEEDSTEP_LIB is not set +CONFIG_X86_SPEEDSTEP_LIB=y +CONFIG_CPU_IDLE=y +CONFIG_CPU_IDLE_GOV_LADDER=y # # Bus options (PCI etc.) @@ -257,16 +363,18 @@ CONFIG_X86_ACPI_CPUFREQ_PROC_INTF=y CONFIG_PCI=y CONFIG_PCI_DIRECT=y CONFIG_PCI_MMCONFIG=y +CONFIG_PCI_DOMAINS=y +# CONFIG_DMAR is not set CONFIG_PCIEPORTBUS=y CONFIG_PCIEAER=y +# CONFIG_PCIEASPM is not set CONFIG_ARCH_SUPPORTS_MSI=y CONFIG_PCI_MSI=y +CONFIG_PCI_LEGACY=y # CONFIG_PCI_DEBUG is not set # CONFIG_HT_IRQ is not set - -# -# PCCARD (PCMCIA/CardBus) support -# +CONFIG_ISA_DMA_API=y +CONFIG_K8_NB=y # CONFIG_PCCARD is not set # CONFIG_HOTPLUG_PCI is not set @@ -274,10 +382,12 @@ CONFIG_PCI_MSI=y # Executable file formats / Emulations # CONFIG_BINFMT_ELF=y +CONFIG_COMPAT_BINFMT_ELF=y # CONFIG_BINFMT_MISC is not set CONFIG_IA32_EMULATION=y CONFIG_IA32_AOUT=y CONFIG_COMPAT=y +CONFIG_COMPAT_FOR_U64_ALIGNMENT=y CONFIG_SYSVIPC_COMPAT=y # @@ -313,6 +423,7 @@ CONFIG_INET_TUNNEL=y # CONFIG_INET_XFRM_MODE_TRANSPORT is not set # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set +# CONFIG_INET_LRO is not set CONFIG_INET_DIAG=y CONFIG_INET_TCP_DIAG=y # CONFIG_TCP_CONG_ADVANCED is not set @@ -334,8 +445,10 @@ CONFIG_IPV6=y # CONFIG_INET6_XFRM_MODE_BEET is not set # CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION is not set CONFIG_IPV6_SIT=y +CONFIG_IPV6_NDISC_NODETYPE=y # CONFIG_IPV6_TUNNEL is not set # CONFIG_IPV6_MULTIPLE_TABLES is not set +# CONFIG_IPV6_MROUTE is not set # CONFIG_NETWORK_SECMARK is not set # CONFIG_NETFILTER is not set # CONFIG_IP_DCCP is not set @@ -352,10 +465,6 @@ CONFIG_IPV6_SIT=y # CONFIG_LAPB is not set # CONFIG_ECONET is not set # CONFIG_WAN_ROUTER is not set - -# -# QoS and/or fair queueing -# # CONFIG_NET_SCHED is not set # @@ -364,6 +473,7 @@ CONFIG_IPV6_SIT=y # CONFIG_NET_PKTGEN is not set # CONFIG_NET_TCPPROBE is not set # CONFIG_HAMRADIO is not set +# CONFIG_CAN is not set # CONFIG_IRDA is not set # CONFIG_BT is not set # CONFIG_AF_RXRPC is not set @@ -385,6 +495,7 @@ CONFIG_IPV6_SIT=y # # Generic Driver Options # +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_STANDALONE=y CONFIG_PREVENT_FIRMWARE_BUILD=y CONFIG_FW_LOADER=y @@ -416,7 +527,7 @@ CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_COUNT=16 CONFIG_BLK_DEV_RAM_SIZE=4096 -CONFIG_BLK_DEV_RAM_BLOCKSIZE=1024 +# CONFIG_BLK_DEV_XIP is not set # CONFIG_CDROM_PKTCDVD is not set # CONFIG_ATA_OVER_ETH is not set CONFIG_MISC_DEVICES=y @@ -427,17 +538,20 @@ CONFIG_MISC_DEVICES=y # CONFIG_TIFM_CORE is not set # CONFIG_SONY_LAPTOP is not set # CONFIG_THINKPAD_ACPI is not set +# CONFIG_INTEL_MENLOW is not set +# CONFIG_ENCLOSURE_SERVICES is not set +CONFIG_HAVE_IDE=y CONFIG_IDE=y CONFIG_BLK_DEV_IDE=y # -# Please see Documentation/ide.txt for help/info on IDE drives +# Please see Documentation/ide/ide.txt for help/info on IDE drives # # CONFIG_BLK_DEV_IDE_SATA is not set -# CONFIG_BLK_DEV_HD_IDE is not set CONFIG_BLK_DEV_IDEDISK=y CONFIG_IDEDISK_MULTI_MODE=y CONFIG_BLK_DEV_IDECD=y +CONFIG_BLK_DEV_IDECD_VERBOSE_ERRORS=y # CONFIG_BLK_DEV_IDETAPE is not set # CONFIG_BLK_DEV_IDEFLOPPY is not set # CONFIG_BLK_DEV_IDESCSI is not set @@ -449,18 +563,21 @@ CONFIG_IDE_PROC_FS=y # IDE chipset support/bugfixes # CONFIG_IDE_GENERIC=y +# CONFIG_BLK_DEV_PLATFORM is not set # CONFIG_BLK_DEV_CMD640 is not set # CONFIG_BLK_DEV_IDEPNP is not set +CONFIG_BLK_DEV_IDEDMA_SFF=y + +# +# PCI IDE chipsets support +# CONFIG_BLK_DEV_IDEPCI=y -# CONFIG_IDEPCI_SHARE_IRQ is not set CONFIG_IDEPCI_PCIBUS_ORDER=y # CONFIG_BLK_DEV_OFFBOARD is not set # CONFIG_BLK_DEV_GENERIC is not set # CONFIG_BLK_DEV_OPTI621 is not set # CONFIG_BLK_DEV_RZ1000 is not set CONFIG_BLK_DEV_IDEDMA_PCI=y -# CONFIG_BLK_DEV_IDEDMA_FORCED is not set -# CONFIG_IDEDMA_ONLYDISK is not set # CONFIG_BLK_DEV_AEC62XX is not set # CONFIG_BLK_DEV_ALI15X3 is not set CONFIG_BLK_DEV_AMD74XX=y @@ -487,9 +604,8 @@ CONFIG_BLK_DEV_PDC202XX_NEW=y # CONFIG_BLK_DEV_TRM290 is not set # CONFIG_BLK_DEV_VIA82CXXX is not set # CONFIG_BLK_DEV_TC86C001 is not set -# CONFIG_IDE_ARM is not set CONFIG_BLK_DEV_IDEDMA=y -# CONFIG_IDEDMA_IVB is not set +# CONFIG_BLK_DEV_HD_ONLY is not set # CONFIG_BLK_DEV_HD is not set # @@ -528,105 +644,16 @@ CONFIG_SCSI_WAIT_SCAN=m CONFIG_SCSI_SPI_ATTRS=y CONFIG_SCSI_FC_ATTRS=y # CONFIG_SCSI_ISCSI_ATTRS is not set -CONFIG_SCSI_SAS_ATTRS=y # CONFIG_SCSI_SAS_LIBSAS is not set - -# -# SCSI low-level drivers -# -# CONFIG_ISCSI_TCP is not set -# CONFIG_BLK_DEV_3W_XXXX_RAID is not set -# CONFIG_SCSI_3W_9XXX is not set -# CONFIG_SCSI_ACARD is not set -# CONFIG_SCSI_AACRAID is not set -# CONFIG_SCSI_AIC7XXX is not set -# CONFIG_SCSI_AIC7XXX_OLD is not set -CONFIG_SCSI_AIC79XX=y -CONFIG_AIC79XX_CMDS_PER_DEVICE=32 -CONFIG_AIC79XX_RESET_DELAY_MS=4000 -# CONFIG_AIC79XX_DEBUG_ENABLE is not set -CONFIG_AIC79XX_DEBUG_MASK=0 -# CONFIG_AIC79XX_REG_PRETTY_PRINT is not set -# CONFIG_SCSI_AIC94XX is not set -# CONFIG_SCSI_ARCMSR is not set -# CONFIG_MEGARAID_NEWGEN is not set -# CONFIG_MEGARAID_LEGACY is not set -# CONFIG_MEGARAID_SAS is not set -# CONFIG_SCSI_HPTIOP is not set -# CONFIG_SCSI_BUSLOGIC is not set -# CONFIG_SCSI_DMX3191D is not set -# CONFIG_SCSI_EATA is not set -# CONFIG_SCSI_FUTURE_DOMAIN is not set -# CONFIG_SCSI_GDTH is not set -# CONFIG_SCSI_IPS is not set -# CONFIG_SCSI_INITIO is not set -# CONFIG_SCSI_INIA100 is not set -# CONFIG_SCSI_STEX is not set -# CONFIG_SCSI_SYM53C8XX_2 is not set -# CONFIG_SCSI_IPR is not set -# CONFIG_SCSI_QLOGIC_1280 is not set -# CONFIG_SCSI_QLA_FC is not set -# CONFIG_SCSI_QLA_ISCSI is not set -# CONFIG_SCSI_LPFC is not set -# CONFIG_SCSI_DC395x is not set -# CONFIG_SCSI_DC390T is not set -# CONFIG_SCSI_DEBUG is not set -# CONFIG_SCSI_SRP is not set +# CONFIG_SCSI_SRP_ATTRS is not set +# CONFIG_SCSI_LOWLEVEL is not set CONFIG_ATA=y # CONFIG_ATA_NONSTANDARD is not set CONFIG_ATA_ACPI=y +# CONFIG_SATA_PMP is not set CONFIG_SATA_AHCI=y -CONFIG_SATA_SVW=y -CONFIG_ATA_PIIX=y -# CONFIG_SATA_MV is not set -CONFIG_SATA_NV=y -# CONFIG_PDC_ADMA is not set -# CONFIG_SATA_QSTOR is not set -# CONFIG_SATA_PROMISE is not set -# CONFIG_SATA_SX4 is not set -CONFIG_SATA_SIL=y # CONFIG_SATA_SIL24 is not set -# CONFIG_SATA_SIS is not set -# CONFIG_SATA_ULI is not set -CONFIG_SATA_VIA=y -# CONFIG_SATA_VITESSE is not set -# CONFIG_SATA_INIC162X is not set -# CONFIG_PATA_ALI is not set -# CONFIG_PATA_AMD is not set -# CONFIG_PATA_ARTOP is not set -# CONFIG_PATA_ATIIXP is not set -# CONFIG_PATA_CMD640_PCI is not set -# CONFIG_PATA_CMD64X is not set -# CONFIG_PATA_CS5520 is not set -# CONFIG_PATA_CS5530 is not set -# CONFIG_PATA_CYPRESS is not set -# CONFIG_PATA_EFAR is not set -# CONFIG_ATA_GENERIC is not set -# CONFIG_PATA_HPT366 is not set -# CONFIG_PATA_HPT37X is not set -# CONFIG_PATA_HPT3X2N is not set -# CONFIG_PATA_HPT3X3 is not set -# CONFIG_PATA_IT821X is not set -# CONFIG_PATA_IT8213 is not set -# CONFIG_PATA_JMICRON is not set -# CONFIG_PATA_TRIFLEX is not set -# CONFIG_PATA_MARVELL is not set -# CONFIG_PATA_MPIIX is not set -# CONFIG_PATA_OLDPIIX is not set -# CONFIG_PATA_NETCELL is not set -# CONFIG_PATA_NS87410 is not set -# CONFIG_PATA_OPTI is not set -# CONFIG_PATA_OPTIDMA is not set -# CONFIG_PATA_PDC_OLD is not set -# CONFIG_PATA_RADISYS is not set -# CONFIG_PATA_RZ1000 is not set -# CONFIG_PATA_SC1200 is not set -# CONFIG_PATA_SERVERWORKS is not set -# CONFIG_PATA_PDC2027X is not set -# CONFIG_PATA_SIL680 is not set -# CONFIG_PATA_SIS is not set -# CONFIG_PATA_VIA is not set -# CONFIG_PATA_WINBOND is not set +# CONFIG_ATA_SFF is not set CONFIG_MD=y # CONFIG_BLK_DEV_MD is not set CONFIG_BLK_DEV_DM=y @@ -637,16 +664,14 @@ CONFIG_BLK_DEV_DM=y # CONFIG_DM_ZERO is not set # CONFIG_DM_MULTIPATH is not set # CONFIG_DM_DELAY is not set - -# -# Fusion MPT device support -# +# CONFIG_DM_UEVENT is not set CONFIG_FUSION=y CONFIG_FUSION_SPI=y # CONFIG_FUSION_FC is not set # CONFIG_FUSION_SAS is not set CONFIG_FUSION_MAX_SGE=128 # CONFIG_FUSION_CTL is not set +# CONFIG_FUSION_LOGGING is not set # # IEEE 1394 (FireWire) support @@ -687,6 +712,7 @@ CONFIG_NETDEVICES_MULTIQUEUE=y # CONFIG_MACVLAN is not set # CONFIG_EQUALIZER is not set CONFIG_TUN=y +# CONFIG_VETH is not set # CONFIG_NET_SB1000 is not set # CONFIG_ARCNET is not set # CONFIG_PHYLIB is not set @@ -709,15 +735,21 @@ CONFIG_TULIP=y # CONFIG_DM9102 is not set # CONFIG_ULI526X is not set # CONFIG_HP100 is not set +# CONFIG_IBM_NEW_EMAC_ZMII is not set +# CONFIG_IBM_NEW_EMAC_RGMII is not set +# CONFIG_IBM_NEW_EMAC_TAH is not set +# CONFIG_IBM_NEW_EMAC_EMAC4 is not set CONFIG_NET_PCI=y # CONFIG_PCNET32 is not set CONFIG_AMD8111_ETH=y # CONFIG_AMD8111E_NAPI is not set # CONFIG_ADAPTEC_STARFIRE is not set CONFIG_B44=y +CONFIG_B44_PCI_AUTOSELECT=y +CONFIG_B44_PCICORE_AUTOSELECT=y +CONFIG_B44_PCI=y CONFIG_FORCEDETH=y # CONFIG_FORCEDETH_NAPI is not set -# CONFIG_DGRS is not set # CONFIG_EEPRO100 is not set CONFIG_E100=y # CONFIG_FEALNX is not set @@ -729,6 +761,7 @@ CONFIG_8139TOO=y # CONFIG_8139TOO_TUNE_TWISTER is not set # CONFIG_8139TOO_8129 is not set # CONFIG_8139_OLD_RX_RESET is not set +# CONFIG_R6040 is not set # CONFIG_SIS900 is not set # CONFIG_EPIC100 is not set # CONFIG_SUNDANCE is not set @@ -740,6 +773,10 @@ CONFIG_NETDEV_1000=y CONFIG_E1000=y # CONFIG_E1000_NAPI is not set # CONFIG_E1000_DISABLE_PACKET_SPLIT is not set +# CONFIG_E1000E is not set +# CONFIG_E1000E_ENABLED is not set +# CONFIG_IP1000 is not set +# CONFIG_IGB is not set # CONFIG_NS83820 is not set # CONFIG_HAMACHI is not set # CONFIG_YELLOWFIN is not set @@ -755,12 +792,17 @@ CONFIG_BNX2=y CONFIG_NETDEV_10000=y # CONFIG_CHELSIO_T1 is not set # CONFIG_CHELSIO_T3 is not set +# CONFIG_IXGBE is not set # CONFIG_IXGB is not set CONFIG_S2IO=m # CONFIG_S2IO_NAPI is not set # CONFIG_MYRI10GE is not set # CONFIG_NETXEN_NIC is not set +# CONFIG_NIU is not set # CONFIG_MLX4_CORE is not set +# CONFIG_TEHUTI is not set +# CONFIG_BNX2X is not set +# CONFIG_SFC is not set # CONFIG_TR is not set # @@ -768,6 +810,8 @@ CONFIG_S2IO=m # # CONFIG_WLAN_PRE80211 is not set # CONFIG_WLAN_80211 is not set +# CONFIG_IWLWIFI is not set +# CONFIG_IWLWIFI_LEDS is not set # # USB Network Adapters @@ -776,7 +820,6 @@ CONFIG_S2IO=m # CONFIG_USB_KAWETH is not set # CONFIG_USB_PEGASUS is not set # CONFIG_USB_RTL8150 is not set -# CONFIG_USB_USBNET_MII is not set # CONFIG_USB_USBNET is not set # CONFIG_WAN is not set # CONFIG_FDDI is not set @@ -784,8 +827,8 @@ CONFIG_S2IO=m # CONFIG_PPP is not set # CONFIG_SLIP is not set # CONFIG_NET_FC is not set -# CONFIG_SHAPER is not set CONFIG_NETCONSOLE=y +# CONFIG_NETCONSOLE_DYNAMIC is not set CONFIG_NETPOLL=y # CONFIG_NETPOLL_TRAP is not set CONFIG_NET_POLL_CONTROLLER=y @@ -807,7 +850,6 @@ CONFIG_INPUT_MOUSEDEV_PSAUX=y CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 # CONFIG_INPUT_JOYDEV is not set -# CONFIG_INPUT_TSDEV is not set CONFIG_INPUT_EVDEV=y # CONFIG_INPUT_EVBUG is not set @@ -856,7 +898,9 @@ CONFIG_VT=y CONFIG_VT_CONSOLE=y CONFIG_HW_CONSOLE=y # CONFIG_VT_HW_CONSOLE_BINDING is not set +# CONFIG_DEVKMEM is not set # CONFIG_SERIAL_NONSTANDARD is not set +# CONFIG_NOZOMI is not set # # Serial drivers @@ -880,7 +924,6 @@ CONFIG_UNIX98_PTYS=y CONFIG_LEGACY_PTYS=y CONFIG_LEGACY_PTY_COUNT=256 # CONFIG_IPMI_HANDLER is not set -# CONFIG_WATCHDOG is not set CONFIG_HW_RANDOM=y CONFIG_HW_RANDOM_INTEL=y CONFIG_HW_RANDOM_AMD=y @@ -888,12 +931,6 @@ CONFIG_HW_RANDOM_AMD=y CONFIG_RTC=y # CONFIG_R3964 is not set # CONFIG_APPLICOM is not set -CONFIG_AGP=y -CONFIG_AGP_AMD64=y -CONFIG_AGP_INTEL=y -# CONFIG_AGP_SIS is not set -# CONFIG_AGP_VIA is not set -# CONFIG_DRM is not set # CONFIG_MWAVE is not set # CONFIG_PC8736x_GPIO is not set CONFIG_RAW_DRIVER=y @@ -906,40 +943,69 @@ CONFIG_HPET_MMAP=y # CONFIG_TELCLOCK is not set CONFIG_DEVPORT=y # CONFIG_I2C is not set - -# -# SPI support -# # CONFIG_SPI is not set -# CONFIG_SPI_MASTER is not set # CONFIG_W1 is not set -# CONFIG_POWER_SUPPLY is not set +CONFIG_POWER_SUPPLY=y +# CONFIG_POWER_SUPPLY_DEBUG is not set +# CONFIG_PDA_POWER is not set +# CONFIG_BATTERY_DS2760 is not set # CONFIG_HWMON is not set +CONFIG_THERMAL=y +# CONFIG_WATCHDOG is not set + +# +# Sonics Silicon Backplane +# +CONFIG_SSB_POSSIBLE=y +CONFIG_SSB=y +CONFIG_SSB_SPROM=y +CONFIG_SSB_PCIHOST_POSSIBLE=y +CONFIG_SSB_PCIHOST=y +# CONFIG_SSB_B43_PCI_BRIDGE is not set +# CONFIG_SSB_DEBUG is not set +CONFIG_SSB_DRIVER_PCICORE_POSSIBLE=y +CONFIG_SSB_DRIVER_PCICORE=y # # Multifunction device drivers # # CONFIG_MFD_SM501 is not set +# CONFIG_HTC_PASIC3 is not set # # Multimedia devices # + +# +# Multimedia core support +# # CONFIG_VIDEO_DEV is not set # CONFIG_DVB_CORE is not set + +# +# Multimedia drivers +# CONFIG_DAB=y # CONFIG_USB_DABUSB is not set # # Graphics support # +CONFIG_AGP=y +CONFIG_AGP_AMD64=y +CONFIG_AGP_INTEL=y +# CONFIG_AGP_SIS is not set +# CONFIG_AGP_VIA is not set +# CONFIG_DRM is not set +# CONFIG_VGASTATE is not set +# CONFIG_VIDEO_OUTPUT_CONTROL is not set +# CONFIG_FB is not set # CONFIG_BACKLIGHT_LCD_SUPPORT is not set # # Display device support # # CONFIG_DISPLAY_SUPPORT is not set -# CONFIG_VGASTATE is not set -# CONFIG_FB is not set # # Console display driver support @@ -971,6 +1037,7 @@ CONFIG_SOUND_PRIME=y CONFIG_HID_SUPPORT=y CONFIG_HID=y # CONFIG_HID_DEBUG is not set +CONFIG_HIDRAW=y # # USB Input Devices @@ -985,6 +1052,7 @@ CONFIG_USB_ARCH_HAS_OHCI=y CONFIG_USB_ARCH_HAS_EHCI=y CONFIG_USB=y # CONFIG_USB_DEBUG is not set +# CONFIG_USB_ANNOUNCE_NEW_DEVICES is not set # # Miscellaneous USB options @@ -993,18 +1061,19 @@ CONFIG_USB_DEVICEFS=y # CONFIG_USB_DEVICE_CLASS is not set # CONFIG_USB_DYNAMIC_MINORS is not set # CONFIG_USB_SUSPEND is not set -# CONFIG_USB_PERSIST is not set # CONFIG_USB_OTG is not set # # USB Host Controller Drivers # +# CONFIG_USB_C67X00_HCD is not set CONFIG_USB_EHCI_HCD=y -# CONFIG_USB_EHCI_SPLIT_ISO is not set # CONFIG_USB_EHCI_ROOT_HUB_TT is not set # CONFIG_USB_EHCI_TT_NEWSCHED is not set # CONFIG_USB_ISP116X_HCD is not set +# CONFIG_USB_ISP1760_HCD is not set CONFIG_USB_OHCI_HCD=y +# CONFIG_USB_OHCI_HCD_SSB is not set # CONFIG_USB_OHCI_BIG_ENDIAN_DESC is not set # CONFIG_USB_OHCI_BIG_ENDIAN_MMIO is not set CONFIG_USB_OHCI_LITTLE_ENDIAN=y @@ -1036,7 +1105,9 @@ CONFIG_USB_STORAGE=y # CONFIG_USB_STORAGE_SDDR55 is not set # CONFIG_USB_STORAGE_JUMPSHOT is not set # CONFIG_USB_STORAGE_ALAUDA is not set +# CONFIG_USB_STORAGE_ONETOUCH is not set # CONFIG_USB_STORAGE_KARMA is not set +# CONFIG_USB_STORAGE_CYPRESS_ATACB is not set # CONFIG_USB_LIBUSUAL is not set # @@ -1049,10 +1120,6 @@ CONFIG_USB_MON=y # # USB port drivers # - -# -# USB Serial Converter support -# # CONFIG_USB_SERIAL is not set # @@ -1078,55 +1145,20 @@ CONFIG_USB_MON=y # CONFIG_USB_TRANCEVIBRATOR is not set # CONFIG_USB_IOWARRIOR is not set # CONFIG_USB_TEST is not set - -# -# USB DSL modem support -# - -# -# USB Gadget Support -# # CONFIG_USB_GADGET is not set # CONFIG_MMC is not set - -# -# LED devices -# +# CONFIG_MEMSTICK is not set # CONFIG_NEW_LEDS is not set - -# -# LED drivers -# - -# -# LED Triggers -# +# CONFIG_ACCESSIBILITY is not set # CONFIG_INFINIBAND is not set # CONFIG_EDAC is not set - -# -# Real Time Clock -# # CONFIG_RTC_CLASS is not set - -# -# DMA Engine support -# -# CONFIG_DMA_ENGINE is not set - -# -# DMA Clients -# +CONFIG_DMADEVICES=y # # DMA Devices # -CONFIG_VIRTUALIZATION=y -# CONFIG_KVM is not set - -# -# Userspace I/O -# +# CONFIG_INTEL_IOATDMA is not set # CONFIG_UIO is not set # @@ -1136,6 +1168,7 @@ CONFIG_VIRTUALIZATION=y # CONFIG_DELL_RBU is not set # CONFIG_DCDBAS is not set CONFIG_DMIID=y +# CONFIG_ISCSI_IBFT_FIND is not set # # File systems @@ -1164,12 +1197,10 @@ CONFIG_FS_POSIX_ACL=y # CONFIG_XFS_FS is not set # CONFIG_GFS2_FS is not set # CONFIG_OCFS2_FS is not set -# CONFIG_MINIX_FS is not set -# CONFIG_ROMFS_FS is not set +CONFIG_DNOTIFY=y CONFIG_INOTIFY=y CONFIG_INOTIFY_USER=y # CONFIG_QUOTA is not set -CONFIG_DNOTIFY=y # CONFIG_AUTOFS_FS is not set CONFIG_AUTOFS4_FS=y # CONFIG_FUSE_FS is not set @@ -1204,7 +1235,6 @@ CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y CONFIG_HUGETLBFS=y CONFIG_HUGETLB_PAGE=y -CONFIG_RAMFS=y # CONFIG_CONFIGFS_FS is not set # @@ -1219,48 +1249,19 @@ CONFIG_RAMFS=y # CONFIG_EFS_FS is not set # CONFIG_CRAMFS is not set # CONFIG_VXFS_FS is not set +# CONFIG_MINIX_FS is not set # CONFIG_HPFS_FS is not set # CONFIG_QNX4FS_FS is not set +# CONFIG_ROMFS_FS is not set # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set - -# -# Network File Systems -# -CONFIG_NFS_FS=y -CONFIG_NFS_V3=y -# CONFIG_NFS_V3_ACL is not set -# CONFIG_NFS_V4 is not set -# CONFIG_NFS_DIRECTIO is not set -CONFIG_NFSD=y -CONFIG_NFSD_V3=y -# CONFIG_NFSD_V3_ACL is not set -# CONFIG_NFSD_V4 is not set -CONFIG_NFSD_TCP=y -CONFIG_ROOT_NFS=y -CONFIG_LOCKD=y -CONFIG_LOCKD_V4=y -CONFIG_EXPORTFS=y -CONFIG_NFS_COMMON=y -CONFIG_SUNRPC=y -# CONFIG_SUNRPC_BIND34 is not set -# CONFIG_RPCSEC_GSS_KRB5 is not set -# CONFIG_RPCSEC_GSS_SPKM3 is not set -# CONFIG_SMB_FS is not set -# CONFIG_CIFS is not set -# CONFIG_NCP_FS is not set -# CONFIG_CODA_FS is not set -# CONFIG_AFS_FS is not set +# CONFIG_NETWORK_FILESYSTEMS is not set # # Partition Types # # CONFIG_PARTITION_ADVANCED is not set CONFIG_MSDOS_PARTITION=y - -# -# Native Language Support -# CONFIG_NLS=y CONFIG_NLS_DEFAULT="iso8859-1" CONFIG_NLS_CODEPAGE_437=y @@ -1301,25 +1302,16 @@ CONFIG_NLS_ISO8859_15=y # CONFIG_NLS_KOI8_R is not set # CONFIG_NLS_KOI8_U is not set CONFIG_NLS_UTF8=y - -# -# Distributed Lock Manager -# # CONFIG_DLM is not set # -# Instrumentation Support -# -CONFIG_PROFILING=y -CONFIG_OPROFILE=y -CONFIG_KPROBES=y - -# # Kernel hacking # CONFIG_TRACE_IRQFLAGS_SUPPORT=y # CONFIG_PRINTK_TIME is not set +CONFIG_ENABLE_WARN_DEPRECATED=y # CONFIG_ENABLE_MUST_CHECK is not set +CONFIG_FRAME_WARN=2048 CONFIG_MAGIC_SYSRQ=y CONFIG_UNUSED_SYMBOLS=y CONFIG_DEBUG_FS=y @@ -1330,6 +1322,7 @@ CONFIG_DETECT_SOFTLOCKUP=y # CONFIG_SCHED_DEBUG is not set # CONFIG_SCHEDSTATS is not set CONFIG_TIMER_STATS=y +# CONFIG_DEBUG_OBJECTS is not set # CONFIG_DEBUG_SLAB is not set # CONFIG_DEBUG_RT_MUTEXES is not set # CONFIG_RT_MUTEX_TESTER is not set @@ -1344,28 +1337,64 @@ CONFIG_TIMER_STATS=y CONFIG_DEBUG_BUGVERBOSE=y # CONFIG_DEBUG_INFO is not set # CONFIG_DEBUG_VM is not set +# CONFIG_DEBUG_WRITECOUNT is not set # CONFIG_DEBUG_LIST is not set +# CONFIG_DEBUG_SG is not set # CONFIG_FRAME_POINTER is not set -CONFIG_OPTIMIZE_INLINING=y +# CONFIG_BOOT_PRINTK_DELAY is not set # CONFIG_RCU_TORTURE_TEST is not set +# CONFIG_KPROBES_SANITY_TEST is not set +# CONFIG_BACKTRACE_SELF_TEST is not set # CONFIG_LKDTM is not set # CONFIG_FAULT_INJECTION is not set -# CONFIG_DEBUG_RODATA is not set -# CONFIG_IOMMU_DEBUG is not set +# CONFIG_LATENCYTOP is not set +# CONFIG_PROVIDE_OHCI1394_DMA_INIT is not set +# CONFIG_SAMPLES is not set +# CONFIG_KGDB is not set +CONFIG_HAVE_ARCH_KGDB=y +# CONFIG_NONPROMISC_DEVMEM is not set +CONFIG_EARLY_PRINTK=y CONFIG_DEBUG_STACKOVERFLOW=y # CONFIG_DEBUG_STACK_USAGE is not set +# CONFIG_DEBUG_PAGEALLOC is not set +# CONFIG_DEBUG_PER_CPU_MAPS is not set +# CONFIG_X86_PTDUMP is not set +# CONFIG_DEBUG_RODATA is not set +# CONFIG_DIRECT_GBPAGES is not set +# CONFIG_DEBUG_NX_TEST is not set +CONFIG_X86_MPPARSE=y +# CONFIG_IOMMU_DEBUG is not set +CONFIG_IO_DELAY_TYPE_0X80=0 +CONFIG_IO_DELAY_TYPE_0XED=1 +CONFIG_IO_DELAY_TYPE_UDELAY=2 +CONFIG_IO_DELAY_TYPE_NONE=3 +CONFIG_IO_DELAY_0X80=y +# CONFIG_IO_DELAY_0XED is not set +# CONFIG_IO_DELAY_UDELAY is not set +# CONFIG_IO_DELAY_NONE is not set +CONFIG_DEFAULT_IO_DELAY_TYPE=0 +# CONFIG_DEBUG_BOOT_PARAMS is not set +# CONFIG_CPA_DEBUG is not set # # Security options # # CONFIG_KEYS is not set # CONFIG_SECURITY is not set +# CONFIG_SECURITY_FILE_CAPABILITIES is not set # CONFIG_CRYPTO is not set +# CONFIG_HAVE_KVM is not set +# CONFIG_VIRTUALIZATION is not set +# CONFIG_KVM is not set +# CONFIG_VIRTIO_PCI is not set +# CONFIG_VIRTIO_BALLOON is not set # # Library routines # CONFIG_BITREVERSE=y +CONFIG_GENERIC_FIND_FIRST_BIT=y +CONFIG_GENERIC_FIND_NEXT_BIT=y # CONFIG_CRC_CCITT is not set # CONFIG_CRC16 is not set # CONFIG_CRC_ITU_T is not set -- cgit v0.10.2 From b5d958ea66ac11b1190c24f042f517adf9229a98 Mon Sep 17 00:00:00 2001 From: Auke Kok Date: Wed, 9 Apr 2008 13:17:39 -0700 Subject: e1000e: set CONFIG_E1000E=y in x86 defconfigs This adds to the already default CONFIG_E1000=y in these files. Signed-off-by: Auke Kok Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index ad7ddaa..73f2c3c 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig @@ -814,6 +814,10 @@ CONFIG_NETDEV_1000=y CONFIG_E1000=y # CONFIG_E1000_NAPI is not set # CONFIG_E1000_DISABLE_PACKET_SPLIT is not set +CONFIG_E1000E=y +CONFIG_E1000E_ENABLED=y +# CONFIG_IP1000 is not set +# CONFIG_IGB is not set # CONFIG_NS83820 is not set # CONFIG_HAMACHI is not set # CONFIG_YELLOWFIN is not set -- cgit v0.10.2 From 5cb04df8d3f03e37a19f2502591a84156be71772 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 4 May 2008 19:49:04 +0200 Subject: x86: defconfig updates refresh 32-bit defconfig too, and update the 64-bit configs as well, the defconfig should be much more useful by default, so most of the updates are the enabling of various options. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index 73f2c3c..9bc34e2 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig @@ -1,54 +1,103 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.22-git14 -# Fri Jul 20 09:53:15 2007 +# Linux kernel version: 2.6.26-rc1 +# Sun May 4 19:59:02 2008 # +# CONFIG_64BIT is not set CONFIG_X86_32=y +# CONFIG_X86_64 is not set +CONFIG_X86=y +CONFIG_DEFCONFIG_LIST="arch/x86/configs/i386_defconfig" +# CONFIG_GENERIC_LOCKBREAK is not set CONFIG_GENERIC_TIME=y +CONFIG_GENERIC_CMOS_UPDATE=y CONFIG_CLOCKSOURCE_WATCHDOG=y CONFIG_GENERIC_CLOCKEVENTS=y CONFIG_GENERIC_CLOCKEVENTS_BROADCAST=y CONFIG_LOCKDEP_SUPPORT=y CONFIG_STACKTRACE_SUPPORT=y -CONFIG_SEMAPHORE_SLEEPERS=y -CONFIG_X86=y +CONFIG_HAVE_LATENCYTOP_SUPPORT=y +CONFIG_FAST_CMPXCHG_LOCAL=y CONFIG_MMU=y CONFIG_ZONE_DMA=y -CONFIG_QUICKLIST=y CONFIG_GENERIC_ISA_DMA=y CONFIG_GENERIC_IOMAP=y CONFIG_GENERIC_BUG=y CONFIG_GENERIC_HWEIGHT=y +# CONFIG_GENERIC_GPIO is not set CONFIG_ARCH_MAY_HAVE_PC_FDC=y -CONFIG_DMI=y -CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" +# CONFIG_RWSEM_GENERIC_SPINLOCK is not set +CONFIG_RWSEM_XCHGADD_ALGORITHM=y +# CONFIG_ARCH_HAS_ILOG2_U32 is not set +# CONFIG_ARCH_HAS_ILOG2_U64 is not set +CONFIG_ARCH_HAS_CPU_IDLE_WAIT=y +CONFIG_GENERIC_CALIBRATE_DELAY=y +# CONFIG_GENERIC_TIME_VSYSCALL is not set +CONFIG_ARCH_HAS_CPU_RELAX=y +CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y +CONFIG_HAVE_SETUP_PER_CPU_AREA=y +# CONFIG_HAVE_CPUMASK_OF_CPU_MAP is not set +CONFIG_ARCH_HIBERNATION_POSSIBLE=y +CONFIG_ARCH_SUSPEND_POSSIBLE=y +# CONFIG_ZONE_DMA32 is not set +CONFIG_ARCH_POPULATES_NODE_MAP=y +# CONFIG_AUDIT_ARCH is not set +CONFIG_ARCH_SUPPORTS_AOUT=y +CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING=y +CONFIG_GENERIC_HARDIRQS=y +CONFIG_GENERIC_IRQ_PROBE=y +CONFIG_GENERIC_PENDING_IRQ=y +CONFIG_X86_SMP=y +CONFIG_X86_32_SMP=y +CONFIG_X86_HT=y +CONFIG_X86_BIOS_REBOOT=y +CONFIG_X86_TRAMPOLINE=y +CONFIG_KTIME_SCALAR=y # -# Code maturity level options +# General setup # CONFIG_EXPERIMENTAL=y CONFIG_LOCK_KERNEL=y CONFIG_INIT_ENV_ARG_LIMIT=32 - -# -# General setup -# CONFIG_LOCALVERSION="" -CONFIG_LOCALVERSION_AUTO=y +# CONFIG_LOCALVERSION_AUTO is not set CONFIG_SWAP=y CONFIG_SYSVIPC=y CONFIG_SYSVIPC_SYSCTL=y CONFIG_POSIX_MQUEUE=y -# CONFIG_BSD_PROCESS_ACCT is not set -# CONFIG_TASKSTATS is not set -# CONFIG_USER_NS is not set -# CONFIG_AUDIT is not set -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_LOG_BUF_SHIFT=18 -# CONFIG_CPUSETS is not set -CONFIG_SYSFS_DEPRECATED=y +CONFIG_BSD_PROCESS_ACCT=y +# CONFIG_BSD_PROCESS_ACCT_V3 is not set +CONFIG_TASKSTATS=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_XACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_AUDIT=y +CONFIG_AUDITSYSCALL=y +CONFIG_AUDIT_TREE=y +# CONFIG_IKCONFIG is not set +CONFIG_LOG_BUF_SHIFT=17 +CONFIG_CGROUPS=y +# CONFIG_CGROUP_DEBUG is not set +CONFIG_CGROUP_NS=y +# CONFIG_CGROUP_DEVICE is not set +CONFIG_CPUSETS=y +CONFIG_GROUP_SCHED=y +CONFIG_FAIR_GROUP_SCHED=y +# CONFIG_RT_GROUP_SCHED is not set +# CONFIG_USER_SCHED is not set +CONFIG_CGROUP_SCHED=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_RESOURCE_COUNTERS=y +# CONFIG_CGROUP_MEM_RES_CTLR is not set +# CONFIG_SYSFS_DEPRECATED_V2 is not set +CONFIG_PROC_PID_CPUSET=y CONFIG_RELAY=y +CONFIG_NAMESPACES=y +CONFIG_UTS_NS=y +CONFIG_IPC_NS=y +CONFIG_USER_NS=y +CONFIG_PID_NS=y CONFIG_BLK_DEV_INITRD=y CONFIG_INITRAMFS_SOURCE="" CONFIG_CC_OPTIMIZE_FOR_SIZE=y @@ -56,13 +105,15 @@ CONFIG_SYSCTL=y # CONFIG_EMBEDDED is not set CONFIG_UID16=y CONFIG_SYSCTL_SYSCALL=y +CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_KALLSYMS=y CONFIG_KALLSYMS_ALL=y -# CONFIG_KALLSYMS_EXTRA_PASS is not set +CONFIG_KALLSYMS_EXTRA_PASS=y CONFIG_HOTPLUG=y CONFIG_PRINTK=y CONFIG_BUG=y CONFIG_ELF_CORE=y +# CONFIG_COMPAT_BRK is not set CONFIG_BASE_FULL=y CONFIG_FUTEX=y CONFIG_ANON_INODES=y @@ -76,6 +127,17 @@ CONFIG_SLUB_DEBUG=y # CONFIG_SLAB is not set CONFIG_SLUB=y # CONFIG_SLOB is not set +CONFIG_PROFILING=y +CONFIG_MARKERS=y +# CONFIG_OPROFILE is not set +CONFIG_HAVE_OPROFILE=y +CONFIG_KPROBES=y +CONFIG_KRETPROBES=y +CONFIG_HAVE_KPROBES=y +CONFIG_HAVE_KRETPROBES=y +# CONFIG_HAVE_DMA_ATTRS is not set +CONFIG_PROC_PAGE_MONITOR=y +CONFIG_SLABINFO=y CONFIG_RT_MUTEXES=y # CONFIG_TINY_SHMEM is not set CONFIG_BASE_SMALL=0 @@ -87,10 +149,10 @@ CONFIG_MODULE_FORCE_UNLOAD=y # CONFIG_KMOD is not set CONFIG_STOP_MACHINE=y CONFIG_BLOCK=y -CONFIG_LBD=y -# CONFIG_BLK_DEV_IO_TRACE is not set +# CONFIG_LBD is not set +CONFIG_BLK_DEV_IO_TRACE=y # CONFIG_LSF is not set -# CONFIG_BLK_DEV_BSG is not set +CONFIG_BLK_DEV_BSG=y # # IO Schedulers @@ -103,7 +165,8 @@ CONFIG_IOSCHED_CFQ=y # CONFIG_DEFAULT_DEADLINE is not set CONFIG_DEFAULT_CFQ=y # CONFIG_DEFAULT_NOOP is not set -CONFIG_DEFAULT_IOSCHED="anticipatory" +CONFIG_DEFAULT_IOSCHED="cfq" +CONFIG_CLASSIC_RCU=y # # Processor type and features @@ -111,18 +174,21 @@ CONFIG_DEFAULT_IOSCHED="anticipatory" CONFIG_TICK_ONESHOT=y CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y +CONFIG_GENERIC_CLOCKEVENTS_BUILD=y CONFIG_SMP=y -# CONFIG_X86_PC is not set +CONFIG_X86_PC=y # CONFIG_X86_ELAN is not set # CONFIG_X86_VOYAGER is not set # CONFIG_X86_NUMAQ is not set # CONFIG_X86_SUMMIT is not set # CONFIG_X86_BIGSMP is not set # CONFIG_X86_VISWS is not set -CONFIG_X86_GENERICARCH=y +# CONFIG_X86_GENERICARCH is not set # CONFIG_X86_ES7000 is not set -# CONFIG_PARAVIRT is not set -CONFIG_X86_CYCLONE_TIMER=y +# CONFIG_X86_RDC321X is not set +# CONFIG_X86_VSMP is not set +CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y +# CONFIG_PARAVIRT_GUEST is not set # CONFIG_M386 is not set # CONFIG_M486 is not set # CONFIG_M586 is not set @@ -130,9 +196,8 @@ CONFIG_X86_CYCLONE_TIMER=y # CONFIG_M586MMX is not set # CONFIG_M686 is not set # CONFIG_MPENTIUMII is not set -CONFIG_MPENTIUMIII=y +# CONFIG_MPENTIUMIII is not set # CONFIG_MPENTIUMM is not set -# CONFIG_MCORE2 is not set # CONFIG_MPENTIUM4 is not set # CONFIG_MK6 is not set # CONFIG_MK7 is not set @@ -147,14 +212,14 @@ CONFIG_MPENTIUMIII=y # CONFIG_MCYRIXIII is not set # CONFIG_MVIAC3_2 is not set # CONFIG_MVIAC7 is not set -CONFIG_X86_GENERIC=y +# CONFIG_MPSC is not set +CONFIG_MCORE2=y +# CONFIG_GENERIC_CPU is not set +# CONFIG_X86_GENERIC is not set +CONFIG_X86_CPU=y CONFIG_X86_CMPXCHG=y -CONFIG_X86_L1_CACHE_SHIFT=7 +CONFIG_X86_L1_CACHE_SHIFT=6 CONFIG_X86_XADD=y -CONFIG_RWSEM_XCHGADD_ALGORITHM=y -# CONFIG_ARCH_HAS_ILOG2_U32 is not set -# CONFIG_ARCH_HAS_ILOG2_U64 is not set -CONFIG_GENERIC_CALIBRATE_DELAY=y CONFIG_X86_WP_WORKS_OK=y CONFIG_X86_INVLPG=y CONFIG_X86_BSWAP=y @@ -162,106 +227,120 @@ CONFIG_X86_POPAD_OK=y CONFIG_X86_GOOD_APIC=y CONFIG_X86_INTEL_USERCOPY=y CONFIG_X86_USE_PPRO_CHECKSUM=y +CONFIG_X86_P6_NOP=y CONFIG_X86_TSC=y -CONFIG_X86_CMOV=y -CONFIG_X86_MINIMUM_CPU_FAMILY=4 +CONFIG_X86_MINIMUM_CPU_FAMILY=6 +CONFIG_X86_DEBUGCTLMSR=y CONFIG_HPET_TIMER=y CONFIG_HPET_EMULATE_RTC=y -CONFIG_NR_CPUS=32 -CONFIG_SCHED_SMT=y +CONFIG_DMI=y +# CONFIG_IOMMU_HELPER is not set +CONFIG_NR_CPUS=4 +# CONFIG_SCHED_SMT is not set CONFIG_SCHED_MC=y # CONFIG_PREEMPT_NONE is not set CONFIG_PREEMPT_VOLUNTARY=y # CONFIG_PREEMPT is not set -CONFIG_PREEMPT_BKL=y CONFIG_X86_LOCAL_APIC=y CONFIG_X86_IO_APIC=y -CONFIG_X86_MCE=y -CONFIG_X86_MCE_NONFATAL=y -CONFIG_X86_MCE_P4THERMAL=y +# CONFIG_X86_MCE is not set CONFIG_VM86=y # CONFIG_TOSHIBA is not set # CONFIG_I8K is not set # CONFIG_X86_REBOOTFIXUPS is not set -CONFIG_MICROCODE=y -CONFIG_MICROCODE_OLD_INTERFACE=y +# CONFIG_MICROCODE is not set CONFIG_X86_MSR=y CONFIG_X86_CPUID=y - -# -# Firmware Drivers -# -# CONFIG_EDD is not set -# CONFIG_DELL_RBU is not set -# CONFIG_DCDBAS is not set -CONFIG_DMIID=y # CONFIG_NOHIGHMEM is not set CONFIG_HIGHMEM4G=y # CONFIG_HIGHMEM64G is not set CONFIG_PAGE_OFFSET=0xC0000000 CONFIG_HIGHMEM=y -CONFIG_ARCH_POPULATES_NODE_MAP=y +CONFIG_NEED_NODE_MEMMAP_SIZE=y +CONFIG_ARCH_FLATMEM_ENABLE=y +CONFIG_ARCH_SPARSEMEM_ENABLE=y +CONFIG_ARCH_SELECT_MEMORY_MODEL=y CONFIG_SELECT_MEMORY_MODEL=y -CONFIG_FLATMEM_MANUAL=y +# CONFIG_FLATMEM_MANUAL is not set # CONFIG_DISCONTIGMEM_MANUAL is not set -# CONFIG_SPARSEMEM_MANUAL is not set -CONFIG_FLATMEM=y -CONFIG_FLAT_NODE_MEM_MAP=y -# CONFIG_SPARSEMEM_STATIC is not set +CONFIG_SPARSEMEM_MANUAL=y +CONFIG_SPARSEMEM=y +CONFIG_HAVE_MEMORY_PRESENT=y +CONFIG_SPARSEMEM_STATIC=y +# CONFIG_SPARSEMEM_VMEMMAP_ENABLE is not set + +# +# Memory hotplug is currently incompatible with Software Suspend +# +CONFIG_PAGEFLAGS_EXTENDED=y CONFIG_SPLIT_PTLOCK_CPUS=4 CONFIG_RESOURCES_64BIT=y CONFIG_ZONE_DMA_FLAG=1 CONFIG_BOUNCE=y -CONFIG_NR_QUICK=1 CONFIG_VIRT_TO_BUS=y # CONFIG_HIGHPTE is not set # CONFIG_MATH_EMULATION is not set CONFIG_MTRR=y -# CONFIG_EFI is not set +# CONFIG_X86_PAT is not set +CONFIG_EFI=y # CONFIG_IRQBALANCE is not set CONFIG_SECCOMP=y # CONFIG_HZ_100 is not set -CONFIG_HZ_250=y +# CONFIG_HZ_250 is not set # CONFIG_HZ_300 is not set -# CONFIG_HZ_1000 is not set -CONFIG_HZ=250 -# CONFIG_KEXEC is not set -# CONFIG_CRASH_DUMP is not set -CONFIG_PHYSICAL_START=0x100000 -# CONFIG_RELOCATABLE is not set -CONFIG_PHYSICAL_ALIGN=0x100000 -# CONFIG_HOTPLUG_CPU is not set -CONFIG_COMPAT_VDSO=y +CONFIG_HZ_1000=y +CONFIG_HZ=1000 +CONFIG_SCHED_HRTICK=y +CONFIG_KEXEC=y +CONFIG_CRASH_DUMP=y +CONFIG_PHYSICAL_START=0x1000000 +CONFIG_RELOCATABLE=y +CONFIG_PHYSICAL_ALIGN=0x200000 +CONFIG_HOTPLUG_CPU=y +# CONFIG_COMPAT_VDSO is not set CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y # -# Power management options (ACPI, APM) +# Power management options # CONFIG_PM=y -CONFIG_PM_LEGACY=y -# CONFIG_PM_DEBUG is not set - -# -# ACPI (Advanced Configuration and Power Interface) Support -# +CONFIG_PM_DEBUG=y +# CONFIG_PM_VERBOSE is not set +CONFIG_CAN_PM_TRACE=y +CONFIG_PM_TRACE=y +CONFIG_PM_TRACE_RTC=y +CONFIG_PM_SLEEP_SMP=y +CONFIG_PM_SLEEP=y +CONFIG_SUSPEND=y +CONFIG_SUSPEND_FREEZER=y +CONFIG_HIBERNATION=y +CONFIG_PM_STD_PARTITION="" CONFIG_ACPI=y +CONFIG_ACPI_SLEEP=y CONFIG_ACPI_PROCFS=y +CONFIG_ACPI_PROCFS_POWER=y +CONFIG_ACPI_SYSFS_POWER=y +CONFIG_ACPI_PROC_EVENT=y CONFIG_ACPI_AC=y CONFIG_ACPI_BATTERY=y CONFIG_ACPI_BUTTON=y CONFIG_ACPI_FAN=y -# CONFIG_ACPI_DOCK is not set +CONFIG_ACPI_DOCK=y +# CONFIG_ACPI_BAY is not set CONFIG_ACPI_PROCESSOR=y +CONFIG_ACPI_HOTPLUG_CPU=y CONFIG_ACPI_THERMAL=y +# CONFIG_ACPI_WMI is not set # CONFIG_ACPI_ASUS is not set # CONFIG_ACPI_TOSHIBA is not set -CONFIG_ACPI_BLACKLIST_YEAR=2001 -CONFIG_ACPI_DEBUG=y +# CONFIG_ACPI_CUSTOM_DSDT is not set +CONFIG_ACPI_BLACKLIST_YEAR=0 +# CONFIG_ACPI_DEBUG is not set CONFIG_ACPI_EC=y CONFIG_ACPI_POWER=y CONFIG_ACPI_SYSTEM=y CONFIG_X86_PM_TIMER=y -# CONFIG_ACPI_CONTAINER is not set +CONFIG_ACPI_CONTAINER=y # CONFIG_ACPI_SBS is not set # CONFIG_APM is not set @@ -271,15 +350,17 @@ CONFIG_X86_PM_TIMER=y CONFIG_CPU_FREQ=y CONFIG_CPU_FREQ_TABLE=y CONFIG_CPU_FREQ_DEBUG=y -CONFIG_CPU_FREQ_STAT=y -# CONFIG_CPU_FREQ_STAT_DETAILS is not set -CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y -# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set +# CONFIG_CPU_FREQ_STAT is not set +# CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE is not set +# CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE is not set +CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE=y +# CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND is not set +# CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE is not set CONFIG_CPU_FREQ_GOV_PERFORMANCE=y # CONFIG_CPU_FREQ_GOV_POWERSAVE is not set CONFIG_CPU_FREQ_GOV_USERSPACE=y CONFIG_CPU_FREQ_GOV_ONDEMAND=y -CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y +# CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set # # CPUFreq processor drivers @@ -287,8 +368,7 @@ CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y CONFIG_X86_ACPI_CPUFREQ=y # CONFIG_X86_POWERNOW_K6 is not set # CONFIG_X86_POWERNOW_K7 is not set -CONFIG_X86_POWERNOW_K8=y -CONFIG_X86_POWERNOW_K8_ACPI=y +# CONFIG_X86_POWERNOW_K8 is not set # CONFIG_X86_GX_SUSPMOD is not set # CONFIG_X86_SPEEDSTEP_CENTRINO is not set # CONFIG_X86_SPEEDSTEP_ICH is not set @@ -302,43 +382,72 @@ CONFIG_X86_POWERNOW_K8_ACPI=y # # shared options # -CONFIG_X86_ACPI_CPUFREQ_PROC_INTF=y +# CONFIG_X86_ACPI_CPUFREQ_PROC_INTF is not set # CONFIG_X86_SPEEDSTEP_LIB is not set +CONFIG_CPU_IDLE=y +CONFIG_CPU_IDLE_GOV_LADDER=y +CONFIG_CPU_IDLE_GOV_MENU=y # -# Bus options (PCI, PCMCIA, EISA, MCA, ISA) +# Bus options (PCI etc.) # CONFIG_PCI=y # CONFIG_PCI_GOBIOS is not set # CONFIG_PCI_GOMMCONFIG is not set # CONFIG_PCI_GODIRECT is not set CONFIG_PCI_GOANY=y +# CONFIG_PCI_GOOLPC is not set CONFIG_PCI_BIOS=y CONFIG_PCI_DIRECT=y CONFIG_PCI_MMCONFIG=y -# CONFIG_PCIEPORTBUS is not set +CONFIG_PCI_DOMAINS=y +CONFIG_PCIEPORTBUS=y +# CONFIG_HOTPLUG_PCI_PCIE is not set +CONFIG_PCIEAER=y +# CONFIG_PCIEASPM is not set CONFIG_ARCH_SUPPORTS_MSI=y CONFIG_PCI_MSI=y +# CONFIG_PCI_LEGACY is not set # CONFIG_PCI_DEBUG is not set -# CONFIG_HT_IRQ is not set +CONFIG_HT_IRQ=y CONFIG_ISA_DMA_API=y # CONFIG_ISA is not set # CONFIG_MCA is not set # CONFIG_SCx200 is not set +# CONFIG_OLPC is not set CONFIG_K8_NB=y - -# -# PCCARD (PCMCIA/CardBus) support -# -# CONFIG_PCCARD is not set -# CONFIG_HOTPLUG_PCI is not set - -# -# Executable file formats +CONFIG_PCCARD=y +# CONFIG_PCMCIA_DEBUG is not set +CONFIG_PCMCIA=y +CONFIG_PCMCIA_LOAD_CIS=y +CONFIG_PCMCIA_IOCTL=y +CONFIG_CARDBUS=y + +# +# PC-card bridges +# +CONFIG_YENTA=y +CONFIG_YENTA_O2=y +CONFIG_YENTA_RICOH=y +CONFIG_YENTA_TI=y +CONFIG_YENTA_ENE_TUNE=y +CONFIG_YENTA_TOSHIBA=y +# CONFIG_PD6729 is not set +# CONFIG_I82092 is not set +CONFIG_PCCARD_NONSTATIC=y +CONFIG_HOTPLUG_PCI=y +# CONFIG_HOTPLUG_PCI_FAKE is not set +# CONFIG_HOTPLUG_PCI_IBM is not set +# CONFIG_HOTPLUG_PCI_ACPI is not set +# CONFIG_HOTPLUG_PCI_CPCI is not set +# CONFIG_HOTPLUG_PCI_SHPC is not set + +# +# Executable file formats / Emulations # CONFIG_BINFMT_ELF=y # CONFIG_BINFMT_AOUT is not set -# CONFIG_BINFMT_MISC is not set +CONFIG_BINFMT_MISC=y # # Networking @@ -349,59 +458,142 @@ CONFIG_NET=y # Networking options # CONFIG_PACKET=y -# CONFIG_PACKET_MMAP is not set +CONFIG_PACKET_MMAP=y CONFIG_UNIX=y CONFIG_XFRM=y -# CONFIG_XFRM_USER is not set +CONFIG_XFRM_USER=y # CONFIG_XFRM_SUB_POLICY is not set # CONFIG_XFRM_MIGRATE is not set +# CONFIG_XFRM_STATISTICS is not set # CONFIG_NET_KEY is not set CONFIG_INET=y CONFIG_IP_MULTICAST=y -# CONFIG_IP_ADVANCED_ROUTER is not set +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_ASK_IP_FIB_HASH=y +# CONFIG_IP_FIB_TRIE is not set CONFIG_IP_FIB_HASH=y -CONFIG_IP_PNP=y -CONFIG_IP_PNP_DHCP=y -# CONFIG_IP_PNP_BOOTP is not set -# CONFIG_IP_PNP_RARP is not set +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_ROUTE_MULTIPATH=y +CONFIG_IP_ROUTE_VERBOSE=y +# CONFIG_IP_PNP is not set # CONFIG_NET_IPIP is not set # CONFIG_NET_IPGRE is not set -# CONFIG_IP_MROUTE is not set +CONFIG_IP_MROUTE=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y # CONFIG_ARPD is not set -# CONFIG_SYN_COOKIES is not set +CONFIG_SYN_COOKIES=y # CONFIG_INET_AH is not set # CONFIG_INET_ESP is not set # CONFIG_INET_IPCOMP is not set # CONFIG_INET_XFRM_TUNNEL is not set CONFIG_INET_TUNNEL=y -CONFIG_INET_XFRM_MODE_TRANSPORT=y -CONFIG_INET_XFRM_MODE_TUNNEL=y +# CONFIG_INET_XFRM_MODE_TRANSPORT is not set +# CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set -CONFIG_INET_DIAG=y -CONFIG_INET_TCP_DIAG=y -# CONFIG_TCP_CONG_ADVANCED is not set +CONFIG_INET_LRO=y +# CONFIG_INET_DIAG is not set +CONFIG_TCP_CONG_ADVANCED=y +# CONFIG_TCP_CONG_BIC is not set CONFIG_TCP_CONG_CUBIC=y +# CONFIG_TCP_CONG_WESTWOOD is not set +# CONFIG_TCP_CONG_HTCP is not set +# CONFIG_TCP_CONG_HSTCP is not set +# CONFIG_TCP_CONG_HYBLA is not set +# CONFIG_TCP_CONG_VEGAS is not set +# CONFIG_TCP_CONG_SCALABLE is not set +# CONFIG_TCP_CONG_LP is not set +# CONFIG_TCP_CONG_VENO is not set +# CONFIG_TCP_CONG_YEAH is not set +# CONFIG_TCP_CONG_ILLINOIS is not set +# CONFIG_DEFAULT_BIC is not set +CONFIG_DEFAULT_CUBIC=y +# CONFIG_DEFAULT_HTCP is not set +# CONFIG_DEFAULT_VEGAS is not set +# CONFIG_DEFAULT_WESTWOOD is not set +# CONFIG_DEFAULT_RENO is not set CONFIG_DEFAULT_TCP_CONG="cubic" -# CONFIG_TCP_MD5SIG is not set +CONFIG_TCP_MD5SIG=y +# CONFIG_IP_VS is not set CONFIG_IPV6=y # CONFIG_IPV6_PRIVACY is not set # CONFIG_IPV6_ROUTER_PREF is not set # CONFIG_IPV6_OPTIMISTIC_DAD is not set -# CONFIG_INET6_AH is not set -# CONFIG_INET6_ESP is not set +CONFIG_INET6_AH=y +CONFIG_INET6_ESP=y # CONFIG_INET6_IPCOMP is not set # CONFIG_IPV6_MIP6 is not set # CONFIG_INET6_XFRM_TUNNEL is not set # CONFIG_INET6_TUNNEL is not set CONFIG_INET6_XFRM_MODE_TRANSPORT=y CONFIG_INET6_XFRM_MODE_TUNNEL=y -# CONFIG_INET6_XFRM_MODE_BEET is not set +CONFIG_INET6_XFRM_MODE_BEET=y # CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION is not set CONFIG_IPV6_SIT=y +CONFIG_IPV6_NDISC_NODETYPE=y # CONFIG_IPV6_TUNNEL is not set # CONFIG_IPV6_MULTIPLE_TABLES is not set -# CONFIG_NETWORK_SECMARK is not set -# CONFIG_NETFILTER is not set +# CONFIG_IPV6_MROUTE is not set +CONFIG_NETLABEL=y +CONFIG_NETWORK_SECMARK=y +CONFIG_NETFILTER=y +# CONFIG_NETFILTER_DEBUG is not set +# CONFIG_NETFILTER_ADVANCED is not set + +# +# Core Netfilter Configuration +# +CONFIG_NETFILTER_NETLINK=y +CONFIG_NETFILTER_NETLINK_LOG=y +CONFIG_NF_CONNTRACK=y +CONFIG_NF_CONNTRACK_SECMARK=y +CONFIG_NF_CONNTRACK_FTP=y +CONFIG_NF_CONNTRACK_IRC=y +CONFIG_NF_CONNTRACK_SIP=y +CONFIG_NF_CT_NETLINK=y +CONFIG_NETFILTER_XTABLES=y +CONFIG_NETFILTER_XT_TARGET_MARK=y +CONFIG_NETFILTER_XT_TARGET_NFLOG=y +CONFIG_NETFILTER_XT_TARGET_SECMARK=y +CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y +CONFIG_NETFILTER_XT_TARGET_TCPMSS=y +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y +CONFIG_NETFILTER_XT_MATCH_MARK=y +CONFIG_NETFILTER_XT_MATCH_POLICY=y +CONFIG_NETFILTER_XT_MATCH_STATE=y + +# +# IP: Netfilter Configuration +# +CONFIG_NF_CONNTRACK_IPV4=y +CONFIG_NF_CONNTRACK_PROC_COMPAT=y +CONFIG_IP_NF_IPTABLES=y +CONFIG_IP_NF_FILTER=y +CONFIG_IP_NF_TARGET_REJECT=y +CONFIG_IP_NF_TARGET_LOG=y +CONFIG_IP_NF_TARGET_ULOG=y +CONFIG_NF_NAT=y +CONFIG_NF_NAT_NEEDED=y +CONFIG_IP_NF_TARGET_MASQUERADE=y +CONFIG_NF_NAT_FTP=y +CONFIG_NF_NAT_IRC=y +# CONFIG_NF_NAT_TFTP is not set +# CONFIG_NF_NAT_AMANDA is not set +# CONFIG_NF_NAT_PPTP is not set +# CONFIG_NF_NAT_H323 is not set +CONFIG_NF_NAT_SIP=y +CONFIG_IP_NF_MANGLE=y + +# +# IPv6: Netfilter Configuration +# +CONFIG_NF_CONNTRACK_IPV6=y +CONFIG_IP6_NF_IPTABLES=y +CONFIG_IP6_NF_MATCH_IPV6HEADER=y +CONFIG_IP6_NF_FILTER=y +CONFIG_IP6_NF_TARGET_LOG=y +CONFIG_IP6_NF_TARGET_REJECT=y +CONFIG_IP6_NF_MANGLE=y # CONFIG_IP_DCCP is not set # CONFIG_IP_SCTP is not set # CONFIG_TIPC is not set @@ -409,6 +601,7 @@ CONFIG_IPV6_SIT=y # CONFIG_BRIDGE is not set # CONFIG_VLAN_8021Q is not set # CONFIG_DECNET is not set +CONFIG_LLC=y # CONFIG_LLC2 is not set # CONFIG_IPX is not set # CONFIG_ATALK is not set @@ -416,28 +609,99 @@ CONFIG_IPV6_SIT=y # CONFIG_LAPB is not set # CONFIG_ECONET is not set # CONFIG_WAN_ROUTER is not set - -# -# QoS and/or fair queueing -# -# CONFIG_NET_SCHED is not set +CONFIG_NET_SCHED=y + +# +# Queueing/Scheduling +# +# CONFIG_NET_SCH_CBQ is not set +# CONFIG_NET_SCH_HTB is not set +# CONFIG_NET_SCH_HFSC is not set +# CONFIG_NET_SCH_PRIO is not set +# CONFIG_NET_SCH_RR is not set +# CONFIG_NET_SCH_RED is not set +# CONFIG_NET_SCH_SFQ is not set +# CONFIG_NET_SCH_TEQL is not set +# CONFIG_NET_SCH_TBF is not set +# CONFIG_NET_SCH_GRED is not set +# CONFIG_NET_SCH_DSMARK is not set +# CONFIG_NET_SCH_NETEM is not set +# CONFIG_NET_SCH_INGRESS is not set + +# +# Classification +# +CONFIG_NET_CLS=y +# CONFIG_NET_CLS_BASIC is not set +# CONFIG_NET_CLS_TCINDEX is not set +# CONFIG_NET_CLS_ROUTE4 is not set +# CONFIG_NET_CLS_FW is not set +# CONFIG_NET_CLS_U32 is not set +# CONFIG_NET_CLS_RSVP is not set +# CONFIG_NET_CLS_RSVP6 is not set +# CONFIG_NET_CLS_FLOW is not set +CONFIG_NET_EMATCH=y +CONFIG_NET_EMATCH_STACK=32 +# CONFIG_NET_EMATCH_CMP is not set +# CONFIG_NET_EMATCH_NBYTE is not set +# CONFIG_NET_EMATCH_U32 is not set +# CONFIG_NET_EMATCH_META is not set +# CONFIG_NET_EMATCH_TEXT is not set +CONFIG_NET_CLS_ACT=y +# CONFIG_NET_ACT_POLICE is not set +# CONFIG_NET_ACT_GACT is not set +# CONFIG_NET_ACT_MIRRED is not set +# CONFIG_NET_ACT_IPT is not set +# CONFIG_NET_ACT_NAT is not set +# CONFIG_NET_ACT_PEDIT is not set +# CONFIG_NET_ACT_SIMP is not set +CONFIG_NET_SCH_FIFO=y # # Network testing # # CONFIG_NET_PKTGEN is not set # CONFIG_NET_TCPPROBE is not set -# CONFIG_HAMRADIO is not set +CONFIG_HAMRADIO=y + +# +# Packet Radio protocols +# +# CONFIG_AX25 is not set +# CONFIG_CAN is not set # CONFIG_IRDA is not set # CONFIG_BT is not set # CONFIG_AF_RXRPC is not set +CONFIG_FIB_RULES=y # # Wireless # -# CONFIG_CFG80211 is not set -# CONFIG_WIRELESS_EXT is not set -# CONFIG_MAC80211 is not set +CONFIG_CFG80211=y +CONFIG_NL80211=y +CONFIG_WIRELESS_EXT=y +CONFIG_MAC80211=y + +# +# Rate control algorithm selection +# +CONFIG_MAC80211_RC_DEFAULT_PID=y +# CONFIG_MAC80211_RC_DEFAULT_NONE is not set + +# +# Selecting 'y' for an algorithm will +# + +# +# build the algorithm into mac80211. +# +CONFIG_MAC80211_RC_DEFAULT="pid" +CONFIG_MAC80211_RC_PID=y +# CONFIG_MAC80211_MESH is not set +CONFIG_MAC80211_LEDS=y +# CONFIG_MAC80211_DEBUGFS is not set +# CONFIG_MAC80211_DEBUG_PACKET_ALIGNMENT is not set +# CONFIG_MAC80211_DEBUG is not set # CONFIG_IEEE80211 is not set # CONFIG_RFKILL is not set # CONFIG_NET_9P is not set @@ -449,13 +713,15 @@ CONFIG_IPV6_SIT=y # # Generic Driver Options # +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_STANDALONE=y CONFIG_PREVENT_FIRMWARE_BUILD=y CONFIG_FW_LOADER=y # CONFIG_DEBUG_DRIVER is not set -# CONFIG_DEBUG_DEVRES is not set +CONFIG_DEBUG_DEVRES=y # CONFIG_SYS_HYPERVISOR is not set -# CONFIG_CONNECTOR is not set +CONFIG_CONNECTOR=y +CONFIG_PROC_EVENTS=y # CONFIG_MTD is not set # CONFIG_PARPORT is not set CONFIG_PNP=y @@ -466,7 +732,7 @@ CONFIG_PNP=y # CONFIG_PNPACPI=y CONFIG_BLK_DEV=y -CONFIG_BLK_DEV_FD=y +# CONFIG_BLK_DEV_FD is not set # CONFIG_BLK_CPQ_DA is not set # CONFIG_BLK_CPQ_CISS_DA is not set # CONFIG_BLK_DEV_DAC960 is not set @@ -479,8 +745,8 @@ CONFIG_BLK_DEV_LOOP=y # CONFIG_BLK_DEV_UB is not set CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_COUNT=16 -CONFIG_BLK_DEV_RAM_SIZE=4096 -CONFIG_BLK_DEV_RAM_BLOCKSIZE=1024 +CONFIG_BLK_DEV_RAM_SIZE=16384 +# CONFIG_BLK_DEV_XIP is not set # CONFIG_CDROM_PKTCDVD is not set # CONFIG_ATA_OVER_ETH is not set CONFIG_MISC_DEVICES=y @@ -489,73 +755,17 @@ CONFIG_MISC_DEVICES=y # CONFIG_EEPROM_93CX6 is not set # CONFIG_SGI_IOC4 is not set # CONFIG_TIFM_CORE is not set +# CONFIG_ACER_WMI is not set +# CONFIG_ASUS_LAPTOP is not set +# CONFIG_FUJITSU_LAPTOP is not set +# CONFIG_TC1100_WMI is not set +# CONFIG_MSI_LAPTOP is not set # CONFIG_SONY_LAPTOP is not set # CONFIG_THINKPAD_ACPI is not set -CONFIG_IDE=y -CONFIG_BLK_DEV_IDE=y - -# -# Please see Documentation/ide.txt for help/info on IDE drives -# -# CONFIG_BLK_DEV_IDE_SATA is not set -# CONFIG_BLK_DEV_HD_IDE is not set -CONFIG_BLK_DEV_IDEDISK=y -CONFIG_IDEDISK_MULTI_MODE=y -CONFIG_BLK_DEV_IDECD=y -# CONFIG_BLK_DEV_IDETAPE is not set -# CONFIG_BLK_DEV_IDEFLOPPY is not set -# CONFIG_BLK_DEV_IDESCSI is not set -CONFIG_BLK_DEV_IDEACPI=y -# CONFIG_IDE_TASK_IOCTL is not set -CONFIG_IDE_PROC_FS=y - -# -# IDE chipset support/bugfixes -# -CONFIG_IDE_GENERIC=y -# CONFIG_BLK_DEV_CMD640 is not set -# CONFIG_BLK_DEV_IDEPNP is not set -CONFIG_BLK_DEV_IDEPCI=y -# CONFIG_IDEPCI_SHARE_IRQ is not set -CONFIG_IDEPCI_PCIBUS_ORDER=y -# CONFIG_BLK_DEV_OFFBOARD is not set -# CONFIG_BLK_DEV_GENERIC is not set -# CONFIG_BLK_DEV_OPTI621 is not set -# CONFIG_BLK_DEV_RZ1000 is not set -CONFIG_BLK_DEV_IDEDMA_PCI=y -# CONFIG_BLK_DEV_IDEDMA_FORCED is not set -# CONFIG_IDEDMA_ONLYDISK is not set -# CONFIG_BLK_DEV_AEC62XX is not set -# CONFIG_BLK_DEV_ALI15X3 is not set -CONFIG_BLK_DEV_AMD74XX=y -# CONFIG_BLK_DEV_ATIIXP is not set -# CONFIG_BLK_DEV_CMD64X is not set -# CONFIG_BLK_DEV_TRIFLEX is not set -# CONFIG_BLK_DEV_CY82C693 is not set -# CONFIG_BLK_DEV_CS5520 is not set -# CONFIG_BLK_DEV_CS5530 is not set -# CONFIG_BLK_DEV_CS5535 is not set -# CONFIG_BLK_DEV_HPT34X is not set -# CONFIG_BLK_DEV_HPT366 is not set -# CONFIG_BLK_DEV_JMICRON is not set -# CONFIG_BLK_DEV_SC1200 is not set -CONFIG_BLK_DEV_PIIX=y -# CONFIG_BLK_DEV_IT8213 is not set -# CONFIG_BLK_DEV_IT821X is not set -# CONFIG_BLK_DEV_NS87415 is not set -# CONFIG_BLK_DEV_PDC202XX_OLD is not set -# CONFIG_BLK_DEV_PDC202XX_NEW is not set -# CONFIG_BLK_DEV_SVWKS is not set -# CONFIG_BLK_DEV_SIIMAGE is not set -# CONFIG_BLK_DEV_SIS5513 is not set -# CONFIG_BLK_DEV_SLC90E66 is not set -# CONFIG_BLK_DEV_TRM290 is not set -# CONFIG_BLK_DEV_VIA82CXXX is not set -# CONFIG_BLK_DEV_TC86C001 is not set -# CONFIG_IDE_ARM is not set -CONFIG_BLK_DEV_IDEDMA=y -# CONFIG_IDEDMA_IVB is not set -# CONFIG_BLK_DEV_HD is not set +# CONFIG_INTEL_MENLOW is not set +# CONFIG_ENCLOSURE_SERVICES is not set +CONFIG_HAVE_IDE=y +# CONFIG_IDE is not set # # SCSI device support @@ -564,8 +774,8 @@ CONFIG_BLK_DEV_IDEDMA=y CONFIG_SCSI=y CONFIG_SCSI_DMA=y # CONFIG_SCSI_TGT is not set -CONFIG_SCSI_NETLINK=y -# CONFIG_SCSI_PROC_FS is not set +# CONFIG_SCSI_NETLINK is not set +CONFIG_SCSI_PROC_FS=y # # SCSI support type (disk, tape, CD-ROM) @@ -574,7 +784,7 @@ CONFIG_BLK_DEV_SD=y # CONFIG_CHR_DEV_ST is not set # CONFIG_CHR_DEV_OSST is not set CONFIG_BLK_DEV_SR=y -# CONFIG_BLK_DEV_SR_VENDOR is not set +CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_CHR_DEV_SG=y # CONFIG_CHR_DEV_SCH is not set @@ -582,7 +792,7 @@ CONFIG_CHR_DEV_SG=y # Some SCSI devices (e.g. CD jukebox) support multiple LUNs # # CONFIG_SCSI_MULTI_LUN is not set -# CONFIG_SCSI_CONSTANTS is not set +CONFIG_SCSI_CONSTANTS=y # CONFIG_SCSI_LOGGING is not set # CONFIG_SCSI_SCAN_ASYNC is not set CONFIG_SCSI_WAIT_SCAN=m @@ -591,81 +801,37 @@ CONFIG_SCSI_WAIT_SCAN=m # SCSI Transports # CONFIG_SCSI_SPI_ATTRS=y -CONFIG_SCSI_FC_ATTRS=y +# CONFIG_SCSI_FC_ATTRS is not set # CONFIG_SCSI_ISCSI_ATTRS is not set # CONFIG_SCSI_SAS_ATTRS is not set # CONFIG_SCSI_SAS_LIBSAS is not set - -# -# SCSI low-level drivers -# -# CONFIG_ISCSI_TCP is not set -CONFIG_BLK_DEV_3W_XXXX_RAID=y -# CONFIG_SCSI_3W_9XXX is not set -# CONFIG_SCSI_ACARD is not set -# CONFIG_SCSI_AACRAID is not set -CONFIG_SCSI_AIC7XXX=y -CONFIG_AIC7XXX_CMDS_PER_DEVICE=32 -CONFIG_AIC7XXX_RESET_DELAY_MS=5000 -CONFIG_AIC7XXX_DEBUG_ENABLE=y -CONFIG_AIC7XXX_DEBUG_MASK=0 -CONFIG_AIC7XXX_REG_PRETTY_PRINT=y -# CONFIG_SCSI_AIC7XXX_OLD is not set -CONFIG_SCSI_AIC79XX=y -CONFIG_AIC79XX_CMDS_PER_DEVICE=32 -CONFIG_AIC79XX_RESET_DELAY_MS=4000 -# CONFIG_AIC79XX_DEBUG_ENABLE is not set -CONFIG_AIC79XX_DEBUG_MASK=0 -# CONFIG_AIC79XX_REG_PRETTY_PRINT is not set -# CONFIG_SCSI_AIC94XX is not set -# CONFIG_SCSI_DPT_I2O is not set -# CONFIG_SCSI_ADVANSYS is not set -# CONFIG_SCSI_ARCMSR is not set -# CONFIG_MEGARAID_NEWGEN is not set -# CONFIG_MEGARAID_LEGACY is not set -# CONFIG_MEGARAID_SAS is not set -# CONFIG_SCSI_HPTIOP is not set -# CONFIG_SCSI_BUSLOGIC is not set -# CONFIG_SCSI_DMX3191D is not set -# CONFIG_SCSI_EATA is not set -# CONFIG_SCSI_FUTURE_DOMAIN is not set -# CONFIG_SCSI_GDTH is not set -# CONFIG_SCSI_IPS is not set -# CONFIG_SCSI_INITIO is not set -# CONFIG_SCSI_INIA100 is not set -# CONFIG_SCSI_STEX is not set -# CONFIG_SCSI_SYM53C8XX_2 is not set -# CONFIG_SCSI_IPR is not set -# CONFIG_SCSI_QLOGIC_1280 is not set -# CONFIG_SCSI_QLA_FC is not set -# CONFIG_SCSI_QLA_ISCSI is not set -# CONFIG_SCSI_LPFC is not set -# CONFIG_SCSI_DC395x is not set -# CONFIG_SCSI_DC390T is not set -# CONFIG_SCSI_NSP32 is not set -# CONFIG_SCSI_DEBUG is not set -# CONFIG_SCSI_SRP is not set +# CONFIG_SCSI_SRP_ATTRS is not set +# CONFIG_SCSI_LOWLEVEL is not set +# CONFIG_SCSI_LOWLEVEL_PCMCIA is not set CONFIG_ATA=y # CONFIG_ATA_NONSTANDARD is not set CONFIG_ATA_ACPI=y +CONFIG_SATA_PMP=y CONFIG_SATA_AHCI=y -CONFIG_SATA_SVW=y +# CONFIG_SATA_SIL24 is not set +CONFIG_ATA_SFF=y +# CONFIG_SATA_SVW is not set CONFIG_ATA_PIIX=y # CONFIG_SATA_MV is not set -CONFIG_SATA_NV=y +# CONFIG_SATA_NV is not set # CONFIG_PDC_ADMA is not set # CONFIG_SATA_QSTOR is not set # CONFIG_SATA_PROMISE is not set # CONFIG_SATA_SX4 is not set -CONFIG_SATA_SIL=y -# CONFIG_SATA_SIL24 is not set +# CONFIG_SATA_SIL is not set # CONFIG_SATA_SIS is not set # CONFIG_SATA_ULI is not set -CONFIG_SATA_VIA=y +# CONFIG_SATA_VIA is not set # CONFIG_SATA_VITESSE is not set # CONFIG_SATA_INIC162X is not set +# CONFIG_PATA_ACPI is not set # CONFIG_PATA_ALI is not set -# CONFIG_PATA_AMD is not set +CONFIG_PATA_AMD=y # CONFIG_PATA_ARTOP is not set # CONFIG_PATA_ATIIXP is not set # CONFIG_PATA_CMD640_PCI is not set @@ -673,6 +839,7 @@ CONFIG_SATA_VIA=y # CONFIG_PATA_CS5520 is not set # CONFIG_PATA_CS5530 is not set # CONFIG_PATA_CS5535 is not set +# CONFIG_PATA_CS5536 is not set # CONFIG_PATA_CYPRESS is not set # CONFIG_PATA_EFAR is not set # CONFIG_ATA_GENERIC is not set @@ -686,11 +853,14 @@ CONFIG_SATA_VIA=y # CONFIG_PATA_TRIFLEX is not set # CONFIG_PATA_MARVELL is not set # CONFIG_PATA_MPIIX is not set -# CONFIG_PATA_OLDPIIX is not set +CONFIG_PATA_OLDPIIX=y # CONFIG_PATA_NETCELL is not set +# CONFIG_PATA_NINJA32 is not set # CONFIG_PATA_NS87410 is not set +# CONFIG_PATA_NS87415 is not set # CONFIG_PATA_OPTI is not set # CONFIG_PATA_OPTIDMA is not set +# CONFIG_PATA_PCMCIA is not set # CONFIG_PATA_PDC_OLD is not set # CONFIG_PATA_RADISYS is not set # CONFIG_PATA_RZ1000 is not set @@ -702,65 +872,42 @@ CONFIG_SATA_VIA=y # CONFIG_PATA_VIA is not set # CONFIG_PATA_WINBOND is not set CONFIG_MD=y -# CONFIG_BLK_DEV_MD is not set +CONFIG_BLK_DEV_MD=y +# CONFIG_MD_LINEAR is not set +# CONFIG_MD_RAID0 is not set +# CONFIG_MD_RAID1 is not set +# CONFIG_MD_RAID10 is not set +# CONFIG_MD_RAID456 is not set +# CONFIG_MD_MULTIPATH is not set +# CONFIG_MD_FAULTY is not set CONFIG_BLK_DEV_DM=y # CONFIG_DM_DEBUG is not set # CONFIG_DM_CRYPT is not set # CONFIG_DM_SNAPSHOT is not set -# CONFIG_DM_MIRROR is not set -# CONFIG_DM_ZERO is not set +CONFIG_DM_MIRROR=y +CONFIG_DM_ZERO=y # CONFIG_DM_MULTIPATH is not set # CONFIG_DM_DELAY is not set - -# -# Fusion MPT device support -# -CONFIG_FUSION=y -CONFIG_FUSION_SPI=y -# CONFIG_FUSION_FC is not set -# CONFIG_FUSION_SAS is not set -CONFIG_FUSION_MAX_SGE=128 -# CONFIG_FUSION_CTL is not set +# CONFIG_DM_UEVENT is not set +# CONFIG_FUSION is not set # # IEEE 1394 (FireWire) support # # CONFIG_FIREWIRE is not set -CONFIG_IEEE1394=y - -# -# Subsystem Options -# -# CONFIG_IEEE1394_VERBOSEDEBUG is not set - -# -# Controllers -# - -# -# Texas Instruments PCILynx requires I2C -# -CONFIG_IEEE1394_OHCI1394=y - -# -# Protocols -# -# CONFIG_IEEE1394_VIDEO1394 is not set -# CONFIG_IEEE1394_SBP2 is not set -# CONFIG_IEEE1394_ETH1394_ROM_ENTRY is not set -# CONFIG_IEEE1394_ETH1394 is not set -# CONFIG_IEEE1394_DV1394 is not set -CONFIG_IEEE1394_RAWIO=y +# CONFIG_IEEE1394 is not set # CONFIG_I2O is not set CONFIG_MACINTOSH_DRIVERS=y -# CONFIG_MAC_EMUMOUSEBTN is not set +CONFIG_MAC_EMUMOUSEBTN=y CONFIG_NETDEVICES=y -CONFIG_NETDEVICES_MULTIQUEUE=y +# CONFIG_NETDEVICES_MULTIQUEUE is not set +# CONFIG_IFB is not set # CONFIG_DUMMY is not set # CONFIG_BONDING is not set # CONFIG_MACVLAN is not set # CONFIG_EQUALIZER is not set # CONFIG_TUN is not set +# CONFIG_VETH is not set # CONFIG_NET_SB1000 is not set # CONFIG_ARCNET is not set # CONFIG_PHYLIB is not set @@ -770,38 +917,40 @@ CONFIG_MII=y # CONFIG_SUNGEM is not set # CONFIG_CASSINI is not set CONFIG_NET_VENDOR_3COM=y -CONFIG_VORTEX=y +# CONFIG_VORTEX is not set # CONFIG_TYPHOON is not set CONFIG_NET_TULIP=y # CONFIG_DE2104X is not set -CONFIG_TULIP=y -# CONFIG_TULIP_MWI is not set -# CONFIG_TULIP_MMIO is not set -# CONFIG_TULIP_NAPI is not set +# CONFIG_TULIP is not set # CONFIG_DE4X5 is not set # CONFIG_WINBOND_840 is not set # CONFIG_DM9102 is not set # CONFIG_ULI526X is not set +# CONFIG_PCMCIA_XIRCOM is not set # CONFIG_HP100 is not set +# CONFIG_IBM_NEW_EMAC_ZMII is not set +# CONFIG_IBM_NEW_EMAC_RGMII is not set +# CONFIG_IBM_NEW_EMAC_TAH is not set +# CONFIG_IBM_NEW_EMAC_EMAC4 is not set CONFIG_NET_PCI=y # CONFIG_PCNET32 is not set # CONFIG_AMD8111_ETH is not set # CONFIG_ADAPTEC_STARFIRE is not set -CONFIG_B44=y +# CONFIG_B44 is not set CONFIG_FORCEDETH=y # CONFIG_FORCEDETH_NAPI is not set -# CONFIG_DGRS is not set # CONFIG_EEPRO100 is not set CONFIG_E100=y # CONFIG_FEALNX is not set # CONFIG_NATSEMI is not set # CONFIG_NE2K_PCI is not set -CONFIG_8139CP=y +# CONFIG_8139CP is not set CONFIG_8139TOO=y -# CONFIG_8139TOO_PIO is not set +CONFIG_8139TOO_PIO=y # CONFIG_8139TOO_TUNE_TWISTER is not set # CONFIG_8139TOO_8129 is not set # CONFIG_8139_OLD_RX_RESET is not set +# CONFIG_R6040 is not set # CONFIG_SIS900 is not set # CONFIG_EPIC100 is not set # CONFIG_SUNDANCE is not set @@ -814,38 +963,75 @@ CONFIG_NETDEV_1000=y CONFIG_E1000=y # CONFIG_E1000_NAPI is not set # CONFIG_E1000_DISABLE_PACKET_SPLIT is not set -CONFIG_E1000E=y -CONFIG_E1000E_ENABLED=y +# CONFIG_E1000E is not set +# CONFIG_E1000E_ENABLED is not set # CONFIG_IP1000 is not set # CONFIG_IGB is not set # CONFIG_NS83820 is not set # CONFIG_HAMACHI is not set # CONFIG_YELLOWFIN is not set -CONFIG_R8169=y -# CONFIG_R8169_NAPI is not set +# CONFIG_R8169 is not set # CONFIG_SIS190 is not set # CONFIG_SKGE is not set CONFIG_SKY2=y +# CONFIG_SKY2_DEBUG is not set # CONFIG_VIA_VELOCITY is not set CONFIG_TIGON3=y -CONFIG_BNX2=y +# CONFIG_BNX2 is not set # CONFIG_QLA3XXX is not set # CONFIG_ATL1 is not set CONFIG_NETDEV_10000=y # CONFIG_CHELSIO_T1 is not set # CONFIG_CHELSIO_T3 is not set +# CONFIG_IXGBE is not set # CONFIG_IXGB is not set # CONFIG_S2IO is not set # CONFIG_MYRI10GE is not set # CONFIG_NETXEN_NIC is not set +# CONFIG_NIU is not set # CONFIG_MLX4_CORE is not set -# CONFIG_TR is not set +# CONFIG_TEHUTI is not set +# CONFIG_BNX2X is not set +# CONFIG_SFC is not set +CONFIG_TR=y +# CONFIG_IBMOL is not set +# CONFIG_IBMLS is not set +# CONFIG_3C359 is not set +# CONFIG_TMS380TR is not set # # Wireless LAN # # CONFIG_WLAN_PRE80211 is not set -# CONFIG_WLAN_80211 is not set +CONFIG_WLAN_80211=y +# CONFIG_PCMCIA_RAYCS is not set +# CONFIG_IPW2100 is not set +# CONFIG_IPW2200 is not set +# CONFIG_LIBERTAS is not set +# CONFIG_AIRO is not set +# CONFIG_HERMES is not set +# CONFIG_ATMEL is not set +# CONFIG_AIRO_CS is not set +# CONFIG_PCMCIA_WL3501 is not set +# CONFIG_PRISM54 is not set +# CONFIG_USB_ZD1201 is not set +# CONFIG_USB_NET_RNDIS_WLAN is not set +# CONFIG_RTL8180 is not set +# CONFIG_RTL8187 is not set +# CONFIG_ADM8211 is not set +# CONFIG_P54_COMMON is not set +CONFIG_ATH5K=y +# CONFIG_ATH5K_DEBUG is not set +# CONFIG_IWLWIFI is not set +# CONFIG_IWLCORE is not set +# CONFIG_IWLWIFI_LEDS is not set +# CONFIG_IWL4965 is not set +# CONFIG_IWL3945 is not set +# CONFIG_HOSTAP is not set +# CONFIG_B43 is not set +# CONFIG_B43LEGACY is not set +# CONFIG_ZD1211RW is not set +# CONFIG_RT2X00 is not set # # USB Network Adapters @@ -854,16 +1040,27 @@ CONFIG_NETDEV_10000=y # CONFIG_USB_KAWETH is not set # CONFIG_USB_PEGASUS is not set # CONFIG_USB_RTL8150 is not set -# CONFIG_USB_USBNET_MII is not set # CONFIG_USB_USBNET is not set +CONFIG_NET_PCMCIA=y +# CONFIG_PCMCIA_3C589 is not set +# CONFIG_PCMCIA_3C574 is not set +# CONFIG_PCMCIA_FMVJ18X is not set +# CONFIG_PCMCIA_PCNET is not set +# CONFIG_PCMCIA_NMCLAN is not set +# CONFIG_PCMCIA_SMC91C92 is not set +# CONFIG_PCMCIA_XIRC2PS is not set +# CONFIG_PCMCIA_AXNET is not set +# CONFIG_PCMCIA_IBMTR is not set # CONFIG_WAN is not set -# CONFIG_FDDI is not set +CONFIG_FDDI=y +# CONFIG_DEFXX is not set +# CONFIG_SKFP is not set # CONFIG_HIPPI is not set # CONFIG_PPP is not set # CONFIG_SLIP is not set # CONFIG_NET_FC is not set -# CONFIG_SHAPER is not set CONFIG_NETCONSOLE=y +# CONFIG_NETCONSOLE_DYNAMIC is not set CONFIG_NETPOLL=y # CONFIG_NETPOLL_TRAP is not set CONFIG_NET_POLL_CONTROLLER=y @@ -874,18 +1071,17 @@ CONFIG_NET_POLL_CONTROLLER=y # Input device support # CONFIG_INPUT=y -# CONFIG_INPUT_FF_MEMLESS is not set -# CONFIG_INPUT_POLLDEV is not set +CONFIG_INPUT_FF_MEMLESS=y +CONFIG_INPUT_POLLDEV=y # # Userland interfaces # CONFIG_INPUT_MOUSEDEV=y -CONFIG_INPUT_MOUSEDEV_PSAUX=y +# CONFIG_INPUT_MOUSEDEV_PSAUX is not set CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 # CONFIG_INPUT_JOYDEV is not set -# CONFIG_INPUT_TSDEV is not set CONFIG_INPUT_EVDEV=y # CONFIG_INPUT_EVBUG is not set @@ -910,17 +1106,63 @@ CONFIG_MOUSE_PS2_TRACKPOINT=y # CONFIG_MOUSE_SERIAL is not set # CONFIG_MOUSE_APPLETOUCH is not set # CONFIG_MOUSE_VSXXXAA is not set -# CONFIG_INPUT_JOYSTICK is not set -# CONFIG_INPUT_TABLET is not set -# CONFIG_INPUT_TOUCHSCREEN is not set -# CONFIG_INPUT_MISC is not set +CONFIG_INPUT_JOYSTICK=y +# CONFIG_JOYSTICK_ANALOG is not set +# CONFIG_JOYSTICK_A3D is not set +# CONFIG_JOYSTICK_ADI is not set +# CONFIG_JOYSTICK_COBRA is not set +# CONFIG_JOYSTICK_GF2K is not set +# CONFIG_JOYSTICK_GRIP is not set +# CONFIG_JOYSTICK_GRIP_MP is not set +# CONFIG_JOYSTICK_GUILLEMOT is not set +# CONFIG_JOYSTICK_INTERACT is not set +# CONFIG_JOYSTICK_SIDEWINDER is not set +# CONFIG_JOYSTICK_TMDC is not set +# CONFIG_JOYSTICK_IFORCE is not set +# CONFIG_JOYSTICK_WARRIOR is not set +# CONFIG_JOYSTICK_MAGELLAN is not set +# CONFIG_JOYSTICK_SPACEORB is not set +# CONFIG_JOYSTICK_SPACEBALL is not set +# CONFIG_JOYSTICK_STINGER is not set +# CONFIG_JOYSTICK_TWIDJOY is not set +# CONFIG_JOYSTICK_ZHENHUA is not set +# CONFIG_JOYSTICK_JOYDUMP is not set +# CONFIG_JOYSTICK_XPAD is not set +CONFIG_INPUT_TABLET=y +# CONFIG_TABLET_USB_ACECAD is not set +# CONFIG_TABLET_USB_AIPTEK is not set +# CONFIG_TABLET_USB_GTCO is not set +# CONFIG_TABLET_USB_KBTAB is not set +# CONFIG_TABLET_USB_WACOM is not set +CONFIG_INPUT_TOUCHSCREEN=y +# CONFIG_TOUCHSCREEN_FUJITSU is not set +# CONFIG_TOUCHSCREEN_GUNZE is not set +# CONFIG_TOUCHSCREEN_ELO is not set +# CONFIG_TOUCHSCREEN_MTOUCH is not set +# CONFIG_TOUCHSCREEN_MK712 is not set +# CONFIG_TOUCHSCREEN_PENMOUNT is not set +# CONFIG_TOUCHSCREEN_TOUCHRIGHT is not set +# CONFIG_TOUCHSCREEN_TOUCHWIN is not set +# CONFIG_TOUCHSCREEN_UCB1400 is not set +# CONFIG_TOUCHSCREEN_USB_COMPOSITE is not set +CONFIG_INPUT_MISC=y +# CONFIG_INPUT_PCSPKR is not set +# CONFIG_INPUT_APANEL is not set +# CONFIG_INPUT_WISTRON_BTNS is not set +# CONFIG_INPUT_ATLAS_BTNS is not set +# CONFIG_INPUT_ATI_REMOTE is not set +# CONFIG_INPUT_ATI_REMOTE2 is not set +# CONFIG_INPUT_KEYSPAN_REMOTE is not set +# CONFIG_INPUT_POWERMATE is not set +# CONFIG_INPUT_YEALINK is not set +# CONFIG_INPUT_UINPUT is not set # # Hardware I/O ports # CONFIG_SERIO=y CONFIG_SERIO_I8042=y -# CONFIG_SERIO_SERPORT is not set +CONFIG_SERIO_SERPORT=y # CONFIG_SERIO_CT82C710 is not set # CONFIG_SERIO_PCIPS2 is not set CONFIG_SERIO_LIBPS2=y @@ -933,8 +1175,26 @@ CONFIG_SERIO_LIBPS2=y CONFIG_VT=y CONFIG_VT_CONSOLE=y CONFIG_HW_CONSOLE=y -# CONFIG_VT_HW_CONSOLE_BINDING is not set -# CONFIG_SERIAL_NONSTANDARD is not set +CONFIG_VT_HW_CONSOLE_BINDING=y +CONFIG_DEVKMEM=y +CONFIG_SERIAL_NONSTANDARD=y +# CONFIG_COMPUTONE is not set +# CONFIG_ROCKETPORT is not set +# CONFIG_CYCLADES is not set +# CONFIG_DIGIEPCA is not set +# CONFIG_MOXA_INTELLIO is not set +# CONFIG_MOXA_SMARTIO is not set +# CONFIG_ISI is not set +# CONFIG_SYNCLINK is not set +# CONFIG_SYNCLINKMP is not set +# CONFIG_SYNCLINK_GT is not set +# CONFIG_N_HDLC is not set +# CONFIG_RISCOM8 is not set +# CONFIG_SPECIALIX is not set +# CONFIG_SX is not set +# CONFIG_RIO is not set +# CONFIG_STALDRV is not set +# CONFIG_NOZOMI is not set # # Serial drivers @@ -944,9 +1204,14 @@ CONFIG_SERIAL_8250_CONSOLE=y CONFIG_FIX_EARLYCON_MEM=y CONFIG_SERIAL_8250_PCI=y CONFIG_SERIAL_8250_PNP=y -CONFIG_SERIAL_8250_NR_UARTS=4 +# CONFIG_SERIAL_8250_CS is not set +CONFIG_SERIAL_8250_NR_UARTS=32 CONFIG_SERIAL_8250_RUNTIME_UARTS=4 -# CONFIG_SERIAL_8250_EXTENDED is not set +CONFIG_SERIAL_8250_EXTENDED=y +CONFIG_SERIAL_8250_MANY_PORTS=y +CONFIG_SERIAL_8250_SHARE_IRQ=y +CONFIG_SERIAL_8250_DETECT_IRQ=y +CONFIG_SERIAL_8250_RSA=y # # Non-8250 serial port support @@ -955,89 +1220,275 @@ CONFIG_SERIAL_CORE=y CONFIG_SERIAL_CORE_CONSOLE=y # CONFIG_SERIAL_JSM is not set CONFIG_UNIX98_PTYS=y -CONFIG_LEGACY_PTYS=y -CONFIG_LEGACY_PTY_COUNT=256 +# CONFIG_LEGACY_PTYS is not set # CONFIG_IPMI_HANDLER is not set -# CONFIG_WATCHDOG is not set CONFIG_HW_RANDOM=y -CONFIG_HW_RANDOM_INTEL=y -CONFIG_HW_RANDOM_AMD=y +# CONFIG_HW_RANDOM_INTEL is not set +# CONFIG_HW_RANDOM_AMD is not set CONFIG_HW_RANDOM_GEODE=y CONFIG_HW_RANDOM_VIA=y -# CONFIG_NVRAM is not set -CONFIG_RTC=y +CONFIG_NVRAM=y # CONFIG_R3964 is not set # CONFIG_APPLICOM is not set # CONFIG_SONYPI is not set -CONFIG_AGP=y -# CONFIG_AGP_ALI is not set -# CONFIG_AGP_ATI is not set -# CONFIG_AGP_AMD is not set -CONFIG_AGP_AMD64=y -CONFIG_AGP_INTEL=y -# CONFIG_AGP_NVIDIA is not set -# CONFIG_AGP_SIS is not set -# CONFIG_AGP_SWORKS is not set -# CONFIG_AGP_VIA is not set -# CONFIG_AGP_EFFICEON is not set -# CONFIG_DRM is not set + +# +# PCMCIA character devices +# +# CONFIG_SYNCLINK_CS is not set +# CONFIG_CARDMAN_4000 is not set +# CONFIG_CARDMAN_4040 is not set +# CONFIG_IPWIRELESS is not set # CONFIG_MWAVE is not set # CONFIG_PC8736x_GPIO is not set # CONFIG_NSC_GPIO is not set # CONFIG_CS5535_GPIO is not set -CONFIG_RAW_DRIVER=y -CONFIG_MAX_RAW_DEVS=256 +# CONFIG_RAW_DRIVER is not set CONFIG_HPET=y # CONFIG_HPET_RTC_IRQ is not set -CONFIG_HPET_MMAP=y +# CONFIG_HPET_MMAP is not set # CONFIG_HANGCHECK_TIMER is not set # CONFIG_TCG_TPM is not set # CONFIG_TELCLOCK is not set CONFIG_DEVPORT=y -# CONFIG_I2C is not set - -# -# SPI support -# +CONFIG_I2C=y +CONFIG_I2C_BOARDINFO=y +# CONFIG_I2C_CHARDEV is not set + +# +# I2C Hardware Bus support +# +# CONFIG_I2C_ALI1535 is not set +# CONFIG_I2C_ALI1563 is not set +# CONFIG_I2C_ALI15X3 is not set +# CONFIG_I2C_AMD756 is not set +# CONFIG_I2C_AMD8111 is not set +CONFIG_I2C_I801=y +# CONFIG_I2C_I810 is not set +# CONFIG_I2C_PIIX4 is not set +# CONFIG_I2C_NFORCE2 is not set +# CONFIG_I2C_OCORES is not set +# CONFIG_I2C_PARPORT_LIGHT is not set +# CONFIG_I2C_PROSAVAGE is not set +# CONFIG_I2C_SAVAGE4 is not set +# CONFIG_I2C_SIMTEC is not set +# CONFIG_SCx200_ACB is not set +# CONFIG_I2C_SIS5595 is not set +# CONFIG_I2C_SIS630 is not set +# CONFIG_I2C_SIS96X is not set +# CONFIG_I2C_TAOS_EVM is not set +# CONFIG_I2C_STUB is not set +# CONFIG_I2C_TINY_USB is not set +# CONFIG_I2C_VIA is not set +# CONFIG_I2C_VIAPRO is not set +# CONFIG_I2C_VOODOO3 is not set +# CONFIG_I2C_PCA_PLATFORM is not set + +# +# Miscellaneous I2C Chip support +# +# CONFIG_DS1682 is not set +# CONFIG_SENSORS_EEPROM is not set +# CONFIG_SENSORS_PCF8574 is not set +# CONFIG_PCF8575 is not set +# CONFIG_SENSORS_PCF8591 is not set +# CONFIG_SENSORS_MAX6875 is not set +# CONFIG_SENSORS_TSL2550 is not set +# CONFIG_I2C_DEBUG_CORE is not set +# CONFIG_I2C_DEBUG_ALGO is not set +# CONFIG_I2C_DEBUG_BUS is not set +# CONFIG_I2C_DEBUG_CHIP is not set # CONFIG_SPI is not set -# CONFIG_SPI_MASTER is not set # CONFIG_W1 is not set -# CONFIG_POWER_SUPPLY is not set +CONFIG_POWER_SUPPLY=y +# CONFIG_POWER_SUPPLY_DEBUG is not set +# CONFIG_PDA_POWER is not set +# CONFIG_BATTERY_DS2760 is not set # CONFIG_HWMON is not set +CONFIG_THERMAL=y +CONFIG_WATCHDOG=y +# CONFIG_WATCHDOG_NOWAYOUT is not set + +# +# Watchdog Device Drivers +# +# CONFIG_SOFT_WATCHDOG is not set +# CONFIG_ACQUIRE_WDT is not set +# CONFIG_ADVANTECH_WDT is not set +# CONFIG_ALIM1535_WDT is not set +# CONFIG_ALIM7101_WDT is not set +# CONFIG_SC520_WDT is not set +# CONFIG_EUROTECH_WDT is not set +# CONFIG_IB700_WDT is not set +# CONFIG_IBMASR is not set +# CONFIG_WAFER_WDT is not set +# CONFIG_I6300ESB_WDT is not set +# CONFIG_ITCO_WDT is not set +# CONFIG_IT8712F_WDT is not set +# CONFIG_HP_WATCHDOG is not set +# CONFIG_SC1200_WDT is not set +# CONFIG_PC87413_WDT is not set +# CONFIG_60XX_WDT is not set +# CONFIG_SBC8360_WDT is not set +# CONFIG_SBC7240_WDT is not set +# CONFIG_CPU5_WDT is not set +# CONFIG_SMSC37B787_WDT is not set +# CONFIG_W83627HF_WDT is not set +# CONFIG_W83697HF_WDT is not set +# CONFIG_W83877F_WDT is not set +# CONFIG_W83977F_WDT is not set +# CONFIG_MACHZ_WDT is not set +# CONFIG_SBC_EPX_C3_WATCHDOG is not set + +# +# PCI-based Watchdog Cards +# +# CONFIG_PCIPCWATCHDOG is not set +# CONFIG_WDTPCI is not set + +# +# USB-based Watchdog Cards +# +# CONFIG_USBPCWATCHDOG is not set + +# +# Sonics Silicon Backplane +# +CONFIG_SSB_POSSIBLE=y +# CONFIG_SSB is not set # # Multifunction device drivers # # CONFIG_MFD_SM501 is not set +# CONFIG_HTC_PASIC3 is not set # # Multimedia devices # + +# +# Multimedia core support +# # CONFIG_VIDEO_DEV is not set # CONFIG_DVB_CORE is not set + +# +# Multimedia drivers +# CONFIG_DAB=y # CONFIG_USB_DABUSB is not set # # Graphics support # -# CONFIG_BACKLIGHT_LCD_SUPPORT is not set +CONFIG_AGP=y +# CONFIG_AGP_ALI is not set +# CONFIG_AGP_ATI is not set +# CONFIG_AGP_AMD is not set +CONFIG_AGP_AMD64=y +CONFIG_AGP_INTEL=y +# CONFIG_AGP_NVIDIA is not set +# CONFIG_AGP_SIS is not set +# CONFIG_AGP_SWORKS is not set +# CONFIG_AGP_VIA is not set +# CONFIG_AGP_EFFICEON is not set +CONFIG_DRM=y +# CONFIG_DRM_TDFX is not set +# CONFIG_DRM_R128 is not set +# CONFIG_DRM_RADEON is not set +# CONFIG_DRM_I810 is not set +# CONFIG_DRM_I830 is not set +CONFIG_DRM_I915=y +# CONFIG_DRM_MGA is not set +# CONFIG_DRM_SIS is not set +# CONFIG_DRM_VIA is not set +# CONFIG_DRM_SAVAGE is not set +# CONFIG_VGASTATE is not set +# CONFIG_VIDEO_OUTPUT_CONTROL is not set +CONFIG_FB=y +# CONFIG_FIRMWARE_EDID is not set +# CONFIG_FB_DDC is not set +CONFIG_FB_CFB_FILLRECT=y +CONFIG_FB_CFB_COPYAREA=y +CONFIG_FB_CFB_IMAGEBLIT=y +# CONFIG_FB_CFB_REV_PIXELS_IN_BYTE is not set +# CONFIG_FB_SYS_FILLRECT is not set +# CONFIG_FB_SYS_COPYAREA is not set +# CONFIG_FB_SYS_IMAGEBLIT is not set +# CONFIG_FB_FOREIGN_ENDIAN is not set +# CONFIG_FB_SYS_FOPS is not set +CONFIG_FB_DEFERRED_IO=y +# CONFIG_FB_SVGALIB is not set +# CONFIG_FB_MACMODES is not set +# CONFIG_FB_BACKLIGHT is not set +CONFIG_FB_MODE_HELPERS=y +CONFIG_FB_TILEBLITTING=y + +# +# Frame buffer hardware drivers +# +# CONFIG_FB_CIRRUS is not set +# CONFIG_FB_PM2 is not set +# CONFIG_FB_CYBER2000 is not set +# CONFIG_FB_ARC is not set +# CONFIG_FB_ASILIANT is not set +# CONFIG_FB_IMSTT is not set +# CONFIG_FB_VGA16 is not set +# CONFIG_FB_UVESA is not set +# CONFIG_FB_VESA is not set +CONFIG_FB_EFI=y +# CONFIG_FB_IMAC is not set +# CONFIG_FB_N411 is not set +# CONFIG_FB_HGA is not set +# CONFIG_FB_S1D13XXX is not set +# CONFIG_FB_NVIDIA is not set +# CONFIG_FB_RIVA is not set +# CONFIG_FB_I810 is not set +# CONFIG_FB_LE80578 is not set +# CONFIG_FB_INTEL is not set +# CONFIG_FB_MATROX is not set +# CONFIG_FB_RADEON is not set +# CONFIG_FB_ATY128 is not set +# CONFIG_FB_ATY is not set +# CONFIG_FB_S3 is not set +# CONFIG_FB_SAVAGE is not set +# CONFIG_FB_SIS is not set +# CONFIG_FB_NEOMAGIC is not set +# CONFIG_FB_KYRO is not set +# CONFIG_FB_3DFX is not set +# CONFIG_FB_VOODOO1 is not set +# CONFIG_FB_VT8623 is not set +# CONFIG_FB_CYBLA is not set +# CONFIG_FB_TRIDENT is not set +# CONFIG_FB_ARK is not set +# CONFIG_FB_PM3 is not set +# CONFIG_FB_GEODE is not set +# CONFIG_FB_VIRTUAL is not set +CONFIG_BACKLIGHT_LCD_SUPPORT=y +# CONFIG_LCD_CLASS_DEVICE is not set +CONFIG_BACKLIGHT_CLASS_DEVICE=y +# CONFIG_BACKLIGHT_CORGI is not set +# CONFIG_BACKLIGHT_PROGEAR is not set # # Display device support # # CONFIG_DISPLAY_SUPPORT is not set -# CONFIG_VGASTATE is not set -# CONFIG_FB is not set # # Console display driver support # CONFIG_VGA_CONSOLE=y CONFIG_VGACON_SOFT_SCROLLBACK=y -CONFIG_VGACON_SOFT_SCROLLBACK_SIZE=128 +CONFIG_VGACON_SOFT_SCROLLBACK_SIZE=64 CONFIG_VIDEO_SELECT=y CONFIG_DUMMY_CONSOLE=y +# CONFIG_FRAMEBUFFER_CONSOLE is not set +CONFIG_LOGO=y +# CONFIG_LOGO_LINUX_MONO is not set +# CONFIG_LOGO_LINUX_VGA16 is not set +CONFIG_LOGO_LINUX_CLUT224=y # # Sound @@ -1047,33 +1498,167 @@ CONFIG_SOUND=y # # Advanced Linux Sound Architecture # -# CONFIG_SND is not set +CONFIG_SND=y +CONFIG_SND_TIMER=y +CONFIG_SND_PCM=y +CONFIG_SND_HWDEP=y +CONFIG_SND_SEQUENCER=y +CONFIG_SND_SEQ_DUMMY=y +CONFIG_SND_OSSEMUL=y +CONFIG_SND_MIXER_OSS=y +CONFIG_SND_PCM_OSS=y +CONFIG_SND_PCM_OSS_PLUGINS=y +CONFIG_SND_SEQUENCER_OSS=y +CONFIG_SND_DYNAMIC_MINORS=y +CONFIG_SND_SUPPORT_OLD_API=y +CONFIG_SND_VERBOSE_PROCFS=y +# CONFIG_SND_VERBOSE_PRINTK is not set +# CONFIG_SND_DEBUG is not set +CONFIG_SND_VMASTER=y + +# +# Generic devices +# +# CONFIG_SND_PCSP is not set +# CONFIG_SND_DUMMY is not set +# CONFIG_SND_VIRMIDI is not set +# CONFIG_SND_MTPAV is not set +# CONFIG_SND_SERIAL_U16550 is not set +# CONFIG_SND_MPU401 is not set + +# +# PCI devices +# +# CONFIG_SND_AD1889 is not set +# CONFIG_SND_ALS300 is not set +# CONFIG_SND_ALS4000 is not set +# CONFIG_SND_ALI5451 is not set +# CONFIG_SND_ATIIXP is not set +# CONFIG_SND_ATIIXP_MODEM is not set +# CONFIG_SND_AU8810 is not set +# CONFIG_SND_AU8820 is not set +# CONFIG_SND_AU8830 is not set +# CONFIG_SND_AW2 is not set +# CONFIG_SND_AZT3328 is not set +# CONFIG_SND_BT87X is not set +# CONFIG_SND_CA0106 is not set +# CONFIG_SND_CMIPCI is not set +# CONFIG_SND_OXYGEN is not set +# CONFIG_SND_CS4281 is not set +# CONFIG_SND_CS46XX is not set +# CONFIG_SND_CS5530 is not set +# CONFIG_SND_CS5535AUDIO is not set +# CONFIG_SND_DARLA20 is not set +# CONFIG_SND_GINA20 is not set +# CONFIG_SND_LAYLA20 is not set +# CONFIG_SND_DARLA24 is not set +# CONFIG_SND_GINA24 is not set +# CONFIG_SND_LAYLA24 is not set +# CONFIG_SND_MONA is not set +# CONFIG_SND_MIA is not set +# CONFIG_SND_ECHO3G is not set +# CONFIG_SND_INDIGO is not set +# CONFIG_SND_INDIGOIO is not set +# CONFIG_SND_INDIGODJ is not set +# CONFIG_SND_EMU10K1 is not set +# CONFIG_SND_EMU10K1X is not set +# CONFIG_SND_ENS1370 is not set +# CONFIG_SND_ENS1371 is not set +# CONFIG_SND_ES1938 is not set +# CONFIG_SND_ES1968 is not set +# CONFIG_SND_FM801 is not set +CONFIG_SND_HDA_INTEL=y +CONFIG_SND_HDA_HWDEP=y +CONFIG_SND_HDA_CODEC_REALTEK=y +CONFIG_SND_HDA_CODEC_ANALOG=y +CONFIG_SND_HDA_CODEC_SIGMATEL=y +CONFIG_SND_HDA_CODEC_VIA=y +CONFIG_SND_HDA_CODEC_ATIHDMI=y +CONFIG_SND_HDA_CODEC_CONEXANT=y +CONFIG_SND_HDA_CODEC_CMEDIA=y +CONFIG_SND_HDA_CODEC_SI3054=y +CONFIG_SND_HDA_GENERIC=y +# CONFIG_SND_HDA_POWER_SAVE is not set +# CONFIG_SND_HDSP is not set +# CONFIG_SND_HDSPM is not set +# CONFIG_SND_HIFIER is not set +# CONFIG_SND_ICE1712 is not set +# CONFIG_SND_ICE1724 is not set +# CONFIG_SND_INTEL8X0 is not set +# CONFIG_SND_INTEL8X0M is not set +# CONFIG_SND_KORG1212 is not set +# CONFIG_SND_MAESTRO3 is not set +# CONFIG_SND_MIXART is not set +# CONFIG_SND_NM256 is not set +# CONFIG_SND_PCXHR is not set +# CONFIG_SND_RIPTIDE is not set +# CONFIG_SND_RME32 is not set +# CONFIG_SND_RME96 is not set +# CONFIG_SND_RME9652 is not set +# CONFIG_SND_SIS7019 is not set +# CONFIG_SND_SONICVIBES is not set +# CONFIG_SND_TRIDENT is not set +# CONFIG_SND_VIA82XX is not set +# CONFIG_SND_VIA82XX_MODEM is not set +# CONFIG_SND_VIRTUOSO is not set +# CONFIG_SND_VX222 is not set +# CONFIG_SND_YMFPCI is not set + +# +# USB devices +# +# CONFIG_SND_USB_AUDIO is not set +# CONFIG_SND_USB_USX2Y is not set +# CONFIG_SND_USB_CAIAQ is not set + +# +# PCMCIA devices +# +# CONFIG_SND_VXPOCKET is not set +# CONFIG_SND_PDAUDIOCF is not set + +# +# System on Chip audio support +# +# CONFIG_SND_SOC is not set + +# +# ALSA SoC audio for Freescale SOCs +# + +# +# SoC Audio for the Texas Instruments OMAP +# # # Open Sound System # -CONFIG_SOUND_PRIME=y -# CONFIG_SOUND_TRIDENT is not set -# CONFIG_SOUND_MSNDCLAS is not set -# CONFIG_SOUND_MSNDPIN is not set -# CONFIG_SOUND_OSS is not set +# CONFIG_SOUND_PRIME is not set CONFIG_HID_SUPPORT=y CONFIG_HID=y -# CONFIG_HID_DEBUG is not set +CONFIG_HID_DEBUG=y +CONFIG_HIDRAW=y # # USB Input Devices # CONFIG_USB_HID=y -# CONFIG_USB_HIDINPUT_POWERBOOK is not set -# CONFIG_HID_FF is not set -# CONFIG_USB_HIDDEV is not set +CONFIG_USB_HIDINPUT_POWERBOOK=y +CONFIG_HID_FF=y +CONFIG_HID_PID=y +CONFIG_LOGITECH_FF=y +# CONFIG_LOGIRUMBLEPAD2_FF is not set +CONFIG_PANTHERLORD_FF=y +CONFIG_THRUSTMASTER_FF=y +CONFIG_ZEROPLUS_FF=y +CONFIG_USB_HIDDEV=y CONFIG_USB_SUPPORT=y CONFIG_USB_ARCH_HAS_HCD=y CONFIG_USB_ARCH_HAS_OHCI=y CONFIG_USB_ARCH_HAS_EHCI=y CONFIG_USB=y -# CONFIG_USB_DEBUG is not set +CONFIG_USB_DEBUG=y +CONFIG_USB_ANNOUNCE_NEW_DEVICES=y # # Miscellaneous USB options @@ -1081,18 +1666,18 @@ CONFIG_USB=y CONFIG_USB_DEVICEFS=y # CONFIG_USB_DEVICE_CLASS is not set # CONFIG_USB_DYNAMIC_MINORS is not set -# CONFIG_USB_SUSPEND is not set -# CONFIG_USB_PERSIST is not set +CONFIG_USB_SUSPEND=y # CONFIG_USB_OTG is not set # # USB Host Controller Drivers # +# CONFIG_USB_C67X00_HCD is not set CONFIG_USB_EHCI_HCD=y -# CONFIG_USB_EHCI_SPLIT_ISO is not set # CONFIG_USB_EHCI_ROOT_HUB_TT is not set # CONFIG_USB_EHCI_TT_NEWSCHED is not set # CONFIG_USB_ISP116X_HCD is not set +# CONFIG_USB_ISP1760_HCD is not set CONFIG_USB_OHCI_HCD=y # CONFIG_USB_OHCI_BIG_ENDIAN_DESC is not set # CONFIG_USB_OHCI_BIG_ENDIAN_MMIO is not set @@ -1125,8 +1710,10 @@ CONFIG_USB_STORAGE=y # CONFIG_USB_STORAGE_SDDR55 is not set # CONFIG_USB_STORAGE_JUMPSHOT is not set # CONFIG_USB_STORAGE_ALAUDA is not set +# CONFIG_USB_STORAGE_ONETOUCH is not set # CONFIG_USB_STORAGE_KARMA is not set -# CONFIG_USB_LIBUSUAL is not set +# CONFIG_USB_STORAGE_CYPRESS_ATACB is not set +CONFIG_USB_LIBUSUAL=y # # USB Imaging devices @@ -1138,10 +1725,6 @@ CONFIG_USB_MON=y # # USB port drivers # - -# -# USB Serial Converter support -# # CONFIG_USB_SERIAL is not set # @@ -1167,90 +1750,125 @@ CONFIG_USB_MON=y # CONFIG_USB_TRANCEVIBRATOR is not set # CONFIG_USB_IOWARRIOR is not set # CONFIG_USB_TEST is not set +# CONFIG_USB_GADGET is not set +# CONFIG_MMC is not set +# CONFIG_MEMSTICK is not set +CONFIG_NEW_LEDS=y +CONFIG_LEDS_CLASS=y # -# USB DSL modem support +# LED drivers # +# CONFIG_LEDS_CLEVO_MAIL is not set # -# USB Gadget Support +# LED Triggers # -# CONFIG_USB_GADGET is not set -# CONFIG_MMC is not set +CONFIG_LEDS_TRIGGERS=y +# CONFIG_LEDS_TRIGGER_TIMER is not set +# CONFIG_LEDS_TRIGGER_HEARTBEAT is not set +# CONFIG_LEDS_TRIGGER_DEFAULT_ON is not set +# CONFIG_ACCESSIBILITY is not set +# CONFIG_INFINIBAND is not set +CONFIG_EDAC=y # -# LED devices +# Reporting subsystems # -# CONFIG_NEW_LEDS is not set +# CONFIG_EDAC_DEBUG is not set +# CONFIG_EDAC_MM_EDAC is not set +CONFIG_RTC_LIB=y +CONFIG_RTC_CLASS=y +# CONFIG_RTC_HCTOSYS is not set +# CONFIG_RTC_DEBUG is not set # -# LED drivers +# RTC interfaces # +CONFIG_RTC_INTF_SYSFS=y +CONFIG_RTC_INTF_PROC=y +CONFIG_RTC_INTF_DEV=y +# CONFIG_RTC_INTF_DEV_UIE_EMUL is not set +# CONFIG_RTC_DRV_TEST is not set # -# LED Triggers +# I2C RTC drivers # -# CONFIG_INFINIBAND is not set -# CONFIG_EDAC is not set +# CONFIG_RTC_DRV_DS1307 is not set +# CONFIG_RTC_DRV_DS1374 is not set +# CONFIG_RTC_DRV_DS1672 is not set +# CONFIG_RTC_DRV_MAX6900 is not set +# CONFIG_RTC_DRV_RS5C372 is not set +# CONFIG_RTC_DRV_ISL1208 is not set +# CONFIG_RTC_DRV_X1205 is not set +# CONFIG_RTC_DRV_PCF8563 is not set +# CONFIG_RTC_DRV_PCF8583 is not set +# CONFIG_RTC_DRV_M41T80 is not set +# CONFIG_RTC_DRV_S35390A is not set # -# Real Time Clock +# SPI RTC drivers # -# CONFIG_RTC_CLASS is not set # -# DMA Engine support +# Platform RTC drivers # -# CONFIG_DMA_ENGINE is not set +CONFIG_RTC_DRV_CMOS=y +# CONFIG_RTC_DRV_DS1511 is not set +# CONFIG_RTC_DRV_DS1553 is not set +# CONFIG_RTC_DRV_DS1742 is not set +# CONFIG_RTC_DRV_STK17TA8 is not set +# CONFIG_RTC_DRV_M48T86 is not set +# CONFIG_RTC_DRV_M48T59 is not set +# CONFIG_RTC_DRV_V3020 is not set # -# DMA Clients +# on-CPU RTC drivers # +CONFIG_DMADEVICES=y # # DMA Devices # -CONFIG_VIRTUALIZATION=y -# CONFIG_KVM is not set +# CONFIG_INTEL_IOATDMA is not set +# CONFIG_UIO is not set # -# Userspace I/O +# Firmware Drivers # -# CONFIG_UIO is not set +# CONFIG_EDD is not set +CONFIG_EFI_VARS=y +# CONFIG_DELL_RBU is not set +# CONFIG_DCDBAS is not set +CONFIG_DMIID=y +# CONFIG_ISCSI_IBFT_FIND is not set # # File systems # -CONFIG_EXT2_FS=y -CONFIG_EXT2_FS_XATTR=y -CONFIG_EXT2_FS_POSIX_ACL=y -# CONFIG_EXT2_FS_SECURITY is not set -# CONFIG_EXT2_FS_XIP is not set +# CONFIG_EXT2_FS is not set CONFIG_EXT3_FS=y CONFIG_EXT3_FS_XATTR=y CONFIG_EXT3_FS_POSIX_ACL=y -# CONFIG_EXT3_FS_SECURITY is not set +CONFIG_EXT3_FS_SECURITY=y # CONFIG_EXT4DEV_FS is not set CONFIG_JBD=y # CONFIG_JBD_DEBUG is not set CONFIG_FS_MBCACHE=y -CONFIG_REISERFS_FS=y -# CONFIG_REISERFS_CHECK is not set -# CONFIG_REISERFS_PROC_INFO is not set -CONFIG_REISERFS_FS_XATTR=y -CONFIG_REISERFS_FS_POSIX_ACL=y -# CONFIG_REISERFS_FS_SECURITY is not set +# CONFIG_REISERFS_FS is not set # CONFIG_JFS_FS is not set CONFIG_FS_POSIX_ACL=y # CONFIG_XFS_FS is not set -# CONFIG_GFS2_FS is not set # CONFIG_OCFS2_FS is not set -# CONFIG_MINIX_FS is not set -# CONFIG_ROMFS_FS is not set +CONFIG_DNOTIFY=y CONFIG_INOTIFY=y CONFIG_INOTIFY_USER=y -# CONFIG_QUOTA is not set -CONFIG_DNOTIFY=y +CONFIG_QUOTA=y +CONFIG_QUOTA_NETLINK_INTERFACE=y +# CONFIG_PRINT_QUOTA_WARNING is not set +# CONFIG_QFMT_V1 is not set +CONFIG_QFMT_V2=y +CONFIG_QUOTACTL=y # CONFIG_AUTOFS_FS is not set CONFIG_AUTOFS4_FS=y # CONFIG_FUSE_FS is not set @@ -1260,8 +1878,8 @@ CONFIG_GENERIC_ACL=y # CD-ROM/DVD Filesystems # CONFIG_ISO9660_FS=y -# CONFIG_JOLIET is not set -# CONFIG_ZISOFS is not set +CONFIG_JOLIET=y +CONFIG_ZISOFS=y # CONFIG_UDF_FS is not set # @@ -1279,13 +1897,13 @@ CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1" # CONFIG_PROC_FS=y CONFIG_PROC_KCORE=y +CONFIG_PROC_VMCORE=y CONFIG_PROC_SYSCTL=y CONFIG_SYSFS=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y CONFIG_HUGETLBFS=y CONFIG_HUGETLB_PAGE=y -CONFIG_RAMFS=y # CONFIG_CONFIGFS_FS is not set # @@ -1293,6 +1911,7 @@ CONFIG_RAMFS=y # # CONFIG_ADFS_FS is not set # CONFIG_AFFS_FS is not set +# CONFIG_ECRYPT_FS is not set # CONFIG_HFS_FS is not set # CONFIG_HFSPLUS_FS is not set # CONFIG_BEFS_FS is not set @@ -1300,33 +1919,15 @@ CONFIG_RAMFS=y # CONFIG_EFS_FS is not set # CONFIG_CRAMFS is not set # CONFIG_VXFS_FS is not set +# CONFIG_MINIX_FS is not set # CONFIG_HPFS_FS is not set # CONFIG_QNX4FS_FS is not set +# CONFIG_ROMFS_FS is not set # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set - -# -# Network File Systems -# -CONFIG_NFS_FS=y -CONFIG_NFS_V3=y -# CONFIG_NFS_V3_ACL is not set -# CONFIG_NFS_V4 is not set -# CONFIG_NFS_DIRECTIO is not set -CONFIG_NFSD=y -CONFIG_NFSD_V3=y -# CONFIG_NFSD_V3_ACL is not set -# CONFIG_NFSD_V4 is not set -CONFIG_NFSD_TCP=y -CONFIG_ROOT_NFS=y -CONFIG_LOCKD=y -CONFIG_LOCKD_V4=y -CONFIG_EXPORTFS=y -CONFIG_NFS_COMMON=y -CONFIG_SUNRPC=y -# CONFIG_SUNRPC_BIND34 is not set -# CONFIG_RPCSEC_GSS_KRB5 is not set -# CONFIG_RPCSEC_GSS_SPKM3 is not set +CONFIG_NETWORK_FILESYSTEMS=y +# CONFIG_NFS_FS is not set +# CONFIG_NFSD is not set # CONFIG_SMB_FS is not set # CONFIG_CIFS is not set # CONFIG_NCP_FS is not set @@ -1336,14 +1937,26 @@ CONFIG_SUNRPC=y # # Partition Types # -# CONFIG_PARTITION_ADVANCED is not set +CONFIG_PARTITION_ADVANCED=y +# CONFIG_ACORN_PARTITION is not set +CONFIG_OSF_PARTITION=y +CONFIG_AMIGA_PARTITION=y +# CONFIG_ATARI_PARTITION is not set +CONFIG_MAC_PARTITION=y CONFIG_MSDOS_PARTITION=y - -# -# Native Language Support -# +CONFIG_BSD_DISKLABEL=y +CONFIG_MINIX_SUBPARTITION=y +CONFIG_SOLARIS_X86_PARTITION=y +CONFIG_UNIXWARE_DISKLABEL=y +# CONFIG_LDM_PARTITION is not set +CONFIG_SGI_PARTITION=y +# CONFIG_ULTRIX_PARTITION is not set +CONFIG_SUN_PARTITION=y +CONFIG_KARMA_PARTITION=y +CONFIG_EFI_PARTITION=y +# CONFIG_SYSV68_PARTITION is not set CONFIG_NLS=y -CONFIG_NLS_DEFAULT="iso8859-1" +CONFIG_NLS_DEFAULT="utf8" CONFIG_NLS_CODEPAGE_437=y # CONFIG_NLS_CODEPAGE_737 is not set # CONFIG_NLS_CODEPAGE_775 is not set @@ -1378,37 +1991,33 @@ CONFIG_NLS_ISO8859_1=y # CONFIG_NLS_ISO8859_9 is not set # CONFIG_NLS_ISO8859_13 is not set # CONFIG_NLS_ISO8859_14 is not set -CONFIG_NLS_ISO8859_15=y +# CONFIG_NLS_ISO8859_15 is not set # CONFIG_NLS_KOI8_R is not set # CONFIG_NLS_KOI8_U is not set CONFIG_NLS_UTF8=y - -# -# Distributed Lock Manager -# # CONFIG_DLM is not set -CONFIG_INSTRUMENTATION=y -CONFIG_PROFILING=y -CONFIG_OPROFILE=y -CONFIG_KPROBES=y # # Kernel hacking # CONFIG_TRACE_IRQFLAGS_SUPPORT=y # CONFIG_PRINTK_TIME is not set +# CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set +CONFIG_FRAME_WARN=2048 CONFIG_MAGIC_SYSRQ=y -CONFIG_UNUSED_SYMBOLS=y -# CONFIG_DEBUG_FS is not set +# CONFIG_UNUSED_SYMBOLS is not set +CONFIG_DEBUG_FS=y # CONFIG_HEADERS_CHECK is not set CONFIG_DEBUG_KERNEL=y # CONFIG_DEBUG_SHIRQ is not set -CONFIG_DETECT_SOFTLOCKUP=y +# CONFIG_DETECT_SOFTLOCKUP is not set # CONFIG_SCHED_DEBUG is not set -# CONFIG_SCHEDSTATS is not set +CONFIG_SCHEDSTATS=y CONFIG_TIMER_STATS=y +# CONFIG_DEBUG_OBJECTS is not set # CONFIG_SLUB_DEBUG_ON is not set +# CONFIG_SLUB_STATS is not set # CONFIG_DEBUG_RT_MUTEXES is not set # CONFIG_RT_MUTEX_TESTER is not set # CONFIG_DEBUG_SPINLOCK is not set @@ -1423,48 +2032,174 @@ CONFIG_TIMER_STATS=y CONFIG_DEBUG_BUGVERBOSE=y # CONFIG_DEBUG_INFO is not set # CONFIG_DEBUG_VM is not set +# CONFIG_DEBUG_WRITECOUNT is not set # CONFIG_DEBUG_LIST is not set -# CONFIG_FRAME_POINTER is not set -CONFIG_OPTIMIZE_INLINING=y +# CONFIG_DEBUG_SG is not set +CONFIG_FRAME_POINTER=y +# CONFIG_BOOT_PRINTK_DELAY is not set # CONFIG_RCU_TORTURE_TEST is not set +# CONFIG_KPROBES_SANITY_TEST is not set +# CONFIG_BACKTRACE_SELF_TEST is not set # CONFIG_LKDTM is not set # CONFIG_FAULT_INJECTION is not set +# CONFIG_LATENCYTOP is not set +CONFIG_PROVIDE_OHCI1394_DMA_INIT=y +# CONFIG_SAMPLES is not set +# CONFIG_KGDB is not set +CONFIG_HAVE_ARCH_KGDB=y +# CONFIG_NONPROMISC_DEVMEM is not set CONFIG_EARLY_PRINTK=y CONFIG_DEBUG_STACKOVERFLOW=y -# CONFIG_DEBUG_STACK_USAGE is not set -# CONFIG_DEBUG_RODATA is not set +CONFIG_DEBUG_STACK_USAGE=y +# CONFIG_DEBUG_PAGEALLOC is not set +# CONFIG_X86_PTDUMP is not set +CONFIG_DEBUG_RODATA=y +# CONFIG_DEBUG_RODATA_TEST is not set +CONFIG_DEBUG_NX_TEST=m # CONFIG_4KSTACKS is not set CONFIG_X86_FIND_SMP_CONFIG=y CONFIG_X86_MPPARSE=y CONFIG_DOUBLEFAULT=y +CONFIG_IO_DELAY_TYPE_0X80=0 +CONFIG_IO_DELAY_TYPE_0XED=1 +CONFIG_IO_DELAY_TYPE_UDELAY=2 +CONFIG_IO_DELAY_TYPE_NONE=3 +CONFIG_IO_DELAY_0X80=y +# CONFIG_IO_DELAY_0XED is not set +# CONFIG_IO_DELAY_UDELAY is not set +# CONFIG_IO_DELAY_NONE is not set +CONFIG_DEFAULT_IO_DELAY_TYPE=0 +CONFIG_DEBUG_BOOT_PARAMS=y +# CONFIG_CPA_DEBUG is not set # # Security options # -# CONFIG_KEYS is not set -# CONFIG_SECURITY is not set -# CONFIG_CRYPTO is not set +CONFIG_KEYS=y +CONFIG_KEYS_DEBUG_PROC_KEYS=y +CONFIG_SECURITY=y +CONFIG_SECURITY_NETWORK=y +# CONFIG_SECURITY_NETWORK_XFRM is not set +CONFIG_SECURITY_CAPABILITIES=y +CONFIG_SECURITY_FILE_CAPABILITIES=y +# CONFIG_SECURITY_ROOTPLUG is not set +CONFIG_SECURITY_DEFAULT_MMAP_MIN_ADDR=65536 +CONFIG_SECURITY_SELINUX=y +CONFIG_SECURITY_SELINUX_BOOTPARAM=y +CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=1 +CONFIG_SECURITY_SELINUX_DISABLE=y +CONFIG_SECURITY_SELINUX_DEVELOP=y +CONFIG_SECURITY_SELINUX_AVC_STATS=y +CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1 +# CONFIG_SECURITY_SELINUX_ENABLE_SECMARK_DEFAULT is not set +# CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX is not set +# CONFIG_SECURITY_SMACK is not set +CONFIG_CRYPTO=y + +# +# Crypto core or helper +# +CONFIG_CRYPTO_ALGAPI=y +CONFIG_CRYPTO_AEAD=y +CONFIG_CRYPTO_BLKCIPHER=y +CONFIG_CRYPTO_HASH=y +CONFIG_CRYPTO_MANAGER=y +# CONFIG_CRYPTO_GF128MUL is not set +# CONFIG_CRYPTO_NULL is not set +# CONFIG_CRYPTO_CRYPTD is not set +CONFIG_CRYPTO_AUTHENC=y +# CONFIG_CRYPTO_TEST is not set + +# +# Authenticated Encryption with Associated Data +# +# CONFIG_CRYPTO_CCM is not set +# CONFIG_CRYPTO_GCM is not set +# CONFIG_CRYPTO_SEQIV is not set + +# +# Block modes +# +CONFIG_CRYPTO_CBC=y +# CONFIG_CRYPTO_CTR is not set +# CONFIG_CRYPTO_CTS is not set +CONFIG_CRYPTO_ECB=y +# CONFIG_CRYPTO_LRW is not set +# CONFIG_CRYPTO_PCBC is not set +# CONFIG_CRYPTO_XTS is not set + +# +# Hash modes +# +CONFIG_CRYPTO_HMAC=y +# CONFIG_CRYPTO_XCBC is not set + +# +# Digest +# +# CONFIG_CRYPTO_CRC32C is not set +# CONFIG_CRYPTO_MD4 is not set +CONFIG_CRYPTO_MD5=y +# CONFIG_CRYPTO_MICHAEL_MIC is not set +CONFIG_CRYPTO_SHA1=y +# CONFIG_CRYPTO_SHA256 is not set +# CONFIG_CRYPTO_SHA512 is not set +# CONFIG_CRYPTO_TGR192 is not set +# CONFIG_CRYPTO_WP512 is not set + +# +# Ciphers +# +CONFIG_CRYPTO_AES=y +# CONFIG_CRYPTO_AES_586 is not set +# CONFIG_CRYPTO_ANUBIS is not set +CONFIG_CRYPTO_ARC4=y +# CONFIG_CRYPTO_BLOWFISH is not set +# CONFIG_CRYPTO_CAMELLIA is not set +# CONFIG_CRYPTO_CAST5 is not set +# CONFIG_CRYPTO_CAST6 is not set +CONFIG_CRYPTO_DES=y +# CONFIG_CRYPTO_FCRYPT is not set +# CONFIG_CRYPTO_KHAZAD is not set +# CONFIG_CRYPTO_SALSA20 is not set +# CONFIG_CRYPTO_SALSA20_586 is not set +# CONFIG_CRYPTO_SEED is not set +# CONFIG_CRYPTO_SERPENT is not set +# CONFIG_CRYPTO_TEA is not set +# CONFIG_CRYPTO_TWOFISH is not set +# CONFIG_CRYPTO_TWOFISH_586 is not set + +# +# Compression +# +# CONFIG_CRYPTO_DEFLATE is not set +# CONFIG_CRYPTO_LZO is not set +CONFIG_CRYPTO_HW=y +# CONFIG_CRYPTO_DEV_PADLOCK is not set +# CONFIG_CRYPTO_DEV_GEODE is not set +# CONFIG_CRYPTO_DEV_HIFN_795X is not set +CONFIG_HAVE_KVM=y +CONFIG_VIRTUALIZATION=y +# CONFIG_KVM is not set +# CONFIG_LGUEST is not set +# CONFIG_VIRTIO_PCI is not set +# CONFIG_VIRTIO_BALLOON is not set # # Library routines # CONFIG_BITREVERSE=y +CONFIG_GENERIC_FIND_FIRST_BIT=y +CONFIG_GENERIC_FIND_NEXT_BIT=y # CONFIG_CRC_CCITT is not set # CONFIG_CRC16 is not set # CONFIG_CRC_ITU_T is not set CONFIG_CRC32=y # CONFIG_CRC7 is not set # CONFIG_LIBCRC32C is not set +CONFIG_AUDIT_GENERIC=y CONFIG_ZLIB_INFLATE=y CONFIG_PLIST=y CONFIG_HAS_IOMEM=y CONFIG_HAS_IOPORT=y CONFIG_HAS_DMA=y -CONFIG_GENERIC_HARDIRQS=y -CONFIG_GENERIC_IRQ_PROBE=y -CONFIG_GENERIC_PENDING_IRQ=y -CONFIG_X86_SMP=y -CONFIG_X86_HT=y -CONFIG_X86_BIOS_REBOOT=y -CONFIG_X86_TRAMPOLINE=y -CONFIG_KTIME_SCALAR=y diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index 2f1b356..ae5124e 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit # Linux kernel version: 2.6.26-rc1 -# Sun May 4 19:27:29 2008 +# Sun May 4 19:59:57 2008 # CONFIG_64BIT=y # CONFIG_X86_32 is not set @@ -61,27 +61,42 @@ CONFIG_EXPERIMENTAL=y CONFIG_LOCK_KERNEL=y CONFIG_INIT_ENV_ARG_LIMIT=32 CONFIG_LOCALVERSION="" -CONFIG_LOCALVERSION_AUTO=y +# CONFIG_LOCALVERSION_AUTO is not set CONFIG_SWAP=y CONFIG_SYSVIPC=y CONFIG_SYSVIPC_SYSCTL=y CONFIG_POSIX_MQUEUE=y -# CONFIG_BSD_PROCESS_ACCT is not set -# CONFIG_TASKSTATS is not set -# CONFIG_AUDIT is not set -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_LOG_BUF_SHIFT=18 -# CONFIG_CGROUPS is not set -# CONFIG_GROUP_SCHED is not set +CONFIG_BSD_PROCESS_ACCT=y +# CONFIG_BSD_PROCESS_ACCT_V3 is not set +CONFIG_TASKSTATS=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_XACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_AUDIT=y +CONFIG_AUDITSYSCALL=y +CONFIG_AUDIT_TREE=y +# CONFIG_IKCONFIG is not set +CONFIG_LOG_BUF_SHIFT=17 +CONFIG_CGROUPS=y +# CONFIG_CGROUP_DEBUG is not set +CONFIG_CGROUP_NS=y +# CONFIG_CGROUP_DEVICE is not set +CONFIG_CPUSETS=y +CONFIG_GROUP_SCHED=y +CONFIG_FAIR_GROUP_SCHED=y +# CONFIG_RT_GROUP_SCHED is not set # CONFIG_USER_SCHED is not set -# CONFIG_CGROUP_SCHED is not set +CONFIG_CGROUP_SCHED=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_RESOURCE_COUNTERS=y +# CONFIG_CGROUP_MEM_RES_CTLR is not set # CONFIG_SYSFS_DEPRECATED_V2 is not set +CONFIG_PROC_PID_CPUSET=y CONFIG_RELAY=y CONFIG_NAMESPACES=y CONFIG_UTS_NS=y CONFIG_IPC_NS=y -# CONFIG_USER_NS is not set +CONFIG_USER_NS=y CONFIG_PID_NS=y CONFIG_BLK_DEV_INITRD=y CONFIG_INITRAMFS_SOURCE="" @@ -93,7 +108,7 @@ CONFIG_SYSCTL_SYSCALL=y CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_KALLSYMS=y CONFIG_KALLSYMS_ALL=y -# CONFIG_KALLSYMS_EXTRA_PASS is not set +CONFIG_KALLSYMS_EXTRA_PASS=y CONFIG_HOTPLUG=y CONFIG_PRINTK=y CONFIG_BUG=y @@ -108,12 +123,13 @@ CONFIG_TIMERFD=y CONFIG_EVENTFD=y CONFIG_SHMEM=y CONFIG_VM_EVENT_COUNTERS=y -CONFIG_SLAB=y -# CONFIG_SLUB is not set +CONFIG_SLUB_DEBUG=y +# CONFIG_SLAB is not set +CONFIG_SLUB=y # CONFIG_SLOB is not set CONFIG_PROFILING=y -# CONFIG_MARKERS is not set -CONFIG_OPROFILE=y +CONFIG_MARKERS=y +# CONFIG_OPROFILE is not set CONFIG_HAVE_OPROFILE=y CONFIG_KPROBES=y CONFIG_KRETPROBES=y @@ -133,15 +149,15 @@ CONFIG_MODULE_FORCE_UNLOAD=y # CONFIG_KMOD is not set CONFIG_STOP_MACHINE=y CONFIG_BLOCK=y -# CONFIG_BLK_DEV_IO_TRACE is not set -# CONFIG_BLK_DEV_BSG is not set +CONFIG_BLK_DEV_IO_TRACE=y +CONFIG_BLK_DEV_BSG=y CONFIG_BLOCK_COMPAT=y # # IO Schedulers # CONFIG_IOSCHED_NOOP=y -# CONFIG_IOSCHED_AS is not set +CONFIG_IOSCHED_AS=y CONFIG_IOSCHED_DEADLINE=y CONFIG_IOSCHED_CFQ=y # CONFIG_DEFAULT_AS is not set @@ -155,7 +171,7 @@ CONFIG_CLASSIC_RCU=y # Processor type and features # CONFIG_TICK_ONESHOT=y -# CONFIG_NO_HZ is not set +CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y CONFIG_GENERIC_CLOCKEVENTS_BUILD=y CONFIG_SMP=y @@ -171,7 +187,8 @@ CONFIG_X86_PC=y # CONFIG_X86_RDC321X is not set # CONFIG_X86_VSMP is not set # CONFIG_PARAVIRT_GUEST is not set -# CONFIG_MEMTEST_BOOTPARAM is not set +CONFIG_MEMTEST_BOOTPARAM=y +CONFIG_MEMTEST_BOOTPARAM_VALUE=0 # CONFIG_M386 is not set # CONFIG_M486 is not set # CONFIG_M586 is not set @@ -196,14 +213,17 @@ CONFIG_X86_PC=y # CONFIG_MVIAC3_2 is not set # CONFIG_MVIAC7 is not set # CONFIG_MPSC is not set -# CONFIG_MCORE2 is not set -CONFIG_GENERIC_CPU=y +CONFIG_MCORE2=y +# CONFIG_GENERIC_CPU is not set CONFIG_X86_CPU=y -CONFIG_X86_L1_CACHE_BYTES=128 -CONFIG_X86_INTERNODE_CACHE_BYTES=128 +CONFIG_X86_L1_CACHE_BYTES=64 +CONFIG_X86_INTERNODE_CACHE_BYTES=64 CONFIG_X86_CMPXCHG=y -CONFIG_X86_L1_CACHE_SHIFT=7 +CONFIG_X86_L1_CACHE_SHIFT=6 CONFIG_X86_GOOD_APIC=y +CONFIG_X86_INTEL_USERCOPY=y +CONFIG_X86_USE_PPRO_CHECKSUM=y +CONFIG_X86_P6_NOP=y CONFIG_X86_TSC=y CONFIG_X86_CMOV=y CONFIG_X86_MINIMUM_CPU_FAMILY=64 @@ -212,20 +232,19 @@ CONFIG_HPET_TIMER=y CONFIG_HPET_EMULATE_RTC=y CONFIG_DMI=y CONFIG_GART_IOMMU=y -# CONFIG_CALGARY_IOMMU is not set +CONFIG_CALGARY_IOMMU=y +CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT=y CONFIG_SWIOTLB=y CONFIG_IOMMU_HELPER=y -CONFIG_NR_CPUS=32 -CONFIG_SCHED_SMT=y +CONFIG_NR_CPUS=4 +# CONFIG_SCHED_SMT is not set CONFIG_SCHED_MC=y # CONFIG_PREEMPT_NONE is not set CONFIG_PREEMPT_VOLUNTARY=y # CONFIG_PREEMPT is not set CONFIG_X86_LOCAL_APIC=y CONFIG_X86_IO_APIC=y -CONFIG_X86_MCE=y -CONFIG_X86_MCE_INTEL=y -CONFIG_X86_MCE_AMD=y +# CONFIG_X86_MCE is not set # CONFIG_I8K is not set # CONFIG_MICROCODE is not set CONFIG_X86_MSR=y @@ -234,7 +253,7 @@ CONFIG_NUMA=y CONFIG_K8_NUMA=y CONFIG_X86_64_ACPI_NUMA=y CONFIG_NODES_SPAN_OTHER_NODES=y -CONFIG_NUMA_EMU=y +# CONFIG_NUMA_EMU is not set CONFIG_NODES_SHIFT=6 CONFIG_ARCH_SPARSEMEM_DEFAULT=y CONFIG_ARCH_SPARSEMEM_ENABLE=y @@ -249,7 +268,7 @@ CONFIG_HAVE_MEMORY_PRESENT=y # CONFIG_SPARSEMEM_STATIC is not set CONFIG_SPARSEMEM_EXTREME=y CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y -# CONFIG_SPARSEMEM_VMEMMAP is not set +CONFIG_SPARSEMEM_VMEMMAP=y # # Memory hotplug is currently incompatible with Software Suspend @@ -263,18 +282,18 @@ CONFIG_BOUNCE=y CONFIG_VIRT_TO_BUS=y CONFIG_MTRR=y # CONFIG_X86_PAT is not set -# CONFIG_EFI is not set +CONFIG_EFI=y CONFIG_SECCOMP=y # CONFIG_HZ_100 is not set -CONFIG_HZ_250=y +# CONFIG_HZ_250 is not set # CONFIG_HZ_300 is not set -# CONFIG_HZ_1000 is not set -CONFIG_HZ=250 +CONFIG_HZ_1000=y +CONFIG_HZ=1000 CONFIG_SCHED_HRTICK=y -# CONFIG_KEXEC is not set -# CONFIG_CRASH_DUMP is not set -CONFIG_PHYSICAL_START=0x200000 -# CONFIG_RELOCATABLE is not set +CONFIG_KEXEC=y +CONFIG_CRASH_DUMP=y +CONFIG_PHYSICAL_START=0x1000000 +CONFIG_RELOCATABLE=y CONFIG_PHYSICAL_ALIGN=0x200000 CONFIG_HOTPLUG_CPU=y # CONFIG_COMPAT_VDSO is not set @@ -286,10 +305,15 @@ CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID=y # CONFIG_ARCH_HIBERNATION_HEADER=y CONFIG_PM=y -# CONFIG_PM_DEBUG is not set +CONFIG_PM_DEBUG=y +# CONFIG_PM_VERBOSE is not set +CONFIG_CAN_PM_TRACE=y +CONFIG_PM_TRACE=y +CONFIG_PM_TRACE_RTC=y CONFIG_PM_SLEEP_SMP=y CONFIG_PM_SLEEP=y -# CONFIG_SUSPEND is not set +CONFIG_SUSPEND=y +CONFIG_SUSPEND_FREEZER=y CONFIG_HIBERNATION=y CONFIG_PM_STD_PARTITION="" CONFIG_ACPI=y @@ -327,35 +351,34 @@ CONFIG_ACPI_CONTAINER=y CONFIG_CPU_FREQ=y CONFIG_CPU_FREQ_TABLE=y CONFIG_CPU_FREQ_DEBUG=y -CONFIG_CPU_FREQ_STAT=y -# CONFIG_CPU_FREQ_STAT_DETAILS is not set -CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y +# CONFIG_CPU_FREQ_STAT is not set +# CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE is not set # CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE is not set -# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set +CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE=y # CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND is not set # CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE is not set CONFIG_CPU_FREQ_GOV_PERFORMANCE=y # CONFIG_CPU_FREQ_GOV_POWERSAVE is not set CONFIG_CPU_FREQ_GOV_USERSPACE=y CONFIG_CPU_FREQ_GOV_ONDEMAND=y -CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y +# CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set # # CPUFreq processor drivers # CONFIG_X86_ACPI_CPUFREQ=y -CONFIG_X86_POWERNOW_K8=y -CONFIG_X86_POWERNOW_K8_ACPI=y +# CONFIG_X86_POWERNOW_K8 is not set # CONFIG_X86_SPEEDSTEP_CENTRINO is not set -CONFIG_X86_P4_CLOCKMOD=y +# CONFIG_X86_P4_CLOCKMOD is not set # # shared options # -CONFIG_X86_ACPI_CPUFREQ_PROC_INTF=y -CONFIG_X86_SPEEDSTEP_LIB=y +# CONFIG_X86_ACPI_CPUFREQ_PROC_INTF is not set +# CONFIG_X86_SPEEDSTEP_LIB is not set CONFIG_CPU_IDLE=y CONFIG_CPU_IDLE_GOV_LADDER=y +CONFIG_CPU_IDLE_GOV_MENU=y # # Bus options (PCI etc.) @@ -364,28 +387,53 @@ CONFIG_PCI=y CONFIG_PCI_DIRECT=y CONFIG_PCI_MMCONFIG=y CONFIG_PCI_DOMAINS=y -# CONFIG_DMAR is not set +CONFIG_DMAR=y +CONFIG_DMAR_GFX_WA=y +CONFIG_DMAR_FLOPPY_WA=y CONFIG_PCIEPORTBUS=y +# CONFIG_HOTPLUG_PCI_PCIE is not set CONFIG_PCIEAER=y # CONFIG_PCIEASPM is not set CONFIG_ARCH_SUPPORTS_MSI=y CONFIG_PCI_MSI=y -CONFIG_PCI_LEGACY=y +# CONFIG_PCI_LEGACY is not set # CONFIG_PCI_DEBUG is not set -# CONFIG_HT_IRQ is not set +CONFIG_HT_IRQ=y CONFIG_ISA_DMA_API=y CONFIG_K8_NB=y -# CONFIG_PCCARD is not set -# CONFIG_HOTPLUG_PCI is not set +CONFIG_PCCARD=y +# CONFIG_PCMCIA_DEBUG is not set +CONFIG_PCMCIA=y +CONFIG_PCMCIA_LOAD_CIS=y +CONFIG_PCMCIA_IOCTL=y +CONFIG_CARDBUS=y + +# +# PC-card bridges +# +CONFIG_YENTA=y +CONFIG_YENTA_O2=y +CONFIG_YENTA_RICOH=y +CONFIG_YENTA_TI=y +CONFIG_YENTA_ENE_TUNE=y +CONFIG_YENTA_TOSHIBA=y +# CONFIG_PD6729 is not set +# CONFIG_I82092 is not set +CONFIG_PCCARD_NONSTATIC=y +CONFIG_HOTPLUG_PCI=y +# CONFIG_HOTPLUG_PCI_FAKE is not set +# CONFIG_HOTPLUG_PCI_ACPI is not set +# CONFIG_HOTPLUG_PCI_CPCI is not set +# CONFIG_HOTPLUG_PCI_SHPC is not set # # Executable file formats / Emulations # CONFIG_BINFMT_ELF=y CONFIG_COMPAT_BINFMT_ELF=y -# CONFIG_BINFMT_MISC is not set +CONFIG_BINFMT_MISC=y CONFIG_IA32_EMULATION=y -CONFIG_IA32_AOUT=y +# CONFIG_IA32_AOUT is not set CONFIG_COMPAT=y CONFIG_COMPAT_FOR_U64_ALIGNMENT=y CONFIG_SYSVIPC_COMPAT=y @@ -399,22 +447,31 @@ CONFIG_NET=y # Networking options # CONFIG_PACKET=y -# CONFIG_PACKET_MMAP is not set +CONFIG_PACKET_MMAP=y CONFIG_UNIX=y +CONFIG_XFRM=y +CONFIG_XFRM_USER=y +# CONFIG_XFRM_SUB_POLICY is not set +# CONFIG_XFRM_MIGRATE is not set +# CONFIG_XFRM_STATISTICS is not set # CONFIG_NET_KEY is not set CONFIG_INET=y CONFIG_IP_MULTICAST=y -# CONFIG_IP_ADVANCED_ROUTER is not set +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_ASK_IP_FIB_HASH=y +# CONFIG_IP_FIB_TRIE is not set CONFIG_IP_FIB_HASH=y -CONFIG_IP_PNP=y -CONFIG_IP_PNP_DHCP=y -# CONFIG_IP_PNP_BOOTP is not set -# CONFIG_IP_PNP_RARP is not set +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_ROUTE_MULTIPATH=y +CONFIG_IP_ROUTE_VERBOSE=y +# CONFIG_IP_PNP is not set # CONFIG_NET_IPIP is not set # CONFIG_NET_IPGRE is not set -# CONFIG_IP_MROUTE is not set +CONFIG_IP_MROUTE=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y # CONFIG_ARPD is not set -# CONFIG_SYN_COOKIES is not set +CONFIG_SYN_COOKIES=y # CONFIG_INET_AH is not set # CONFIG_INET_ESP is not set # CONFIG_INET_IPCOMP is not set @@ -423,34 +480,109 @@ CONFIG_INET_TUNNEL=y # CONFIG_INET_XFRM_MODE_TRANSPORT is not set # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_LRO is not set -CONFIG_INET_DIAG=y -CONFIG_INET_TCP_DIAG=y -# CONFIG_TCP_CONG_ADVANCED is not set +CONFIG_INET_LRO=y +# CONFIG_INET_DIAG is not set +CONFIG_TCP_CONG_ADVANCED=y +# CONFIG_TCP_CONG_BIC is not set CONFIG_TCP_CONG_CUBIC=y +# CONFIG_TCP_CONG_WESTWOOD is not set +# CONFIG_TCP_CONG_HTCP is not set +# CONFIG_TCP_CONG_HSTCP is not set +# CONFIG_TCP_CONG_HYBLA is not set +# CONFIG_TCP_CONG_VEGAS is not set +# CONFIG_TCP_CONG_SCALABLE is not set +# CONFIG_TCP_CONG_LP is not set +# CONFIG_TCP_CONG_VENO is not set +# CONFIG_TCP_CONG_YEAH is not set +# CONFIG_TCP_CONG_ILLINOIS is not set +# CONFIG_DEFAULT_BIC is not set +CONFIG_DEFAULT_CUBIC=y +# CONFIG_DEFAULT_HTCP is not set +# CONFIG_DEFAULT_VEGAS is not set +# CONFIG_DEFAULT_WESTWOOD is not set +# CONFIG_DEFAULT_RENO is not set CONFIG_DEFAULT_TCP_CONG="cubic" -# CONFIG_TCP_MD5SIG is not set +CONFIG_TCP_MD5SIG=y +# CONFIG_IP_VS is not set CONFIG_IPV6=y # CONFIG_IPV6_PRIVACY is not set # CONFIG_IPV6_ROUTER_PREF is not set # CONFIG_IPV6_OPTIMISTIC_DAD is not set -# CONFIG_INET6_AH is not set -# CONFIG_INET6_ESP is not set +CONFIG_INET6_AH=y +CONFIG_INET6_ESP=y # CONFIG_INET6_IPCOMP is not set # CONFIG_IPV6_MIP6 is not set # CONFIG_INET6_XFRM_TUNNEL is not set # CONFIG_INET6_TUNNEL is not set -# CONFIG_INET6_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET6_XFRM_MODE_TUNNEL is not set -# CONFIG_INET6_XFRM_MODE_BEET is not set +CONFIG_INET6_XFRM_MODE_TRANSPORT=y +CONFIG_INET6_XFRM_MODE_TUNNEL=y +CONFIG_INET6_XFRM_MODE_BEET=y # CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION is not set CONFIG_IPV6_SIT=y CONFIG_IPV6_NDISC_NODETYPE=y # CONFIG_IPV6_TUNNEL is not set # CONFIG_IPV6_MULTIPLE_TABLES is not set # CONFIG_IPV6_MROUTE is not set -# CONFIG_NETWORK_SECMARK is not set -# CONFIG_NETFILTER is not set +CONFIG_NETLABEL=y +CONFIG_NETWORK_SECMARK=y +CONFIG_NETFILTER=y +# CONFIG_NETFILTER_DEBUG is not set +# CONFIG_NETFILTER_ADVANCED is not set + +# +# Core Netfilter Configuration +# +CONFIG_NETFILTER_NETLINK=y +CONFIG_NETFILTER_NETLINK_LOG=y +CONFIG_NF_CONNTRACK=y +CONFIG_NF_CONNTRACK_SECMARK=y +CONFIG_NF_CONNTRACK_FTP=y +CONFIG_NF_CONNTRACK_IRC=y +CONFIG_NF_CONNTRACK_SIP=y +CONFIG_NF_CT_NETLINK=y +CONFIG_NETFILTER_XTABLES=y +CONFIG_NETFILTER_XT_TARGET_MARK=y +CONFIG_NETFILTER_XT_TARGET_NFLOG=y +CONFIG_NETFILTER_XT_TARGET_SECMARK=y +CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y +CONFIG_NETFILTER_XT_TARGET_TCPMSS=y +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y +CONFIG_NETFILTER_XT_MATCH_MARK=y +CONFIG_NETFILTER_XT_MATCH_POLICY=y +CONFIG_NETFILTER_XT_MATCH_STATE=y + +# +# IP: Netfilter Configuration +# +CONFIG_NF_CONNTRACK_IPV4=y +CONFIG_NF_CONNTRACK_PROC_COMPAT=y +CONFIG_IP_NF_IPTABLES=y +CONFIG_IP_NF_FILTER=y +CONFIG_IP_NF_TARGET_REJECT=y +CONFIG_IP_NF_TARGET_LOG=y +CONFIG_IP_NF_TARGET_ULOG=y +CONFIG_NF_NAT=y +CONFIG_NF_NAT_NEEDED=y +CONFIG_IP_NF_TARGET_MASQUERADE=y +CONFIG_NF_NAT_FTP=y +CONFIG_NF_NAT_IRC=y +# CONFIG_NF_NAT_TFTP is not set +# CONFIG_NF_NAT_AMANDA is not set +# CONFIG_NF_NAT_PPTP is not set +# CONFIG_NF_NAT_H323 is not set +CONFIG_NF_NAT_SIP=y +CONFIG_IP_NF_MANGLE=y + +# +# IPv6: Netfilter Configuration +# +CONFIG_NF_CONNTRACK_IPV6=y +CONFIG_IP6_NF_IPTABLES=y +CONFIG_IP6_NF_MATCH_IPV6HEADER=y +CONFIG_IP6_NF_FILTER=y +CONFIG_IP6_NF_TARGET_LOG=y +CONFIG_IP6_NF_TARGET_REJECT=y +CONFIG_IP6_NF_MANGLE=y # CONFIG_IP_DCCP is not set # CONFIG_IP_SCTP is not set # CONFIG_TIPC is not set @@ -458,6 +590,7 @@ CONFIG_IPV6_NDISC_NODETYPE=y # CONFIG_BRIDGE is not set # CONFIG_VLAN_8021Q is not set # CONFIG_DECNET is not set +CONFIG_LLC=y # CONFIG_LLC2 is not set # CONFIG_IPX is not set # CONFIG_ATALK is not set @@ -465,25 +598,99 @@ CONFIG_IPV6_NDISC_NODETYPE=y # CONFIG_LAPB is not set # CONFIG_ECONET is not set # CONFIG_WAN_ROUTER is not set -# CONFIG_NET_SCHED is not set +CONFIG_NET_SCHED=y + +# +# Queueing/Scheduling +# +# CONFIG_NET_SCH_CBQ is not set +# CONFIG_NET_SCH_HTB is not set +# CONFIG_NET_SCH_HFSC is not set +# CONFIG_NET_SCH_PRIO is not set +# CONFIG_NET_SCH_RR is not set +# CONFIG_NET_SCH_RED is not set +# CONFIG_NET_SCH_SFQ is not set +# CONFIG_NET_SCH_TEQL is not set +# CONFIG_NET_SCH_TBF is not set +# CONFIG_NET_SCH_GRED is not set +# CONFIG_NET_SCH_DSMARK is not set +# CONFIG_NET_SCH_NETEM is not set +# CONFIG_NET_SCH_INGRESS is not set + +# +# Classification +# +CONFIG_NET_CLS=y +# CONFIG_NET_CLS_BASIC is not set +# CONFIG_NET_CLS_TCINDEX is not set +# CONFIG_NET_CLS_ROUTE4 is not set +# CONFIG_NET_CLS_FW is not set +# CONFIG_NET_CLS_U32 is not set +# CONFIG_NET_CLS_RSVP is not set +# CONFIG_NET_CLS_RSVP6 is not set +# CONFIG_NET_CLS_FLOW is not set +CONFIG_NET_EMATCH=y +CONFIG_NET_EMATCH_STACK=32 +# CONFIG_NET_EMATCH_CMP is not set +# CONFIG_NET_EMATCH_NBYTE is not set +# CONFIG_NET_EMATCH_U32 is not set +# CONFIG_NET_EMATCH_META is not set +# CONFIG_NET_EMATCH_TEXT is not set +CONFIG_NET_CLS_ACT=y +# CONFIG_NET_ACT_POLICE is not set +# CONFIG_NET_ACT_GACT is not set +# CONFIG_NET_ACT_MIRRED is not set +# CONFIG_NET_ACT_IPT is not set +# CONFIG_NET_ACT_NAT is not set +# CONFIG_NET_ACT_PEDIT is not set +# CONFIG_NET_ACT_SIMP is not set +CONFIG_NET_SCH_FIFO=y # # Network testing # # CONFIG_NET_PKTGEN is not set # CONFIG_NET_TCPPROBE is not set -# CONFIG_HAMRADIO is not set +CONFIG_HAMRADIO=y + +# +# Packet Radio protocols +# +# CONFIG_AX25 is not set # CONFIG_CAN is not set # CONFIG_IRDA is not set # CONFIG_BT is not set # CONFIG_AF_RXRPC is not set +CONFIG_FIB_RULES=y # # Wireless # -# CONFIG_CFG80211 is not set -# CONFIG_WIRELESS_EXT is not set -# CONFIG_MAC80211 is not set +CONFIG_CFG80211=y +CONFIG_NL80211=y +CONFIG_WIRELESS_EXT=y +CONFIG_MAC80211=y + +# +# Rate control algorithm selection +# +CONFIG_MAC80211_RC_DEFAULT_PID=y +# CONFIG_MAC80211_RC_DEFAULT_NONE is not set + +# +# Selecting 'y' for an algorithm will +# + +# +# build the algorithm into mac80211. +# +CONFIG_MAC80211_RC_DEFAULT="pid" +CONFIG_MAC80211_RC_PID=y +# CONFIG_MAC80211_MESH is not set +CONFIG_MAC80211_LEDS=y +# CONFIG_MAC80211_DEBUGFS is not set +# CONFIG_MAC80211_DEBUG_PACKET_ALIGNMENT is not set +# CONFIG_MAC80211_DEBUG is not set # CONFIG_IEEE80211 is not set # CONFIG_RFKILL is not set # CONFIG_NET_9P is not set @@ -500,9 +707,10 @@ CONFIG_STANDALONE=y CONFIG_PREVENT_FIRMWARE_BUILD=y CONFIG_FW_LOADER=y # CONFIG_DEBUG_DRIVER is not set -# CONFIG_DEBUG_DEVRES is not set +CONFIG_DEBUG_DEVRES=y # CONFIG_SYS_HYPERVISOR is not set -# CONFIG_CONNECTOR is not set +CONFIG_CONNECTOR=y +CONFIG_PROC_EVENTS=y # CONFIG_MTD is not set # CONFIG_PARPORT is not set CONFIG_PNP=y @@ -513,7 +721,7 @@ CONFIG_PNP=y # CONFIG_PNPACPI=y CONFIG_BLK_DEV=y -CONFIG_BLK_DEV_FD=y +# CONFIG_BLK_DEV_FD is not set # CONFIG_BLK_CPQ_DA is not set # CONFIG_BLK_CPQ_CISS_DA is not set # CONFIG_BLK_DEV_DAC960 is not set @@ -526,7 +734,7 @@ CONFIG_BLK_DEV_LOOP=y # CONFIG_BLK_DEV_UB is not set CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_COUNT=16 -CONFIG_BLK_DEV_RAM_SIZE=4096 +CONFIG_BLK_DEV_RAM_SIZE=16384 # CONFIG_BLK_DEV_XIP is not set # CONFIG_CDROM_PKTCDVD is not set # CONFIG_ATA_OVER_ETH is not set @@ -536,77 +744,16 @@ CONFIG_MISC_DEVICES=y # CONFIG_EEPROM_93CX6 is not set # CONFIG_SGI_IOC4 is not set # CONFIG_TIFM_CORE is not set +# CONFIG_ACER_WMI is not set +# CONFIG_ASUS_LAPTOP is not set +# CONFIG_FUJITSU_LAPTOP is not set +# CONFIG_MSI_LAPTOP is not set # CONFIG_SONY_LAPTOP is not set # CONFIG_THINKPAD_ACPI is not set # CONFIG_INTEL_MENLOW is not set # CONFIG_ENCLOSURE_SERVICES is not set CONFIG_HAVE_IDE=y -CONFIG_IDE=y -CONFIG_BLK_DEV_IDE=y - -# -# Please see Documentation/ide/ide.txt for help/info on IDE drives -# -# CONFIG_BLK_DEV_IDE_SATA is not set -CONFIG_BLK_DEV_IDEDISK=y -CONFIG_IDEDISK_MULTI_MODE=y -CONFIG_BLK_DEV_IDECD=y -CONFIG_BLK_DEV_IDECD_VERBOSE_ERRORS=y -# CONFIG_BLK_DEV_IDETAPE is not set -# CONFIG_BLK_DEV_IDEFLOPPY is not set -# CONFIG_BLK_DEV_IDESCSI is not set -CONFIG_BLK_DEV_IDEACPI=y -# CONFIG_IDE_TASK_IOCTL is not set -CONFIG_IDE_PROC_FS=y - -# -# IDE chipset support/bugfixes -# -CONFIG_IDE_GENERIC=y -# CONFIG_BLK_DEV_PLATFORM is not set -# CONFIG_BLK_DEV_CMD640 is not set -# CONFIG_BLK_DEV_IDEPNP is not set -CONFIG_BLK_DEV_IDEDMA_SFF=y - -# -# PCI IDE chipsets support -# -CONFIG_BLK_DEV_IDEPCI=y -CONFIG_IDEPCI_PCIBUS_ORDER=y -# CONFIG_BLK_DEV_OFFBOARD is not set -# CONFIG_BLK_DEV_GENERIC is not set -# CONFIG_BLK_DEV_OPTI621 is not set -# CONFIG_BLK_DEV_RZ1000 is not set -CONFIG_BLK_DEV_IDEDMA_PCI=y -# CONFIG_BLK_DEV_AEC62XX is not set -# CONFIG_BLK_DEV_ALI15X3 is not set -CONFIG_BLK_DEV_AMD74XX=y -CONFIG_BLK_DEV_ATIIXP=y -# CONFIG_BLK_DEV_CMD64X is not set -# CONFIG_BLK_DEV_TRIFLEX is not set -# CONFIG_BLK_DEV_CY82C693 is not set -# CONFIG_BLK_DEV_CS5520 is not set -# CONFIG_BLK_DEV_CS5530 is not set -# CONFIG_BLK_DEV_HPT34X is not set -# CONFIG_BLK_DEV_HPT366 is not set -# CONFIG_BLK_DEV_JMICRON is not set -# CONFIG_BLK_DEV_SC1200 is not set -CONFIG_BLK_DEV_PIIX=y -# CONFIG_BLK_DEV_IT8213 is not set -# CONFIG_BLK_DEV_IT821X is not set -# CONFIG_BLK_DEV_NS87415 is not set -# CONFIG_BLK_DEV_PDC202XX_OLD is not set -CONFIG_BLK_DEV_PDC202XX_NEW=y -# CONFIG_BLK_DEV_SVWKS is not set -# CONFIG_BLK_DEV_SIIMAGE is not set -# CONFIG_BLK_DEV_SIS5513 is not set -# CONFIG_BLK_DEV_SLC90E66 is not set -# CONFIG_BLK_DEV_TRM290 is not set -# CONFIG_BLK_DEV_VIA82CXXX is not set -# CONFIG_BLK_DEV_TC86C001 is not set -CONFIG_BLK_DEV_IDEDMA=y -# CONFIG_BLK_DEV_HD_ONLY is not set -# CONFIG_BLK_DEV_HD is not set +# CONFIG_IDE is not set # # SCSI device support @@ -615,8 +762,8 @@ CONFIG_BLK_DEV_IDEDMA=y CONFIG_SCSI=y CONFIG_SCSI_DMA=y # CONFIG_SCSI_TGT is not set -CONFIG_SCSI_NETLINK=y -# CONFIG_SCSI_PROC_FS is not set +# CONFIG_SCSI_NETLINK is not set +CONFIG_SCSI_PROC_FS=y # # SCSI support type (disk, tape, CD-ROM) @@ -625,7 +772,7 @@ CONFIG_BLK_DEV_SD=y # CONFIG_CHR_DEV_ST is not set # CONFIG_CHR_DEV_OSST is not set CONFIG_BLK_DEV_SR=y -# CONFIG_BLK_DEV_SR_VENDOR is not set +CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_CHR_DEV_SG=y # CONFIG_CHR_DEV_SCH is not set @@ -642,76 +789,110 @@ CONFIG_SCSI_WAIT_SCAN=m # SCSI Transports # CONFIG_SCSI_SPI_ATTRS=y -CONFIG_SCSI_FC_ATTRS=y +# CONFIG_SCSI_FC_ATTRS is not set # CONFIG_SCSI_ISCSI_ATTRS is not set +# CONFIG_SCSI_SAS_ATTRS is not set # CONFIG_SCSI_SAS_LIBSAS is not set # CONFIG_SCSI_SRP_ATTRS is not set # CONFIG_SCSI_LOWLEVEL is not set +# CONFIG_SCSI_LOWLEVEL_PCMCIA is not set CONFIG_ATA=y # CONFIG_ATA_NONSTANDARD is not set CONFIG_ATA_ACPI=y -# CONFIG_SATA_PMP is not set +CONFIG_SATA_PMP=y CONFIG_SATA_AHCI=y # CONFIG_SATA_SIL24 is not set -# CONFIG_ATA_SFF is not set +CONFIG_ATA_SFF=y +# CONFIG_SATA_SVW is not set +CONFIG_ATA_PIIX=y +# CONFIG_SATA_MV is not set +# CONFIG_SATA_NV is not set +# CONFIG_PDC_ADMA is not set +# CONFIG_SATA_QSTOR is not set +# CONFIG_SATA_PROMISE is not set +# CONFIG_SATA_SX4 is not set +# CONFIG_SATA_SIL is not set +# CONFIG_SATA_SIS is not set +# CONFIG_SATA_ULI is not set +# CONFIG_SATA_VIA is not set +# CONFIG_SATA_VITESSE is not set +# CONFIG_SATA_INIC162X is not set +# CONFIG_PATA_ACPI is not set +# CONFIG_PATA_ALI is not set +CONFIG_PATA_AMD=y +# CONFIG_PATA_ARTOP is not set +# CONFIG_PATA_ATIIXP is not set +# CONFIG_PATA_CMD640_PCI is not set +# CONFIG_PATA_CMD64X is not set +# CONFIG_PATA_CS5520 is not set +# CONFIG_PATA_CS5530 is not set +# CONFIG_PATA_CYPRESS is not set +# CONFIG_PATA_EFAR is not set +# CONFIG_ATA_GENERIC is not set +# CONFIG_PATA_HPT366 is not set +# CONFIG_PATA_HPT37X is not set +# CONFIG_PATA_HPT3X2N is not set +# CONFIG_PATA_HPT3X3 is not set +# CONFIG_PATA_IT821X is not set +# CONFIG_PATA_IT8213 is not set +# CONFIG_PATA_JMICRON is not set +# CONFIG_PATA_TRIFLEX is not set +# CONFIG_PATA_MARVELL is not set +# CONFIG_PATA_MPIIX is not set +CONFIG_PATA_OLDPIIX=y +# CONFIG_PATA_NETCELL is not set +# CONFIG_PATA_NINJA32 is not set +# CONFIG_PATA_NS87410 is not set +# CONFIG_PATA_NS87415 is not set +# CONFIG_PATA_OPTI is not set +# CONFIG_PATA_OPTIDMA is not set +# CONFIG_PATA_PCMCIA is not set +# CONFIG_PATA_PDC_OLD is not set +# CONFIG_PATA_RADISYS is not set +# CONFIG_PATA_RZ1000 is not set +# CONFIG_PATA_SC1200 is not set +# CONFIG_PATA_SERVERWORKS is not set +# CONFIG_PATA_PDC2027X is not set +# CONFIG_PATA_SIL680 is not set +# CONFIG_PATA_SIS is not set +# CONFIG_PATA_VIA is not set +# CONFIG_PATA_WINBOND is not set CONFIG_MD=y -# CONFIG_BLK_DEV_MD is not set +CONFIG_BLK_DEV_MD=y +# CONFIG_MD_LINEAR is not set +# CONFIG_MD_RAID0 is not set +# CONFIG_MD_RAID1 is not set +# CONFIG_MD_RAID10 is not set +# CONFIG_MD_RAID456 is not set +# CONFIG_MD_MULTIPATH is not set +# CONFIG_MD_FAULTY is not set CONFIG_BLK_DEV_DM=y # CONFIG_DM_DEBUG is not set # CONFIG_DM_CRYPT is not set # CONFIG_DM_SNAPSHOT is not set -# CONFIG_DM_MIRROR is not set -# CONFIG_DM_ZERO is not set +CONFIG_DM_MIRROR=y +CONFIG_DM_ZERO=y # CONFIG_DM_MULTIPATH is not set # CONFIG_DM_DELAY is not set # CONFIG_DM_UEVENT is not set -CONFIG_FUSION=y -CONFIG_FUSION_SPI=y -# CONFIG_FUSION_FC is not set -# CONFIG_FUSION_SAS is not set -CONFIG_FUSION_MAX_SGE=128 -# CONFIG_FUSION_CTL is not set -# CONFIG_FUSION_LOGGING is not set +# CONFIG_FUSION is not set # # IEEE 1394 (FireWire) support # # CONFIG_FIREWIRE is not set -CONFIG_IEEE1394=y - -# -# Subsystem Options -# -# CONFIG_IEEE1394_VERBOSEDEBUG is not set - -# -# Controllers -# - -# -# Texas Instruments PCILynx requires I2C -# -CONFIG_IEEE1394_OHCI1394=y - -# -# Protocols -# -# CONFIG_IEEE1394_VIDEO1394 is not set -# CONFIG_IEEE1394_SBP2 is not set -# CONFIG_IEEE1394_ETH1394_ROM_ENTRY is not set -# CONFIG_IEEE1394_ETH1394 is not set -# CONFIG_IEEE1394_DV1394 is not set -CONFIG_IEEE1394_RAWIO=y +# CONFIG_IEEE1394 is not set # CONFIG_I2O is not set CONFIG_MACINTOSH_DRIVERS=y -# CONFIG_MAC_EMUMOUSEBTN is not set +CONFIG_MAC_EMUMOUSEBTN=y CONFIG_NETDEVICES=y -CONFIG_NETDEVICES_MULTIQUEUE=y +# CONFIG_NETDEVICES_MULTIQUEUE is not set +# CONFIG_IFB is not set # CONFIG_DUMMY is not set # CONFIG_BONDING is not set # CONFIG_MACVLAN is not set # CONFIG_EQUALIZER is not set -CONFIG_TUN=y +# CONFIG_TUN is not set # CONFIG_VETH is not set # CONFIG_NET_SB1000 is not set # CONFIG_ARCNET is not set @@ -722,18 +903,16 @@ CONFIG_MII=y # CONFIG_SUNGEM is not set # CONFIG_CASSINI is not set CONFIG_NET_VENDOR_3COM=y -CONFIG_VORTEX=y +# CONFIG_VORTEX is not set # CONFIG_TYPHOON is not set CONFIG_NET_TULIP=y # CONFIG_DE2104X is not set -CONFIG_TULIP=y -# CONFIG_TULIP_MWI is not set -# CONFIG_TULIP_MMIO is not set -# CONFIG_TULIP_NAPI is not set +# CONFIG_TULIP is not set # CONFIG_DE4X5 is not set # CONFIG_WINBOND_840 is not set # CONFIG_DM9102 is not set # CONFIG_ULI526X is not set +# CONFIG_PCMCIA_XIRCOM is not set # CONFIG_HP100 is not set # CONFIG_IBM_NEW_EMAC_ZMII is not set # CONFIG_IBM_NEW_EMAC_RGMII is not set @@ -741,13 +920,9 @@ CONFIG_TULIP=y # CONFIG_IBM_NEW_EMAC_EMAC4 is not set CONFIG_NET_PCI=y # CONFIG_PCNET32 is not set -CONFIG_AMD8111_ETH=y -# CONFIG_AMD8111E_NAPI is not set +# CONFIG_AMD8111_ETH is not set # CONFIG_ADAPTEC_STARFIRE is not set -CONFIG_B44=y -CONFIG_B44_PCI_AUTOSELECT=y -CONFIG_B44_PCICORE_AUTOSELECT=y -CONFIG_B44_PCI=y +# CONFIG_B44 is not set CONFIG_FORCEDETH=y # CONFIG_FORCEDETH_NAPI is not set # CONFIG_EEPRO100 is not set @@ -755,9 +930,9 @@ CONFIG_E100=y # CONFIG_FEALNX is not set # CONFIG_NATSEMI is not set # CONFIG_NE2K_PCI is not set -CONFIG_8139CP=y +# CONFIG_8139CP is not set CONFIG_8139TOO=y -# CONFIG_8139TOO_PIO is not set +CONFIG_8139TOO_PIO=y # CONFIG_8139TOO_TUNE_TWISTER is not set # CONFIG_8139TOO_8129 is not set # CONFIG_8139_OLD_RX_RESET is not set @@ -783,10 +958,11 @@ CONFIG_E1000=y # CONFIG_R8169 is not set # CONFIG_SIS190 is not set # CONFIG_SKGE is not set -# CONFIG_SKY2 is not set +CONFIG_SKY2=y +# CONFIG_SKY2_DEBUG is not set # CONFIG_VIA_VELOCITY is not set CONFIG_TIGON3=y -CONFIG_BNX2=y +# CONFIG_BNX2 is not set # CONFIG_QLA3XXX is not set # CONFIG_ATL1 is not set CONFIG_NETDEV_10000=y @@ -794,8 +970,7 @@ CONFIG_NETDEV_10000=y # CONFIG_CHELSIO_T3 is not set # CONFIG_IXGBE is not set # CONFIG_IXGB is not set -CONFIG_S2IO=m -# CONFIG_S2IO_NAPI is not set +# CONFIG_S2IO is not set # CONFIG_MYRI10GE is not set # CONFIG_NETXEN_NIC is not set # CONFIG_NIU is not set @@ -803,15 +978,44 @@ CONFIG_S2IO=m # CONFIG_TEHUTI is not set # CONFIG_BNX2X is not set # CONFIG_SFC is not set -# CONFIG_TR is not set +CONFIG_TR=y +# CONFIG_IBMOL is not set +# CONFIG_3C359 is not set +# CONFIG_TMS380TR is not set # # Wireless LAN # # CONFIG_WLAN_PRE80211 is not set -# CONFIG_WLAN_80211 is not set +CONFIG_WLAN_80211=y +# CONFIG_PCMCIA_RAYCS is not set +# CONFIG_IPW2100 is not set +# CONFIG_IPW2200 is not set +# CONFIG_LIBERTAS is not set +# CONFIG_AIRO is not set +# CONFIG_HERMES is not set +# CONFIG_ATMEL is not set +# CONFIG_AIRO_CS is not set +# CONFIG_PCMCIA_WL3501 is not set +# CONFIG_PRISM54 is not set +# CONFIG_USB_ZD1201 is not set +# CONFIG_USB_NET_RNDIS_WLAN is not set +# CONFIG_RTL8180 is not set +# CONFIG_RTL8187 is not set +# CONFIG_ADM8211 is not set +# CONFIG_P54_COMMON is not set +CONFIG_ATH5K=y +# CONFIG_ATH5K_DEBUG is not set # CONFIG_IWLWIFI is not set +# CONFIG_IWLCORE is not set # CONFIG_IWLWIFI_LEDS is not set +# CONFIG_IWL4965 is not set +# CONFIG_IWL3945 is not set +# CONFIG_HOSTAP is not set +# CONFIG_B43 is not set +# CONFIG_B43LEGACY is not set +# CONFIG_ZD1211RW is not set +# CONFIG_RT2X00 is not set # # USB Network Adapters @@ -821,8 +1025,19 @@ CONFIG_S2IO=m # CONFIG_USB_PEGASUS is not set # CONFIG_USB_RTL8150 is not set # CONFIG_USB_USBNET is not set +CONFIG_NET_PCMCIA=y +# CONFIG_PCMCIA_3C589 is not set +# CONFIG_PCMCIA_3C574 is not set +# CONFIG_PCMCIA_FMVJ18X is not set +# CONFIG_PCMCIA_PCNET is not set +# CONFIG_PCMCIA_NMCLAN is not set +# CONFIG_PCMCIA_SMC91C92 is not set +# CONFIG_PCMCIA_XIRC2PS is not set +# CONFIG_PCMCIA_AXNET is not set # CONFIG_WAN is not set -# CONFIG_FDDI is not set +CONFIG_FDDI=y +# CONFIG_DEFXX is not set +# CONFIG_SKFP is not set # CONFIG_HIPPI is not set # CONFIG_PPP is not set # CONFIG_SLIP is not set @@ -839,14 +1054,14 @@ CONFIG_NET_POLL_CONTROLLER=y # Input device support # CONFIG_INPUT=y -# CONFIG_INPUT_FF_MEMLESS is not set -# CONFIG_INPUT_POLLDEV is not set +CONFIG_INPUT_FF_MEMLESS=y +CONFIG_INPUT_POLLDEV=y # # Userland interfaces # CONFIG_INPUT_MOUSEDEV=y -CONFIG_INPUT_MOUSEDEV_PSAUX=y +# CONFIG_INPUT_MOUSEDEV_PSAUX is not set CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 # CONFIG_INPUT_JOYDEV is not set @@ -874,17 +1089,62 @@ CONFIG_MOUSE_PS2_TRACKPOINT=y # CONFIG_MOUSE_SERIAL is not set # CONFIG_MOUSE_APPLETOUCH is not set # CONFIG_MOUSE_VSXXXAA is not set -# CONFIG_INPUT_JOYSTICK is not set -# CONFIG_INPUT_TABLET is not set -# CONFIG_INPUT_TOUCHSCREEN is not set -# CONFIG_INPUT_MISC is not set +CONFIG_INPUT_JOYSTICK=y +# CONFIG_JOYSTICK_ANALOG is not set +# CONFIG_JOYSTICK_A3D is not set +# CONFIG_JOYSTICK_ADI is not set +# CONFIG_JOYSTICK_COBRA is not set +# CONFIG_JOYSTICK_GF2K is not set +# CONFIG_JOYSTICK_GRIP is not set +# CONFIG_JOYSTICK_GRIP_MP is not set +# CONFIG_JOYSTICK_GUILLEMOT is not set +# CONFIG_JOYSTICK_INTERACT is not set +# CONFIG_JOYSTICK_SIDEWINDER is not set +# CONFIG_JOYSTICK_TMDC is not set +# CONFIG_JOYSTICK_IFORCE is not set +# CONFIG_JOYSTICK_WARRIOR is not set +# CONFIG_JOYSTICK_MAGELLAN is not set +# CONFIG_JOYSTICK_SPACEORB is not set +# CONFIG_JOYSTICK_SPACEBALL is not set +# CONFIG_JOYSTICK_STINGER is not set +# CONFIG_JOYSTICK_TWIDJOY is not set +# CONFIG_JOYSTICK_ZHENHUA is not set +# CONFIG_JOYSTICK_JOYDUMP is not set +# CONFIG_JOYSTICK_XPAD is not set +CONFIG_INPUT_TABLET=y +# CONFIG_TABLET_USB_ACECAD is not set +# CONFIG_TABLET_USB_AIPTEK is not set +# CONFIG_TABLET_USB_GTCO is not set +# CONFIG_TABLET_USB_KBTAB is not set +# CONFIG_TABLET_USB_WACOM is not set +CONFIG_INPUT_TOUCHSCREEN=y +# CONFIG_TOUCHSCREEN_FUJITSU is not set +# CONFIG_TOUCHSCREEN_GUNZE is not set +# CONFIG_TOUCHSCREEN_ELO is not set +# CONFIG_TOUCHSCREEN_MTOUCH is not set +# CONFIG_TOUCHSCREEN_MK712 is not set +# CONFIG_TOUCHSCREEN_PENMOUNT is not set +# CONFIG_TOUCHSCREEN_TOUCHRIGHT is not set +# CONFIG_TOUCHSCREEN_TOUCHWIN is not set +# CONFIG_TOUCHSCREEN_UCB1400 is not set +# CONFIG_TOUCHSCREEN_USB_COMPOSITE is not set +CONFIG_INPUT_MISC=y +# CONFIG_INPUT_PCSPKR is not set +# CONFIG_INPUT_APANEL is not set +# CONFIG_INPUT_ATLAS_BTNS is not set +# CONFIG_INPUT_ATI_REMOTE is not set +# CONFIG_INPUT_ATI_REMOTE2 is not set +# CONFIG_INPUT_KEYSPAN_REMOTE is not set +# CONFIG_INPUT_POWERMATE is not set +# CONFIG_INPUT_YEALINK is not set +# CONFIG_INPUT_UINPUT is not set # # Hardware I/O ports # CONFIG_SERIO=y CONFIG_SERIO_I8042=y -# CONFIG_SERIO_SERPORT is not set +CONFIG_SERIO_SERPORT=y # CONFIG_SERIO_CT82C710 is not set # CONFIG_SERIO_PCIPS2 is not set CONFIG_SERIO_LIBPS2=y @@ -897,9 +1157,25 @@ CONFIG_SERIO_LIBPS2=y CONFIG_VT=y CONFIG_VT_CONSOLE=y CONFIG_HW_CONSOLE=y -# CONFIG_VT_HW_CONSOLE_BINDING is not set -# CONFIG_DEVKMEM is not set -# CONFIG_SERIAL_NONSTANDARD is not set +CONFIG_VT_HW_CONSOLE_BINDING=y +CONFIG_DEVKMEM=y +CONFIG_SERIAL_NONSTANDARD=y +# CONFIG_COMPUTONE is not set +# CONFIG_ROCKETPORT is not set +# CONFIG_CYCLADES is not set +# CONFIG_DIGIEPCA is not set +# CONFIG_MOXA_INTELLIO is not set +# CONFIG_MOXA_SMARTIO is not set +# CONFIG_ISI is not set +# CONFIG_SYNCLINK is not set +# CONFIG_SYNCLINKMP is not set +# CONFIG_SYNCLINK_GT is not set +# CONFIG_N_HDLC is not set +# CONFIG_RISCOM8 is not set +# CONFIG_SPECIALIX is not set +# CONFIG_SX is not set +# CONFIG_RIO is not set +# CONFIG_STALDRV is not set # CONFIG_NOZOMI is not set # @@ -910,9 +1186,14 @@ CONFIG_SERIAL_8250_CONSOLE=y CONFIG_FIX_EARLYCON_MEM=y CONFIG_SERIAL_8250_PCI=y CONFIG_SERIAL_8250_PNP=y -CONFIG_SERIAL_8250_NR_UARTS=4 +# CONFIG_SERIAL_8250_CS is not set +CONFIG_SERIAL_8250_NR_UARTS=32 CONFIG_SERIAL_8250_RUNTIME_UARTS=4 -# CONFIG_SERIAL_8250_EXTENDED is not set +CONFIG_SERIAL_8250_EXTENDED=y +CONFIG_SERIAL_8250_MANY_PORTS=y +CONFIG_SERIAL_8250_SHARE_IRQ=y +CONFIG_SERIAL_8250_DETECT_IRQ=y +CONFIG_SERIAL_8250_RSA=y # # Non-8250 serial port support @@ -921,28 +1202,78 @@ CONFIG_SERIAL_CORE=y CONFIG_SERIAL_CORE_CONSOLE=y # CONFIG_SERIAL_JSM is not set CONFIG_UNIX98_PTYS=y -CONFIG_LEGACY_PTYS=y -CONFIG_LEGACY_PTY_COUNT=256 +# CONFIG_LEGACY_PTYS is not set # CONFIG_IPMI_HANDLER is not set CONFIG_HW_RANDOM=y -CONFIG_HW_RANDOM_INTEL=y -CONFIG_HW_RANDOM_AMD=y -# CONFIG_NVRAM is not set -CONFIG_RTC=y +# CONFIG_HW_RANDOM_INTEL is not set +# CONFIG_HW_RANDOM_AMD is not set +CONFIG_NVRAM=y # CONFIG_R3964 is not set # CONFIG_APPLICOM is not set + +# +# PCMCIA character devices +# +# CONFIG_SYNCLINK_CS is not set +# CONFIG_CARDMAN_4000 is not set +# CONFIG_CARDMAN_4040 is not set +# CONFIG_IPWIRELESS is not set # CONFIG_MWAVE is not set # CONFIG_PC8736x_GPIO is not set -CONFIG_RAW_DRIVER=y -CONFIG_MAX_RAW_DEVS=256 +# CONFIG_RAW_DRIVER is not set CONFIG_HPET=y # CONFIG_HPET_RTC_IRQ is not set -CONFIG_HPET_MMAP=y +# CONFIG_HPET_MMAP is not set # CONFIG_HANGCHECK_TIMER is not set # CONFIG_TCG_TPM is not set # CONFIG_TELCLOCK is not set CONFIG_DEVPORT=y -# CONFIG_I2C is not set +CONFIG_I2C=y +CONFIG_I2C_BOARDINFO=y +# CONFIG_I2C_CHARDEV is not set + +# +# I2C Hardware Bus support +# +# CONFIG_I2C_ALI1535 is not set +# CONFIG_I2C_ALI1563 is not set +# CONFIG_I2C_ALI15X3 is not set +# CONFIG_I2C_AMD756 is not set +# CONFIG_I2C_AMD8111 is not set +CONFIG_I2C_I801=y +# CONFIG_I2C_I810 is not set +# CONFIG_I2C_PIIX4 is not set +# CONFIG_I2C_NFORCE2 is not set +# CONFIG_I2C_OCORES is not set +# CONFIG_I2C_PARPORT_LIGHT is not set +# CONFIG_I2C_PROSAVAGE is not set +# CONFIG_I2C_SAVAGE4 is not set +# CONFIG_I2C_SIMTEC is not set +# CONFIG_I2C_SIS5595 is not set +# CONFIG_I2C_SIS630 is not set +# CONFIG_I2C_SIS96X is not set +# CONFIG_I2C_TAOS_EVM is not set +# CONFIG_I2C_STUB is not set +# CONFIG_I2C_TINY_USB is not set +# CONFIG_I2C_VIA is not set +# CONFIG_I2C_VIAPRO is not set +# CONFIG_I2C_VOODOO3 is not set +# CONFIG_I2C_PCA_PLATFORM is not set + +# +# Miscellaneous I2C Chip support +# +# CONFIG_DS1682 is not set +# CONFIG_SENSORS_EEPROM is not set +# CONFIG_SENSORS_PCF8574 is not set +# CONFIG_PCF8575 is not set +# CONFIG_SENSORS_PCF8591 is not set +# CONFIG_SENSORS_MAX6875 is not set +# CONFIG_SENSORS_TSL2550 is not set +# CONFIG_I2C_DEBUG_CORE is not set +# CONFIG_I2C_DEBUG_ALGO is not set +# CONFIG_I2C_DEBUG_BUS is not set +# CONFIG_I2C_DEBUG_CHIP is not set # CONFIG_SPI is not set # CONFIG_W1 is not set CONFIG_POWER_SUPPLY=y @@ -951,20 +1282,55 @@ CONFIG_POWER_SUPPLY=y # CONFIG_BATTERY_DS2760 is not set # CONFIG_HWMON is not set CONFIG_THERMAL=y -# CONFIG_WATCHDOG is not set +CONFIG_WATCHDOG=y +# CONFIG_WATCHDOG_NOWAYOUT is not set + +# +# Watchdog Device Drivers +# +# CONFIG_SOFT_WATCHDOG is not set +# CONFIG_ACQUIRE_WDT is not set +# CONFIG_ADVANTECH_WDT is not set +# CONFIG_ALIM1535_WDT is not set +# CONFIG_ALIM7101_WDT is not set +# CONFIG_SC520_WDT is not set +# CONFIG_EUROTECH_WDT is not set +# CONFIG_IB700_WDT is not set +# CONFIG_IBMASR is not set +# CONFIG_WAFER_WDT is not set +# CONFIG_I6300ESB_WDT is not set +# CONFIG_ITCO_WDT is not set +# CONFIG_IT8712F_WDT is not set +# CONFIG_HP_WATCHDOG is not set +# CONFIG_SC1200_WDT is not set +# CONFIG_PC87413_WDT is not set +# CONFIG_60XX_WDT is not set +# CONFIG_SBC8360_WDT is not set +# CONFIG_CPU5_WDT is not set +# CONFIG_SMSC37B787_WDT is not set +# CONFIG_W83627HF_WDT is not set +# CONFIG_W83697HF_WDT is not set +# CONFIG_W83877F_WDT is not set +# CONFIG_W83977F_WDT is not set +# CONFIG_MACHZ_WDT is not set +# CONFIG_SBC_EPX_C3_WATCHDOG is not set + +# +# PCI-based Watchdog Cards +# +# CONFIG_PCIPCWATCHDOG is not set +# CONFIG_WDTPCI is not set + +# +# USB-based Watchdog Cards +# +# CONFIG_USBPCWATCHDOG is not set # # Sonics Silicon Backplane # CONFIG_SSB_POSSIBLE=y -CONFIG_SSB=y -CONFIG_SSB_SPROM=y -CONFIG_SSB_PCIHOST_POSSIBLE=y -CONFIG_SSB_PCIHOST=y -# CONFIG_SSB_B43_PCI_BRIDGE is not set -# CONFIG_SSB_DEBUG is not set -CONFIG_SSB_DRIVER_PCICORE_POSSIBLE=y -CONFIG_SSB_DRIVER_PCICORE=y +# CONFIG_SSB is not set # # Multifunction device drivers @@ -996,11 +1362,81 @@ CONFIG_AGP_AMD64=y CONFIG_AGP_INTEL=y # CONFIG_AGP_SIS is not set # CONFIG_AGP_VIA is not set -# CONFIG_DRM is not set +CONFIG_DRM=y +# CONFIG_DRM_TDFX is not set +# CONFIG_DRM_R128 is not set +# CONFIG_DRM_RADEON is not set +# CONFIG_DRM_I810 is not set +# CONFIG_DRM_I830 is not set +CONFIG_DRM_I915=y +# CONFIG_DRM_MGA is not set +# CONFIG_DRM_SIS is not set +# CONFIG_DRM_VIA is not set +# CONFIG_DRM_SAVAGE is not set # CONFIG_VGASTATE is not set # CONFIG_VIDEO_OUTPUT_CONTROL is not set -# CONFIG_FB is not set -# CONFIG_BACKLIGHT_LCD_SUPPORT is not set +CONFIG_FB=y +# CONFIG_FIRMWARE_EDID is not set +# CONFIG_FB_DDC is not set +CONFIG_FB_CFB_FILLRECT=y +CONFIG_FB_CFB_COPYAREA=y +CONFIG_FB_CFB_IMAGEBLIT=y +# CONFIG_FB_CFB_REV_PIXELS_IN_BYTE is not set +# CONFIG_FB_SYS_FILLRECT is not set +# CONFIG_FB_SYS_COPYAREA is not set +# CONFIG_FB_SYS_IMAGEBLIT is not set +# CONFIG_FB_FOREIGN_ENDIAN is not set +# CONFIG_FB_SYS_FOPS is not set +CONFIG_FB_DEFERRED_IO=y +# CONFIG_FB_SVGALIB is not set +# CONFIG_FB_MACMODES is not set +# CONFIG_FB_BACKLIGHT is not set +CONFIG_FB_MODE_HELPERS=y +CONFIG_FB_TILEBLITTING=y + +# +# Frame buffer hardware drivers +# +# CONFIG_FB_CIRRUS is not set +# CONFIG_FB_PM2 is not set +# CONFIG_FB_CYBER2000 is not set +# CONFIG_FB_ARC is not set +# CONFIG_FB_ASILIANT is not set +# CONFIG_FB_IMSTT is not set +# CONFIG_FB_VGA16 is not set +# CONFIG_FB_UVESA is not set +# CONFIG_FB_VESA is not set +CONFIG_FB_EFI=y +# CONFIG_FB_IMAC is not set +# CONFIG_FB_N411 is not set +# CONFIG_FB_HGA is not set +# CONFIG_FB_S1D13XXX is not set +# CONFIG_FB_NVIDIA is not set +# CONFIG_FB_RIVA is not set +# CONFIG_FB_LE80578 is not set +# CONFIG_FB_INTEL is not set +# CONFIG_FB_MATROX is not set +# CONFIG_FB_RADEON is not set +# CONFIG_FB_ATY128 is not set +# CONFIG_FB_ATY is not set +# CONFIG_FB_S3 is not set +# CONFIG_FB_SAVAGE is not set +# CONFIG_FB_SIS is not set +# CONFIG_FB_NEOMAGIC is not set +# CONFIG_FB_KYRO is not set +# CONFIG_FB_3DFX is not set +# CONFIG_FB_VOODOO1 is not set +# CONFIG_FB_VT8623 is not set +# CONFIG_FB_TRIDENT is not set +# CONFIG_FB_ARK is not set +# CONFIG_FB_PM3 is not set +# CONFIG_FB_GEODE is not set +# CONFIG_FB_VIRTUAL is not set +CONFIG_BACKLIGHT_LCD_SUPPORT=y +# CONFIG_LCD_CLASS_DEVICE is not set +CONFIG_BACKLIGHT_CLASS_DEVICE=y +# CONFIG_BACKLIGHT_CORGI is not set +# CONFIG_BACKLIGHT_PROGEAR is not set # # Display device support @@ -1012,9 +1448,14 @@ CONFIG_AGP_INTEL=y # CONFIG_VGA_CONSOLE=y CONFIG_VGACON_SOFT_SCROLLBACK=y -CONFIG_VGACON_SOFT_SCROLLBACK_SIZE=256 +CONFIG_VGACON_SOFT_SCROLLBACK_SIZE=64 CONFIG_VIDEO_SELECT=y CONFIG_DUMMY_CONSOLE=y +# CONFIG_FRAMEBUFFER_CONSOLE is not set +CONFIG_LOGO=y +# CONFIG_LOGO_LINUX_MONO is not set +# CONFIG_LOGO_LINUX_VGA16 is not set +CONFIG_LOGO_LINUX_CLUT224=y # # Sound @@ -1024,35 +1465,165 @@ CONFIG_SOUND=y # # Advanced Linux Sound Architecture # -# CONFIG_SND is not set +CONFIG_SND=y +CONFIG_SND_TIMER=y +CONFIG_SND_PCM=y +CONFIG_SND_HWDEP=y +CONFIG_SND_SEQUENCER=y +CONFIG_SND_SEQ_DUMMY=y +CONFIG_SND_OSSEMUL=y +CONFIG_SND_MIXER_OSS=y +CONFIG_SND_PCM_OSS=y +CONFIG_SND_PCM_OSS_PLUGINS=y +CONFIG_SND_SEQUENCER_OSS=y +CONFIG_SND_DYNAMIC_MINORS=y +CONFIG_SND_SUPPORT_OLD_API=y +CONFIG_SND_VERBOSE_PROCFS=y +# CONFIG_SND_VERBOSE_PRINTK is not set +# CONFIG_SND_DEBUG is not set +CONFIG_SND_VMASTER=y + +# +# Generic devices +# +# CONFIG_SND_PCSP is not set +# CONFIG_SND_DUMMY is not set +# CONFIG_SND_VIRMIDI is not set +# CONFIG_SND_MTPAV is not set +# CONFIG_SND_SERIAL_U16550 is not set +# CONFIG_SND_MPU401 is not set + +# +# PCI devices +# +# CONFIG_SND_AD1889 is not set +# CONFIG_SND_ALS300 is not set +# CONFIG_SND_ALS4000 is not set +# CONFIG_SND_ALI5451 is not set +# CONFIG_SND_ATIIXP is not set +# CONFIG_SND_ATIIXP_MODEM is not set +# CONFIG_SND_AU8810 is not set +# CONFIG_SND_AU8820 is not set +# CONFIG_SND_AU8830 is not set +# CONFIG_SND_AW2 is not set +# CONFIG_SND_AZT3328 is not set +# CONFIG_SND_BT87X is not set +# CONFIG_SND_CA0106 is not set +# CONFIG_SND_CMIPCI is not set +# CONFIG_SND_OXYGEN is not set +# CONFIG_SND_CS4281 is not set +# CONFIG_SND_CS46XX is not set +# CONFIG_SND_CS5530 is not set +# CONFIG_SND_DARLA20 is not set +# CONFIG_SND_GINA20 is not set +# CONFIG_SND_LAYLA20 is not set +# CONFIG_SND_DARLA24 is not set +# CONFIG_SND_GINA24 is not set +# CONFIG_SND_LAYLA24 is not set +# CONFIG_SND_MONA is not set +# CONFIG_SND_MIA is not set +# CONFIG_SND_ECHO3G is not set +# CONFIG_SND_INDIGO is not set +# CONFIG_SND_INDIGOIO is not set +# CONFIG_SND_INDIGODJ is not set +# CONFIG_SND_EMU10K1 is not set +# CONFIG_SND_EMU10K1X is not set +# CONFIG_SND_ENS1370 is not set +# CONFIG_SND_ENS1371 is not set +# CONFIG_SND_ES1938 is not set +# CONFIG_SND_ES1968 is not set +# CONFIG_SND_FM801 is not set +CONFIG_SND_HDA_INTEL=y +CONFIG_SND_HDA_HWDEP=y +CONFIG_SND_HDA_CODEC_REALTEK=y +CONFIG_SND_HDA_CODEC_ANALOG=y +CONFIG_SND_HDA_CODEC_SIGMATEL=y +CONFIG_SND_HDA_CODEC_VIA=y +CONFIG_SND_HDA_CODEC_ATIHDMI=y +CONFIG_SND_HDA_CODEC_CONEXANT=y +CONFIG_SND_HDA_CODEC_CMEDIA=y +CONFIG_SND_HDA_CODEC_SI3054=y +CONFIG_SND_HDA_GENERIC=y +# CONFIG_SND_HDA_POWER_SAVE is not set +# CONFIG_SND_HDSP is not set +# CONFIG_SND_HDSPM is not set +# CONFIG_SND_HIFIER is not set +# CONFIG_SND_ICE1712 is not set +# CONFIG_SND_ICE1724 is not set +# CONFIG_SND_INTEL8X0 is not set +# CONFIG_SND_INTEL8X0M is not set +# CONFIG_SND_KORG1212 is not set +# CONFIG_SND_MAESTRO3 is not set +# CONFIG_SND_MIXART is not set +# CONFIG_SND_NM256 is not set +# CONFIG_SND_PCXHR is not set +# CONFIG_SND_RIPTIDE is not set +# CONFIG_SND_RME32 is not set +# CONFIG_SND_RME96 is not set +# CONFIG_SND_RME9652 is not set +# CONFIG_SND_SONICVIBES is not set +# CONFIG_SND_TRIDENT is not set +# CONFIG_SND_VIA82XX is not set +# CONFIG_SND_VIA82XX_MODEM is not set +# CONFIG_SND_VIRTUOSO is not set +# CONFIG_SND_VX222 is not set +# CONFIG_SND_YMFPCI is not set + +# +# USB devices +# +# CONFIG_SND_USB_AUDIO is not set +# CONFIG_SND_USB_USX2Y is not set +# CONFIG_SND_USB_CAIAQ is not set + +# +# PCMCIA devices +# +# CONFIG_SND_VXPOCKET is not set +# CONFIG_SND_PDAUDIOCF is not set + +# +# System on Chip audio support +# +# CONFIG_SND_SOC is not set + +# +# ALSA SoC audio for Freescale SOCs +# + +# +# SoC Audio for the Texas Instruments OMAP +# # # Open Sound System # -CONFIG_SOUND_PRIME=y -# CONFIG_SOUND_TRIDENT is not set -# CONFIG_SOUND_MSNDCLAS is not set -# CONFIG_SOUND_MSNDPIN is not set -# CONFIG_SOUND_OSS is not set +# CONFIG_SOUND_PRIME is not set CONFIG_HID_SUPPORT=y CONFIG_HID=y -# CONFIG_HID_DEBUG is not set +CONFIG_HID_DEBUG=y CONFIG_HIDRAW=y # # USB Input Devices # CONFIG_USB_HID=y -# CONFIG_USB_HIDINPUT_POWERBOOK is not set -# CONFIG_HID_FF is not set -# CONFIG_USB_HIDDEV is not set +CONFIG_USB_HIDINPUT_POWERBOOK=y +CONFIG_HID_FF=y +CONFIG_HID_PID=y +CONFIG_LOGITECH_FF=y +# CONFIG_LOGIRUMBLEPAD2_FF is not set +CONFIG_PANTHERLORD_FF=y +CONFIG_THRUSTMASTER_FF=y +CONFIG_ZEROPLUS_FF=y +CONFIG_USB_HIDDEV=y CONFIG_USB_SUPPORT=y CONFIG_USB_ARCH_HAS_HCD=y CONFIG_USB_ARCH_HAS_OHCI=y CONFIG_USB_ARCH_HAS_EHCI=y CONFIG_USB=y -# CONFIG_USB_DEBUG is not set -# CONFIG_USB_ANNOUNCE_NEW_DEVICES is not set +CONFIG_USB_DEBUG=y +CONFIG_USB_ANNOUNCE_NEW_DEVICES=y # # Miscellaneous USB options @@ -1060,7 +1631,7 @@ CONFIG_USB=y CONFIG_USB_DEVICEFS=y # CONFIG_USB_DEVICE_CLASS is not set # CONFIG_USB_DYNAMIC_MINORS is not set -# CONFIG_USB_SUSPEND is not set +CONFIG_USB_SUSPEND=y # CONFIG_USB_OTG is not set # @@ -1073,7 +1644,6 @@ CONFIG_USB_EHCI_HCD=y # CONFIG_USB_ISP116X_HCD is not set # CONFIG_USB_ISP1760_HCD is not set CONFIG_USB_OHCI_HCD=y -# CONFIG_USB_OHCI_HCD_SSB is not set # CONFIG_USB_OHCI_BIG_ENDIAN_DESC is not set # CONFIG_USB_OHCI_BIG_ENDIAN_MMIO is not set CONFIG_USB_OHCI_LITTLE_ENDIAN=y @@ -1108,7 +1678,7 @@ CONFIG_USB_STORAGE=y # CONFIG_USB_STORAGE_ONETOUCH is not set # CONFIG_USB_STORAGE_KARMA is not set # CONFIG_USB_STORAGE_CYPRESS_ATACB is not set -# CONFIG_USB_LIBUSUAL is not set +CONFIG_USB_LIBUSUAL=y # # USB Imaging devices @@ -1148,11 +1718,78 @@ CONFIG_USB_MON=y # CONFIG_USB_GADGET is not set # CONFIG_MMC is not set # CONFIG_MEMSTICK is not set -# CONFIG_NEW_LEDS is not set +CONFIG_NEW_LEDS=y +CONFIG_LEDS_CLASS=y + +# +# LED drivers +# +# CONFIG_LEDS_CLEVO_MAIL is not set + +# +# LED Triggers +# +CONFIG_LEDS_TRIGGERS=y +# CONFIG_LEDS_TRIGGER_TIMER is not set +# CONFIG_LEDS_TRIGGER_HEARTBEAT is not set +# CONFIG_LEDS_TRIGGER_DEFAULT_ON is not set # CONFIG_ACCESSIBILITY is not set # CONFIG_INFINIBAND is not set -# CONFIG_EDAC is not set -# CONFIG_RTC_CLASS is not set +CONFIG_EDAC=y + +# +# Reporting subsystems +# +# CONFIG_EDAC_DEBUG is not set +# CONFIG_EDAC_MM_EDAC is not set +CONFIG_RTC_LIB=y +CONFIG_RTC_CLASS=y +# CONFIG_RTC_HCTOSYS is not set +# CONFIG_RTC_DEBUG is not set + +# +# RTC interfaces +# +CONFIG_RTC_INTF_SYSFS=y +CONFIG_RTC_INTF_PROC=y +CONFIG_RTC_INTF_DEV=y +# CONFIG_RTC_INTF_DEV_UIE_EMUL is not set +# CONFIG_RTC_DRV_TEST is not set + +# +# I2C RTC drivers +# +# CONFIG_RTC_DRV_DS1307 is not set +# CONFIG_RTC_DRV_DS1374 is not set +# CONFIG_RTC_DRV_DS1672 is not set +# CONFIG_RTC_DRV_MAX6900 is not set +# CONFIG_RTC_DRV_RS5C372 is not set +# CONFIG_RTC_DRV_ISL1208 is not set +# CONFIG_RTC_DRV_X1205 is not set +# CONFIG_RTC_DRV_PCF8563 is not set +# CONFIG_RTC_DRV_PCF8583 is not set +# CONFIG_RTC_DRV_M41T80 is not set +# CONFIG_RTC_DRV_S35390A is not set + +# +# SPI RTC drivers +# + +# +# Platform RTC drivers +# +CONFIG_RTC_DRV_CMOS=y +# CONFIG_RTC_DRV_DS1511 is not set +# CONFIG_RTC_DRV_DS1553 is not set +# CONFIG_RTC_DRV_DS1742 is not set +# CONFIG_RTC_DRV_STK17TA8 is not set +# CONFIG_RTC_DRV_M48T86 is not set +# CONFIG_RTC_DRV_M48T59 is not set +# CONFIG_RTC_DRV_V3020 is not set + +# +# on-CPU RTC drivers +# CONFIG_DMADEVICES=y # @@ -1165,6 +1802,7 @@ CONFIG_DMADEVICES=y # Firmware Drivers # # CONFIG_EDD is not set +CONFIG_EFI_VARS=y # CONFIG_DELL_RBU is not set # CONFIG_DCDBAS is not set CONFIG_DMIID=y @@ -1173,25 +1811,16 @@ CONFIG_DMIID=y # # File systems # -CONFIG_EXT2_FS=y -CONFIG_EXT2_FS_XATTR=y -CONFIG_EXT2_FS_POSIX_ACL=y -# CONFIG_EXT2_FS_SECURITY is not set -# CONFIG_EXT2_FS_XIP is not set +# CONFIG_EXT2_FS is not set CONFIG_EXT3_FS=y CONFIG_EXT3_FS_XATTR=y CONFIG_EXT3_FS_POSIX_ACL=y -# CONFIG_EXT3_FS_SECURITY is not set +CONFIG_EXT3_FS_SECURITY=y # CONFIG_EXT4DEV_FS is not set CONFIG_JBD=y # CONFIG_JBD_DEBUG is not set CONFIG_FS_MBCACHE=y -CONFIG_REISERFS_FS=y -# CONFIG_REISERFS_CHECK is not set -# CONFIG_REISERFS_PROC_INFO is not set -CONFIG_REISERFS_FS_XATTR=y -CONFIG_REISERFS_FS_POSIX_ACL=y -# CONFIG_REISERFS_FS_SECURITY is not set +# CONFIG_REISERFS_FS is not set # CONFIG_JFS_FS is not set CONFIG_FS_POSIX_ACL=y # CONFIG_XFS_FS is not set @@ -1200,7 +1829,12 @@ CONFIG_FS_POSIX_ACL=y CONFIG_DNOTIFY=y CONFIG_INOTIFY=y CONFIG_INOTIFY_USER=y -# CONFIG_QUOTA is not set +CONFIG_QUOTA=y +CONFIG_QUOTA_NETLINK_INTERFACE=y +# CONFIG_PRINT_QUOTA_WARNING is not set +# CONFIG_QFMT_V1 is not set +CONFIG_QFMT_V2=y +CONFIG_QUOTACTL=y # CONFIG_AUTOFS_FS is not set CONFIG_AUTOFS4_FS=y # CONFIG_FUSE_FS is not set @@ -1211,7 +1845,7 @@ CONFIG_GENERIC_ACL=y # CONFIG_ISO9660_FS=y CONFIG_JOLIET=y -# CONFIG_ZISOFS is not set +CONFIG_ZISOFS=y # CONFIG_UDF_FS is not set # @@ -1229,6 +1863,7 @@ CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1" # CONFIG_PROC_FS=y CONFIG_PROC_KCORE=y +CONFIG_PROC_VMCORE=y CONFIG_PROC_SYSCTL=y CONFIG_SYSFS=y CONFIG_TMPFS=y @@ -1242,6 +1877,7 @@ CONFIG_HUGETLB_PAGE=y # # CONFIG_ADFS_FS is not set # CONFIG_AFFS_FS is not set +# CONFIG_ECRYPT_FS is not set # CONFIG_HFS_FS is not set # CONFIG_HFSPLUS_FS is not set # CONFIG_BEFS_FS is not set @@ -1255,15 +1891,38 @@ CONFIG_HUGETLB_PAGE=y # CONFIG_ROMFS_FS is not set # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set -# CONFIG_NETWORK_FILESYSTEMS is not set +CONFIG_NETWORK_FILESYSTEMS=y +# CONFIG_NFS_FS is not set +# CONFIG_NFSD is not set +# CONFIG_SMB_FS is not set +# CONFIG_CIFS is not set +# CONFIG_NCP_FS is not set +# CONFIG_CODA_FS is not set +# CONFIG_AFS_FS is not set # # Partition Types # -# CONFIG_PARTITION_ADVANCED is not set +CONFIG_PARTITION_ADVANCED=y +# CONFIG_ACORN_PARTITION is not set +CONFIG_OSF_PARTITION=y +CONFIG_AMIGA_PARTITION=y +# CONFIG_ATARI_PARTITION is not set +CONFIG_MAC_PARTITION=y CONFIG_MSDOS_PARTITION=y +CONFIG_BSD_DISKLABEL=y +CONFIG_MINIX_SUBPARTITION=y +CONFIG_SOLARIS_X86_PARTITION=y +CONFIG_UNIXWARE_DISKLABEL=y +# CONFIG_LDM_PARTITION is not set +CONFIG_SGI_PARTITION=y +# CONFIG_ULTRIX_PARTITION is not set +CONFIG_SUN_PARTITION=y +CONFIG_KARMA_PARTITION=y +CONFIG_EFI_PARTITION=y +# CONFIG_SYSV68_PARTITION is not set CONFIG_NLS=y -CONFIG_NLS_DEFAULT="iso8859-1" +CONFIG_NLS_DEFAULT="utf8" CONFIG_NLS_CODEPAGE_437=y # CONFIG_NLS_CODEPAGE_737 is not set # CONFIG_NLS_CODEPAGE_775 is not set @@ -1298,7 +1957,7 @@ CONFIG_NLS_ISO8859_1=y # CONFIG_NLS_ISO8859_9 is not set # CONFIG_NLS_ISO8859_13 is not set # CONFIG_NLS_ISO8859_14 is not set -CONFIG_NLS_ISO8859_15=y +# CONFIG_NLS_ISO8859_15 is not set # CONFIG_NLS_KOI8_R is not set # CONFIG_NLS_KOI8_U is not set CONFIG_NLS_UTF8=y @@ -1309,21 +1968,22 @@ CONFIG_NLS_UTF8=y # CONFIG_TRACE_IRQFLAGS_SUPPORT=y # CONFIG_PRINTK_TIME is not set -CONFIG_ENABLE_WARN_DEPRECATED=y +# CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_FRAME_WARN=2048 CONFIG_MAGIC_SYSRQ=y -CONFIG_UNUSED_SYMBOLS=y +# CONFIG_UNUSED_SYMBOLS is not set CONFIG_DEBUG_FS=y # CONFIG_HEADERS_CHECK is not set CONFIG_DEBUG_KERNEL=y # CONFIG_DEBUG_SHIRQ is not set -CONFIG_DETECT_SOFTLOCKUP=y +# CONFIG_DETECT_SOFTLOCKUP is not set # CONFIG_SCHED_DEBUG is not set -# CONFIG_SCHEDSTATS is not set +CONFIG_SCHEDSTATS=y CONFIG_TIMER_STATS=y # CONFIG_DEBUG_OBJECTS is not set -# CONFIG_DEBUG_SLAB is not set +# CONFIG_SLUB_DEBUG_ON is not set +# CONFIG_SLUB_STATS is not set # CONFIG_DEBUG_RT_MUTEXES is not set # CONFIG_RT_MUTEX_TESTER is not set # CONFIG_DEBUG_SPINLOCK is not set @@ -1340,7 +2000,7 @@ CONFIG_DEBUG_BUGVERBOSE=y # CONFIG_DEBUG_WRITECOUNT is not set # CONFIG_DEBUG_LIST is not set # CONFIG_DEBUG_SG is not set -# CONFIG_FRAME_POINTER is not set +CONFIG_FRAME_POINTER=y # CONFIG_BOOT_PRINTK_DELAY is not set # CONFIG_RCU_TORTURE_TEST is not set # CONFIG_KPROBES_SANITY_TEST is not set @@ -1348,20 +2008,21 @@ CONFIG_DEBUG_BUGVERBOSE=y # CONFIG_LKDTM is not set # CONFIG_FAULT_INJECTION is not set # CONFIG_LATENCYTOP is not set -# CONFIG_PROVIDE_OHCI1394_DMA_INIT is not set +CONFIG_PROVIDE_OHCI1394_DMA_INIT=y # CONFIG_SAMPLES is not set # CONFIG_KGDB is not set CONFIG_HAVE_ARCH_KGDB=y # CONFIG_NONPROMISC_DEVMEM is not set CONFIG_EARLY_PRINTK=y CONFIG_DEBUG_STACKOVERFLOW=y -# CONFIG_DEBUG_STACK_USAGE is not set +CONFIG_DEBUG_STACK_USAGE=y # CONFIG_DEBUG_PAGEALLOC is not set # CONFIG_DEBUG_PER_CPU_MAPS is not set # CONFIG_X86_PTDUMP is not set -# CONFIG_DEBUG_RODATA is not set +CONFIG_DEBUG_RODATA=y # CONFIG_DIRECT_GBPAGES is not set -# CONFIG_DEBUG_NX_TEST is not set +# CONFIG_DEBUG_RODATA_TEST is not set +CONFIG_DEBUG_NX_TEST=m CONFIG_X86_MPPARSE=y # CONFIG_IOMMU_DEBUG is not set CONFIG_IO_DELAY_TYPE_0X80=0 @@ -1373,18 +2034,115 @@ CONFIG_IO_DELAY_0X80=y # CONFIG_IO_DELAY_UDELAY is not set # CONFIG_IO_DELAY_NONE is not set CONFIG_DEFAULT_IO_DELAY_TYPE=0 -# CONFIG_DEBUG_BOOT_PARAMS is not set +CONFIG_DEBUG_BOOT_PARAMS=y # CONFIG_CPA_DEBUG is not set # # Security options # -# CONFIG_KEYS is not set -# CONFIG_SECURITY is not set -# CONFIG_SECURITY_FILE_CAPABILITIES is not set -# CONFIG_CRYPTO is not set -# CONFIG_HAVE_KVM is not set -# CONFIG_VIRTUALIZATION is not set +CONFIG_KEYS=y +CONFIG_KEYS_DEBUG_PROC_KEYS=y +CONFIG_SECURITY=y +CONFIG_SECURITY_NETWORK=y +# CONFIG_SECURITY_NETWORK_XFRM is not set +CONFIG_SECURITY_CAPABILITIES=y +CONFIG_SECURITY_FILE_CAPABILITIES=y +# CONFIG_SECURITY_ROOTPLUG is not set +CONFIG_SECURITY_DEFAULT_MMAP_MIN_ADDR=65536 +CONFIG_SECURITY_SELINUX=y +CONFIG_SECURITY_SELINUX_BOOTPARAM=y +CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=1 +CONFIG_SECURITY_SELINUX_DISABLE=y +CONFIG_SECURITY_SELINUX_DEVELOP=y +CONFIG_SECURITY_SELINUX_AVC_STATS=y +CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1 +# CONFIG_SECURITY_SELINUX_ENABLE_SECMARK_DEFAULT is not set +# CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX is not set +# CONFIG_SECURITY_SMACK is not set +CONFIG_CRYPTO=y + +# +# Crypto core or helper +# +CONFIG_CRYPTO_ALGAPI=y +CONFIG_CRYPTO_AEAD=y +CONFIG_CRYPTO_BLKCIPHER=y +CONFIG_CRYPTO_HASH=y +CONFIG_CRYPTO_MANAGER=y +# CONFIG_CRYPTO_GF128MUL is not set +# CONFIG_CRYPTO_NULL is not set +# CONFIG_CRYPTO_CRYPTD is not set +CONFIG_CRYPTO_AUTHENC=y +# CONFIG_CRYPTO_TEST is not set + +# +# Authenticated Encryption with Associated Data +# +# CONFIG_CRYPTO_CCM is not set +# CONFIG_CRYPTO_GCM is not set +# CONFIG_CRYPTO_SEQIV is not set + +# +# Block modes +# +CONFIG_CRYPTO_CBC=y +# CONFIG_CRYPTO_CTR is not set +# CONFIG_CRYPTO_CTS is not set +CONFIG_CRYPTO_ECB=y +# CONFIG_CRYPTO_LRW is not set +# CONFIG_CRYPTO_PCBC is not set +# CONFIG_CRYPTO_XTS is not set + +# +# Hash modes +# +CONFIG_CRYPTO_HMAC=y +# CONFIG_CRYPTO_XCBC is not set + +# +# Digest +# +# CONFIG_CRYPTO_CRC32C is not set +# CONFIG_CRYPTO_MD4 is not set +CONFIG_CRYPTO_MD5=y +# CONFIG_CRYPTO_MICHAEL_MIC is not set +CONFIG_CRYPTO_SHA1=y +# CONFIG_CRYPTO_SHA256 is not set +# CONFIG_CRYPTO_SHA512 is not set +# CONFIG_CRYPTO_TGR192 is not set +# CONFIG_CRYPTO_WP512 is not set + +# +# Ciphers +# +CONFIG_CRYPTO_AES=y +# CONFIG_CRYPTO_AES_X86_64 is not set +# CONFIG_CRYPTO_ANUBIS is not set +CONFIG_CRYPTO_ARC4=y +# CONFIG_CRYPTO_BLOWFISH is not set +# CONFIG_CRYPTO_CAMELLIA is not set +# CONFIG_CRYPTO_CAST5 is not set +# CONFIG_CRYPTO_CAST6 is not set +CONFIG_CRYPTO_DES=y +# CONFIG_CRYPTO_FCRYPT is not set +# CONFIG_CRYPTO_KHAZAD is not set +# CONFIG_CRYPTO_SALSA20 is not set +# CONFIG_CRYPTO_SALSA20_X86_64 is not set +# CONFIG_CRYPTO_SEED is not set +# CONFIG_CRYPTO_SERPENT is not set +# CONFIG_CRYPTO_TEA is not set +# CONFIG_CRYPTO_TWOFISH is not set +# CONFIG_CRYPTO_TWOFISH_X86_64 is not set + +# +# Compression +# +# CONFIG_CRYPTO_DEFLATE is not set +# CONFIG_CRYPTO_LZO is not set +CONFIG_CRYPTO_HW=y +# CONFIG_CRYPTO_DEV_HIFN_795X is not set +CONFIG_HAVE_KVM=y +CONFIG_VIRTUALIZATION=y # CONFIG_KVM is not set # CONFIG_VIRTIO_PCI is not set # CONFIG_VIRTIO_BALLOON is not set -- cgit v0.10.2 From 205f93288093df69f9ab5f6981aef27b91088b28 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Mon, 5 May 2008 17:52:52 -0400 Subject: x86: add new cache descriptor The latest rev of Intel doc AP-485 details a new cache descriptor that we don't yet support. A 6MB 24-way assoc L2 cache. Signed-off-by: Dave Jones Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 26d615d..2c8afaf 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -62,6 +62,7 @@ static struct _cache_table cache_table[] __cpuinitdata = { 0x4b, LVL_3, 8192 }, /* 16-way set assoc, 64 byte line size */ { 0x4c, LVL_3, 12288 }, /* 12-way set assoc, 64 byte line size */ { 0x4d, LVL_3, 16384 }, /* 16-way set assoc, 64 byte line size */ + { 0x4e, LVL_2, 6144 }, /* 24-way set assoc, 64 byte line size */ { 0x60, LVL_1_DATA, 16 }, /* 8-way set assoc, sectored cache, 64 byte line size */ { 0x66, LVL_1_DATA, 8 }, /* 4-way set assoc, sectored cache, 64 byte line size */ { 0x67, LVL_1_DATA, 16 }, /* 4-way set assoc, sectored cache, 64 byte line size */ -- cgit v0.10.2 From 0327318445d55808991a63137cfb698a90ab6adf Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 18 Apr 2008 17:49:15 -0700 Subject: x86_64: simplify the memtest parameter setting use CONFIG_MEMTEST only. if it is set, will have memtest=0 (disabled) need to have memtest=4 in command line to test more patterns. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index fe361ae..a1a9021 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -419,35 +419,19 @@ config PARAVIRT endif -config MEMTEST_BOOTPARAM - bool "Memtest boot parameter" +config MEMTEST + bool "Memtest" depends on X86_64 default y help This option adds a kernel parameter 'memtest', which allows memtest - to be disabled at boot. If this option is selected, memtest - functionality can be disabled with memtest=0 on the kernel - command line. The purpose of this option is to allow a single - kernel image to be distributed with memtest built in, but not - necessarily enabled. - + to be set. + memtest=0, mean disabled; -- default + memtest=1, mean do 1 test pattern; + ... + memtest=4, mean do 4 test patterns. If you are unsure how to answer this question, answer Y. -config MEMTEST_BOOTPARAM_VALUE - int "Memtest boot parameter default value (0-4)" - depends on MEMTEST_BOOTPARAM - range 0 4 - default 0 - help - This option sets the default value for the kernel parameter - 'memtest', which allows memtest to be disabled at boot. If this - option is set to 0 (zero), the memtest kernel parameter will - default to 0, disabling memtest at bootup. If this option is - set to 4, the memtest kernel parameter will default to 4, - enabling memtest at bootup, and use that as pattern number. - - If you are unsure how to answer this question, answer 0. - config ACPI_SRAT def_bool y depends on X86_32 && ACPI && NUMA && (X86_SUMMIT || X86_GENERICARCH) diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 32ba13b..c6d8131 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -431,7 +431,7 @@ static void __init init_gbpages(void) direct_gbpages = 0; } -#ifdef CONFIG_MEMTEST_BOOTPARAM +#ifdef CONFIG_MEMTEST static void __init memtest(unsigned long start_phys, unsigned long size, unsigned pattern) @@ -493,7 +493,8 @@ static void __init memtest(unsigned long start_phys, unsigned long size, } -static int memtest_pattern __initdata = CONFIG_MEMTEST_BOOTPARAM_VALUE; +/* default is disabled */ +static int memtest_pattern __initdata; static int __init parse_memtest(char *arg) { -- cgit v0.10.2 From 88f85a55c0645c2b7e03bf34d2a90eddf6de34fa Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Mon, 12 May 2008 16:42:43 -0500 Subject: JFS: 0 is not valid errno value so return NULL from jfs_lookup Signed-off-by: Marcin Slusarz Signed-off-by: Dave Kleikamp Cc: jfs-discussion@lists.sourceforge.net Cc: Alexander Viro diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 0ba6778..2aba823 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -1455,7 +1455,7 @@ static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry, struc free_UCSname(&key); if (rc == -ENOENT) { d_add(dentry, NULL); - return ERR_PTR(0); + return NULL; } else if (rc) { jfs_err("jfs_lookup: dtSearch returned %d", rc); return ERR_PTR(rc); -- cgit v0.10.2 From b2e03ca7485cac033a0667d9e45e28d32fdee9a5 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 13 May 2008 08:22:10 -0500 Subject: JFS: switch to seq_files Signed-off-by: Alexey Dobriyan Signed-off-by: Dave Kleikamp diff --git a/fs/jfs/jfs_debug.c b/fs/jfs/jfs_debug.c index bf6ab19..6a73de8 100644 --- a/fs/jfs/jfs_debug.c +++ b/fs/jfs/jfs_debug.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include "jfs_incore.h" #include "jfs_filsys.h" @@ -30,29 +31,19 @@ static struct proc_dir_entry *base; #ifdef CONFIG_JFS_DEBUG -static int loglevel_read(char *page, char **start, off_t off, - int count, int *eof, void *data) +static int jfs_loglevel_proc_show(struct seq_file *m, void *v) { - int len; - - len = sprintf(page, "%d\n", jfsloglevel); - - len -= off; - *start = page + off; - - if (len > count) - len = count; - else - *eof = 1; - - if (len < 0) - len = 0; + seq_printf(m, "%d\n", jfsloglevel); + return 0; +} - return len; +static int jfs_loglevel_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, jfs_loglevel_proc_show, NULL); } -static int loglevel_write(struct file *file, const char __user *buffer, - unsigned long count, void *data) +static ssize_t jfs_loglevel_proc_write(struct file *file, + const char __user *buffer, size_t count, loff_t *ppos) { char c; @@ -65,22 +56,30 @@ static int loglevel_write(struct file *file, const char __user *buffer, jfsloglevel = c - '0'; return count; } + +static const struct file_operations jfs_loglevel_proc_fops = { + .owner = THIS_MODULE, + .open = jfs_loglevel_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .write = jfs_loglevel_proc_write, +}; #endif static struct { const char *name; - read_proc_t *read_fn; - write_proc_t *write_fn; + const struct file_operations *proc_fops; } Entries[] = { #ifdef CONFIG_JFS_STATISTICS - { "lmstats", jfs_lmstats_read, }, - { "txstats", jfs_txstats_read, }, - { "xtstat", jfs_xtstat_read, }, - { "mpstat", jfs_mpstat_read, }, + { "lmstats", &jfs_lmstats_proc_fops, }, + { "txstats", &jfs_txstats_proc_fops, }, + { "xtstat", &jfs_xtstat_proc_fops, }, + { "mpstat", &jfs_mpstat_proc_fops, }, #endif #ifdef CONFIG_JFS_DEBUG - { "TxAnchor", jfs_txanchor_read, }, - { "loglevel", loglevel_read, loglevel_write } + { "TxAnchor", &jfs_txanchor_proc_fops, }, + { "loglevel", &jfs_loglevel_proc_fops } #endif }; #define NPROCENT ARRAY_SIZE(Entries) @@ -93,13 +92,8 @@ void jfs_proc_init(void) return; base->owner = THIS_MODULE; - for (i = 0; i < NPROCENT; i++) { - struct proc_dir_entry *p; - if ((p = create_proc_entry(Entries[i].name, 0, base))) { - p->read_proc = Entries[i].read_fn; - p->write_proc = Entries[i].write_fn; - } - } + for (i = 0; i < NPROCENT; i++) + proc_create(Entries[i].name, 0, base, Entries[i].proc_fops); } void jfs_proc_clean(void) diff --git a/fs/jfs/jfs_debug.h b/fs/jfs/jfs_debug.h index 044c1e6..eafd130 100644 --- a/fs/jfs/jfs_debug.h +++ b/fs/jfs/jfs_debug.h @@ -62,7 +62,7 @@ extern void jfs_proc_clean(void); extern int jfsloglevel; -extern int jfs_txanchor_read(char *, char **, off_t, int, int *, void *); +extern const struct file_operations jfs_txanchor_proc_fops; /* information message: e.g., configuration, major event */ #define jfs_info(fmt, arg...) do { \ @@ -105,10 +105,10 @@ extern int jfs_txanchor_read(char *, char **, off_t, int, int *, void *); * ---------- */ #ifdef CONFIG_JFS_STATISTICS -extern int jfs_lmstats_read(char *, char **, off_t, int, int *, void *); -extern int jfs_txstats_read(char *, char **, off_t, int, int *, void *); -extern int jfs_mpstat_read(char *, char **, off_t, int, int *, void *); -extern int jfs_xtstat_read(char *, char **, off_t, int, int *, void *); +extern const struct file_operations jfs_lmstats_proc_fops; +extern const struct file_operations jfs_txstats_proc_fops; +extern const struct file_operations jfs_mpstat_proc_fops; +extern const struct file_operations jfs_xtstat_proc_fops; #define INCREMENT(x) ((x)++) #define DECREMENT(x) ((x)--) diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c index 325a967..cd2ec29 100644 --- a/fs/jfs/jfs_logmgr.c +++ b/fs/jfs/jfs_logmgr.c @@ -69,6 +69,7 @@ #include #include #include +#include #include "jfs_incore.h" #include "jfs_filsys.h" #include "jfs_metapage.h" @@ -2503,13 +2504,9 @@ exit: } #ifdef CONFIG_JFS_STATISTICS -int jfs_lmstats_read(char *buffer, char **start, off_t offset, int length, - int *eof, void *data) +static int jfs_lmstats_proc_show(struct seq_file *m, void *v) { - int len = 0; - off_t begin; - - len += sprintf(buffer, + seq_printf(m, "JFS Logmgr stats\n" "================\n" "commits = %d\n" @@ -2522,19 +2519,19 @@ int jfs_lmstats_read(char *buffer, char **start, off_t offset, int length, lmStat.pagedone, lmStat.full_page, lmStat.partial_page); + return 0; +} - begin = offset; - *start = buffer + begin; - len -= begin; - - if (len > length) - len = length; - else - *eof = 1; - - if (len < 0) - len = 0; - - return len; +static int jfs_lmstats_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, jfs_lmstats_proc_show, NULL); } + +const struct file_operations jfs_lmstats_proc_fops = { + .owner = THIS_MODULE, + .open = jfs_lmstats_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; #endif /* CONFIG_JFS_STATISTICS */ diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c index d1e64f2..854ff0e 100644 --- a/fs/jfs/jfs_metapage.c +++ b/fs/jfs/jfs_metapage.c @@ -19,10 +19,12 @@ #include #include +#include #include #include #include #include +#include #include "jfs_incore.h" #include "jfs_superblock.h" #include "jfs_filsys.h" @@ -804,13 +806,9 @@ void __invalidate_metapages(struct inode *ip, s64 addr, int len) } #ifdef CONFIG_JFS_STATISTICS -int jfs_mpstat_read(char *buffer, char **start, off_t offset, int length, - int *eof, void *data) +static int jfs_mpstat_proc_show(struct seq_file *m, void *v) { - int len = 0; - off_t begin; - - len += sprintf(buffer, + seq_printf(m, "JFS Metapage statistics\n" "=======================\n" "page allocations = %d\n" @@ -819,19 +817,19 @@ int jfs_mpstat_read(char *buffer, char **start, off_t offset, int length, mpStat.pagealloc, mpStat.pagefree, mpStat.lockwait); + return 0; +} - begin = offset; - *start = buffer + begin; - len -= begin; - - if (len > length) - len = length; - else - *eof = 1; - - if (len < 0) - len = 0; - - return len; +static int jfs_mpstat_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, jfs_mpstat_proc_show, NULL); } + +const struct file_operations jfs_mpstat_proc_fops = { + .owner = THIS_MODULE, + .open = jfs_mpstat_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; #endif diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c index e7c60ae..f26e4d0 100644 --- a/fs/jfs/jfs_txnmgr.c +++ b/fs/jfs/jfs_txnmgr.c @@ -49,6 +49,7 @@ #include #include #include +#include #include "jfs_incore.h" #include "jfs_inode.h" #include "jfs_filsys.h" @@ -3009,11 +3010,8 @@ int jfs_sync(void *arg) } #if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_DEBUG) -int jfs_txanchor_read(char *buffer, char **start, off_t offset, int length, - int *eof, void *data) +static int jfs_txanchor_proc_show(struct seq_file *m, void *v) { - int len = 0; - off_t begin; char *freewait; char *freelockwait; char *lowlockwait; @@ -3025,7 +3023,7 @@ int jfs_txanchor_read(char *buffer, char **start, off_t offset, int length, lowlockwait = waitqueue_active(&TxAnchor.lowlockwait) ? "active" : "empty"; - len += sprintf(buffer, + seq_printf(m, "JFS TxAnchor\n" "============\n" "freetid = %d\n" @@ -3044,31 +3042,27 @@ int jfs_txanchor_read(char *buffer, char **start, off_t offset, int length, TxAnchor.tlocksInUse, jfs_tlocks_low, list_empty(&TxAnchor.unlock_queue) ? "" : "not "); + return 0; +} - begin = offset; - *start = buffer + begin; - len -= begin; - - if (len > length) - len = length; - else - *eof = 1; - - if (len < 0) - len = 0; - - return len; +static int jfs_txanchor_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, jfs_txanchor_proc_show, NULL); } + +const struct file_operations jfs_txanchor_proc_fops = { + .owner = THIS_MODULE, + .open = jfs_txanchor_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; #endif #if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_STATISTICS) -int jfs_txstats_read(char *buffer, char **start, off_t offset, int length, - int *eof, void *data) +static int jfs_txstats_proc_show(struct seq_file *m, void *v) { - int len = 0; - off_t begin; - - len += sprintf(buffer, + seq_printf(m, "JFS TxStats\n" "===========\n" "calls to txBegin = %d\n" @@ -3089,19 +3083,19 @@ int jfs_txstats_read(char *buffer, char **start, off_t offset, int length, TxStat.txBeginAnon_lockslow, TxStat.txLockAlloc, TxStat.txLockAlloc_freelock); + return 0; +} - begin = offset; - *start = buffer + begin; - len -= begin; - - if (len > length) - len = length; - else - *eof = 1; - - if (len < 0) - len = 0; - - return len; +static int jfs_txstats_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, jfs_txstats_proc_show, NULL); } + +const struct file_operations jfs_txstats_proc_fops = { + .owner = THIS_MODULE, + .open = jfs_txstats_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; #endif diff --git a/fs/jfs/jfs_xtree.c b/fs/jfs/jfs_xtree.c index 5a61ebf..ae3acaf 100644 --- a/fs/jfs/jfs_xtree.c +++ b/fs/jfs/jfs_xtree.c @@ -20,7 +20,9 @@ */ #include +#include #include +#include #include "jfs_incore.h" #include "jfs_filsys.h" #include "jfs_metapage.h" @@ -4134,13 +4136,9 @@ s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size) } #ifdef CONFIG_JFS_STATISTICS -int jfs_xtstat_read(char *buffer, char **start, off_t offset, int length, - int *eof, void *data) +static int jfs_xtstat_proc_show(struct seq_file *m, void *v) { - int len = 0; - off_t begin; - - len += sprintf(buffer, + seq_printf(m, "JFS Xtree statistics\n" "====================\n" "searches = %d\n" @@ -4149,19 +4147,19 @@ int jfs_xtstat_read(char *buffer, char **start, off_t offset, int length, xtStat.search, xtStat.fastSearch, xtStat.split); + return 0; +} - begin = offset; - *start = buffer + begin; - len -= begin; - - if (len > length) - len = length; - else - *eof = 1; - - if (len < 0) - len = 0; - - return len; +static int jfs_xtstat_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, jfs_xtstat_proc_show, NULL); } + +const struct file_operations jfs_xtstat_proc_fops = { + .owner = THIS_MODULE, + .open = jfs_xtstat_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; #endif -- cgit v0.10.2 From 493d35863dbb692c38c1415fd83d88dfb902ae37 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Wed, 14 May 2008 16:22:58 -0700 Subject: mutex-debug: check mutex magic before owner Currently, the mutex debug code checks the lock->owner before lock->magic, so a corrupt mutex will most likely result in failing the owner check, rather than the magic check. This change to debug_mutex_unlock does the magic check first, so we have a better idea of what breaks. Signed-off-by: Jeremy Kerr Acked-by: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar diff --git a/kernel/mutex-debug.c b/kernel/mutex-debug.c index 3aaa06c..1d94160 100644 --- a/kernel/mutex-debug.c +++ b/kernel/mutex-debug.c @@ -79,8 +79,8 @@ void debug_mutex_unlock(struct mutex *lock) if (unlikely(!debug_locks)) return; - DEBUG_LOCKS_WARN_ON(lock->owner != current_thread_info()); DEBUG_LOCKS_WARN_ON(lock->magic != lock); + DEBUG_LOCKS_WARN_ON(lock->owner != current_thread_info()); DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next); DEBUG_LOCKS_WARN_ON(lock->owner != current_thread_info()); } -- cgit v0.10.2 From dedd4915af40cff6614707c50dcae43d17beadec Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 17 May 2008 08:28:33 +0200 Subject: bitops: fix build in struct thread_info we can move flags from u32 to natural size - assembly code uses offsetof. Signed-off-by: Ingo Molnar diff --git a/include/asm-x86/thread_info.h b/include/asm-x86/thread_info.h index 74481b7..25d7105 100644 --- a/include/asm-x86/thread_info.h +++ b/include/asm-x86/thread_info.h @@ -24,10 +24,10 @@ struct exec_domain; struct thread_info { struct task_struct *task; /* main task structure */ struct exec_domain *exec_domain; /* execution domain */ - __u32 flags; /* low level flags */ + unsigned long flags; /* low level flags */ __u32 status; /* thread synchronous flags */ __u32 cpu; /* current CPU */ - int preempt_count; /* 0 => preemptable, + int preempt_count; /* 0 => preemptable, <0 => BUG */ mm_segment_t addr_limit; struct restart_block restart_block; -- cgit v0.10.2 From 75bd2ef1457998791cfc89cd59927574488fc22a Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Thu, 15 May 2008 09:09:23 -0600 Subject: bsg: cdev lock_kernel() pushdown Push the cdev lock_kernel call into bsg_open(). Signed-off-by: Jonathan Corbet diff --git a/block/bsg.c b/block/bsg.c index f0b7cd3..dbe3ffd 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -834,7 +835,11 @@ static struct bsg_device *bsg_get_device(struct inode *inode, struct file *file) static int bsg_open(struct inode *inode, struct file *file) { - struct bsg_device *bd = bsg_get_device(inode, file); + struct bsg_device *bd; + + lock_kernel(); + bd = bsg_get_device(inode, file); + unlock_kernel(); if (IS_ERR(bd)) return PTR_ERR(bd); -- cgit v0.10.2 From 0c401df37ef9f45f35390a5574e24cbf3f916acf Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Thu, 15 May 2008 09:10:18 -0600 Subject: cris: cdev lock_kernel() pushdown Push the cdev lock_kernel() call into cris drivers. Signed-off-by: Jonathan Corbet diff --git a/arch/cris/arch-v10/drivers/gpio.c b/arch/cris/arch-v10/drivers/gpio.c index 68a998b..86048e6 100644 --- a/arch/cris/arch-v10/drivers/gpio.c +++ b/arch/cris/arch-v10/drivers/gpio.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -323,6 +324,7 @@ gpio_open(struct inode *inode, struct file *filp) if (!priv) return -ENOMEM; + lock_kernel(); priv->minor = p; /* initialize the io/alarm struct */ @@ -357,6 +359,7 @@ gpio_open(struct inode *inode, struct file *filp) alarmlist = priv; spin_unlock_irqrestore(&gpio_lock, flags); + unlock_kernel(); return 0; } diff --git a/arch/cris/arch-v10/drivers/sync_serial.c b/arch/cris/arch-v10/drivers/sync_serial.c index 069546e..91fea62 100644 --- a/arch/cris/arch-v10/drivers/sync_serial.c +++ b/arch/cris/arch-v10/drivers/sync_serial.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -443,18 +444,21 @@ static int sync_serial_open(struct inode *inode, struct file *file) int dev = MINOR(inode->i_rdev); struct sync_port *port; int mode; + int err = -EBUSY; + lock_kernel(); DEBUG(printk(KERN_DEBUG "Open sync serial port %d\n", dev)); if (dev < 0 || dev >= NUMBER_OF_PORTS || !ports[dev].enabled) { DEBUG(printk(KERN_DEBUG "Invalid minor %d\n", dev)); - return -ENODEV; + err = -ENODEV; + goto out; } port = &ports[dev]; /* Allow open this device twice (assuming one reader and one writer) */ if (port->busy == 2) { DEBUG(printk(KERN_DEBUG "Device is busy.. \n")); - return -EBUSY; + goto out; } if (port->init_irqs) { if (port->use_dma) { @@ -465,14 +469,14 @@ static int sync_serial_open(struct inode *inode, struct file *file) &ports[0])) { printk(KERN_CRIT "Can't alloc " "sync serial port 1 IRQ"); - return -EBUSY; + goto out; } else if (request_irq(25, rx_interrupt, 0, "synchronous serial 1 dma rx", &ports[0])) { free_irq(24, &port[0]); printk(KERN_CRIT "Can't alloc " "sync serial port 1 IRQ"); - return -EBUSY; + goto out; } else if (cris_request_dma(8, "synchronous serial 1 dma tr", DMA_VERBOSE_ON_ERROR, @@ -482,7 +486,7 @@ static int sync_serial_open(struct inode *inode, struct file *file) printk(KERN_CRIT "Can't alloc " "sync serial port 1 " "TX DMA channel"); - return -EBUSY; + goto out; } else if (cris_request_dma(9, "synchronous serial 1 dma rec", DMA_VERBOSE_ON_ERROR, @@ -493,7 +497,7 @@ static int sync_serial_open(struct inode *inode, struct file *file) printk(KERN_CRIT "Can't alloc " "sync serial port 1 " "RX DMA channel"); - return -EBUSY; + goto out; } #endif RESET_DMA(8); WAIT_DMA(8); @@ -520,14 +524,14 @@ static int sync_serial_open(struct inode *inode, struct file *file) &ports[1])) { printk(KERN_CRIT "Can't alloc " "sync serial port 3 IRQ"); - return -EBUSY; + goto out; } else if (request_irq(21, rx_interrupt, 0, "synchronous serial 3 dma rx", &ports[1])) { free_irq(20, &ports[1]); printk(KERN_CRIT "Can't alloc " "sync serial port 3 IRQ"); - return -EBUSY; + goto out; } else if (cris_request_dma(4, "synchronous serial 3 dma tr", DMA_VERBOSE_ON_ERROR, @@ -537,7 +541,7 @@ static int sync_serial_open(struct inode *inode, struct file *file) printk(KERN_CRIT "Can't alloc " "sync serial port 3 " "TX DMA channel"); - return -EBUSY; + goto out; } else if (cris_request_dma(5, "synchronous serial 3 dma rec", DMA_VERBOSE_ON_ERROR, @@ -548,7 +552,7 @@ static int sync_serial_open(struct inode *inode, struct file *file) printk(KERN_CRIT "Can't alloc " "sync serial port 3 " "RX DMA channel"); - return -EBUSY; + goto out; } #endif RESET_DMA(4); WAIT_DMA(4); @@ -581,7 +585,7 @@ static int sync_serial_open(struct inode *inode, struct file *file) &ports[0])) { printk(KERN_CRIT "Can't alloc " "sync serial manual irq"); - return -EBUSY; + goto out; } } else if (port == &ports[1]) { if (request_irq(8, @@ -591,7 +595,7 @@ static int sync_serial_open(struct inode *inode, struct file *file) &ports[1])) { printk(KERN_CRIT "Can't alloc " "sync serial manual irq"); - return -EBUSY; + goto out; } } port->init_irqs = 0; @@ -620,7 +624,11 @@ static int sync_serial_open(struct inode *inode, struct file *file) *R_IRQ_MASK1_SET = 1 << port->data_avail_bit; DEBUG(printk(KERN_DEBUG "sser%d rec started\n", dev)); } - return 0; + ret = 0; + +out: + unlock_kernel(); + return ret; } static int sync_serial_release(struct inode *inode, struct file *file) diff --git a/arch/cris/arch-v32/drivers/mach-a3/gpio.c b/arch/cris/arch-v32/drivers/mach-a3/gpio.c index de107da..ef98608 100644 --- a/arch/cris/arch-v32/drivers/mach-a3/gpio.c +++ b/arch/cris/arch-v32/drivers/mach-a3/gpio.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -390,6 +391,8 @@ static int gpio_open(struct inode *inode, struct file *filp) if (!priv) return -ENOMEM; + + lock_kernel(); memset(priv, 0, sizeof(*priv)); priv->minor = p; @@ -412,6 +415,7 @@ static int gpio_open(struct inode *inode, struct file *filp) spin_unlock_irq(&gpio_lock); } + unlock_kernel(); return 0; } diff --git a/arch/cris/arch-v32/drivers/mach-fs/gpio.c b/arch/cris/arch-v32/drivers/mach-fs/gpio.c index 7863fd4..fe1fde8 100644 --- a/arch/cris/arch-v32/drivers/mach-fs/gpio.c +++ b/arch/cris/arch-v32/drivers/mach-fs/gpio.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -426,9 +427,10 @@ gpio_open(struct inode *inode, struct file *filp) return -EINVAL; priv = kmalloc(sizeof(struct gpio_private), GFP_KERNEL); - if (!priv) return -ENOMEM; + + lock_kernel(); memset(priv, 0, sizeof(*priv)); priv->minor = p; @@ -449,6 +451,7 @@ gpio_open(struct inode *inode, struct file *filp) alarmlist = priv; spin_unlock_irq(&alarm_lock); + unlock_kernel(); return 0; } diff --git a/arch/cris/arch-v32/drivers/sync_serial.c b/arch/cris/arch-v32/drivers/sync_serial.c index 47c377d..d2a0fbf 100644 --- a/arch/cris/arch-v32/drivers/sync_serial.c +++ b/arch/cris/arch-v32/drivers/sync_serial.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -429,23 +430,26 @@ static inline int sync_data_avail_to_end(struct sync_port *port) static int sync_serial_open(struct inode *inode, struct file *file) { int dev = iminor(inode); + int ret = -EBUSY; sync_port *port; reg_dma_rw_cfg cfg = {.en = regk_dma_yes}; reg_dma_rw_intr_mask intr_mask = {.data = regk_dma_yes}; + lock_kernel(); DEBUG(printk(KERN_DEBUG "Open sync serial port %d\n", dev)); if (dev < 0 || dev >= NBR_PORTS || !ports[dev].enabled) { DEBUG(printk(KERN_DEBUG "Invalid minor %d\n", dev)); - return -ENODEV; + ret = -ENODEV; + goto out; } port = &ports[dev]; /* Allow open this device twice (assuming one reader and one writer) */ if (port->busy == 2) { DEBUG(printk(KERN_DEBUG "Device is busy.. \n")); - return -EBUSY; + goto out; } @@ -459,7 +463,7 @@ static int sync_serial_open(struct inode *inode, struct file *file) "synchronous serial 0 dma tr", &ports[0])) { printk(KERN_CRIT "Can't allocate sync serial port 0 IRQ"); - return -EBUSY; + goto out; } else if (request_irq(DMA_IN_INTR_VECT, rx_interrupt, 0, @@ -467,7 +471,7 @@ static int sync_serial_open(struct inode *inode, struct file *file) &ports[0])) { free_irq(DMA_OUT_INTR_VECT, &port[0]); printk(KERN_CRIT "Can't allocate sync serial port 0 IRQ"); - return -EBUSY; + goto out; } else if (crisv32_request_dma(OUT_DMA_NBR, "synchronous serial 0 dma tr", DMA_VERBOSE_ON_ERROR, @@ -476,7 +480,7 @@ static int sync_serial_open(struct inode *inode, struct file *file) free_irq(DMA_OUT_INTR_VECT, &port[0]); free_irq(DMA_IN_INTR_VECT, &port[0]); printk(KERN_CRIT "Can't allocate sync serial port 0 TX DMA channel"); - return -EBUSY; + goto out; } else if (crisv32_request_dma(IN_DMA_NBR, "synchronous serial 0 dma rec", DMA_VERBOSE_ON_ERROR, @@ -486,7 +490,7 @@ static int sync_serial_open(struct inode *inode, struct file *file) free_irq(DMA_OUT_INTR_VECT, &port[0]); free_irq(DMA_IN_INTR_VECT, &port[0]); printk(KERN_CRIT "Can't allocate sync serial port 1 RX DMA channel"); - return -EBUSY; + goto out; } #endif } @@ -499,7 +503,7 @@ static int sync_serial_open(struct inode *inode, struct file *file) "synchronous serial 1 dma tr", &ports[1])) { printk(KERN_CRIT "Can't allocate sync serial port 1 IRQ"); - return -EBUSY; + goto out; } else if (request_irq(DMA7_INTR_VECT, rx_interrupt, 0, @@ -507,7 +511,7 @@ static int sync_serial_open(struct inode *inode, struct file *file) &ports[1])) { free_irq(DMA6_INTR_VECT, &ports[1]); printk(KERN_CRIT "Can't allocate sync serial port 3 IRQ"); - return -EBUSY; + goto out; } else if (crisv32_request_dma( SYNC_SER1_TX_DMA_NBR, "synchronous serial 1 dma tr", @@ -517,7 +521,7 @@ static int sync_serial_open(struct inode *inode, struct file *file) free_irq(DMA6_INTR_VECT, &ports[1]); free_irq(DMA7_INTR_VECT, &ports[1]); printk(KERN_CRIT "Can't allocate sync serial port 3 TX DMA channel"); - return -EBUSY; + goto out; } else if (crisv32_request_dma( SYNC_SER1_RX_DMA_NBR, "synchronous serial 3 dma rec", @@ -528,7 +532,7 @@ static int sync_serial_open(struct inode *inode, struct file *file) free_irq(DMA6_INTR_VECT, &ports[1]); free_irq(DMA7_INTR_VECT, &ports[1]); printk(KERN_CRIT "Can't allocate sync serial port 3 RX DMA channel"); - return -EBUSY; + goto out; } #endif } @@ -554,7 +558,7 @@ static int sync_serial_open(struct inode *inode, struct file *file) "synchronous serial manual irq", &ports[0])) { printk("Can't allocate sync serial manual irq"); - return -EBUSY; + goto out; } } #ifdef CONFIG_ETRAXFS @@ -565,7 +569,7 @@ static int sync_serial_open(struct inode *inode, struct file *file) "synchronous serial manual irq", &ports[1])) { printk(KERN_CRIT "Can't allocate sync serial manual irq"); - return -EBUSY; + goto out; } } #endif @@ -578,7 +582,10 @@ static int sync_serial_open(struct inode *inode, struct file *file) } /* port->init_irqs */ port->busy++; - return 0; + ret = 0; +out: + unlock_kernel(); + return ret; } static int sync_serial_release(struct inode *inode, struct file *file) -- cgit v0.10.2 From 7558da942e51933b5e6aa5e851d4da1df0cd6752 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Thu, 15 May 2008 09:10:50 -0600 Subject: mips: cdev lock_kernel() pushdown Push the cdev lock_kernel() call into MIPS-specific drivers. Signed-off-by: Jonathan Corbet diff --git a/arch/mips/kernel/rtlx.c b/arch/mips/kernel/rtlx.c index b88f1c1..b556419 100644 --- a/arch/mips/kernel/rtlx.c +++ b/arch/mips/kernel/rtlx.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -392,8 +393,12 @@ out: static int file_open(struct inode *inode, struct file *filp) { int minor = iminor(inode); + int err; - return rtlx_open(minor, (filp->f_flags & O_NONBLOCK) ? 0 : 1); + lock_kernel(); + err = rtlx_open(minor, (filp->f_flags & O_NONBLOCK) ? 0 : 1); + unlock_kernel(); + return err; } static int file_release(struct inode *inode, struct file *filp) diff --git a/arch/mips/kernel/vpe.c b/arch/mips/kernel/vpe.c index 2794501..972b2d2 100644 --- a/arch/mips/kernel/vpe.c +++ b/arch/mips/kernel/vpe.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -1050,17 +1051,20 @@ static int vpe_open(struct inode *inode, struct file *filp) enum vpe_state state; struct vpe_notifications *not; struct vpe *v; - int ret; + int ret, err = 0; + lock_kernel(); if (minor != iminor(inode)) { /* assume only 1 device at the moment. */ printk(KERN_WARNING "VPE loader: only vpe1 is supported\n"); - return -ENODEV; + err = -ENODEV; + goto out; } if ((v = get_vpe(tclimit)) == NULL) { printk(KERN_WARNING "VPE loader: unable to get vpe\n"); - return -ENODEV; + err = -ENODEV; + goto out; } state = xchg(&v->state, VPE_STATE_INUSE); @@ -1100,6 +1104,8 @@ static int vpe_open(struct inode *inode, struct file *filp) v->shared_ptr = NULL; v->__start = 0; +out: + unlock_kernel(); return 0; } diff --git a/arch/mips/sibyte/common/sb_tbprof.c b/arch/mips/sibyte/common/sb_tbprof.c index 63b444e..28b012a 100644 --- a/arch/mips/sibyte/common/sb_tbprof.c +++ b/arch/mips/sibyte/common/sb_tbprof.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -402,18 +403,26 @@ static int sbprof_zbprof_stop(void) static int sbprof_tb_open(struct inode *inode, struct file *filp) { int minor; + int err = 0; + lock_kernel(); minor = iminor(inode); - if (minor != 0) - return -ENODEV; + if (minor != 0) { + err = -ENODEV; + goto out; + } - if (xchg(&sbp.open, SB_OPENING) != SB_CLOSED) - return -EBUSY; + if (xchg(&sbp.open, SB_OPENING) != SB_CLOSED) { + err = -EBUSY; + goto out; + } memset(&sbp, 0, sizeof(struct sbprof_tb)); sbp.sbprof_tbbuf = vmalloc(MAX_TBSAMPLE_BYTES); - if (!sbp.sbprof_tbbuf) - return -ENOMEM; + if (!sbp.sbprof_tbbuf) { + err = -ENOMEM; + goto out; + } memset(sbp.sbprof_tbbuf, 0, MAX_TBSAMPLE_BYTES); init_waitqueue_head(&sbp.tb_sync); init_waitqueue_head(&sbp.tb_read); @@ -421,7 +430,9 @@ static int sbprof_tb_open(struct inode *inode, struct file *filp) sbp.open = SB_OPEN; - return 0; + out: + unlock_kernel(); + return err; } static int sbprof_tb_release(struct inode *inode, struct file *filp) -- cgit v0.10.2 From 1fa984b583a809423ddb1d88fa46484071f85f80 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Thu, 15 May 2008 09:11:26 -0600 Subject: sh: cdev lock_kernel() pushdown Push the cdev lock_kernel() call down into the sh gio driver. Signed-off-by: Jonathan Corbet diff --git a/arch/sh/boards/landisk/gio.c b/arch/sh/boards/landisk/gio.c index 1702508..0c15b0a 100644 --- a/arch/sh/boards/landisk/gio.c +++ b/arch/sh/boards/landisk/gio.c @@ -14,6 +14,7 @@ */ #include #include +#include #include #include #include @@ -32,17 +33,20 @@ static int openCnt; static int gio_open(struct inode *inode, struct file *filp) { int minor; + int ret = -ENOENT; + lock_kernel(); minor = MINOR(inode->i_rdev); if (minor < DEVCOUNT) { if (openCnt > 0) { - return -EALREADY; + ret = -EALREADY; } else { openCnt++; - return 0; + ret = 0; } } - return -ENOENT; + unlock_kernel(); + return ret; } static int gio_close(struct inode *inode, struct file *filp) -- cgit v0.10.2 From 5119e92efc733d730b34f9605a5ae61fdc4bf649 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Thu, 15 May 2008 09:12:01 -0600 Subject: x86: cdev lock_kernel() pushdown Push the cdev lock_kernel() call down into the x86 msr and cpuid drivers. Signed-off-by: Jonathan Corbet diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index daff52a..71f1c26 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -107,15 +108,23 @@ static ssize_t cpuid_read(struct file *file, char __user *buf, static int cpuid_open(struct inode *inode, struct file *file) { - unsigned int cpu = iminor(file->f_path.dentry->d_inode); - struct cpuinfo_x86 *c = &cpu_data(cpu); - - if (cpu >= NR_CPUS || !cpu_online(cpu)) - return -ENXIO; /* No such CPU */ + unsigned int cpu; + struct cpuinfo_x86 *c; + int ret = 0; + + lock_kernel(); + + cpu = iminor(file->f_path.dentry->d_inode); + if (cpu >= NR_CPUS || !cpu_online(cpu)) { + ret = -ENXIO; /* No such CPU */ + goto out; + } + c = &cpu_data(cpu); if (c->cpuid_level < 0) - return -EIO; /* CPUID not supported */ - - return 0; + ret = -EIO; /* CPUID not supported */ +out: + unlock_kernel(); + return ret; } /* diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 1f3abe0..a153b39 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -117,12 +117,20 @@ static int msr_open(struct inode *inode, struct file *file) { unsigned int cpu = iminor(file->f_path.dentry->d_inode); struct cpuinfo_x86 *c = &cpu_data(cpu); + int ret = 0; - if (cpu >= NR_CPUS || !cpu_online(cpu)) - return -ENXIO; /* No such CPU */ - if (!cpu_has(c, X86_FEATURE_MSR)) - return -EIO; /* MSR not supported */ + lock_kernel(); + cpu = iminor(file->f_path.dentry->d_inode); + if (cpu >= NR_CPUS || !cpu_online(cpu)) { + ret = -ENXIO; /* No such CPU */ + goto out; + } + c = &cpu_data(cpu); + if (!cpu_has(c, X86_FEATURE_MSR)) + ret = -EIO; /* MSR not supported */ +out: + unlock_kernel(); return 0; } -- cgit v0.10.2 From 3db633ee352bfe20d4a2b0c3c8a46ce31a6c7149 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Thu, 15 May 2008 09:21:02 -0600 Subject: i2c: cdev lock_kernel() pushdown Signed-off-by: Jonathan Corbet diff --git a/drivers/i2c/i2c-dev.c b/drivers/i2c/i2c-dev.c index d34c14c..006a585 100644 --- a/drivers/i2c/i2c-dev.c +++ b/drivers/i2c/i2c-dev.c @@ -34,6 +34,7 @@ #include #include #include +#include #include static struct i2c_driver i2cdev_driver; @@ -441,14 +442,20 @@ static int i2cdev_open(struct inode *inode, struct file *file) struct i2c_client *client; struct i2c_adapter *adap; struct i2c_dev *i2c_dev; + int ret = 0; + lock_kernel(); i2c_dev = i2c_dev_get_by_minor(minor); - if (!i2c_dev) - return -ENODEV; + if (!i2c_dev) { + ret = -ENODEV; + goto out; + } adap = i2c_get_adapter(i2c_dev->adap->nr); - if (!adap) - return -ENODEV; + if (!adap) { + ret = -ENODEV; + goto out; + } /* This creates an anonymous i2c_client, which may later be * pointed to some address using I2C_SLAVE or I2C_SLAVE_FORCE. @@ -460,7 +467,8 @@ static int i2cdev_open(struct inode *inode, struct file *file) client = kzalloc(sizeof(*client), GFP_KERNEL); if (!client) { i2c_put_adapter(adap); - return -ENOMEM; + ret = -ENOMEM; + goto out; } snprintf(client->name, I2C_NAME_SIZE, "i2c-dev %d", adap->nr); client->driver = &i2cdev_driver; @@ -468,7 +476,9 @@ static int i2cdev_open(struct inode *inode, struct file *file) client->adapter = adap; file->private_data = client; - return 0; +out: + unlock_kernel(); + return ret; } static int i2cdev_release(struct inode *inode, struct file *file) -- cgit v0.10.2 From 0911810755fc9f15659cc3cb43912633b90027a0 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Thu, 15 May 2008 09:21:33 -0600 Subject: cosa: cdev lock_kernel() pushdown Signed-off-by: Jonathan Corbet diff --git a/drivers/net/wan/cosa.c b/drivers/net/wan/cosa.c index b0fce13..5827324 100644 --- a/drivers/net/wan/cosa.c +++ b/drivers/net/wan/cosa.c @@ -92,6 +92,7 @@ #include #include #include +#include #undef COSA_SLOW_IO /* for testing purposes only */ @@ -970,15 +971,21 @@ static int cosa_open(struct inode *inode, struct file *file) struct channel_data *chan; unsigned long flags; int n; + int ret = 0; + lock_kernel(); if ((n=iminor(file->f_path.dentry->d_inode)>>CARD_MINOR_BITS) - >= nr_cards) - return -ENODEV; + >= nr_cards) { + ret = -ENODEV; + goto out; + } cosa = cosa_cards+n; if ((n=iminor(file->f_path.dentry->d_inode) - & ((1<= cosa->nchannels) - return -ENODEV; + & ((1<= cosa->nchannels) { + ret = -ENODEV; + goto out; + } chan = cosa->chan + n; file->private_data = chan; @@ -987,7 +994,8 @@ static int cosa_open(struct inode *inode, struct file *file) if (chan->usage < 0) { /* in netdev mode */ spin_unlock_irqrestore(&cosa->lock, flags); - return -EBUSY; + ret = -EBUSY; + goto out; } cosa->usage++; chan->usage++; @@ -996,7 +1004,9 @@ static int cosa_open(struct inode *inode, struct file *file) chan->setup_rx = chrdev_setup_rx; chan->rx_done = chrdev_rx_done; spin_unlock_irqrestore(&cosa->lock, flags); - return 0; +out: + unlock_kernel(); + return ret; } static int cosa_release(struct inode *inode, struct file *file) -- cgit v0.10.2 From 0bec0bba7a507bdaf07312fcabdc00b5576abb32 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Thu, 15 May 2008 09:25:03 -0600 Subject: pcmcia: cdev lock_kernel() pushdown Signed-off-by: Jonathan Corbet diff --git a/drivers/pcmcia/pcmcia_ioctl.c b/drivers/pcmcia/pcmcia_ioctl.c index 5f186ab..138396e 100644 --- a/drivers/pcmcia/pcmcia_ioctl.c +++ b/drivers/pcmcia/pcmcia_ioctl.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #define IN_CARD_SERVICES @@ -397,20 +398,27 @@ static int ds_open(struct inode *inode, struct file *file) struct pcmcia_socket *s; user_info_t *user; static int warning_printed = 0; + int ret = 0; ds_dbg(0, "ds_open(socket %d)\n", i); + lock_kernel(); s = pcmcia_get_socket_by_nr(i); - if (!s) - return -ENODEV; + if (!s) { + ret = -ENODEV; + goto out; + } s = pcmcia_get_socket(s); - if (!s) - return -ENODEV; + if (!s) { + ret = -ENODEV; + goto out; + } if ((file->f_flags & O_ACCMODE) != O_RDONLY) { if (s->pcmcia_state.busy) { pcmcia_put_socket(s); - return -EBUSY; + ret = -EBUSY; + goto out; } else s->pcmcia_state.busy = 1; @@ -419,7 +427,8 @@ static int ds_open(struct inode *inode, struct file *file) user = kmalloc(sizeof(user_info_t), GFP_KERNEL); if (!user) { pcmcia_put_socket(s); - return -ENOMEM; + ret = -ENOMEM; + goto out; } user->event_tail = user->event_head = 0; user->next = s->user; @@ -441,7 +450,9 @@ static int ds_open(struct inode *inode, struct file *file) if (s->pcmcia_state.present) queue_event(user, CS_EVENT_CARD_INSERTION); - return 0; +out: + unlock_kernel(); + return ret; } /* ds_open */ /*====================================================================*/ -- cgit v0.10.2 From 8b09dee67f484e9b42114b1a1f068e080fd7aa56 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:21:05 +0200 Subject: rcupreempt: remove duplicate prototypes rcu_batches_completed and rcu_patches_completed_bh are both declared in rcuclassic.h and rcupreempt.h. This patch removes the extra prototypes for them from rcupdate.h. rcu_batches_completed_bh is defined as a static inline in the rcupreempt.h header file. Trying to export this as EXPORT_SYMBOL_GPL causes linking problems with the powerpc linker. There's no need to export a static inlined function. Modules must be compiled with the same type of RCU implementation as the kernel they are for. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index d42dbec..ec2fc5b 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -224,8 +224,6 @@ extern void call_rcu_bh(struct rcu_head *head, /* Exported common interfaces */ extern void synchronize_rcu(void); extern void rcu_barrier(void); -extern long rcu_batches_completed(void); -extern long rcu_batches_completed_bh(void); /* Internal to kernel */ extern void rcu_init(void); diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c index e1cdf19..5e02b77 100644 --- a/kernel/rcupreempt.c +++ b/kernel/rcupreempt.c @@ -217,8 +217,6 @@ long rcu_batches_completed(void) } EXPORT_SYMBOL_GPL(rcu_batches_completed); -EXPORT_SYMBOL_GPL(rcu_batches_completed_bh); - void __rcu_read_lock(void) { int idx; -- cgit v0.10.2 From 4446a36ff8c74ac3b32feb009b651048e129c6af Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 12 May 2008 21:21:05 +0200 Subject: rcu: add call_rcu_sched() Fourth cut of patch to provide the call_rcu_sched(). This is again to synchronize_sched() as call_rcu() is to synchronize_rcu(). Should be fine for experimental and -rt use, but not ready for inclusion. With some luck, I will be able to tell Andrew to come out of hiding on the next round. Passes multi-day rcutorture sessions with concurrent CPU hotplugging. Fixes since the first version include a bug that could result in indefinite blocking (spotted by Gautham Shenoy), better resiliency against CPU-hotplug operations, and other minor fixes. Fixes since the second version include reworking grace-period detection to avoid deadlocks that could happen when running concurrently with CPU hotplug, adding Mathieu's fix to avoid the softlockup messages, as well as Mathieu's fix to allow use earlier in boot. Fixes since the third version include a wrong-CPU bug spotted by Andrew, getting rid of the obsolete synchronize_kernel API that somehow snuck back in, merging spin_unlock() and local_irq_restore() in a few places, commenting the code that checks for quiescent states based on interrupting from user-mode execution or the idle loop, removing some inline attributes, and some code-style changes. Known/suspected shortcomings: o I still do not entirely trust the sleep/wakeup logic. Next step will be to use a private snapshot of the CPU online mask in rcu_sched_grace_period() -- if the CPU wasn't there at the start of the grace period, we don't need to hear from it. And the bit about accounting for changes in online CPUs inside of rcu_sched_grace_period() is ugly anyway. o It might be good for rcu_sched_grace_period() to invoke resched_cpu() when a given CPU wasn't responding quickly, but resched_cpu() is declared static... This patch also fixes a long-standing bug in the earlier preemptable-RCU implementation of synchronize_rcu() that could result in loss of concurrent external changes to a task's CPU affinity mask. I still cannot remember who reported this... Signed-off-by: Paul E. McKenney Signed-off-by: Mathieu Desnoyers Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h index b3aa05b..8c77490 100644 --- a/include/linux/rcuclassic.h +++ b/include/linux/rcuclassic.h @@ -151,7 +151,10 @@ extern struct lockdep_map rcu_lock_map; #define __synchronize_sched() synchronize_rcu() +#define call_rcu_sched(head, func) call_rcu(head, func) + extern void __rcu_init(void); +#define rcu_init_sched() do { } while (0) extern void rcu_check_callbacks(int cpu, int user); extern void rcu_restart_cpu(int cpu); diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index ec2fc5b..411969c 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -40,6 +40,7 @@ #include #include #include +#include /** * struct rcu_head - callback structure for use with RCU @@ -168,6 +169,27 @@ struct rcu_head { (p) = (v); \ }) +/* Infrastructure to implement the synchronize_() primitives. */ + +struct rcu_synchronize { + struct rcu_head head; + struct completion completion; +}; + +extern void wakeme_after_rcu(struct rcu_head *head); + +#define synchronize_rcu_xxx(name, func) \ +void name(void) \ +{ \ + struct rcu_synchronize rcu; \ + \ + init_completion(&rcu.completion); \ + /* Will wake me after RCU finished. */ \ + func(&rcu.head, wakeme_after_rcu); \ + /* Wait for it. */ \ + wait_for_completion(&rcu.completion); \ +} + /** * synchronize_sched - block until all CPUs have exited any non-preemptive * kernel code sequences. diff --git a/include/linux/rcupreempt.h b/include/linux/rcupreempt.h index 8a05c7e..f04b64e 100644 --- a/include/linux/rcupreempt.h +++ b/include/linux/rcupreempt.h @@ -40,10 +40,39 @@ #include #include -#define rcu_qsctr_inc(cpu) +struct rcu_dyntick_sched { + int dynticks; + int dynticks_snap; + int sched_qs; + int sched_qs_snap; + int sched_dynticks_snap; +}; + +DECLARE_PER_CPU(struct rcu_dyntick_sched, rcu_dyntick_sched); + +static inline void rcu_qsctr_inc(int cpu) +{ + struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu); + + rdssp->sched_qs++; +} #define rcu_bh_qsctr_inc(cpu) #define call_rcu_bh(head, rcu) call_rcu(head, rcu) +/** + * call_rcu_sched - Queue RCU callback for invocation after sched grace period. + * @head: structure to be used for queueing the RCU updates. + * @func: actual update function to be invoked after the grace period + * + * The update function will be invoked some time after a full + * synchronize_sched()-style grace period elapses, in other words after + * all currently executing preempt-disabled sections of code (including + * hardirq handlers, NMI handlers, and local_irq_save() blocks) have + * completed. + */ +extern void call_rcu_sched(struct rcu_head *head, + void (*func)(struct rcu_head *head)); + extern void __rcu_read_lock(void) __acquires(RCU); extern void __rcu_read_unlock(void) __releases(RCU); extern int rcu_pending(int cpu); @@ -55,6 +84,7 @@ extern int rcu_needs_cpu(int cpu); extern void __synchronize_sched(void); extern void __rcu_init(void); +extern void rcu_init_sched(void); extern void rcu_check_callbacks(int cpu, int user); extern void rcu_restart_cpu(int cpu); extern long rcu_batches_completed(void); @@ -81,20 +111,20 @@ extern struct rcupreempt_trace *rcupreempt_trace_cpu(int cpu); struct softirq_action; #ifdef CONFIG_NO_HZ -DECLARE_PER_CPU(long, dynticks_progress_counter); +DECLARE_PER_CPU(struct rcu_dyntick_sched, rcu_dyntick_sched); static inline void rcu_enter_nohz(void) { smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */ - __get_cpu_var(dynticks_progress_counter)++; - WARN_ON(__get_cpu_var(dynticks_progress_counter) & 0x1); + __get_cpu_var(rcu_dyntick_sched).dynticks++; + WARN_ON(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1); } static inline void rcu_exit_nohz(void) { - __get_cpu_var(dynticks_progress_counter)++; smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */ - WARN_ON(!(__get_cpu_var(dynticks_progress_counter) & 0x1)); + __get_cpu_var(rcu_dyntick_sched).dynticks++; + WARN_ON(!(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1)); } #else /* CONFIG_NO_HZ */ diff --git a/init/main.c b/init/main.c index f7fb200..a9cc3e0 100644 --- a/init/main.c +++ b/init/main.c @@ -758,6 +758,7 @@ static void __init do_initcalls(void) */ static void __init do_basic_setup(void) { + rcu_init_sched(); /* needed by module_init stage. */ /* drivers will send hotplug events */ init_workqueues(); usermodehelper_init(); diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index c09605f..a4e329d 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -39,18 +39,12 @@ #include #include #include -#include #include #include #include #include #include -struct rcu_synchronize { - struct rcu_head head; - struct completion completion; -}; - static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL}; static atomic_t rcu_barrier_cpu_count; static DEFINE_MUTEX(rcu_barrier_mutex); @@ -60,7 +54,7 @@ static struct completion rcu_barrier_completion; * Awaken the corresponding synchronize_rcu() instance now that a * grace period has elapsed. */ -static void wakeme_after_rcu(struct rcu_head *head) +void wakeme_after_rcu(struct rcu_head *head) { struct rcu_synchronize *rcu; @@ -77,17 +71,7 @@ static void wakeme_after_rcu(struct rcu_head *head) * sections are delimited by rcu_read_lock() and rcu_read_unlock(), * and may be nested. */ -void synchronize_rcu(void) -{ - struct rcu_synchronize rcu; - - init_completion(&rcu.completion); - /* Will wake me after RCU finished */ - call_rcu(&rcu.head, wakeme_after_rcu); - - /* Wait for it */ - wait_for_completion(&rcu.completion); -} +synchronize_rcu_xxx(synchronize_rcu, call_rcu) EXPORT_SYMBOL_GPL(synchronize_rcu); static void rcu_barrier_callback(struct rcu_head *notused) diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c index 5e02b77..aaa7976 100644 --- a/kernel/rcupreempt.c +++ b/kernel/rcupreempt.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include #include @@ -87,9 +88,14 @@ struct rcu_data { struct rcu_head **nexttail; struct rcu_head *waitlist[GP_STAGES]; struct rcu_head **waittail[GP_STAGES]; - struct rcu_head *donelist; + struct rcu_head *donelist; /* from waitlist & waitschedlist */ struct rcu_head **donetail; long rcu_flipctr[2]; + struct rcu_head *nextschedlist; + struct rcu_head **nextschedtail; + struct rcu_head *waitschedlist; + struct rcu_head **waitschedtail; + int rcu_sched_sleeping; #ifdef CONFIG_RCU_TRACE struct rcupreempt_trace trace; #endif /* #ifdef CONFIG_RCU_TRACE */ @@ -131,11 +137,24 @@ enum rcu_try_flip_states { rcu_try_flip_waitmb_state, }; +/* + * States for rcu_ctrlblk.rcu_sched_sleep. + */ + +enum rcu_sched_sleep_states { + rcu_sched_not_sleeping, /* Not sleeping, callbacks need GP. */ + rcu_sched_sleep_prep, /* Thinking of sleeping, rechecking. */ + rcu_sched_sleeping, /* Sleeping, awaken if GP needed. */ +}; + struct rcu_ctrlblk { spinlock_t fliplock; /* Protect state-machine transitions. */ long completed; /* Number of last completed batch. */ enum rcu_try_flip_states rcu_try_flip_state; /* The current state of the rcu state machine */ + spinlock_t schedlock; /* Protect rcu_sched sleep state. */ + enum rcu_sched_sleep_states sched_sleep; /* rcu_sched state. */ + wait_queue_head_t sched_wq; /* Place for rcu_sched to sleep. */ }; static DEFINE_PER_CPU(struct rcu_data, rcu_data); @@ -143,8 +162,12 @@ static struct rcu_ctrlblk rcu_ctrlblk = { .fliplock = __SPIN_LOCK_UNLOCKED(rcu_ctrlblk.fliplock), .completed = 0, .rcu_try_flip_state = rcu_try_flip_idle_state, + .schedlock = __SPIN_LOCK_UNLOCKED(rcu_ctrlblk.schedlock), + .sched_sleep = rcu_sched_not_sleeping, + .sched_wq = __WAIT_QUEUE_HEAD_INITIALIZER(rcu_ctrlblk.sched_wq), }; +static struct task_struct *rcu_sched_grace_period_task; #ifdef CONFIG_RCU_TRACE static char *rcu_try_flip_state_names[] = @@ -207,6 +230,8 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(enum rcu_mb_flag_values, rcu_mb_flag) */ #define RCU_TRACE_RDP(f, rdp) RCU_TRACE(f, &((rdp)->trace)); +#define RCU_SCHED_BATCH_TIME (HZ / 50) + /* * Return the number of RCU batches processed thus far. Useful * for debug and statistics. @@ -411,32 +436,34 @@ static void __rcu_advance_callbacks(struct rcu_data *rdp) } } -#ifdef CONFIG_NO_HZ +DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_dyntick_sched, rcu_dyntick_sched) = { + .dynticks = 1, +}; -DEFINE_PER_CPU(long, dynticks_progress_counter) = 1; -static DEFINE_PER_CPU(long, rcu_dyntick_snapshot); +#ifdef CONFIG_NO_HZ static DEFINE_PER_CPU(int, rcu_update_flag); /** * rcu_irq_enter - Called from Hard irq handlers and NMI/SMI. * * If the CPU was idle with dynamic ticks active, this updates the - * dynticks_progress_counter to let the RCU handling know that the + * rcu_dyntick_sched.dynticks to let the RCU handling know that the * CPU is active. */ void rcu_irq_enter(void) { int cpu = smp_processor_id(); + struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu); if (per_cpu(rcu_update_flag, cpu)) per_cpu(rcu_update_flag, cpu)++; /* * Only update if we are coming from a stopped ticks mode - * (dynticks_progress_counter is even). + * (rcu_dyntick_sched.dynticks is even). */ if (!in_interrupt() && - (per_cpu(dynticks_progress_counter, cpu) & 0x1) == 0) { + (rdssp->dynticks & 0x1) == 0) { /* * The following might seem like we could have a race * with NMI/SMIs. But this really isn't a problem. @@ -459,12 +486,12 @@ void rcu_irq_enter(void) * RCU read-side critical sections on this CPU would * have already completed. */ - per_cpu(dynticks_progress_counter, cpu)++; + rdssp->dynticks++; /* * The following memory barrier ensures that any * rcu_read_lock() primitives in the irq handler * are seen by other CPUs to follow the above - * increment to dynticks_progress_counter. This is + * increment to rcu_dyntick_sched.dynticks. This is * required in order for other CPUs to correctly * determine when it is safe to advance the RCU * grace-period state machine. @@ -472,7 +499,7 @@ void rcu_irq_enter(void) smp_mb(); /* see above block comment. */ /* * Since we can't determine the dynamic tick mode from - * the dynticks_progress_counter after this routine, + * the rcu_dyntick_sched.dynticks after this routine, * we use a second flag to acknowledge that we came * from an idle state with ticks stopped. */ @@ -480,7 +507,7 @@ void rcu_irq_enter(void) /* * If we take an NMI/SMI now, they will also increment * the rcu_update_flag, and will not update the - * dynticks_progress_counter on exit. That is for + * rcu_dyntick_sched.dynticks on exit. That is for * this IRQ to do. */ } @@ -490,12 +517,13 @@ void rcu_irq_enter(void) * rcu_irq_exit - Called from exiting Hard irq context. * * If the CPU was idle with dynamic ticks active, update the - * dynticks_progress_counter to put let the RCU handling be + * rcu_dyntick_sched.dynticks to put let the RCU handling be * aware that the CPU is going back to idle with no ticks. */ void rcu_irq_exit(void) { int cpu = smp_processor_id(); + struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu); /* * rcu_update_flag is set if we interrupted the CPU @@ -503,7 +531,7 @@ void rcu_irq_exit(void) * Once this occurs, we keep track of interrupt nesting * because a NMI/SMI could also come in, and we still * only want the IRQ that started the increment of the - * dynticks_progress_counter to be the one that modifies + * rcu_dyntick_sched.dynticks to be the one that modifies * it on exit. */ if (per_cpu(rcu_update_flag, cpu)) { @@ -515,28 +543,29 @@ void rcu_irq_exit(void) /* * If an NMI/SMI happens now we are still - * protected by the dynticks_progress_counter being odd. + * protected by the rcu_dyntick_sched.dynticks being odd. */ /* * The following memory barrier ensures that any * rcu_read_unlock() primitives in the irq handler * are seen by other CPUs to preceed the following - * increment to dynticks_progress_counter. This + * increment to rcu_dyntick_sched.dynticks. This * is required in order for other CPUs to determine * when it is safe to advance the RCU grace-period * state machine. */ smp_mb(); /* see above block comment. */ - per_cpu(dynticks_progress_counter, cpu)++; - WARN_ON(per_cpu(dynticks_progress_counter, cpu) & 0x1); + rdssp->dynticks++; + WARN_ON(rdssp->dynticks & 0x1); } } static void dyntick_save_progress_counter(int cpu) { - per_cpu(rcu_dyntick_snapshot, cpu) = - per_cpu(dynticks_progress_counter, cpu); + struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu); + + rdssp->dynticks_snap = rdssp->dynticks; } static inline int @@ -544,9 +573,10 @@ rcu_try_flip_waitack_needed(int cpu) { long curr; long snap; + struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu); - curr = per_cpu(dynticks_progress_counter, cpu); - snap = per_cpu(rcu_dyntick_snapshot, cpu); + curr = rdssp->dynticks; + snap = rdssp->dynticks_snap; smp_mb(); /* force ordering with cpu entering/leaving dynticks. */ /* @@ -580,9 +610,10 @@ rcu_try_flip_waitmb_needed(int cpu) { long curr; long snap; + struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu); - curr = per_cpu(dynticks_progress_counter, cpu); - snap = per_cpu(rcu_dyntick_snapshot, cpu); + curr = rdssp->dynticks; + snap = rdssp->dynticks_snap; smp_mb(); /* force ordering with cpu entering/leaving dynticks. */ /* @@ -609,14 +640,86 @@ rcu_try_flip_waitmb_needed(int cpu) return 1; } +static void dyntick_save_progress_counter_sched(int cpu) +{ + struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu); + + rdssp->sched_dynticks_snap = rdssp->dynticks; +} + +static int rcu_qsctr_inc_needed_dyntick(int cpu) +{ + long curr; + long snap; + struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu); + + curr = rdssp->dynticks; + snap = rdssp->sched_dynticks_snap; + smp_mb(); /* force ordering with cpu entering/leaving dynticks. */ + + /* + * If the CPU remained in dynticks mode for the entire time + * and didn't take any interrupts, NMIs, SMIs, or whatever, + * then it cannot be in the middle of an rcu_read_lock(), so + * the next rcu_read_lock() it executes must use the new value + * of the counter. Therefore, this CPU has been in a quiescent + * state the entire time, and we don't need to wait for it. + */ + + if ((curr == snap) && ((curr & 0x1) == 0)) + return 0; + + /* + * If the CPU passed through or entered a dynticks idle phase with + * no active irq handlers, then, as above, this CPU has already + * passed through a quiescent state. + */ + + if ((curr - snap) > 2 || (snap & 0x1) == 0) + return 0; + + /* We need this CPU to go through a quiescent state. */ + + return 1; +} + #else /* !CONFIG_NO_HZ */ -# define dyntick_save_progress_counter(cpu) do { } while (0) -# define rcu_try_flip_waitack_needed(cpu) (1) -# define rcu_try_flip_waitmb_needed(cpu) (1) +# define dyntick_save_progress_counter(cpu) do { } while (0) +# define rcu_try_flip_waitack_needed(cpu) (1) +# define rcu_try_flip_waitmb_needed(cpu) (1) + +# define dyntick_save_progress_counter_sched(cpu) do { } while (0) +# define rcu_qsctr_inc_needed_dyntick(cpu) (1) #endif /* CONFIG_NO_HZ */ +static void save_qsctr_sched(int cpu) +{ + struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu); + + rdssp->sched_qs_snap = rdssp->sched_qs; +} + +static inline int rcu_qsctr_inc_needed(int cpu) +{ + struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu); + + /* + * If there has been a quiescent state, no more need to wait + * on this CPU. + */ + + if (rdssp->sched_qs != rdssp->sched_qs_snap) { + smp_mb(); /* force ordering with cpu entering schedule(). */ + return 0; + } + + /* We need this CPU to go through a quiescent state. */ + + return 1; +} + /* * Get here when RCU is idle. Decide whether we need to * move out of idle state, and return non-zero if so. @@ -819,6 +922,26 @@ void rcu_check_callbacks(int cpu, int user) unsigned long flags; struct rcu_data *rdp = RCU_DATA_CPU(cpu); + /* + * If this CPU took its interrupt from user mode or from the + * idle loop, and this is not a nested interrupt, then + * this CPU has to have exited all prior preept-disable + * sections of code. So increment the counter to note this. + * + * The memory barrier is needed to handle the case where + * writes from a preempt-disable section of code get reordered + * into schedule() by this CPU's write buffer. So the memory + * barrier makes sure that the rcu_qsctr_inc() is seen by other + * CPUs to happen after any such write. + */ + + if (user || + (idle_cpu(cpu) && !in_softirq() && + hardirq_count() <= (1 << HARDIRQ_SHIFT))) { + smp_mb(); /* Guard against aggressive schedule(). */ + rcu_qsctr_inc(cpu); + } + rcu_check_mb(cpu); if (rcu_ctrlblk.completed == rdp->completed) rcu_try_flip(); @@ -869,6 +992,8 @@ void rcu_offline_cpu(int cpu) struct rcu_head *list = NULL; unsigned long flags; struct rcu_data *rdp = RCU_DATA_CPU(cpu); + struct rcu_head *schedlist = NULL; + struct rcu_head **schedtail = &schedlist; struct rcu_head **tail = &list; /* @@ -882,6 +1007,11 @@ void rcu_offline_cpu(int cpu) rcu_offline_cpu_enqueue(rdp->waitlist[i], rdp->waittail[i], list, tail); rcu_offline_cpu_enqueue(rdp->nextlist, rdp->nexttail, list, tail); + rcu_offline_cpu_enqueue(rdp->waitschedlist, rdp->waitschedtail, + schedlist, schedtail); + rcu_offline_cpu_enqueue(rdp->nextschedlist, rdp->nextschedtail, + schedlist, schedtail); + rdp->rcu_sched_sleeping = 0; spin_unlock_irqrestore(&rdp->lock, flags); rdp->waitlistcount = 0; @@ -916,22 +1046,40 @@ void rcu_offline_cpu(int cpu) * fix. */ - local_irq_save(flags); + local_irq_save(flags); /* disable preempt till we know what lock. */ rdp = RCU_DATA_ME(); spin_lock(&rdp->lock); *rdp->nexttail = list; if (list) rdp->nexttail = tail; + *rdp->nextschedtail = schedlist; + if (schedlist) + rdp->nextschedtail = schedtail; spin_unlock_irqrestore(&rdp->lock, flags); } void __devinit rcu_online_cpu(int cpu) { unsigned long flags; + struct rcu_data *rdp; spin_lock_irqsave(&rcu_ctrlblk.fliplock, flags); cpu_set(cpu, rcu_cpu_online_map); spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags); + + /* + * The rcu_sched grace-period processing might have bypassed + * this CPU, given that it was not in the rcu_cpu_online_map + * when the grace-period scan started. This means that the + * grace-period task might sleep. So make sure that if this + * should happen, the first callback posted to this CPU will + * wake up the grace-period task if need be. + */ + + rdp = RCU_DATA_CPU(cpu); + spin_lock_irqsave(&rdp->lock, flags); + rdp->rcu_sched_sleeping = 1; + spin_unlock_irqrestore(&rdp->lock, flags); } #else /* #ifdef CONFIG_HOTPLUG_CPU */ @@ -986,31 +1134,196 @@ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) *rdp->nexttail = head; rdp->nexttail = &head->next; RCU_TRACE_RDP(rcupreempt_trace_next_add, rdp); - spin_unlock(&rdp->lock); - local_irq_restore(flags); + spin_unlock_irqrestore(&rdp->lock, flags); } EXPORT_SYMBOL_GPL(call_rcu); +void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) +{ + unsigned long flags; + struct rcu_data *rdp; + int wake_gp = 0; + + head->func = func; + head->next = NULL; + local_irq_save(flags); + rdp = RCU_DATA_ME(); + spin_lock(&rdp->lock); + *rdp->nextschedtail = head; + rdp->nextschedtail = &head->next; + if (rdp->rcu_sched_sleeping) { + + /* Grace-period processing might be sleeping... */ + + rdp->rcu_sched_sleeping = 0; + wake_gp = 1; + } + spin_unlock_irqrestore(&rdp->lock, flags); + if (wake_gp) { + + /* Wake up grace-period processing, unless someone beat us. */ + + spin_lock_irqsave(&rcu_ctrlblk.schedlock, flags); + if (rcu_ctrlblk.sched_sleep != rcu_sched_sleeping) + wake_gp = 0; + rcu_ctrlblk.sched_sleep = rcu_sched_not_sleeping; + spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags); + if (wake_gp) + wake_up_interruptible(&rcu_ctrlblk.sched_wq); + } +} +EXPORT_SYMBOL_GPL(call_rcu_sched); + /* * Wait until all currently running preempt_disable() code segments * (including hardware-irq-disable segments) complete. Note that * in -rt this does -not- necessarily result in all currently executing * interrupt -handlers- having completed. */ -void __synchronize_sched(void) +synchronize_rcu_xxx(__synchronize_sched, call_rcu_sched) +EXPORT_SYMBOL_GPL(__synchronize_sched); + +/* + * kthread function that manages call_rcu_sched grace periods. + */ +static int rcu_sched_grace_period(void *arg) { - cpumask_t oldmask; + int couldsleep; /* might sleep after current pass. */ + int couldsleepnext = 0; /* might sleep after next pass. */ int cpu; + unsigned long flags; + struct rcu_data *rdp; + int ret; - if (sched_getaffinity(0, &oldmask) < 0) - oldmask = cpu_possible_map; - for_each_online_cpu(cpu) { - sched_setaffinity(0, &cpumask_of_cpu(cpu)); - schedule(); - } - sched_setaffinity(0, &oldmask); + /* + * Each pass through the following loop handles one + * rcu_sched grace period cycle. + */ + do { + /* Save each CPU's current state. */ + + for_each_online_cpu(cpu) { + dyntick_save_progress_counter_sched(cpu); + save_qsctr_sched(cpu); + } + + /* + * Sleep for about an RCU grace-period's worth to + * allow better batching and to consume less CPU. + */ + schedule_timeout_interruptible(RCU_SCHED_BATCH_TIME); + + /* + * If there was nothing to do last time, prepare to + * sleep at the end of the current grace period cycle. + */ + couldsleep = couldsleepnext; + couldsleepnext = 1; + if (couldsleep) { + spin_lock_irqsave(&rcu_ctrlblk.schedlock, flags); + rcu_ctrlblk.sched_sleep = rcu_sched_sleep_prep; + spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags); + } + + /* + * Wait on each CPU in turn to have either visited + * a quiescent state or been in dynticks-idle mode. + */ + for_each_online_cpu(cpu) { + while (rcu_qsctr_inc_needed(cpu) && + rcu_qsctr_inc_needed_dyntick(cpu)) { + /* resched_cpu(cpu); @@@ */ + schedule_timeout_interruptible(1); + } + } + + /* Advance callbacks for each CPU. */ + + for_each_online_cpu(cpu) { + + rdp = RCU_DATA_CPU(cpu); + spin_lock_irqsave(&rdp->lock, flags); + + /* + * We are running on this CPU irq-disabled, so no + * CPU can go offline until we re-enable irqs. + * The current CPU might have already gone + * offline (between the for_each_offline_cpu and + * the spin_lock_irqsave), but in that case all its + * callback lists will be empty, so no harm done. + * + * Advance the callbacks! We share normal RCU's + * donelist, since callbacks are invoked the + * same way in either case. + */ + if (rdp->waitschedlist != NULL) { + *rdp->donetail = rdp->waitschedlist; + rdp->donetail = rdp->waitschedtail; + + /* + * Next rcu_check_callbacks() will + * do the required raise_softirq(). + */ + } + if (rdp->nextschedlist != NULL) { + rdp->waitschedlist = rdp->nextschedlist; + rdp->waitschedtail = rdp->nextschedtail; + couldsleep = 0; + couldsleepnext = 0; + } else { + rdp->waitschedlist = NULL; + rdp->waitschedtail = &rdp->waitschedlist; + } + rdp->nextschedlist = NULL; + rdp->nextschedtail = &rdp->nextschedlist; + + /* Mark sleep intention. */ + + rdp->rcu_sched_sleeping = couldsleep; + + spin_unlock_irqrestore(&rdp->lock, flags); + } + + /* If we saw callbacks on the last scan, go deal with them. */ + + if (!couldsleep) + continue; + + /* Attempt to block... */ + + spin_lock_irqsave(&rcu_ctrlblk.schedlock, flags); + if (rcu_ctrlblk.sched_sleep != rcu_sched_sleep_prep) { + + /* + * Someone posted a callback after we scanned. + * Go take care of it. + */ + spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags); + couldsleepnext = 0; + continue; + } + + /* Block until the next person posts a callback. */ + + rcu_ctrlblk.sched_sleep = rcu_sched_sleeping; + spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags); + ret = 0; + __wait_event_interruptible(rcu_ctrlblk.sched_wq, + rcu_ctrlblk.sched_sleep != rcu_sched_sleeping, + ret); + + /* + * Signals would prevent us from sleeping, and we cannot + * do much with them in any case. So flush them. + */ + if (ret) + flush_signals(current); + couldsleepnext = 0; + + } while (!kthread_should_stop()); + + return (0); } -EXPORT_SYMBOL_GPL(__synchronize_sched); /* * Check to see if any future RCU-related work will need to be done @@ -1027,7 +1340,9 @@ int rcu_needs_cpu(int cpu) return (rdp->donelist != NULL || !!rdp->waitlistcount || - rdp->nextlist != NULL); + rdp->nextlist != NULL || + rdp->nextschedlist != NULL || + rdp->waitschedlist != NULL); } int rcu_pending(int cpu) @@ -1038,7 +1353,9 @@ int rcu_pending(int cpu) if (rdp->donelist != NULL || !!rdp->waitlistcount || - rdp->nextlist != NULL) + rdp->nextlist != NULL || + rdp->nextschedlist != NULL || + rdp->waitschedlist != NULL) return 1; /* The RCU core needs an acknowledgement from this CPU. */ @@ -1105,6 +1422,11 @@ void __init __rcu_init(void) rdp->donetail = &rdp->donelist; rdp->rcu_flipctr[0] = 0; rdp->rcu_flipctr[1] = 0; + rdp->nextschedlist = NULL; + rdp->nextschedtail = &rdp->nextschedlist; + rdp->waitschedlist = NULL; + rdp->waitschedtail = &rdp->waitschedlist; + rdp->rcu_sched_sleeping = 0; } register_cpu_notifier(&rcu_nb); @@ -1127,11 +1449,15 @@ void __init __rcu_init(void) } /* - * Deprecated, use synchronize_rcu() or synchronize_sched() instead. + * Late-boot-time RCU initialization that must wait until after scheduler + * has been initialized. */ -void synchronize_kernel(void) +void __init rcu_init_sched(void) { - synchronize_rcu(); + rcu_sched_grace_period_task = kthread_run(rcu_sched_grace_period, + NULL, + "rcu_sched_grace_period"); + WARN_ON(IS_ERR(rcu_sched_grace_period_task)); } #ifdef CONFIG_RCU_TRACE -- cgit v0.10.2 From 8db559b83009bed92e1b5dd13a651ff273d9ff62 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 12 May 2008 21:21:05 +0200 Subject: rcu: add memory barriers and comments to rcu_check_callbacks() Add comments to the logic that infers quiescent states when interrupting from either user mode or the idle loop. Also add a memory barrier: it appears that James Huang was in fact onto something, as the scheduler is much less synchronization happy than it once was, so we can no longer rely on its memory barriers in all cases. Signed-off-by: Paul E. McKenney Reported-by: James Huang Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c index f4ffbd0..d834879 100644 --- a/kernel/rcuclassic.c +++ b/kernel/rcuclassic.c @@ -502,10 +502,38 @@ void rcu_check_callbacks(int cpu, int user) if (user || (idle_cpu(cpu) && !in_softirq() && hardirq_count() <= (1 << HARDIRQ_SHIFT))) { + + /* + * Get here if this CPU took its interrupt from user + * mode or from the idle loop, and if this is not a + * nested interrupt. In this case, the CPU is in + * a quiescent state, so count it. + * + * Also do a memory barrier. This is needed to handle + * the case where writes from a preempt-disable section + * of code get reordered into schedule() by this CPU's + * write buffer. The memory barrier makes sure that + * the rcu_qsctr_inc() and rcu_bh_qsctr_inc() are see + * by other CPUs to happen after any such write. + */ + + smp_mb(); /* See above block comment. */ rcu_qsctr_inc(cpu); rcu_bh_qsctr_inc(cpu); - } else if (!in_softirq()) + + } else if (!in_softirq()) { + + /* + * Get here if this CPU did not take its interrupt from + * softirq, in other words, if it is not interrupting + * a rcu_bh read-side critical section. This is an _bh + * critical section, so count it. The memory barrier + * is needed for the same reason as is the above one. + */ + + smp_mb(); /* See above block comment. */ rcu_bh_qsctr_inc(cpu); + } raise_rcu_softirq(); } -- cgit v0.10.2 From 70f12f848d3e981479b4f6f751e73c14f7c13e5b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 12 May 2008 21:21:05 +0200 Subject: rcu: add rcu_barrier_sched() and rcu_barrier_bh() Add rcu_barrier_sched() and rcu_barrier_bh(). With these in place, rcutorture no longer gives the occasional oops when repeatedly starting and stopping torturing rcu_bh. Also adds the API needed to flush out pre-existing call_rcu_sched() callbacks. Signed-off-by: Paul E. McKenney Signed-off-by: Mathieu Desnoyers Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 411969c..e8b4039 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -246,6 +246,8 @@ extern void call_rcu_bh(struct rcu_head *head, /* Exported common interfaces */ extern void synchronize_rcu(void); extern void rcu_barrier(void); +extern void rcu_barrier_bh(void); +extern void rcu_barrier_sched(void); /* Internal to kernel */ extern void rcu_init(void); diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index a4e329d..4a74b8d 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -45,6 +45,12 @@ #include #include +enum rcu_barrier { + RCU_BARRIER_STD, + RCU_BARRIER_BH, + RCU_BARRIER_SCHED, +}; + static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL}; static atomic_t rcu_barrier_cpu_count; static DEFINE_MUTEX(rcu_barrier_mutex); @@ -83,19 +89,30 @@ static void rcu_barrier_callback(struct rcu_head *notused) /* * Called with preemption disabled, and from cross-cpu IRQ context. */ -static void rcu_barrier_func(void *notused) +static void rcu_barrier_func(void *type) { int cpu = smp_processor_id(); struct rcu_head *head = &per_cpu(rcu_barrier_head, cpu); atomic_inc(&rcu_barrier_cpu_count); - call_rcu(head, rcu_barrier_callback); + switch ((enum rcu_barrier)type) { + case RCU_BARRIER_STD: + call_rcu(head, rcu_barrier_callback); + break; + case RCU_BARRIER_BH: + call_rcu_bh(head, rcu_barrier_callback); + break; + case RCU_BARRIER_SCHED: + call_rcu_sched(head, rcu_barrier_callback); + break; + } } -/** - * rcu_barrier - Wait until all the in-flight RCUs are complete. +/* + * Orchestrate the specified type of RCU barrier, waiting for all + * RCU callbacks of the specified type to complete. */ -void rcu_barrier(void) +static void _rcu_barrier(enum rcu_barrier type) { BUG_ON(in_interrupt()); /* Take cpucontrol mutex to protect against CPU hotplug */ @@ -111,13 +128,39 @@ void rcu_barrier(void) * until all the callbacks are queued. */ rcu_read_lock(); - on_each_cpu(rcu_barrier_func, NULL, 0, 1); + on_each_cpu(rcu_barrier_func, (void *)type, 0, 1); rcu_read_unlock(); wait_for_completion(&rcu_barrier_completion); mutex_unlock(&rcu_barrier_mutex); } + +/** + * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete. + */ +void rcu_barrier(void) +{ + _rcu_barrier(RCU_BARRIER_STD); +} EXPORT_SYMBOL_GPL(rcu_barrier); +/** + * rcu_barrier_bh - Wait until all in-flight call_rcu_bh() callbacks complete. + */ +void rcu_barrier_bh(void) +{ + _rcu_barrier(RCU_BARRIER_BH); +} +EXPORT_SYMBOL_GPL(rcu_barrier_bh); + +/** + * rcu_barrier_sched - Wait for in-flight call_rcu_sched() callbacks. + */ +void rcu_barrier_sched(void) +{ + _rcu_barrier(RCU_BARRIER_SCHED); +} +EXPORT_SYMBOL_GPL(rcu_barrier_sched); + void __init rcu_init(void) { __rcu_init(); -- cgit v0.10.2 From 2326974df29988181b6b69ed6fbf42b17adf916f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 12 May 2008 21:21:05 +0200 Subject: rcu: add call_rcu_sched() and friends to rcutorture Add entry to rcu_torture_ops allowing the correct barrier function to be used upon exit from rcutorture. Also add torture options for the new call_rcu_sched() API. Signed-off-by: Paul E. McKenney Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 33acc424..0334b6a 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c @@ -192,6 +192,7 @@ struct rcu_torture_ops { int (*completed)(void); void (*deferredfree)(struct rcu_torture *p); void (*sync)(void); + void (*cb_barrier)(void); int (*stats)(char *page); char *name; }; @@ -265,6 +266,7 @@ static struct rcu_torture_ops rcu_ops = { .completed = rcu_torture_completed, .deferredfree = rcu_torture_deferred_free, .sync = synchronize_rcu, + .cb_barrier = rcu_barrier, .stats = NULL, .name = "rcu" }; @@ -304,6 +306,7 @@ static struct rcu_torture_ops rcu_sync_ops = { .completed = rcu_torture_completed, .deferredfree = rcu_sync_torture_deferred_free, .sync = synchronize_rcu, + .cb_barrier = NULL, .stats = NULL, .name = "rcu_sync" }; @@ -364,6 +367,7 @@ static struct rcu_torture_ops rcu_bh_ops = { .completed = rcu_bh_torture_completed, .deferredfree = rcu_bh_torture_deferred_free, .sync = rcu_bh_torture_synchronize, + .cb_barrier = rcu_barrier_bh, .stats = NULL, .name = "rcu_bh" }; @@ -377,6 +381,7 @@ static struct rcu_torture_ops rcu_bh_sync_ops = { .completed = rcu_bh_torture_completed, .deferredfree = rcu_sync_torture_deferred_free, .sync = rcu_bh_torture_synchronize, + .cb_barrier = NULL, .stats = NULL, .name = "rcu_bh_sync" }; @@ -458,6 +463,7 @@ static struct rcu_torture_ops srcu_ops = { .completed = srcu_torture_completed, .deferredfree = rcu_sync_torture_deferred_free, .sync = srcu_torture_synchronize, + .cb_barrier = NULL, .stats = srcu_torture_stats, .name = "srcu" }; @@ -482,6 +488,11 @@ static int sched_torture_completed(void) return 0; } +static void rcu_sched_torture_deferred_free(struct rcu_torture *p) +{ + call_rcu_sched(&p->rtort_rcu, rcu_torture_cb); +} + static void sched_torture_synchronize(void) { synchronize_sched(); @@ -494,12 +505,27 @@ static struct rcu_torture_ops sched_ops = { .readdelay = rcu_read_delay, /* just reuse rcu's version. */ .readunlock = sched_torture_read_unlock, .completed = sched_torture_completed, - .deferredfree = rcu_sync_torture_deferred_free, + .deferredfree = rcu_sched_torture_deferred_free, .sync = sched_torture_synchronize, + .cb_barrier = rcu_barrier_sched, .stats = NULL, .name = "sched" }; +static struct rcu_torture_ops sched_ops_sync = { + .init = rcu_sync_torture_init, + .cleanup = NULL, + .readlock = sched_torture_read_lock, + .readdelay = rcu_read_delay, /* just reuse rcu's version. */ + .readunlock = sched_torture_read_unlock, + .completed = sched_torture_completed, + .deferredfree = rcu_sync_torture_deferred_free, + .sync = sched_torture_synchronize, + .cb_barrier = NULL, + .stats = NULL, + .name = "sched_sync" +}; + /* * RCU torture writer kthread. Repeatedly substitutes a new structure * for that pointed to by rcu_torture_current, freeing the old structure @@ -848,7 +874,9 @@ rcu_torture_cleanup(void) stats_task = NULL; /* Wait for all RCU callbacks to fire. */ - rcu_barrier(); + + if (cur_ops->cb_barrier != NULL) + cur_ops->cb_barrier(); rcu_torture_stats_print(); /* -After- the stats thread is stopped! */ @@ -868,7 +896,7 @@ rcu_torture_init(void) int firsterr = 0; static struct rcu_torture_ops *torture_ops[] = { &rcu_ops, &rcu_sync_ops, &rcu_bh_ops, &rcu_bh_sync_ops, - &srcu_ops, &sched_ops, }; + &srcu_ops, &sched_ops, &sched_ops_sync, }; /* Process args and tell the world that the torturer is on the job. */ for (i = 0; i < ARRAY_SIZE(torture_ops); i++) { -- cgit v0.10.2 From 32300751b4079cb5688453baa94711579d4285d5 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 12 May 2008 21:21:05 +0200 Subject: sched: 1Q08 RCU doc update, add call_rcu_sched() Long-delayed update to the RCU documentation, including adding the new call_rcu_sched() and rcu_barrier_sched() APIs. Signed-off-by: Paul E. McKenney Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/Documentation/RCU/NMI-RCU.txt b/Documentation/RCU/NMI-RCU.txt index c64158e..a6d32e6 100644 --- a/Documentation/RCU/NMI-RCU.txt +++ b/Documentation/RCU/NMI-RCU.txt @@ -93,6 +93,9 @@ Since NMI handlers disable preemption, synchronize_sched() is guaranteed not to return until all ongoing NMI handlers exit. It is therefore safe to free up the handler's data as soon as synchronize_sched() returns. +Important note: for this to work, the architecture in question must +invoke irq_enter() and irq_exit() on NMI entry and exit, respectively. + Answer to Quick Quiz diff --git a/Documentation/RCU/RTFP.txt b/Documentation/RCU/RTFP.txt index 39ad8f5..9f711d2 100644 --- a/Documentation/RCU/RTFP.txt +++ b/Documentation/RCU/RTFP.txt @@ -52,6 +52,10 @@ of each iteration. Unfortunately, chaotic relaxation requires highly structured data, such as the matrices used in scientific programs, and is thus inapplicable to most data structures in operating-system kernels. +In 1992, Henry (now Alexia) Massalin completed a dissertation advising +parallel programmers to defer processing when feasible to simplify +synchronization. RCU makes extremely heavy use of this advice. + In 1993, Jacobson [Jacobson93] verbally described what is perhaps the simplest deferred-free technique: simply waiting a fixed amount of time before freeing blocks awaiting deferred free. Jacobson did not describe @@ -138,6 +142,13 @@ blocking in read-side critical sections appeared [PaulEMcKenney2006c], Robert Olsson described an RCU-protected trie-hash combination [RobertOlsson2006a]. +2007 saw the journal version of the award-winning RCU paper from 2006 +[ThomasEHart2007a], as well as a paper demonstrating use of Promela +and Spin to mechanically verify an optimization to Oleg Nesterov's +QRCU [PaulEMcKenney2007QRCUspin], a design document describing +preemptible RCU [PaulEMcKenney2007PreemptibleRCU], and the three-part +LWN "What is RCU?" series [PaulEMcKenney2007WhatIsRCUFundamentally, +PaulEMcKenney2008WhatIsRCUUsage, and PaulEMcKenney2008WhatIsRCUAPI]. Bibtex Entries @@ -202,6 +213,20 @@ Bibtex Entries ,Year="1991" } +@phdthesis{HMassalinPhD +,author="H. Massalin" +,title="Synthesis: An Efficient Implementation of Fundamental Operating +System Services" +,school="Columbia University" +,address="New York, NY" +,year="1992" +,annotation=" + Mondo optimizing compiler. + Wait-free stuff. + Good advice: defer work to avoid synchronization. +" +} + @unpublished{Jacobson93 ,author="Van Jacobson" ,title="Avoid Read-Side Locking Via Delayed Free" @@ -635,3 +660,86 @@ Revised: " } +@unpublished{PaulEMcKenney2007PreemptibleRCU +,Author="Paul E. McKenney" +,Title="The design of preemptible read-copy-update" +,month="October" +,day="8" +,year="2007" +,note="Available: +\url{http://lwn.net/Articles/253651/} +[Viewed October 25, 2007]" +,annotation=" + LWN article describing the design of preemptible RCU. +" +} + +######################################################################## +# +# "What is RCU?" LWN series. +# + +@unpublished{PaulEMcKenney2007WhatIsRCUFundamentally +,Author="Paul E. McKenney and Jonathan Walpole" +,Title="What is {RCU}, Fundamentally?" +,month="December" +,day="17" +,year="2007" +,note="Available: +\url{http://lwn.net/Articles/262464/} +[Viewed December 27, 2007]" +,annotation=" + Lays out the three basic components of RCU: (1) publish-subscribe, + (2) wait for pre-existing readers to complete, and (2) maintain + multiple versions. +" +} + +@unpublished{PaulEMcKenney2008WhatIsRCUUsage +,Author="Paul E. McKenney" +,Title="What is {RCU}? Part 2: Usage" +,month="January" +,day="4" +,year="2008" +,note="Available: +\url{http://lwn.net/Articles/263130/} +[Viewed January 4, 2008]" +,annotation=" + Lays out six uses of RCU: + 1. RCU is a Reader-Writer Lock Replacement + 2. RCU is a Restricted Reference-Counting Mechanism + 3. RCU is a Bulk Reference-Counting Mechanism + 4. RCU is a Poor Man's Garbage Collector + 5. RCU is a Way of Providing Existence Guarantees + 6. RCU is a Way of Waiting for Things to Finish +" +} + +@unpublished{PaulEMcKenney2008WhatIsRCUAPI +,Author="Paul E. McKenney" +,Title="{RCU} part 3: the {RCU} {API}" +,month="January" +,day="17" +,year="2008" +,note="Available: +\url{http://lwn.net/Articles/264090/} +[Viewed January 10, 2008]" +,annotation=" + Gives an overview of the Linux-kernel RCU API and a brief annotated RCU + bibliography. +" +} + +@article{DinakarGuniguntala2008IBMSysJ +,author="D. Guniguntala and P. E. McKenney and J. Triplett and J. Walpole" +,title="The read-copy-update mechanism for supporting real-time applications on shared-memory multiprocessor systems with {Linux}" +,Year="2008" +,Month="April" +,journal="IBM Systems Journal" +,volume="47" +,number="2" +,pages="@@-@@" +,annotation=" + RCU, realtime RCU, sleepable RCU, performance. +" +} diff --git a/Documentation/RCU/checklist.txt b/Documentation/RCU/checklist.txt index 42b01bc..cf5562c 100644 --- a/Documentation/RCU/checklist.txt +++ b/Documentation/RCU/checklist.txt @@ -13,10 +13,13 @@ over a rather long period of time, but improvements are always welcome! detailed performance measurements show that RCU is nonetheless the right tool for the job. - The other exception would be where performance is not an issue, - and RCU provides a simpler implementation. An example of this - situation is the dynamic NMI code in the Linux 2.6 kernel, - at least on architectures where NMIs are rare. + Another exception is where performance is not an issue, and RCU + provides a simpler implementation. An example of this situation + is the dynamic NMI code in the Linux 2.6 kernel, at least on + architectures where NMIs are rare. + + Yet another exception is where the low real-time latency of RCU's + read-side primitives is critically important. 1. Does the update code have proper mutual exclusion? @@ -39,9 +42,10 @@ over a rather long period of time, but improvements are always welcome! 2. Do the RCU read-side critical sections make proper use of rcu_read_lock() and friends? These primitives are needed - to suppress preemption (or bottom halves, in the case of - rcu_read_lock_bh()) in the read-side critical sections, - and are also an excellent aid to readability. + to prevent grace periods from ending prematurely, which + could result in data being unceremoniously freed out from + under your read-side code, which can greatly increase the + actuarial risk of your kernel. As a rough rule of thumb, any dereference of an RCU-protected pointer must be covered by rcu_read_lock() or rcu_read_lock_bh() @@ -54,15 +58,30 @@ over a rather long period of time, but improvements are always welcome! be running while updates are in progress. There are a number of ways to handle this concurrency, depending on the situation: - a. Make updates appear atomic to readers. For example, + a. Use the RCU variants of the list and hlist update + primitives to add, remove, and replace elements on an + RCU-protected list. Alternatively, use the RCU-protected + trees that have been added to the Linux kernel. + + This is almost always the best approach. + + b. Proceed as in (a) above, but also maintain per-element + locks (that are acquired by both readers and writers) + that guard per-element state. Of course, fields that + the readers refrain from accessing can be guarded by the + update-side lock. + + This works quite well, also. + + c. Make updates appear atomic to readers. For example, pointer updates to properly aligned fields will appear atomic, as will individual atomic primitives. Operations performed under a lock and sequences of multiple atomic primitives will -not- appear to be atomic. - This is almost always the best approach. + This can work, but is starting to get a bit tricky. - b. Carefully order the updates and the reads so that + d. Carefully order the updates and the reads so that readers see valid data at all phases of the update. This is often more difficult than it sounds, especially given modern CPUs' tendency to reorder memory references. @@ -123,18 +142,22 @@ over a rather long period of time, but improvements are always welcome! when publicizing a pointer to a structure that can be traversed by an RCU read-side critical section. -5. If call_rcu(), or a related primitive such as call_rcu_bh(), - is used, the callback function must be written to be called - from softirq context. In particular, it cannot block. +5. If call_rcu(), or a related primitive such as call_rcu_bh() or + call_rcu_sched(), is used, the callback function must be + written to be called from softirq context. In particular, + it cannot block. 6. Since synchronize_rcu() can block, it cannot be called from - any sort of irq context. + any sort of irq context. Ditto for synchronize_sched() and + synchronize_srcu(). 7. If the updater uses call_rcu(), then the corresponding readers must use rcu_read_lock() and rcu_read_unlock(). If the updater uses call_rcu_bh(), then the corresponding readers must use - rcu_read_lock_bh() and rcu_read_unlock_bh(). Mixing things up - will result in confusion and broken kernels. + rcu_read_lock_bh() and rcu_read_unlock_bh(). If the updater + uses call_rcu_sched(), then the corresponding readers must + disable preemption. Mixing things up will result in confusion + and broken kernels. One exception to this rule: rcu_read_lock() and rcu_read_unlock() may be substituted for rcu_read_lock_bh() and rcu_read_unlock_bh() @@ -143,9 +166,9 @@ over a rather long period of time, but improvements are always welcome! such cases is a must, of course! And the jury is still out on whether the increased speed is worth it. -8. Although synchronize_rcu() is a bit slower than is call_rcu(), - it usually results in simpler code. So, unless update - performance is critically important or the updaters cannot block, +8. Although synchronize_rcu() is slower than is call_rcu(), it + usually results in simpler code. So, unless update performance + is critically important or the updaters cannot block, synchronize_rcu() should be used in preference to call_rcu(). An especially important property of the synchronize_rcu() @@ -187,23 +210,23 @@ over a rather long period of time, but improvements are always welcome! number of updates per grace period. 9. All RCU list-traversal primitives, which include - list_for_each_rcu(), list_for_each_entry_rcu(), + rcu_dereference(), list_for_each_rcu(), list_for_each_entry_rcu(), list_for_each_continue_rcu(), and list_for_each_safe_rcu(), - must be within an RCU read-side critical section. RCU + must be either within an RCU read-side critical section or + must be protected by appropriate update-side locks. RCU read-side critical sections are delimited by rcu_read_lock() and rcu_read_unlock(), or by similar primitives such as rcu_read_lock_bh() and rcu_read_unlock_bh(). - Use of the _rcu() list-traversal primitives outside of an - RCU read-side critical section causes no harm other than - a slight performance degradation on Alpha CPUs. It can - also be quite helpful in reducing code bloat when common - code is shared between readers and updaters. + The reason that it is permissible to use RCU list-traversal + primitives when the update-side lock is held is that doing so + can be quite helpful in reducing code bloat when common code is + shared between readers and updaters. 10. Conversely, if you are in an RCU read-side critical section, - you -must- use the "_rcu()" variants of the list macros. - Failing to do so will break Alpha and confuse people reading - your code. + and you don't hold the appropriate update-side lock, you -must- + use the "_rcu()" variants of the list macros. Failing to do so + will break Alpha and confuse people reading your code. 11. Note that synchronize_rcu() -only- guarantees to wait until all currently executing rcu_read_lock()-protected RCU read-side @@ -230,6 +253,14 @@ over a rather long period of time, but improvements are always welcome! must use whatever locking or other synchronization is required to safely access and/or modify that data structure. + RCU callbacks are -usually- executed on the same CPU that executed + the corresponding call_rcu(), call_rcu_bh(), or call_rcu_sched(), + but are by -no- means guaranteed to be. For example, if a given + CPU goes offline while having an RCU callback pending, then that + RCU callback will execute on some surviving CPU. (If this was + not the case, a self-spawning RCU callback would prevent the + victim CPU from ever going offline.) + 14. SRCU (srcu_read_lock(), srcu_read_unlock(), and synchronize_srcu()) may only be invoked from process context. Unlike other forms of RCU, it -is- permissible to block in an SRCU read-side critical diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt index e0d6d99..e04d643 100644 --- a/Documentation/RCU/whatisRCU.txt +++ b/Documentation/RCU/whatisRCU.txt @@ -1,3 +1,11 @@ +Please note that the "What is RCU?" LWN series is an excellent place +to start learning about RCU: + +1. What is RCU, Fundamentally? http://lwn.net/Articles/262464/ +2. What is RCU? Part 2: Usage http://lwn.net/Articles/263130/ +3. RCU part 3: the RCU API http://lwn.net/Articles/264090/ + + What is RCU? RCU is a synchronization mechanism that was added to the Linux kernel @@ -772,26 +780,18 @@ Linux-kernel source code, but it helps to have a full list of the APIs, since there does not appear to be a way to categorize them in docbook. Here is the list, by category. -Markers for RCU read-side critical sections: - - rcu_read_lock - rcu_read_unlock - rcu_read_lock_bh - rcu_read_unlock_bh - srcu_read_lock - srcu_read_unlock - RCU pointer/list traversal: rcu_dereference + list_for_each_entry_rcu + hlist_for_each_entry_rcu + list_for_each_rcu (to be deprecated in favor of list_for_each_entry_rcu) - list_for_each_entry_rcu list_for_each_continue_rcu (to be deprecated in favor of new list_for_each_entry_continue_rcu) - hlist_for_each_entry_rcu -RCU pointer update: +RCU pointer/list update: rcu_assign_pointer list_add_rcu @@ -799,16 +799,36 @@ RCU pointer update: list_del_rcu list_replace_rcu hlist_del_rcu + hlist_add_after_rcu + hlist_add_before_rcu hlist_add_head_rcu + hlist_replace_rcu + list_splice_init_rcu() -RCU grace period: +RCU: Critical sections Grace period Barrier + + rcu_read_lock synchronize_net rcu_barrier + rcu_read_unlock synchronize_rcu + call_rcu + + +bh: Critical sections Grace period Barrier + + rcu_read_lock_bh call_rcu_bh rcu_barrier_bh + rcu_read_unlock_bh + + +sched: Critical sections Grace period Barrier + + [preempt_disable] synchronize_sched rcu_barrier_sched + [and friends] call_rcu_sched + + +SRCU: Critical sections Grace period Barrier + + srcu_read_lock synchronize_srcu N/A + srcu_read_unlock - synchronize_net - synchronize_sched - synchronize_rcu - synchronize_srcu - call_rcu - call_rcu_bh See the comment headers in the source code (or the docbook generated from them) for more information. -- cgit v0.10.2 From 82524746c27fa418c250a56dd7606b9d3fc79826 Mon Sep 17 00:00:00 2001 From: Franck Bui-Huu Date: Mon, 12 May 2008 21:21:05 +0200 Subject: rcu: split list.h and move rcu-protected lists into rculist.h Move rcu-protected lists from list.h into a new header file rculist.h. This is done because list are a very used primitive structure all over the kernel and it's currently impossible to include other header files in this list.h without creating some circular dependencies. For example, list.h implements rcu-protected list and uses rcu_dereference() without including rcupdate.h. It actually compiles because users of rcu_dereference() are macros. Others RCU functions could be used too but aren't probably because of this. Therefore this patch creates rculist.h which includes rcupdates without to many changes/troubles. Signed-off-by: Franck Bui-Huu Acked-by: Paul E. McKenney Acked-by: Josh Triplett Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar diff --git a/arch/ia64/sn/kernel/irq.c b/arch/ia64/sn/kernel/irq.c index 53351c3..96c31b4 100644 --- a/arch/ia64/sn/kernel/irq.c +++ b/arch/ia64/sn/kernel/irq.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include diff --git a/crypto/async_tx/async_tx.c b/crypto/async_tx/async_tx.c index c6e772f..095c798 100644 --- a/crypto/async_tx/async_tx.c +++ b/crypto/async_tx/async_tx.c @@ -23,6 +23,7 @@ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. * */ +#include #include #include diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c index e0ec540..5d830d8 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs.c @@ -35,6 +35,7 @@ #include #include #include +#include #include "ipath_kernel.h" #include "ipath_verbs.h" diff --git a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c index 9e5abf9..d73e322 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c @@ -31,8 +31,7 @@ * SOFTWARE. */ -#include -#include +#include #include "ipath_verbs.h" diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index c36a03a..860d75d 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 2a66394..1f5cebf 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -3,6 +3,7 @@ #include #include +#include #include #include #include diff --git a/include/linux/list.h b/include/linux/list.h index 08cf4f6..139ec41 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -85,65 +85,6 @@ static inline void list_add_tail(struct list_head *new, struct list_head *head) } /* - * Insert a new entry between two known consecutive entries. - * - * This is only for internal list manipulation where we know - * the prev/next entries already! - */ -static inline void __list_add_rcu(struct list_head * new, - struct list_head * prev, struct list_head * next) -{ - new->next = next; - new->prev = prev; - smp_wmb(); - next->prev = new; - prev->next = new; -} - -/** - * list_add_rcu - add a new entry to rcu-protected list - * @new: new entry to be added - * @head: list head to add it after - * - * Insert a new entry after the specified head. - * This is good for implementing stacks. - * - * The caller must take whatever precautions are necessary - * (such as holding appropriate locks) to avoid racing - * with another list-mutation primitive, such as list_add_rcu() - * or list_del_rcu(), running on this same list. - * However, it is perfectly legal to run concurrently with - * the _rcu list-traversal primitives, such as - * list_for_each_entry_rcu(). - */ -static inline void list_add_rcu(struct list_head *new, struct list_head *head) -{ - __list_add_rcu(new, head, head->next); -} - -/** - * list_add_tail_rcu - add a new entry to rcu-protected list - * @new: new entry to be added - * @head: list head to add it before - * - * Insert a new entry before the specified head. - * This is useful for implementing queues. - * - * The caller must take whatever precautions are necessary - * (such as holding appropriate locks) to avoid racing - * with another list-mutation primitive, such as list_add_tail_rcu() - * or list_del_rcu(), running on this same list. - * However, it is perfectly legal to run concurrently with - * the _rcu list-traversal primitives, such as - * list_for_each_entry_rcu(). - */ -static inline void list_add_tail_rcu(struct list_head *new, - struct list_head *head) -{ - __list_add_rcu(new, head->prev, head); -} - -/* * Delete a list entry by making the prev/next entries * point to each other. * @@ -174,36 +115,6 @@ extern void list_del(struct list_head *entry); #endif /** - * list_del_rcu - deletes entry from list without re-initialization - * @entry: the element to delete from the list. - * - * Note: list_empty() on entry does not return true after this, - * the entry is in an undefined state. It is useful for RCU based - * lockfree traversal. - * - * In particular, it means that we can not poison the forward - * pointers that may still be used for walking the list. - * - * The caller must take whatever precautions are necessary - * (such as holding appropriate locks) to avoid racing - * with another list-mutation primitive, such as list_del_rcu() - * or list_add_rcu(), running on this same list. - * However, it is perfectly legal to run concurrently with - * the _rcu list-traversal primitives, such as - * list_for_each_entry_rcu(). - * - * Note that the caller is not permitted to immediately free - * the newly deleted entry. Instead, either synchronize_rcu() - * or call_rcu() must be used to defer freeing until an RCU - * grace period has elapsed. - */ -static inline void list_del_rcu(struct list_head *entry) -{ - __list_del(entry->prev, entry->next); - entry->prev = LIST_POISON2; -} - -/** * list_replace - replace old entry by new one * @old : the element to be replaced * @new : the new element to insert @@ -227,25 +138,6 @@ static inline void list_replace_init(struct list_head *old, } /** - * list_replace_rcu - replace old entry by new one - * @old : the element to be replaced - * @new : the new element to insert - * - * The @old entry will be replaced with the @new entry atomically. - * Note: @old should not be empty. - */ -static inline void list_replace_rcu(struct list_head *old, - struct list_head *new) -{ - new->next = old->next; - new->prev = old->prev; - smp_wmb(); - new->next->prev = new; - new->prev->next = new; - old->prev = LIST_POISON2; -} - -/** * list_del_init - deletes entry from list and reinitialize it. * @entry: the element to delete from the list. */ @@ -369,62 +261,6 @@ static inline void list_splice_init(struct list_head *list, } /** - * list_splice_init_rcu - splice an RCU-protected list into an existing list. - * @list: the RCU-protected list to splice - * @head: the place in the list to splice the first list into - * @sync: function to sync: synchronize_rcu(), synchronize_sched(), ... - * - * @head can be RCU-read traversed concurrently with this function. - * - * Note that this function blocks. - * - * Important note: the caller must take whatever action is necessary to - * prevent any other updates to @head. In principle, it is possible - * to modify the list as soon as sync() begins execution. - * If this sort of thing becomes necessary, an alternative version - * based on call_rcu() could be created. But only if -really- - * needed -- there is no shortage of RCU API members. - */ -static inline void list_splice_init_rcu(struct list_head *list, - struct list_head *head, - void (*sync)(void)) -{ - struct list_head *first = list->next; - struct list_head *last = list->prev; - struct list_head *at = head->next; - - if (list_empty(head)) - return; - - /* "first" and "last" tracking list, so initialize it. */ - - INIT_LIST_HEAD(list); - - /* - * At this point, the list body still points to the source list. - * Wait for any readers to finish using the list before splicing - * the list body into the new list. Any new readers will see - * an empty list. - */ - - sync(); - - /* - * Readers are finished with the source list, so perform splice. - * The order is important if the new list is global and accessible - * to concurrent RCU readers. Note that RCU readers are not - * permitted to traverse the prev pointers without excluding - * this function. - */ - - last->next = at; - smp_wmb(); - head->next = first; - first->prev = head; - at->prev = last; -} - -/** * list_entry - get the struct for this entry * @ptr: the &struct list_head pointer. * @type: the type of the struct this is embedded in. @@ -629,57 +465,6 @@ static inline void list_splice_init_rcu(struct list_head *list, &pos->member != (head); \ pos = n, n = list_entry(n->member.prev, typeof(*n), member)) -/** - * list_for_each_rcu - iterate over an rcu-protected list - * @pos: the &struct list_head to use as a loop cursor. - * @head: the head for your list. - * - * This list-traversal primitive may safely run concurrently with - * the _rcu list-mutation primitives such as list_add_rcu() - * as long as the traversal is guarded by rcu_read_lock(). - */ -#define list_for_each_rcu(pos, head) \ - for (pos = rcu_dereference((head)->next); \ - prefetch(pos->next), pos != (head); \ - pos = rcu_dereference(pos->next)) - -#define __list_for_each_rcu(pos, head) \ - for (pos = rcu_dereference((head)->next); \ - pos != (head); \ - pos = rcu_dereference(pos->next)) - -/** - * list_for_each_entry_rcu - iterate over rcu list of given type - * @pos: the type * to use as a loop cursor. - * @head: the head for your list. - * @member: the name of the list_struct within the struct. - * - * This list-traversal primitive may safely run concurrently with - * the _rcu list-mutation primitives such as list_add_rcu() - * as long as the traversal is guarded by rcu_read_lock(). - */ -#define list_for_each_entry_rcu(pos, head, member) \ - for (pos = list_entry(rcu_dereference((head)->next), typeof(*pos), member); \ - prefetch(pos->member.next), &pos->member != (head); \ - pos = list_entry(rcu_dereference(pos->member.next), typeof(*pos), member)) - - -/** - * list_for_each_continue_rcu - * @pos: the &struct list_head to use as a loop cursor. - * @head: the head for your list. - * - * Iterate over an rcu-protected list, continuing after current point. - * - * This list-traversal primitive may safely run concurrently with - * the _rcu list-mutation primitives such as list_add_rcu() - * as long as the traversal is guarded by rcu_read_lock(). - */ -#define list_for_each_continue_rcu(pos, head) \ - for ((pos) = rcu_dereference((pos)->next); \ - prefetch((pos)->next), (pos) != (head); \ - (pos) = rcu_dereference((pos)->next)) - /* * Double linked lists with a single pointer list head. * Mostly useful for hash tables where the two pointer list head is @@ -730,31 +515,6 @@ static inline void hlist_del(struct hlist_node *n) n->pprev = LIST_POISON2; } -/** - * hlist_del_rcu - deletes entry from hash list without re-initialization - * @n: the element to delete from the hash list. - * - * Note: list_unhashed() on entry does not return true after this, - * the entry is in an undefined state. It is useful for RCU based - * lockfree traversal. - * - * In particular, it means that we can not poison the forward - * pointers that may still be used for walking the hash list. - * - * The caller must take whatever precautions are necessary - * (such as holding appropriate locks) to avoid racing - * with another list-mutation primitive, such as hlist_add_head_rcu() - * or hlist_del_rcu(), running on this same list. - * However, it is perfectly legal to run concurrently with - * the _rcu list-traversal primitives, such as - * hlist_for_each_entry(). - */ -static inline void hlist_del_rcu(struct hlist_node *n) -{ - __hlist_del(n); - n->pprev = LIST_POISON2; -} - static inline void hlist_del_init(struct hlist_node *n) { if (!hlist_unhashed(n)) { @@ -763,27 +523,6 @@ static inline void hlist_del_init(struct hlist_node *n) } } -/** - * hlist_replace_rcu - replace old entry by new one - * @old : the element to be replaced - * @new : the new element to insert - * - * The @old entry will be replaced with the @new entry atomically. - */ -static inline void hlist_replace_rcu(struct hlist_node *old, - struct hlist_node *new) -{ - struct hlist_node *next = old->next; - - new->next = next; - new->pprev = old->pprev; - smp_wmb(); - if (next) - new->next->pprev = &new->next; - *new->pprev = new; - old->pprev = LIST_POISON2; -} - static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h) { struct hlist_node *first = h->first; @@ -794,38 +533,6 @@ static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h) n->pprev = &h->first; } - -/** - * hlist_add_head_rcu - * @n: the element to add to the hash list. - * @h: the list to add to. - * - * Description: - * Adds the specified element to the specified hlist, - * while permitting racing traversals. - * - * The caller must take whatever precautions are necessary - * (such as holding appropriate locks) to avoid racing - * with another list-mutation primitive, such as hlist_add_head_rcu() - * or hlist_del_rcu(), running on this same list. - * However, it is perfectly legal to run concurrently with - * the _rcu list-traversal primitives, such as - * hlist_for_each_entry_rcu(), used to prevent memory-consistency - * problems on Alpha CPUs. Regardless of the type of CPU, the - * list-traversal primitive must be guarded by rcu_read_lock(). - */ -static inline void hlist_add_head_rcu(struct hlist_node *n, - struct hlist_head *h) -{ - struct hlist_node *first = h->first; - n->next = first; - n->pprev = &h->first; - smp_wmb(); - if (first) - first->pprev = &n->next; - h->first = n; -} - /* next must be != NULL */ static inline void hlist_add_before(struct hlist_node *n, struct hlist_node *next) @@ -847,63 +554,6 @@ static inline void hlist_add_after(struct hlist_node *n, next->next->pprev = &next->next; } -/** - * hlist_add_before_rcu - * @n: the new element to add to the hash list. - * @next: the existing element to add the new element before. - * - * Description: - * Adds the specified element to the specified hlist - * before the specified node while permitting racing traversals. - * - * The caller must take whatever precautions are necessary - * (such as holding appropriate locks) to avoid racing - * with another list-mutation primitive, such as hlist_add_head_rcu() - * or hlist_del_rcu(), running on this same list. - * However, it is perfectly legal to run concurrently with - * the _rcu list-traversal primitives, such as - * hlist_for_each_entry_rcu(), used to prevent memory-consistency - * problems on Alpha CPUs. - */ -static inline void hlist_add_before_rcu(struct hlist_node *n, - struct hlist_node *next) -{ - n->pprev = next->pprev; - n->next = next; - smp_wmb(); - next->pprev = &n->next; - *(n->pprev) = n; -} - -/** - * hlist_add_after_rcu - * @prev: the existing element to add the new element after. - * @n: the new element to add to the hash list. - * - * Description: - * Adds the specified element to the specified hlist - * after the specified node while permitting racing traversals. - * - * The caller must take whatever precautions are necessary - * (such as holding appropriate locks) to avoid racing - * with another list-mutation primitive, such as hlist_add_head_rcu() - * or hlist_del_rcu(), running on this same list. - * However, it is perfectly legal to run concurrently with - * the _rcu list-traversal primitives, such as - * hlist_for_each_entry_rcu(), used to prevent memory-consistency - * problems on Alpha CPUs. - */ -static inline void hlist_add_after_rcu(struct hlist_node *prev, - struct hlist_node *n) -{ - n->next = prev->next; - n->pprev = &prev->next; - smp_wmb(); - prev->next = n; - if (n->next) - n->next->pprev = &n->next; -} - #define hlist_entry(ptr, type, member) container_of(ptr,type,member) #define hlist_for_each(pos, head) \ @@ -964,21 +614,4 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ pos = n) -/** - * hlist_for_each_entry_rcu - iterate over rcu list of given type - * @tpos: the type * to use as a loop cursor. - * @pos: the &struct hlist_node to use as a loop cursor. - * @head: the head for your list. - * @member: the name of the hlist_node within the struct. - * - * This list-traversal primitive may safely run concurrently with - * the _rcu list-mutation primitives such as hlist_add_head_rcu() - * as long as the traversal is guarded by rcu_read_lock(). - */ -#define hlist_for_each_entry_rcu(tpos, pos, head, member) \ - for (pos = rcu_dereference((head)->first); \ - pos && ({ prefetch(pos->next); 1;}) && \ - ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ - pos = rcu_dereference(pos->next)) - #endif diff --git a/include/linux/rculist.h b/include/linux/rculist.h new file mode 100644 index 0000000..aa9b3eb --- /dev/null +++ b/include/linux/rculist.h @@ -0,0 +1,396 @@ +#ifndef _LINUX_RCULIST_H +#define _LINUX_RCULIST_H + +#ifdef __KERNEL__ + +/* + * RCU-protected list version + */ +#include + +/* + * Insert a new entry between two known consecutive entries. + * + * This is only for internal list manipulation where we know + * the prev/next entries already! + */ +static inline void __list_add_rcu(struct list_head *new, + struct list_head *prev, struct list_head *next) +{ + new->next = next; + new->prev = prev; + smp_wmb(); + next->prev = new; + prev->next = new; +} + +/** + * list_add_rcu - add a new entry to rcu-protected list + * @new: new entry to be added + * @head: list head to add it after + * + * Insert a new entry after the specified head. + * This is good for implementing stacks. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as list_add_rcu() + * or list_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * list_for_each_entry_rcu(). + */ +static inline void list_add_rcu(struct list_head *new, struct list_head *head) +{ + __list_add_rcu(new, head, head->next); +} + +/** + * list_add_tail_rcu - add a new entry to rcu-protected list + * @new: new entry to be added + * @head: list head to add it before + * + * Insert a new entry before the specified head. + * This is useful for implementing queues. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as list_add_tail_rcu() + * or list_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * list_for_each_entry_rcu(). + */ +static inline void list_add_tail_rcu(struct list_head *new, + struct list_head *head) +{ + __list_add_rcu(new, head->prev, head); +} + +/** + * list_del_rcu - deletes entry from list without re-initialization + * @entry: the element to delete from the list. + * + * Note: list_empty() on entry does not return true after this, + * the entry is in an undefined state. It is useful for RCU based + * lockfree traversal. + * + * In particular, it means that we can not poison the forward + * pointers that may still be used for walking the list. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as list_del_rcu() + * or list_add_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * list_for_each_entry_rcu(). + * + * Note that the caller is not permitted to immediately free + * the newly deleted entry. Instead, either synchronize_rcu() + * or call_rcu() must be used to defer freeing until an RCU + * grace period has elapsed. + */ +static inline void list_del_rcu(struct list_head *entry) +{ + __list_del(entry->prev, entry->next); + entry->prev = LIST_POISON2; +} + +/** + * list_replace_rcu - replace old entry by new one + * @old : the element to be replaced + * @new : the new element to insert + * + * The @old entry will be replaced with the @new entry atomically. + * Note: @old should not be empty. + */ +static inline void list_replace_rcu(struct list_head *old, + struct list_head *new) +{ + new->next = old->next; + new->prev = old->prev; + smp_wmb(); + new->next->prev = new; + new->prev->next = new; + old->prev = LIST_POISON2; +} + +/** + * list_splice_init_rcu - splice an RCU-protected list into an existing list. + * @list: the RCU-protected list to splice + * @head: the place in the list to splice the first list into + * @sync: function to sync: synchronize_rcu(), synchronize_sched(), ... + * + * @head can be RCU-read traversed concurrently with this function. + * + * Note that this function blocks. + * + * Important note: the caller must take whatever action is necessary to + * prevent any other updates to @head. In principle, it is possible + * to modify the list as soon as sync() begins execution. + * If this sort of thing becomes necessary, an alternative version + * based on call_rcu() could be created. But only if -really- + * needed -- there is no shortage of RCU API members. + */ +static inline void list_splice_init_rcu(struct list_head *list, + struct list_head *head, + void (*sync)(void)) +{ + struct list_head *first = list->next; + struct list_head *last = list->prev; + struct list_head *at = head->next; + + if (list_empty(head)) + return; + + /* "first" and "last" tracking list, so initialize it. */ + + INIT_LIST_HEAD(list); + + /* + * At this point, the list body still points to the source list. + * Wait for any readers to finish using the list before splicing + * the list body into the new list. Any new readers will see + * an empty list. + */ + + sync(); + + /* + * Readers are finished with the source list, so perform splice. + * The order is important if the new list is global and accessible + * to concurrent RCU readers. Note that RCU readers are not + * permitted to traverse the prev pointers without excluding + * this function. + */ + + last->next = at; + smp_wmb(); + head->next = first; + first->prev = head; + at->prev = last; +} + +/** + * list_for_each_rcu - iterate over an rcu-protected list + * @pos: the &struct list_head to use as a loop cursor. + * @head: the head for your list. + * + * This list-traversal primitive may safely run concurrently with + * the _rcu list-mutation primitives such as list_add_rcu() + * as long as the traversal is guarded by rcu_read_lock(). + */ +#define list_for_each_rcu(pos, head) \ + for (pos = (head)->next; \ + prefetch(rcu_dereference(pos)->next), pos != (head); \ + pos = pos->next) + +#define __list_for_each_rcu(pos, head) \ + for (pos = (head)->next; \ + rcu_dereference(pos) != (head); \ + pos = pos->next) + +/** + * list_for_each_safe_rcu + * @pos: the &struct list_head to use as a loop cursor. + * @n: another &struct list_head to use as temporary storage + * @head: the head for your list. + * + * Iterate over an rcu-protected list, safe against removal of list entry. + * + * This list-traversal primitive may safely run concurrently with + * the _rcu list-mutation primitives such as list_add_rcu() + * as long as the traversal is guarded by rcu_read_lock(). + */ +#define list_for_each_safe_rcu(pos, n, head) \ + for (pos = (head)->next; \ + n = rcu_dereference(pos)->next, pos != (head); \ + pos = n) + +/** + * list_for_each_entry_rcu - iterate over rcu list of given type + * @pos: the type * to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the list_struct within the struct. + * + * This list-traversal primitive may safely run concurrently with + * the _rcu list-mutation primitives such as list_add_rcu() + * as long as the traversal is guarded by rcu_read_lock(). + */ +#define list_for_each_entry_rcu(pos, head, member) \ + for (pos = list_entry((head)->next, typeof(*pos), member); \ + prefetch(rcu_dereference(pos)->member.next), \ + &pos->member != (head); \ + pos = list_entry(pos->member.next, typeof(*pos), member)) + + +/** + * list_for_each_continue_rcu + * @pos: the &struct list_head to use as a loop cursor. + * @head: the head for your list. + * + * Iterate over an rcu-protected list, continuing after current point. + * + * This list-traversal primitive may safely run concurrently with + * the _rcu list-mutation primitives such as list_add_rcu() + * as long as the traversal is guarded by rcu_read_lock(). + */ +#define list_for_each_continue_rcu(pos, head) \ + for ((pos) = (pos)->next; \ + prefetch(rcu_dereference((pos))->next), (pos) != (head); \ + (pos) = (pos)->next) + +/** + * hlist_del_rcu - deletes entry from hash list without re-initialization + * @n: the element to delete from the hash list. + * + * Note: list_unhashed() on entry does not return true after this, + * the entry is in an undefined state. It is useful for RCU based + * lockfree traversal. + * + * In particular, it means that we can not poison the forward + * pointers that may still be used for walking the hash list. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as hlist_add_head_rcu() + * or hlist_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * hlist_for_each_entry(). + */ +static inline void hlist_del_rcu(struct hlist_node *n) +{ + __hlist_del(n); + n->pprev = LIST_POISON2; +} + +/** + * hlist_replace_rcu - replace old entry by new one + * @old : the element to be replaced + * @new : the new element to insert + * + * The @old entry will be replaced with the @new entry atomically. + */ +static inline void hlist_replace_rcu(struct hlist_node *old, + struct hlist_node *new) +{ + struct hlist_node *next = old->next; + + new->next = next; + new->pprev = old->pprev; + smp_wmb(); + if (next) + new->next->pprev = &new->next; + *new->pprev = new; + old->pprev = LIST_POISON2; +} + +/** + * hlist_add_head_rcu + * @n: the element to add to the hash list. + * @h: the list to add to. + * + * Description: + * Adds the specified element to the specified hlist, + * while permitting racing traversals. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as hlist_add_head_rcu() + * or hlist_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * hlist_for_each_entry_rcu(), used to prevent memory-consistency + * problems on Alpha CPUs. Regardless of the type of CPU, the + * list-traversal primitive must be guarded by rcu_read_lock(). + */ +static inline void hlist_add_head_rcu(struct hlist_node *n, + struct hlist_head *h) +{ + struct hlist_node *first = h->first; + n->next = first; + n->pprev = &h->first; + smp_wmb(); + if (first) + first->pprev = &n->next; + h->first = n; +} + +/** + * hlist_add_before_rcu + * @n: the new element to add to the hash list. + * @next: the existing element to add the new element before. + * + * Description: + * Adds the specified element to the specified hlist + * before the specified node while permitting racing traversals. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as hlist_add_head_rcu() + * or hlist_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * hlist_for_each_entry_rcu(), used to prevent memory-consistency + * problems on Alpha CPUs. + */ +static inline void hlist_add_before_rcu(struct hlist_node *n, + struct hlist_node *next) +{ + n->pprev = next->pprev; + n->next = next; + smp_wmb(); + next->pprev = &n->next; + *(n->pprev) = n; +} + +/** + * hlist_add_after_rcu + * @prev: the existing element to add the new element after. + * @n: the new element to add to the hash list. + * + * Description: + * Adds the specified element to the specified hlist + * after the specified node while permitting racing traversals. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as hlist_add_head_rcu() + * or hlist_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * hlist_for_each_entry_rcu(), used to prevent memory-consistency + * problems on Alpha CPUs. + */ +static inline void hlist_add_after_rcu(struct hlist_node *prev, + struct hlist_node *n) +{ + n->next = prev->next; + n->pprev = &prev->next; + smp_wmb(); + prev->next = n; + if (n->next) + n->next->pprev = &n->next; +} + +/** + * hlist_for_each_entry_rcu - iterate over rcu list of given type + * @tpos: the type * to use as a loop cursor. + * @pos: the &struct hlist_node to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the hlist_node within the struct. + * + * This list-traversal primitive may safely run concurrently with + * the _rcu list-mutation primitives such as hlist_add_head_rcu() + * as long as the traversal is guarded by rcu_read_lock(). + */ +#define hlist_for_each_entry_rcu(tpos, pos, head, member) \ + for (pos = (head)->first; \ + rcu_dereference(pos) && ({ prefetch(pos->next); 1; }) && \ + ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ + pos = pos->next) + +#endif /* __KERNEL__ */ +#endif diff --git a/kernel/pid.c b/kernel/pid.c index 20d59fa..30bd5d4 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include diff --git a/lib/textsearch.c b/lib/textsearch.c index be8bda3..a3e500a 100644 --- a/lib/textsearch.c +++ b/lib/textsearch.c @@ -97,6 +97,7 @@ #include #include #include +#include #include #include #include diff --git a/net/802/psnap.c b/net/802/psnap.c index 31128cb..ea46439 100644 --- a/net/802/psnap.c +++ b/net/802/psnap.c @@ -20,6 +20,7 @@ #include #include #include +#include static LIST_HEAD(snap_list); static DEFINE_SPINLOCK(snap_lock); diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 2a739ad..e7ddbfa 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 72c5976..142060f 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -15,6 +15,7 @@ #include #include +#include #include #include #include diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c index e38034a..9e96ffc 100644 --- a/net/bridge/br_stp.c +++ b/net/bridge/br_stp.c @@ -13,6 +13,7 @@ * 2 of the License, or (at your option) any later version. */ #include +#include #include "br_private.h" #include "br_private_stp.h" diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c index 02c2f7c..643c032 100644 --- a/net/netlabel/netlabel_domainhash.c +++ b/net/netlabel/netlabel_domainhash.c @@ -30,8 +30,7 @@ */ #include -#include -#include +#include #include #include #include -- cgit v0.10.2 From 10aa9d2cf9878757b003023d33ff90a37aa3044b Mon Sep 17 00:00:00 2001 From: Franck Bui-Huu Date: Mon, 12 May 2008 21:21:06 +0200 Subject: rculist.h: use the rcu API Make almost all list mutation primitives use rcu_assign_pointer(). The main point of this being readability improvement. Signed-off-by: Franck Bui-Huu Cc: "Paul E. McKenney" Cc: Josh Triplett Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar diff --git a/include/linux/rculist.h b/include/linux/rculist.h index aa9b3eb..8d2c81f 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -7,6 +7,7 @@ * RCU-protected list version */ #include +#include /* * Insert a new entry between two known consecutive entries. @@ -19,9 +20,8 @@ static inline void __list_add_rcu(struct list_head *new, { new->next = next; new->prev = prev; - smp_wmb(); + rcu_assign_pointer(prev->next, new); next->prev = new; - prev->next = new; } /** @@ -110,9 +110,8 @@ static inline void list_replace_rcu(struct list_head *old, { new->next = old->next; new->prev = old->prev; - smp_wmb(); + rcu_assign_pointer(new->prev->next, new); new->next->prev = new; - new->prev->next = new; old->prev = LIST_POISON2; } @@ -166,8 +165,7 @@ static inline void list_splice_init_rcu(struct list_head *list, */ last->next = at; - smp_wmb(); - head->next = first; + rcu_assign_pointer(head->next, first); first->prev = head; at->prev = last; } @@ -280,10 +278,9 @@ static inline void hlist_replace_rcu(struct hlist_node *old, new->next = next; new->pprev = old->pprev; - smp_wmb(); + rcu_assign_pointer(*new->pprev, new); if (next) new->next->pprev = &new->next; - *new->pprev = new; old->pprev = LIST_POISON2; } @@ -310,12 +307,12 @@ static inline void hlist_add_head_rcu(struct hlist_node *n, struct hlist_head *h) { struct hlist_node *first = h->first; + n->next = first; n->pprev = &h->first; - smp_wmb(); + rcu_assign_pointer(h->first, n); if (first) first->pprev = &n->next; - h->first = n; } /** @@ -341,9 +338,8 @@ static inline void hlist_add_before_rcu(struct hlist_node *n, { n->pprev = next->pprev; n->next = next; - smp_wmb(); + rcu_assign_pointer(*(n->pprev), n); next->pprev = &n->next; - *(n->pprev) = n; } /** @@ -369,8 +365,7 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, { n->next = prev->next; n->pprev = &prev->next; - smp_wmb(); - prev->next = n; + rcu_assign_pointer(prev->next, n); if (n->next) n->next->pprev = &n->next; } -- cgit v0.10.2 From d7c0651390b6a03ad53f99faec0ba88109d7191d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 12 May 2008 21:21:06 +0200 Subject: rcu: fix rcu_try_flip_waitack_needed() to prevent grace-period stall The comment was correct -- need to make the code match the comment. Without this patch, if a CPU goes dynticks idle (and stays there forever) in just the right phase of preemptible-RCU grace-period processing, grace periods stall. The offending sequence of events (courtesy of Promela/spin, at least after I got the liveness criterion coded correctly...) is as follows: o CPU 0 is in dynticks-idle mode. Its dynticks_progress_counter is (say) 10. o CPU 0 takes an interrupt, so rcu_irq_enter() increments CPU 0's dynticks_progress_counter to 11. o CPU 1 is doing RCU grace-period processing in rcu_try_flip_idle(), sees rcu_pending(), so invokes dyntick_save_progress_counter(), which in turn takes a snapshot of CPU 0's dynticks_progress_counter into CPU 0's rcu_dyntick_snapshot -- now set to 11. CPU 1 then updates the RCU grace-period state to rcu_try_flip_waitack(). o CPU 0 returns from its interrupt, so rcu_irq_exit() increments CPU 0's dynticks_progress_counter to 12. o CPU 1 later invokes rcu_try_flip_waitack(), which notices that CPU 0 has not yet responded, and hence in turn invokes rcu_try_flip_waitack_needed(). This function examines the state of CPU 0's dynticks_progress_counter and rcu_dyntick_snapshot variables, which it copies to curr (== 12) and snap (== 11), respectively. Because curr!=snap, the first condition fails. Because curr-snap is only 1 and snap is odd, the second condition fails. rcu_try_flip_waitack_needed() therefore incorrectly concludes that it must wait for CPU 0 to explicitly acknowledge the counter flip. o CPU 0 remains forever in dynticks-idle mode, never taking any more hardware interrupts or any NMIs, and never running any more tasks. (Of course, -something- will usually eventually happen, which might be why we haven't seen this one in the wild. Still should be fixed!) Therefore the grace period never ends. Fix is to make the code match the comment, as shown below. With this fix, the above scenario would be satisfied with curr being even, and allow the grace period to proceed. Signed-off-by: Paul E. McKenney Cc: Peter Zijlstra Cc: Josh Triplett Cc: Dipankar Sarma Cc: Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c index aaa7976..27d0748 100644 --- a/kernel/rcupreempt.c +++ b/kernel/rcupreempt.c @@ -597,7 +597,7 @@ rcu_try_flip_waitack_needed(int cpu) * that this CPU already acknowledged the counter. */ - if ((curr - snap) > 2 || (snap & 0x1) == 0) + if ((curr - snap) > 2 || (curr & 0x1) == 0) return 0; /* We need this CPU to explicitly acknowledge the counter flip. */ -- cgit v0.10.2 From 78b0e0e9b27b62c4b22f05a147f7a80fa58b1ae3 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 12 May 2008 21:21:06 +0200 Subject: RCU, rculist.h: fix list iterators RCU list iterators: should prefetch ever be optimised out with no side-effects, the current version will lose the barrier completely. Pointed-out-by: Linus Torvalds Signed-off-by: Paul E. McKenney Signed-off-by: Ingo Molnar diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 8d2c81f..b0f39be 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -180,31 +180,14 @@ static inline void list_splice_init_rcu(struct list_head *list, * as long as the traversal is guarded by rcu_read_lock(). */ #define list_for_each_rcu(pos, head) \ - for (pos = (head)->next; \ - prefetch(rcu_dereference(pos)->next), pos != (head); \ - pos = pos->next) + for (pos = rcu_dereference((head)->next); \ + prefetch(pos->next), pos != (head); \ + pos = rcu_dereference(pos->next)) #define __list_for_each_rcu(pos, head) \ - for (pos = (head)->next; \ - rcu_dereference(pos) != (head); \ - pos = pos->next) - -/** - * list_for_each_safe_rcu - * @pos: the &struct list_head to use as a loop cursor. - * @n: another &struct list_head to use as temporary storage - * @head: the head for your list. - * - * Iterate over an rcu-protected list, safe against removal of list entry. - * - * This list-traversal primitive may safely run concurrently with - * the _rcu list-mutation primitives such as list_add_rcu() - * as long as the traversal is guarded by rcu_read_lock(). - */ -#define list_for_each_safe_rcu(pos, n, head) \ - for (pos = (head)->next; \ - n = rcu_dereference(pos)->next, pos != (head); \ - pos = n) + for (pos = rcu_dereference((head)->next); \ + pos != (head); \ + pos = rcu_dereference(pos->next)) /** * list_for_each_entry_rcu - iterate over rcu list of given type @@ -217,10 +200,9 @@ static inline void list_splice_init_rcu(struct list_head *list, * as long as the traversal is guarded by rcu_read_lock(). */ #define list_for_each_entry_rcu(pos, head, member) \ - for (pos = list_entry((head)->next, typeof(*pos), member); \ - prefetch(rcu_dereference(pos)->member.next), \ - &pos->member != (head); \ - pos = list_entry(pos->member.next, typeof(*pos), member)) + for (pos = list_entry(rcu_dereference((head)->next), typeof(*pos), member); \ + prefetch(pos->member.next), &pos->member != (head); \ + pos = list_entry(rcu_dereference(pos->member.next), typeof(*pos), member)) /** @@ -235,9 +217,9 @@ static inline void list_splice_init_rcu(struct list_head *list, * as long as the traversal is guarded by rcu_read_lock(). */ #define list_for_each_continue_rcu(pos, head) \ - for ((pos) = (pos)->next; \ - prefetch(rcu_dereference((pos))->next), (pos) != (head); \ - (pos) = (pos)->next) + for ((pos) = rcu_dereference((pos)->next); \ + prefetch((pos)->next), (pos) != (head); \ + (pos) = rcu_dereference((pos)->next)) /** * hlist_del_rcu - deletes entry from hash list without re-initialization @@ -382,10 +364,10 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, * as long as the traversal is guarded by rcu_read_lock(). */ #define hlist_for_each_entry_rcu(tpos, pos, head, member) \ - for (pos = (head)->first; \ - rcu_dereference(pos) && ({ prefetch(pos->next); 1; }) && \ + for (pos = rcu_dereference((head)->first); \ + pos && ({ prefetch(pos->next); 1; }) && \ ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ - pos = pos->next) + pos = rcu_dereference(pos->next)) #endif /* __KERNEL__ */ #endif -- cgit v0.10.2 From e19a98967f49cb63352b3db4818983ea2cec24ba Mon Sep 17 00:00:00 2001 From: Huang Weiyi Date: Wed, 14 May 2008 16:23:00 -0700 Subject: rcu: remove duplicated include in kernel/rcupreempt_trace.c Removed duplicated include file in kernel/rcupreempt_trace.c Signed-off-by: Huang Weiyi Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar diff --git a/kernel/rcupreempt_trace.c b/kernel/rcupreempt_trace.c index 49ac4947..5edf82c 100644 --- a/kernel/rcupreempt_trace.c +++ b/kernel/rcupreempt_trace.c @@ -38,7 +38,6 @@ #include #include #include -#include #include #include #include -- cgit v0.10.2 From 247ab1a80595100bd7ea61d174f8b7b7e5f8d4bb Mon Sep 17 00:00:00 2001 From: Huang Weiyi Date: Wed, 14 May 2008 16:23:00 -0700 Subject: rcu: remove duplicated include in kernel/rcupreempt.c Removed duplicated include file in kernel/rcupreempt.c. Signed-off-by: Huang Weiyi Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c index 27d0748..b8e4cda 100644 --- a/kernel/rcupreempt.c +++ b/kernel/rcupreempt.c @@ -51,7 +51,6 @@ #include #include #include -#include #include #include #include -- cgit v0.10.2 From 711bbdd659b685b45d3f28b29a00f17be6484f38 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 17 May 2008 08:26:25 +0200 Subject: rculist.h: fix include in net/netfilter/nf_conntrack_netlink.c this file has rculist dependency but did not explicitly include it, which broke the build. Signed-off-by: Ingo Molnar diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 0edefcf..077bcd2 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include -- cgit v0.10.2 From 4f9c16ccfa26691dbb9a5d9e7d5098eb934ccdbe Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Wed, 30 Apr 2008 16:20:19 +0200 Subject: [ALSA] soc - tlv320aic3x - revisit clock setup This patch cleans up the clocking setup for aic3x codecs. It drops the dividers table and determines the PLL control values programatically. Under certain conditions, the PLL is disabled entirely which could save some power. Signed-off-by: Daniel Mack Acked-by: Jarkko Nikula Signed-off-by: Mark Brown Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela diff --git a/sound/soc/codecs/tlv320aic3x.c b/sound/soc/codecs/tlv320aic3x.c index 09b1661..738b3b6 100644 --- a/sound/soc/codecs/tlv320aic3x.c +++ b/sound/soc/codecs/tlv320aic3x.c @@ -49,7 +49,7 @@ #include "tlv320aic3x.h" #define AUDIO_NAME "aic3x" -#define AIC3X_VERSION "0.1" +#define AIC3X_VERSION "0.2" /* codec private data */ struct aic3x_priv { @@ -648,81 +648,6 @@ static int aic3x_add_widgets(struct snd_soc_codec *codec) return 0; } -struct aic3x_rate_divs { - u32 mclk; - u32 rate; - u32 fsref_reg; - u8 sr_reg:4; - u8 pllj_reg; - u16 plld_reg; -}; - -/* AIC3X codec mclk clock divider coefficients */ -static const struct aic3x_rate_divs aic3x_divs[] = { - /* 8k */ - {12000000, 8000, 48000, 0xa, 16, 3840}, - {19200000, 8000, 48000, 0xa, 10, 2400}, - {22579200, 8000, 48000, 0xa, 8, 7075}, - {33868800, 8000, 48000, 0xa, 5, 8049}, - /* 11.025k */ - {12000000, 11025, 44100, 0x6, 15, 528}, - {19200000, 11025, 44100, 0x6, 9, 4080}, - {22579200, 11025, 44100, 0x6, 8, 0}, - {33868800, 11025, 44100, 0x6, 5, 3333}, - /* 16k */ - {12000000, 16000, 48000, 0x4, 16, 3840}, - {19200000, 16000, 48000, 0x4, 10, 2400}, - {22579200, 16000, 48000, 0x4, 8, 7075}, - {33868800, 16000, 48000, 0x4, 5, 8049}, - /* 22.05k */ - {12000000, 22050, 44100, 0x2, 15, 528}, - {19200000, 22050, 44100, 0x2, 9, 4080}, - {22579200, 22050, 44100, 0x2, 8, 0}, - {33868800, 22050, 44100, 0x2, 5, 3333}, - /* 32k */ - {12000000, 32000, 48000, 0x1, 16, 3840}, - {19200000, 32000, 48000, 0x1, 10, 2400}, - {22579200, 32000, 48000, 0x1, 8, 7075}, - {33868800, 32000, 48000, 0x1, 5, 8049}, - /* 44.1k */ - {12000000, 44100, 44100, 0x0, 15, 528}, - {19200000, 44100, 44100, 0x0, 9, 4080}, - {22579200, 44100, 44100, 0x0, 8, 0}, - {33868800, 44100, 44100, 0x0, 5, 3333}, - /* 48k */ - {12000000, 48000, 48000, 0x0, 16, 3840}, - {19200000, 48000, 48000, 0x0, 10, 2400}, - {22579200, 48000, 48000, 0x0, 8, 7075}, - {33868800, 48000, 48000, 0x0, 5, 8049}, - /* 64k */ - {12000000, 64000, 96000, 0x1, 16, 3840}, - {19200000, 64000, 96000, 0x1, 10, 2400}, - {22579200, 64000, 96000, 0x1, 8, 7075}, - {33868800, 64000, 96000, 0x1, 5, 8049}, - /* 88.2k */ - {12000000, 88200, 88200, 0x0, 15, 528}, - {19200000, 88200, 88200, 0x0, 9, 4080}, - {22579200, 88200, 88200, 0x0, 8, 0}, - {33868800, 88200, 88200, 0x0, 5, 3333}, - /* 96k */ - {12000000, 96000, 96000, 0x0, 16, 3840}, - {19200000, 96000, 96000, 0x0, 10, 2400}, - {22579200, 96000, 96000, 0x0, 8, 7075}, - {33868800, 96000, 96000, 0x0, 5, 8049}, -}; - -static inline int aic3x_get_divs(int mclk, int rate) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(aic3x_divs); i++) { - if (aic3x_divs[i].rate == rate && aic3x_divs[i].mclk == mclk) - return i; - } - - return 0; -} - static int aic3x_hw_params(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *params) { @@ -730,49 +655,107 @@ static int aic3x_hw_params(struct snd_pcm_substream *substream, struct snd_soc_device *socdev = rtd->socdev; struct snd_soc_codec *codec = socdev->codec; struct aic3x_priv *aic3x = codec->private_data; - int i; - u8 data, pll_p, pll_r, pll_j; - u16 pll_d; - - i = aic3x_get_divs(aic3x->sysclk, params_rate(params)); + int codec_clk = 0, bypass_pll = 0, fsref, last_clk = 0; + u8 data, r, p, pll_q, pll_p = 1, pll_r = 1, pll_j = 1; + u16 pll_d = 1; - /* Route Left DAC to left channel input and - * right DAC to right channel input */ - data = (LDAC2LCH | RDAC2RCH); - switch (aic3x_divs[i].fsref_reg) { - case 44100: - data |= FSREF_44100; + /* select data word length */ + data = + aic3x_read_reg_cache(codec, AIC3X_ASD_INTF_CTRLB) & (~(0x3 << 4)); + switch (params_format(params)) { + case SNDRV_PCM_FORMAT_S16_LE: break; - case 48000: - data |= FSREF_48000; + case SNDRV_PCM_FORMAT_S20_3LE: + data |= (0x01 << 4); break; - case 88200: - data |= FSREF_44100 | DUAL_RATE_MODE; + case SNDRV_PCM_FORMAT_S24_LE: + data |= (0x02 << 4); break; - case 96000: - data |= FSREF_48000 | DUAL_RATE_MODE; + case SNDRV_PCM_FORMAT_S32_LE: + data |= (0x03 << 4); break; } + aic3x_write(codec, AIC3X_ASD_INTF_CTRLB, data); + + /* Fsref can be 44100 or 48000 */ + fsref = (params_rate(params) % 11025 == 0) ? 44100 : 48000; + + /* Try to find a value for Q which allows us to bypass the PLL and + * generate CODEC_CLK directly. */ + for (pll_q = 2; pll_q < 18; pll_q++) + if (aic3x->sysclk / (128 * pll_q) == fsref) { + bypass_pll = 1; + break; + } + + if (bypass_pll) { + pll_q &= 0xf; + aic3x_write(codec, AIC3X_PLL_PROGA_REG, pll_q << PLLQ_SHIFT); + aic3x_write(codec, AIC3X_GPIOB_REG, CODEC_CLKIN_CLKDIV); + } else + aic3x_write(codec, AIC3X_GPIOB_REG, CODEC_CLKIN_PLLDIV); + + /* Route Left DAC to left channel input and + * right DAC to right channel input */ + data = (LDAC2LCH | RDAC2RCH); + data |= (fsref == 44100) ? FSREF_44100 : FSREF_48000; + if (params_rate(params) >= 64000) + data |= DUAL_RATE_MODE; aic3x_write(codec, AIC3X_CODEC_DATAPATH_REG, data); /* codec sample rate select */ - data = aic3x_divs[i].sr_reg; + data = (fsref * 20) / params_rate(params); + if (params_rate(params) < 64000) + data /= 2; + data /= 5; + data -= 2; data |= (data << 4); aic3x_write(codec, AIC3X_SAMPLE_RATE_SEL_REG, data); - /* Use PLL for generation Fsref by equation: - * Fsref = (MCLK * K * R)/(2048 * P); - * Fix P = 2 and R = 1 and calculate K, if - * K = J.D, i.e. J - an interger portion of K and D is the fractional - * one with 4 digits of precision; - * Example: - * For MCLK = 22.5792 MHz and Fsref = 48kHz: - * Select P = 2, R= 1, K = 8.7074, which results in J = 8, D = 7074 + if (bypass_pll) + return 0; + + /* Use PLL + * find an apropriate setup for j, d, r and p by iterating over + * p and r - j and d are calculated for each fraction. + * Up to 128 values are probed, the closest one wins the game. + * The sysclk is divided by 1000 to prevent integer overflows. */ - pll_p = 2; - pll_r = 1; - pll_j = aic3x_divs[i].pllj_reg; - pll_d = aic3x_divs[i].plld_reg; + codec_clk = (2048 * fsref) / (aic3x->sysclk / 1000); + + for (r = 1; r <= 16; r++) + for (p = 1; p <= 8; p++) { + int clk, tmp = (codec_clk * pll_r * 10) / pll_p; + u8 j = tmp / 10000; + u16 d = tmp % 10000; + + if (j > 63) + continue; + + if (d != 0 && aic3x->sysclk < 10000000) + continue; + + /* This is actually 1000 * ((j + (d/10000)) * r) / p + * The term had to be converted to get rid of the + * division by 10000 */ + clk = ((10000 * j * r) + (d * r)) / (10 * p); + + /* check whether this values get closer than the best + * ones we had before */ + if (abs(codec_clk - clk) < abs(codec_clk - last_clk)) { + pll_j = j; pll_d = d; pll_r = r; pll_p = p; + last_clk = clk; + } + + /* Early exit for exact matches */ + if (clk == codec_clk) + break; + } + + if (last_clk == 0) { + printk(KERN_ERR "%s(): unable to setup PLL\n", __func__); + return -EINVAL; + } data = aic3x_read_reg_cache(codec, AIC3X_PLL_PROGA_REG); aic3x_write(codec, AIC3X_PLL_PROGA_REG, data | (pll_p << PLLP_SHIFT)); @@ -782,24 +765,6 @@ static int aic3x_hw_params(struct snd_pcm_substream *substream, aic3x_write(codec, AIC3X_PLL_PROGD_REG, (pll_d & 0x3F) << PLLD_LSB_SHIFT); - /* select data word length */ - data = - aic3x_read_reg_cache(codec, AIC3X_ASD_INTF_CTRLB) & (~(0x3 << 4)); - switch (params_format(params)) { - case SNDRV_PCM_FORMAT_S16_LE: - break; - case SNDRV_PCM_FORMAT_S20_3LE: - data |= (0x01 << 4); - break; - case SNDRV_PCM_FORMAT_S24_LE: - data |= (0x02 << 4); - break; - case SNDRV_PCM_FORMAT_S32_LE: - data |= (0x03 << 4); - break; - } - aic3x_write(codec, AIC3X_ASD_INTF_CTRLB, data); - return 0; } @@ -826,16 +791,8 @@ static int aic3x_set_dai_sysclk(struct snd_soc_codec_dai *codec_dai, struct snd_soc_codec *codec = codec_dai->codec; struct aic3x_priv *aic3x = codec->private_data; - switch (freq) { - case 12000000: - case 19200000: - case 22579200: - case 33868800: - aic3x->sysclk = freq; - return 0; - } - - return -EINVAL; + aic3x->sysclk = freq; + return 0; } static int aic3x_set_dai_fmt(struct snd_soc_codec_dai *codec_dai, diff --git a/sound/soc/codecs/tlv320aic3x.h b/sound/soc/codecs/tlv320aic3x.h index d0cdeeb..d49d001 100644 --- a/sound/soc/codecs/tlv320aic3x.h +++ b/sound/soc/codecs/tlv320aic3x.h @@ -109,6 +109,7 @@ #define LLOPM_CTRL 86 #define RLOPM_CTRL 93 /* Clock generation control register */ +#define AIC3X_GPIOB_REG 101 #define AIC3X_CLKGEN_CTRL_REG 102 /* Page select register bits */ @@ -128,12 +129,15 @@ /* PLL registers bitfields */ #define PLLP_SHIFT 0 +#define PLLQ_SHIFT 3 #define PLLR_SHIFT 0 #define PLLJ_SHIFT 2 #define PLLD_MSB_SHIFT 0 #define PLLD_LSB_SHIFT 2 /* Clock generation register bits */ +#define CODEC_CLKIN_PLLDIV 0 +#define CODEC_CLKIN_CLKDIV 1 #define PLL_CLKIN_SHIFT 4 #define MCLK_SOURCE 0x0 #define PLL_CLKDIV_SHIFT 0 -- cgit v0.10.2 From 54e7e6167d29a4a98207884b2fbd28b0b3fe91f6 Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Wed, 30 Apr 2008 16:20:52 +0200 Subject: [ALSA] soc - tlv320aic3x - add GPIO support This patch adds support for AIC3x GPIO lines. They can be configured for many possible functions as well as be driven manually. I also introduced i2c read functionality since the GPIO state register has to be read from hardware every time and can not be served from cache. Signed-off-by: Daniel Mack Signed-off-by: Mark Brown Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela diff --git a/sound/soc/codecs/tlv320aic3x.c b/sound/soc/codecs/tlv320aic3x.c index 738b3b6..957996e 100644 --- a/sound/soc/codecs/tlv320aic3x.c +++ b/sound/soc/codecs/tlv320aic3x.c @@ -138,6 +138,20 @@ static int aic3x_write(struct snd_soc_codec *codec, unsigned int reg, return -EIO; } +/* + * read from the aic3x register space + */ +static int aic3x_read(struct snd_soc_codec *codec, unsigned int reg, + u8 *value) +{ + *value = reg & 0xff; + if (codec->hw_read(codec->control_data, value, 1) != 1) + return -EIO; + + aic3x_write_reg_cache(codec, reg, *value); + return 0; +} + #define SOC_DAPM_SINGLE_AIC3X(xname, reg, shift, mask, invert) \ { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, \ .info = snd_soc_info_volsw, \ @@ -911,6 +925,33 @@ static int aic3x_dapm_event(struct snd_soc_codec *codec, int event) return 0; } +void aic3x_set_gpio(struct snd_soc_codec *codec, int gpio, int state) +{ + u8 reg = gpio ? AIC3X_GPIO2_REG : AIC3X_GPIO1_REG; + u8 bit = gpio ? 3: 0; + u8 val = aic3x_read_reg_cache(codec, reg) & ~(1 << bit); + aic3x_write(codec, reg, val | (!!state << bit)); +} +EXPORT_SYMBOL_GPL(aic3x_set_gpio); + +int aic3x_get_gpio(struct snd_soc_codec *codec, int gpio) +{ + u8 reg = gpio ? AIC3X_GPIO2_REG : AIC3X_GPIO1_REG; + u8 val, bit = gpio ? 2: 1; + + aic3x_read(codec, reg, &val); + return (val >> bit) & 1; +} +EXPORT_SYMBOL_GPL(aic3x_get_gpio); + +int aic3x_headset_detected(struct snd_soc_codec *codec) +{ + u8 val; + aic3x_read(codec, AIC3X_RT_IRQ_FLAGS_REG, &val); + return (val >> 2) & 1; +} +EXPORT_SYMBOL_GPL(aic3x_headset_detected); + #define AIC3X_RATES SNDRV_PCM_RATE_8000_96000 #define AIC3X_FORMATS (SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S20_3LE | \ SNDRV_PCM_FMTBIT_S24_3LE | SNDRV_PCM_FMTBIT_S32_LE) @@ -977,6 +1018,7 @@ static int aic3x_resume(struct platform_device *pdev) static int aic3x_init(struct snd_soc_device *socdev) { struct snd_soc_codec *codec = socdev->codec; + struct aic3x_setup_data *setup = socdev->codec_data; int reg, ret = 0; codec->name = "aic3x"; @@ -1067,6 +1109,10 @@ static int aic3x_init(struct snd_soc_device *socdev) /* off, with power on */ aic3x_dapm_event(codec, SNDRV_CTL_POWER_D3hot); + /* setup GPIO functions */ + aic3x_write(codec, AIC3X_GPIO1_REG, (setup->gpio_func[0] & 0xf) << 4); + aic3x_write(codec, AIC3X_GPIO2_REG, (setup->gpio_func[1] & 0xf) << 4); + aic3x_add_controls(codec); aic3x_add_widgets(codec); ret = snd_soc_register_card(socdev); @@ -1174,6 +1220,12 @@ static struct i2c_client client_template = { .name = "AIC3X", .driver = &aic3x_i2c_driver, }; + +static int aic3x_i2c_read(struct i2c_client *client, u8 *value, int len) +{ + value[0] = i2c_smbus_read_byte_data(client, value[0]); + return (len == 1); +} #endif static int aic3x_probe(struct platform_device *pdev) @@ -1208,6 +1260,7 @@ static int aic3x_probe(struct platform_device *pdev) if (setup->i2c_address) { normal_i2c[0] = setup->i2c_address; codec->hw_write = (hw_write_t) i2c_master_send; + codec->hw_read = (hw_read_t) aic3x_i2c_read; ret = i2c_add_driver(&aic3x_i2c_driver); if (ret != 0) printk(KERN_ERR "can't add i2c driver"); diff --git a/sound/soc/codecs/tlv320aic3x.h b/sound/soc/codecs/tlv320aic3x.h index d49d001..c1dd1ac 100644 --- a/sound/soc/codecs/tlv320aic3x.h +++ b/sound/soc/codecs/tlv320aic3x.h @@ -108,8 +108,14 @@ #define DACR1_2_RLOPM_VOL 92 #define LLOPM_CTRL 86 #define RLOPM_CTRL 93 -/* Clock generation control register */ +/* GPIO/IRQ registers */ +#define AIC3X_STICKY_IRQ_FLAGS_REG 96 +#define AIC3X_RT_IRQ_FLAGS_REG 97 +#define AIC3X_GPIO1_REG 98 +#define AIC3X_GPIO2_REG 99 +#define AIC3X_GPIOA_REG 100 #define AIC3X_GPIOB_REG 101 +/* Clock generation control register */ #define AIC3X_CLKGEN_CTRL_REG 102 /* Page select register bits */ @@ -175,8 +181,49 @@ /* Default input volume */ #define DEFAULT_GAIN 0x20 +/* GPIO API */ +enum { + AIC3X_GPIO1_FUNC_DISABLED = 0, + AIC3X_GPIO1_FUNC_AUDIO_WORDCLK_ADC = 1, + AIC3X_GPIO1_FUNC_CLOCK_MUX = 2, + AIC3X_GPIO1_FUNC_CLOCK_MUX_DIV2 = 3, + AIC3X_GPIO1_FUNC_CLOCK_MUX_DIV4 = 4, + AIC3X_GPIO1_FUNC_CLOCK_MUX_DIV8 = 5, + AIC3X_GPIO1_FUNC_SHORT_CIRCUIT_IRQ = 6, + AIC3X_GPIO1_FUNC_AGC_NOISE_IRQ = 7, + AIC3X_GPIO1_FUNC_INPUT = 8, + AIC3X_GPIO1_FUNC_OUTPUT = 9, + AIC3X_GPIO1_FUNC_DIGITAL_MIC_MODCLK = 10, + AIC3X_GPIO1_FUNC_AUDIO_WORDCLK = 11, + AIC3X_GPIO1_FUNC_BUTTON_IRQ = 12, + AIC3X_GPIO1_FUNC_HEADSET_DETECT_IRQ = 13, + AIC3X_GPIO1_FUNC_HEADSET_DETECT_OR_BUTTON_IRQ = 14, + AIC3X_GPIO1_FUNC_ALL_IRQ = 16 +}; + +enum { + AIC3X_GPIO2_FUNC_DISABLED = 0, + AIC3X_GPIO2_FUNC_HEADSET_DETECT_IRQ = 2, + AIC3X_GPIO2_FUNC_INPUT = 3, + AIC3X_GPIO2_FUNC_OUTPUT = 4, + AIC3X_GPIO2_FUNC_DIGITAL_MIC_INPUT = 5, + AIC3X_GPIO2_FUNC_AUDIO_BITCLK = 8, + AIC3X_GPIO2_FUNC_HEADSET_DETECT_OR_BUTTON_IRQ = 9, + AIC3X_GPIO2_FUNC_ALL_IRQ = 10, + AIC3X_GPIO2_FUNC_SHORT_CIRCUIT_OR_AGC_IRQ = 11, + AIC3X_GPIO2_FUNC_HEADSET_OR_BUTTON_PRESS_OR_SHORT_CIRCUIT_IRQ = 12, + AIC3X_GPIO2_FUNC_SHORT_CIRCUIT_IRQ = 13, + AIC3X_GPIO2_FUNC_AGC_NOISE_IRQ = 14, + AIC3X_GPIO2_FUNC_BUTTON_PRESS_IRQ = 15 +}; + +void aic3x_set_gpio(struct snd_soc_codec *codec, int gpio, int state); +int aic3x_get_gpio(struct snd_soc_codec *codec, int gpio); +int aic3x_headset_detected(struct snd_soc_codec *codec); + struct aic3x_setup_data { unsigned short i2c_address; + unsigned int gpio_func[2]; }; extern struct snd_soc_codec_dai aic3x_dai; -- cgit v0.10.2 From eeec12bf7b7d80d1c9cae5aae0dff7e2f928c64b Mon Sep 17 00:00:00 2001 From: Graeme Gregory Date: Wed, 30 Apr 2008 19:27:40 +0200 Subject: [ALSA] soc - DAPM - add hook to read state of DAPM widget This adds a hook to read the power state of a DAPM widget, I use this in the gta02 driver to expose certain DAPM widgets in the mixer for ease of audio routing. Signed-off-by: Graeme Gregory Signed-off-by: Mark Brown Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h index a105b01..40cc695 100644 --- a/include/sound/soc-dapm.h +++ b/include/sound/soc-dapm.h @@ -223,6 +223,8 @@ int snd_soc_dapm_sys_add(struct device *dev); /* dapm audio endpoint control */ int snd_soc_dapm_set_endpoint(struct snd_soc_codec *codec, char *pin, int status); +int snd_soc_dapm_get_endpoint_status(struct snd_soc_codec *codec, + char *pin); int snd_soc_dapm_sync_endpoints(struct snd_soc_codec *codec); /* dapm widget types */ diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index af3326c..9fd5ee8 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -1343,6 +1343,29 @@ int snd_soc_dapm_set_endpoint(struct snd_soc_codec *codec, EXPORT_SYMBOL_GPL(snd_soc_dapm_set_endpoint); /** + * snd_soc_dapm_get_endpoint_status - get audio endpoint status + * @codec: audio codec + * @endpoint: audio signal endpoint (or start point) + * + * Get audio endpoint status - connected or disconnected. + * + * Returns status + */ +int snd_soc_dapm_get_endpoint_status(struct snd_soc_codec *codec, + char *endpoint) +{ + struct snd_soc_dapm_widget *w; + + list_for_each_entry(w, &codec->dapm_widgets, list) { + if (!strcmp(w->name, endpoint)) + return w->connected; + } + + return 0; +} +EXPORT_SYMBOL_GPL(snd_soc_dapm_get_endpoint); + +/** * snd_soc_dapm_free - free dapm resources * @socdev: SoC device * -- cgit v0.10.2 From 650f6b13318bce6a8d59bfa48fe15b5832623cbc Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Thu, 1 May 2008 10:49:18 +0200 Subject: [ALSA] sound: fix export symbol typo Signed-off-by: Stephen Rothwell Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index 9fd5ee8..c60200c 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -1363,7 +1363,7 @@ int snd_soc_dapm_get_endpoint_status(struct snd_soc_codec *codec, return 0; } -EXPORT_SYMBOL_GPL(snd_soc_dapm_get_endpoint); +EXPORT_SYMBOL_GPL(snd_soc_dapm_get_endpoint_status); /** * snd_soc_dapm_free - free dapm resources -- cgit v0.10.2 From 1894c59fdb63692f5ba2576875cc558b856935ca Mon Sep 17 00:00:00 2001 From: Tim Niemeyer Date: Mon, 5 May 2008 14:16:12 +0200 Subject: [ALSA] soc - Patch to add debug messages to the neo1973_wm8753 (GTA01) sound driver Signed-off-by: Tim Niemeyer Signed-off-by: Mark Brown Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela diff --git a/sound/soc/s3c24xx/neo1973_wm8753.c b/sound/soc/s3c24xx/neo1973_wm8753.c index 0e9d1c5..e469186 100644 --- a/sound/soc/s3c24xx/neo1973_wm8753.c +++ b/sound/soc/s3c24xx/neo1973_wm8753.c @@ -43,6 +43,14 @@ #include "s3c24xx-pcm.h" #include "s3c24xx-i2s.h" +/* Debugging stuff */ +#define S3C24XX_SOC_NEO1973_WM8753_DEBUG 0 +#if S3C24XX_SOC_NEO1973_WM8753_DEBUG +#define DBG(x...) printk(KERN_DEBUG "s3c24xx-soc-neo1973-wm8753: " x) +#else +#define DBG(x...) +#endif + /* define the scenarios */ #define NEO_AUDIO_OFF 0 #define NEO_GSM_CALL_AUDIO_HANDSET 1 @@ -67,6 +75,8 @@ static int neo1973_hifi_hw_params(struct snd_pcm_substream *substream, int ret = 0; unsigned long iis_clkrate; + DBG("Entered %s\n", __func__); + iis_clkrate = s3c24xx_i2s_get_clockrate(); switch (params_rate(params)) { @@ -151,6 +161,8 @@ static int neo1973_hifi_hw_free(struct snd_pcm_substream *substream) struct snd_soc_pcm_runtime *rtd = substream->private_data; struct snd_soc_codec_dai *codec_dai = rtd->dai->codec_dai; + DBG("Entered %s\n", __func__); + /* disable the PLL */ return codec_dai->dai_ops.set_pll(codec_dai, WM8753_PLL1, 0, 0); } @@ -172,6 +184,8 @@ static int neo1973_voice_hw_params(struct snd_pcm_substream *substream, int ret = 0; unsigned long iis_clkrate; + DBG("Entered %s\n", __func__); + iis_clkrate = s3c24xx_i2s_get_clockrate(); if (params_rate(params) != 8000) @@ -213,6 +227,8 @@ static int neo1973_voice_hw_free(struct snd_pcm_substream *substream) struct snd_soc_pcm_runtime *rtd = substream->private_data; struct snd_soc_codec_dai *codec_dai = rtd->dai->codec_dai; + DBG("Entered %s\n", __func__); + /* disable the PLL */ return codec_dai->dai_ops.set_pll(codec_dai, WM8753_PLL2, 0, 0); } @@ -233,6 +249,8 @@ static int neo1973_get_scenario(struct snd_kcontrol *kcontrol, static int set_scenario_endpoints(struct snd_soc_codec *codec, int scenario) { + DBG("Entered %s\n", __func__); + switch (neo1973_scenario) { case NEO_AUDIO_OFF: snd_soc_dapm_set_endpoint(codec, "Audio Out", 0); @@ -315,6 +333,8 @@ static int neo1973_set_scenario(struct snd_kcontrol *kcontrol, { struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol); + DBG("Entered %s\n", __func__); + if (neo1973_scenario == ucontrol->value.integer.value[0]) return 0; @@ -327,6 +347,8 @@ static u8 lm4857_regs[4] = {0x00, 0x40, 0x80, 0xC0}; static void lm4857_write_regs(void) { + DBG("Entered %s\n", __func__); + if (i2c_master_send(i2c, lm4857_regs, 4) != 4) printk(KERN_ERR "lm4857: i2c write failed\n"); } @@ -338,6 +360,8 @@ static int lm4857_get_reg(struct snd_kcontrol *kcontrol, int shift = (kcontrol->private_value >> 8) & 0x0F; int mask = (kcontrol->private_value >> 16) & 0xFF; + DBG("Entered %s\n", __func__); + ucontrol->value.integer.value[0] = (lm4857_regs[reg] >> shift) & mask; return 0; } @@ -364,6 +388,8 @@ static int lm4857_get_mode(struct snd_kcontrol *kcontrol, { u8 value = lm4857_regs[LM4857_CTRL] & 0x0F; + DBG("Entered %s\n", __func__); + if (value) value -= 5; @@ -376,6 +402,8 @@ static int lm4857_set_mode(struct snd_kcontrol *kcontrol, { u8 value = ucontrol->value.integer.value[0]; + DBG("Entered %s\n", __func__); + if (value) value += 5; @@ -483,6 +511,8 @@ static int neo1973_wm8753_init(struct snd_soc_codec *codec) { int i, err; + DBG("Entered %s\n", __func__); + /* set up NC codec pins */ snd_soc_dapm_set_endpoint(codec, "LOUT2", 0); snd_soc_dapm_set_endpoint(codec, "ROUT2", 0); @@ -583,6 +613,8 @@ static int lm4857_amp_probe(struct i2c_adapter *adap, int addr, int kind) { int ret; + DBG("Entered %s\n", __func__); + client_template.adapter = adap; client_template.addr = addr; @@ -606,6 +638,8 @@ exit_err: static int lm4857_i2c_detach(struct i2c_client *client) { + DBG("Entered %s\n", __func__); + i2c_detach_client(client); kfree(client); return 0; @@ -613,6 +647,8 @@ static int lm4857_i2c_detach(struct i2c_client *client) static int lm4857_i2c_attach(struct i2c_adapter *adap) { + DBG("Entered %s\n", __func__); + return i2c_probe(adap, &addr_data, lm4857_amp_probe); } @@ -620,6 +656,8 @@ static u8 lm4857_state; static int lm4857_suspend(struct i2c_client *dev, pm_message_t state) { + DBG("Entered %s\n", __func__); + dev_dbg(&dev->dev, "lm4857_suspend\n"); lm4857_state = lm4857_regs[LM4857_CTRL] & 0xf; if (lm4857_state) { @@ -631,6 +669,8 @@ static int lm4857_suspend(struct i2c_client *dev, pm_message_t state) static int lm4857_resume(struct i2c_client *dev) { + DBG("Entered %s\n", __func__); + if (lm4857_state) { lm4857_regs[LM4857_CTRL] |= (lm4857_state & 0x0f); lm4857_write_regs(); @@ -640,6 +680,8 @@ static int lm4857_resume(struct i2c_client *dev) static void lm4857_shutdown(struct i2c_client *dev) { + DBG("Entered %s\n", __func__); + dev_dbg(&dev->dev, "lm4857_shutdown\n"); lm4857_regs[LM4857_CTRL] &= 0xf0; lm4857_write_regs(); @@ -671,6 +713,8 @@ static int __init neo1973_init(void) { int ret; + DBG("Entered %s\n", __func__); + neo1973_snd_device = platform_device_alloc("soc-audio", -1); if (!neo1973_snd_device) return -ENOMEM; @@ -691,6 +735,8 @@ static int __init neo1973_init(void) static void __exit neo1973_exit(void) { + DBG("Entered %s\n", __func__); + i2c_del_driver(&lm4857_i2c_driver); platform_device_unregister(neo1973_snd_device); } -- cgit v0.10.2 From 87af38dafe4f930921b9217c21ec6d72cad56bcc Mon Sep 17 00:00:00 2001 From: Daniel Jacobowitz Date: Wed, 7 May 2008 12:05:10 +0200 Subject: [ALSA] ac97 - Add virtual master control to VT1616/VT1617A codec. Enable VMASTER for VT1616 / VT1617A codec. Signed-off-by: Daniel Jacobowitz Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela diff --git a/sound/drivers/Kconfig b/sound/drivers/Kconfig index 602b58e..159137b 100644 --- a/sound/drivers/Kconfig +++ b/sound/drivers/Kconfig @@ -58,6 +58,7 @@ config SND_AC97_CODEC tristate select SND_PCM select AC97_BUS + select SND_VMASTER config SND_DUMMY tristate "Dummy (/dev/null) soundcard" diff --git a/sound/pci/ac97/ac97_patch.c b/sound/pci/ac97/ac97_patch.c index 1292dce..92817f7 100644 --- a/sound/pci/ac97/ac97_patch.c +++ b/sound/pci/ac97/ac97_patch.c @@ -3352,8 +3352,66 @@ AC97_SINGLE("Downmix LFE and Center to Front", 0x5a, 12, 1, 0), AC97_SINGLE("Downmix Surround to Front", 0x5a, 11, 1, 0), }; +static const char *slave_vols_vt1616[] = { + "Front Playback Volume", + "Surround Playback Volume", + "Center Playback Volume", + "LFE Playback Volume", + NULL +}; + +static const char *slave_sws_vt1616[] = { + "Front Playback Switch", + "Surround Playback Switch", + "Center Playback Switch", + "LFE Playback Switch", + NULL +}; + +/* find a mixer control element with the given name */ +static struct snd_kcontrol *snd_ac97_find_mixer_ctl(struct snd_ac97 *ac97, + const char *name) +{ + struct snd_ctl_elem_id id; + memset(&id, 0, sizeof(id)); + id.iface = SNDRV_CTL_ELEM_IFACE_MIXER; + strcpy(id.name, name); + return snd_ctl_find_id(ac97->bus->card, &id); +} + +/* create a virtual master control and add slaves */ +int snd_ac97_add_vmaster(struct snd_ac97 *ac97, char *name, + const unsigned int *tlv, const char **slaves) +{ + struct snd_kcontrol *kctl; + const char **s; + int err; + + kctl = snd_ctl_make_virtual_master(name, tlv); + if (!kctl) + return -ENOMEM; + err = snd_ctl_add(ac97->bus->card, kctl); + if (err < 0) + return err; + + for (s = slaves; *s; s++) { + struct snd_kcontrol *sctl; + + sctl = snd_ac97_find_mixer_ctl(ac97, *s); + if (!sctl) { + snd_printdd("Cannot find slave %s, skipped\n", *s); + continue; + } + err = snd_ctl_add_slave(kctl, sctl); + if (err < 0) + return err; + } + return 0; +} + static int patch_vt1616_specific(struct snd_ac97 * ac97) { + struct snd_kcontrol *kctl; int err; if (snd_ac97_try_bit(ac97, 0x5a, 9)) @@ -3361,6 +3419,24 @@ static int patch_vt1616_specific(struct snd_ac97 * ac97) return err; if ((err = patch_build_controls(ac97, &snd_ac97_controls_vt1616[1], ARRAY_SIZE(snd_ac97_controls_vt1616) - 1)) < 0) return err; + + /* There is already a misnamed master switch. Rename it. */ + kctl = snd_ac97_find_mixer_ctl(ac97, "Master Playback Volume"); + if (!kctl) + return -EINVAL; + + snd_ac97_rename_vol_ctl(ac97, "Master Playback", "Front Playback"); + + err = snd_ac97_add_vmaster(ac97, "Master Playback Volume", + kctl->tlv.p, slave_vols_vt1616); + if (err < 0) + return err; + + err = snd_ac97_add_vmaster(ac97, "Master Playback Switch", + NULL, slave_sws_vt1616); + if (err < 0) + return err; + return 0; } -- cgit v0.10.2 From 48008b598bb9cfffbd871512f49d84eb5b885a00 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Thu, 8 May 2008 13:01:32 +0200 Subject: [ALSA] i2c: cs8427.c use put_unaligned helper Signed-off-by: Harvey Harrison Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela diff --git a/sound/i2c/cs8427.c b/sound/i2c/cs8427.c index e57e9cb..9c3d361 100644 --- a/sound/i2c/cs8427.c +++ b/sound/i2c/cs8427.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -264,10 +265,7 @@ int snd_cs8427_create(struct snd_i2c_bus *bus, goto __fail; } /* write default channel status bytes */ - buf[0] = ((unsigned char)(SNDRV_PCM_DEFAULT_CON_SPDIF >> 0)); - buf[1] = ((unsigned char)(SNDRV_PCM_DEFAULT_CON_SPDIF >> 8)); - buf[2] = ((unsigned char)(SNDRV_PCM_DEFAULT_CON_SPDIF >> 16)); - buf[3] = ((unsigned char)(SNDRV_PCM_DEFAULT_CON_SPDIF >> 24)); + put_unaligned_le32(SNDRV_PCM_DEFAULT_CON_SPDIF, buf); memset(buf + 4, 0, 24 - 4); if (snd_cs8427_send_corudata(device, 0, buf, 24) < 0) goto __fail; -- cgit v0.10.2 From f3e9d5d1fd6a164611043c053de585a35d76d6a9 Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Thu, 8 May 2008 15:42:15 +0200 Subject: [ALSA] snd_usb_caiaq: add support for 'Session I/O' interface This patch adds suport for Native Instruments new 'Guitar Rig Session I/O' audio hardware. Signed-off-by: Daniel Mack Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela diff --git a/sound/usb/caiaq/caiaq-audio.c b/sound/usb/caiaq/caiaq-audio.c index 24970a5..b3a6033 100644 --- a/sound/usb/caiaq/caiaq-audio.c +++ b/sound/usb/caiaq/caiaq-audio.c @@ -637,6 +637,7 @@ int snd_usb_caiaq_audio_init(struct snd_usb_caiaqdev *dev) switch (dev->chip.usb_id) { case USB_ID(USB_VID_NATIVEINSTRUMENTS, USB_PID_AK1): case USB_ID(USB_VID_NATIVEINSTRUMENTS, USB_PID_RIGKONTROL3): + case USB_ID(USB_VID_NATIVEINSTRUMENTS, USB_PID_SESSIONIO): dev->samplerates |= SNDRV_PCM_RATE_88200; dev->samplerates |= SNDRV_PCM_RATE_192000; break; diff --git a/sound/usb/caiaq/caiaq-device.c b/sound/usb/caiaq/caiaq-device.c index a972f77..8317508 100644 --- a/sound/usb/caiaq/caiaq-device.c +++ b/sound/usb/caiaq/caiaq-device.c @@ -42,14 +42,15 @@ #endif MODULE_AUTHOR("Daniel Mack "); -MODULE_DESCRIPTION("caiaq USB audio, version 1.3.6"); +MODULE_DESCRIPTION("caiaq USB audio, version 1.3.8"); MODULE_LICENSE("GPL"); MODULE_SUPPORTED_DEVICE("{{Native Instruments, RigKontrol2}," "{Native Instruments, RigKontrol3}," "{Native Instruments, Kore Controller}," "{Native Instruments, Kore Controller 2}," - "{Native Instruments, Audio Kontrol 1}" - "{Native Instruments, Audio 8 DJ}}"); + "{Native Instruments, Audio Kontrol 1}," + "{Native Instruments, Audio 8 DJ}," + "{Native Instruments, Session I/O}}"); static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; /* Index 0-max */ static char* id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; /* Id for this card */ @@ -110,6 +111,11 @@ static struct usb_device_id snd_usb_id_table[] = { .idVendor = USB_VID_NATIVEINSTRUMENTS, .idProduct = USB_PID_AUDIO8DJ }, + { + .match_flags = USB_DEVICE_ID_MATCH_DEVICE, + .idVendor = USB_VID_NATIVEINSTRUMENTS, + .idProduct = USB_PID_SESSIONIO + }, { /* terminator */ } }; diff --git a/sound/usb/caiaq/caiaq-device.h b/sound/usb/caiaq/caiaq-device.h index 96a4913..f9fbdba 100644 --- a/sound/usb/caiaq/caiaq-device.h +++ b/sound/usb/caiaq/caiaq-device.h @@ -11,6 +11,7 @@ #define USB_PID_KORECONTROLLER2 0x4712 #define USB_PID_AK1 0x0815 #define USB_PID_AUDIO8DJ 0x1978 +#define USB_PID_SESSIONIO 0x1915 #define EP1_BUFSIZE 64 #define CAIAQ_USB_STR_LEN 0xff -- cgit v0.10.2 From c17cf06bfc4417a79d452c266e819c510f6a8344 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 9 May 2008 12:45:56 +0200 Subject: [ALSA] Remove unneeded ugly hack for i386 in memalloc.c The hack for dma_alloc_coherent() is no longer needed on 2.6.26 since the base code was improved. Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela diff --git a/sound/core/memalloc.c b/sound/core/memalloc.c index 23b7bc0..f5d6d8d 100644 --- a/sound/core/memalloc.c +++ b/sound/core/memalloc.c @@ -80,68 +80,6 @@ struct snd_mem_list { #endif /* - * Hacks - */ - -#if defined(__i386__) -/* - * A hack to allocate large buffers via dma_alloc_coherent() - * - * since dma_alloc_coherent always tries GFP_DMA when the requested - * pci memory region is below 32bit, it happens quite often that even - * 2 order of pages cannot be allocated. - * - * so in the following, we allocate at first without dma_mask, so that - * allocation will be done without GFP_DMA. if the area doesn't match - * with the requested region, then realloate with the original dma_mask - * again. - * - * Really, we want to move this type of thing into dma_alloc_coherent() - * so dma_mask doesn't have to be messed with. - */ - -static void *snd_dma_hack_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, - gfp_t flags) -{ - void *ret; - u64 dma_mask, coherent_dma_mask; - - if (dev == NULL || !dev->dma_mask) - return dma_alloc_coherent(dev, size, dma_handle, flags); - dma_mask = *dev->dma_mask; - coherent_dma_mask = dev->coherent_dma_mask; - *dev->dma_mask = 0xffffffff; /* do without masking */ - dev->coherent_dma_mask = 0xffffffff; /* do without masking */ - ret = dma_alloc_coherent(dev, size, dma_handle, flags); - *dev->dma_mask = dma_mask; /* restore */ - dev->coherent_dma_mask = coherent_dma_mask; /* restore */ - if (ret) { - /* obtained address is out of range? */ - if (((unsigned long)*dma_handle + size - 1) & ~dma_mask) { - /* reallocate with the proper mask */ - dma_free_coherent(dev, size, ret, *dma_handle); - ret = dma_alloc_coherent(dev, size, dma_handle, flags); - } - } else { - /* wish to success now with the proper mask... */ - if (dma_mask != 0xffffffffUL) { - /* allocation with GFP_ATOMIC to avoid the long stall */ - flags &= ~GFP_KERNEL; - flags |= GFP_ATOMIC; - ret = dma_alloc_coherent(dev, size, dma_handle, flags); - } - } - return ret; -} - -/* redefine dma_alloc_coherent for some architectures */ -#undef dma_alloc_coherent -#define dma_alloc_coherent snd_dma_hack_alloc_coherent - -#endif /* arch */ - -/* * * Generic memory allocators * -- cgit v0.10.2 From d023dc0aa25d6a4f7dd0d109179a2077d22a7ad2 Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Tue, 13 May 2008 09:18:27 +0200 Subject: [ALSA] oxygen: fix version in MODULE_LICENSE Adjust the MODULE_LICENSE strings to properly reflect the actual license. Signed-off-by: Clemens Ladisch Signed-off-by: Jaroslav Kysela diff --git a/sound/pci/oxygen/hifier.c b/sound/pci/oxygen/hifier.c index 090dd43..c5d2c86 100644 --- a/sound/pci/oxygen/hifier.c +++ b/sound/pci/oxygen/hifier.c @@ -28,7 +28,7 @@ MODULE_AUTHOR("Clemens Ladisch "); MODULE_DESCRIPTION("TempoTec HiFier driver"); -MODULE_LICENSE("GPL"); +MODULE_LICENSE("GPL v2"); static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; diff --git a/sound/pci/oxygen/oxygen.c b/sound/pci/oxygen/oxygen.c index 63f185c..c1b30cc 100644 --- a/sound/pci/oxygen/oxygen.c +++ b/sound/pci/oxygen/oxygen.c @@ -43,7 +43,7 @@ MODULE_AUTHOR("Clemens Ladisch "); MODULE_DESCRIPTION("C-Media CMI8788 driver"); -MODULE_LICENSE("GPL"); +MODULE_LICENSE("GPL v2"); MODULE_SUPPORTED_DEVICE("{{C-Media,CMI8788}}"); static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; diff --git a/sound/pci/oxygen/oxygen_lib.c b/sound/pci/oxygen/oxygen_lib.c index 897697d..6e38795 100644 --- a/sound/pci/oxygen/oxygen_lib.c +++ b/sound/pci/oxygen/oxygen_lib.c @@ -32,7 +32,7 @@ MODULE_AUTHOR("Clemens Ladisch "); MODULE_DESCRIPTION("C-Media CMI8788 helper library"); -MODULE_LICENSE("GPL"); +MODULE_LICENSE("GPL v2"); static irqreturn_t oxygen_interrupt(int dummy, void *dev_id) diff --git a/sound/pci/oxygen/virtuoso.c b/sound/pci/oxygen/virtuoso.c index 7f84fa5..4618a62 100644 --- a/sound/pci/oxygen/virtuoso.c +++ b/sound/pci/oxygen/virtuoso.c @@ -79,7 +79,7 @@ MODULE_AUTHOR("Clemens Ladisch "); MODULE_DESCRIPTION("Asus AVx00 driver"); -MODULE_LICENSE("GPL"); +MODULE_LICENSE("GPL v2"); MODULE_SUPPORTED_DEVICE("{{Asus,AV100},{Asus,AV200}}"); static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; -- cgit v0.10.2 From c13650079ba3bed1c0bdd9bf4a13274be7676ff6 Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Tue, 13 May 2008 09:19:53 +0200 Subject: [ALSA] oxygen: add symbol for I/O space size Remove another magic number - add a symbol for the size of the PCI I/O range. Signed-off-by: Clemens Ladisch Signed-off-by: Jaroslav Kysela diff --git a/sound/pci/oxygen/oxygen.h b/sound/pci/oxygen/oxygen.h index a71c6e0..6c6efed 100644 --- a/sound/pci/oxygen/oxygen.h +++ b/sound/pci/oxygen/oxygen.h @@ -16,6 +16,8 @@ #define PCM_AC97 5 #define PCM_COUNT 6 +#define OXYGEN_IO_SIZE 0x100 + /* model-specific configuration of outputs/inputs */ #define PLAYBACK_0_TO_I2S 0x001 #define PLAYBACK_1_TO_SPDIF 0x004 diff --git a/sound/pci/oxygen/oxygen_lib.c b/sound/pci/oxygen/oxygen_lib.c index 6e38795..60bc159 100644 --- a/sound/pci/oxygen/oxygen_lib.c +++ b/sound/pci/oxygen/oxygen_lib.c @@ -173,7 +173,7 @@ static void oxygen_proc_read(struct snd_info_entry *entry, int i, j; snd_iprintf(buffer, "CMI8788\n\n"); - for (i = 0; i < 0x100; i += 0x10) { + for (i = 0; i < OXYGEN_IO_SIZE; i += 0x10) { snd_iprintf(buffer, "%02x:", i); for (j = 0; j < 0x10; ++j) snd_iprintf(buffer, " %02x", oxygen_read8(chip, i + j)); @@ -455,7 +455,7 @@ int oxygen_pci_probe(struct pci_dev *pci, int index, char *id, } if (!(pci_resource_flags(pci, 0) & IORESOURCE_IO) || - pci_resource_len(pci, 0) < 0x100) { + pci_resource_len(pci, 0) < OXYGEN_IO_SIZE) { snd_printk(KERN_ERR "invalid PCI I/O range\n"); err = -ENXIO; goto err_pci_regions; -- cgit v0.10.2 From e58aee95806c9d2bbcfc84cb85ce958e360165ef Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Tue, 13 May 2008 09:20:51 +0200 Subject: [ALSA] oxygen: save register writes Save the written values of all CMI8788 and AC97 registers and of some of the DAC/ADC registers so that it is possible to restore the register state later. Signed-off-by: Clemens Ladisch Signed-off-by: Jaroslav Kysela diff --git a/sound/pci/oxygen/oxygen.c b/sound/pci/oxygen/oxygen.c index c1b30cc..8287ac2 100644 --- a/sound/pci/oxygen/oxygen.c +++ b/sound/pci/oxygen/oxygen.c @@ -80,6 +80,7 @@ MODULE_DEVICE_TABLE(pci, oxygen_ids); struct generic_data { u8 ak4396_ctl2; + u16 saved_wm8785_registers[2]; }; static void ak4396_write(struct oxygen *chip, unsigned int codec, @@ -99,12 +100,16 @@ static void ak4396_write(struct oxygen *chip, unsigned int codec, static void wm8785_write(struct oxygen *chip, u8 reg, unsigned int value) { + struct generic_data *data = chip->model_data; + oxygen_write_spi(chip, OXYGEN_SPI_TRIGGER | OXYGEN_SPI_DATA_LENGTH_2 | OXYGEN_SPI_CLOCK_160 | (3 << OXYGEN_SPI_CODEC_SHIFT) | OXYGEN_SPI_CEN_LATCH_CLOCK_LO, (reg << 9) | value); + if (reg < ARRAY_SIZE(data->saved_wm8785_registers)) + data->saved_wm8785_registers[reg] = value; } static void ak4396_init(struct oxygen *chip) @@ -135,10 +140,16 @@ static void ak5385_init(struct oxygen *chip) static void wm8785_init(struct oxygen *chip) { + struct generic_data *data = chip->model_data; + + data->saved_wm8785_registers[0] = WM8785_MCR_SLAVE | + WM8785_OSR_SINGLE | WM8785_FORMAT_LJUST; + data->saved_wm8785_registers[1] = WM8785_WL_24; + wm8785_write(chip, WM8785_R7, 0); - wm8785_write(chip, WM8785_R0, WM8785_MCR_SLAVE | - WM8785_OSR_SINGLE | WM8785_FORMAT_LJUST); - wm8785_write(chip, WM8785_R1, WM8785_WL_24); + wm8785_write(chip, WM8785_R0, data->saved_wm8785_registers[0]); + wm8785_write(chip, WM8785_R1, data->saved_wm8785_registers[1]); + snd_component_add(chip->card, "WM8785"); } diff --git a/sound/pci/oxygen/oxygen.h b/sound/pci/oxygen/oxygen.h index 6c6efed..29c9fa4 100644 --- a/sound/pci/oxygen/oxygen.h +++ b/sound/pci/oxygen/oxygen.h @@ -80,6 +80,12 @@ struct oxygen { struct work_struct spdif_input_bits_work; struct work_struct gpio_work; wait_queue_head_t ac97_waitqueue; + union { + u8 _8[OXYGEN_IO_SIZE]; + __le16 _16[OXYGEN_IO_SIZE / 2]; + __le32 _32[OXYGEN_IO_SIZE / 4]; + } saved_registers; + u16 saved_ac97_registers[2][0x40]; }; struct oxygen_model { diff --git a/sound/pci/oxygen/oxygen_io.c b/sound/pci/oxygen/oxygen_io.c index 5569606..83f135f 100644 --- a/sound/pci/oxygen/oxygen_io.c +++ b/sound/pci/oxygen/oxygen_io.c @@ -44,18 +44,21 @@ EXPORT_SYMBOL(oxygen_read32); void oxygen_write8(struct oxygen *chip, unsigned int reg, u8 value) { outb(value, chip->addr + reg); + chip->saved_registers._8[reg] = value; } EXPORT_SYMBOL(oxygen_write8); void oxygen_write16(struct oxygen *chip, unsigned int reg, u16 value) { outw(value, chip->addr + reg); + chip->saved_registers._16[reg / 2] = cpu_to_le16(value); } EXPORT_SYMBOL(oxygen_write16); void oxygen_write32(struct oxygen *chip, unsigned int reg, u32 value) { outl(value, chip->addr + reg); + chip->saved_registers._32[reg / 4] = cpu_to_le32(value); } EXPORT_SYMBOL(oxygen_write32); @@ -63,7 +66,10 @@ void oxygen_write8_masked(struct oxygen *chip, unsigned int reg, u8 value, u8 mask) { u8 tmp = inb(chip->addr + reg); - outb((tmp & ~mask) | (value & mask), chip->addr + reg); + tmp &= ~mask; + tmp |= value & mask; + outb(tmp, chip->addr + reg); + chip->saved_registers._8[reg] = tmp; } EXPORT_SYMBOL(oxygen_write8_masked); @@ -71,7 +77,10 @@ void oxygen_write16_masked(struct oxygen *chip, unsigned int reg, u16 value, u16 mask) { u16 tmp = inw(chip->addr + reg); - outw((tmp & ~mask) | (value & mask), chip->addr + reg); + tmp &= ~mask; + tmp |= value & mask; + outw(tmp, chip->addr + reg); + chip->saved_registers._16[reg / 2] = cpu_to_le16(tmp); } EXPORT_SYMBOL(oxygen_write16_masked); @@ -79,7 +88,10 @@ void oxygen_write32_masked(struct oxygen *chip, unsigned int reg, u32 value, u32 mask) { u32 tmp = inl(chip->addr + reg); - outl((tmp & ~mask) | (value & mask), chip->addr + reg); + tmp &= ~mask; + tmp |= value & mask; + outl(tmp, chip->addr + reg); + chip->saved_registers._32[reg / 4] = cpu_to_le32(tmp); } EXPORT_SYMBOL(oxygen_write32_masked); @@ -128,8 +140,10 @@ void oxygen_write_ac97(struct oxygen *chip, unsigned int codec, oxygen_write32(chip, OXYGEN_AC97_REGS, reg); /* require two "completed" writes, just to be sure */ if (oxygen_ac97_wait(chip, OXYGEN_AC97_INT_WRITE_DONE) >= 0 && - ++succeeded >= 2) + ++succeeded >= 2) { + chip->saved_ac97_registers[codec][index / 2] = data; return; + } } snd_printk(KERN_ERR "AC'97 write timeout\n"); } diff --git a/sound/pci/oxygen/virtuoso.c b/sound/pci/oxygen/virtuoso.c index 4618a62..110786b 100644 --- a/sound/pci/oxygen/virtuoso.c +++ b/sound/pci/oxygen/virtuoso.c @@ -132,6 +132,9 @@ struct xonar_data { u8 ext_power_int_reg; u8 ext_power_bit; u8 has_power; + u8 pcm1796_oversampling; + u8 cs4398_fm; + u8 cs4362a_fm; }; static void pcm1796_write(struct oxygen *chip, unsigned int codec, @@ -186,12 +189,13 @@ static void xonar_d2_init(struct oxygen *chip) data->anti_pop_delay = 300; data->output_enable_bit = GPIO_D2_OUTPUT_ENABLE; + data->pcm1796_oversampling = PCM1796_OS_64; for (i = 0; i < 4; ++i) { pcm1796_write(chip, i, 18, PCM1796_MUTE | PCM1796_DMF_DISABLED | PCM1796_FMT_24_LJUST | PCM1796_ATLD); pcm1796_write(chip, i, 19, PCM1796_FLT_SHARP | PCM1796_ATS_1); - pcm1796_write(chip, i, 20, PCM1796_OS_64); + pcm1796_write(chip, i, 20, data->pcm1796_oversampling); pcm1796_write(chip, i, 21, 0); pcm1796_write(chip, i, 16, 0x0f); /* set ATL/ATR after ATLD */ pcm1796_write(chip, i, 17, 0x0f); @@ -226,6 +230,9 @@ static void xonar_dx_init(struct oxygen *chip) data->ext_power_reg = OXYGEN_GPI_DATA; data->ext_power_int_reg = OXYGEN_GPI_INTERRUPT_MASK; data->ext_power_bit = GPI_DX_EXT_POWER; + data->cs4398_fm = CS4398_FM_SINGLE | CS4398_DEM_NONE | CS4398_DIF_LJUST; + data->cs4362a_fm = CS4362A_FM_SINGLE | + CS4362A_ATAPI_B_R | CS4362A_ATAPI_A_L; oxygen_write16(chip, OXYGEN_2WIRE_BUS_STATUS, OXYGEN_2WIRE_LENGTH_8 | @@ -236,8 +243,7 @@ static void xonar_dx_init(struct oxygen *chip) cs4398_write(chip, 8, CS4398_CPEN | CS4398_PDN); cs4362a_write(chip, 0x01, CS4362A_PDN | CS4362A_CPEN); /* configure */ - cs4398_write(chip, 2, CS4398_FM_SINGLE | - CS4398_DEM_NONE | CS4398_DIF_LJUST); + cs4398_write(chip, 2, data->cs4398_fm); cs4398_write(chip, 3, CS4398_ATAPI_B_R | CS4398_ATAPI_A_L); cs4398_write(chip, 4, CS4398_MUTEP_LOW | CS4398_PAMUTE); cs4398_write(chip, 5, 0xfe); @@ -249,16 +255,13 @@ static void xonar_dx_init(struct oxygen *chip) CS4362A_RMP_UP | CS4362A_ZERO_CROSS | CS4362A_SOFT_RAMP); cs4362a_write(chip, 0x04, CS4362A_RMP_DN | CS4362A_DEM_NONE); cs4362a_write(chip, 0x05, 0); - cs4362a_write(chip, 0x06, CS4362A_FM_SINGLE | - CS4362A_ATAPI_B_R | CS4362A_ATAPI_A_L); + cs4362a_write(chip, 0x06, data->cs4362a_fm); cs4362a_write(chip, 0x07, 0x7f | CS4362A_MUTE); cs4362a_write(chip, 0x08, 0x7f | CS4362A_MUTE); - cs4362a_write(chip, 0x09, CS4362A_FM_SINGLE | - CS4362A_ATAPI_B_R | CS4362A_ATAPI_A_L); + cs4362a_write(chip, 0x09, data->cs4362a_fm); cs4362a_write(chip, 0x0a, 0x7f | CS4362A_MUTE); cs4362a_write(chip, 0x0b, 0x7f | CS4362A_MUTE); - cs4362a_write(chip, 0x0c, CS4362A_FM_SINGLE | - CS4362A_ATAPI_B_R | CS4362A_ATAPI_A_L); + cs4362a_write(chip, 0x0c, data->cs4362a_fm); cs4362a_write(chip, 0x0d, 0x7f | CS4362A_MUTE); cs4362a_write(chip, 0x0e, 0x7f | CS4362A_MUTE); /* clear power down */ @@ -294,12 +297,13 @@ static void xonar_dx_cleanup(struct oxygen *chip) static void set_pcm1796_params(struct oxygen *chip, struct snd_pcm_hw_params *params) { + struct xonar_data *data = chip->model_data; unsigned int i; - u8 value; - value = params_rate(params) >= 96000 ? PCM1796_OS_32 : PCM1796_OS_64; + data->pcm1796_oversampling = + params_rate(params) >= 96000 ? PCM1796_OS_32 : PCM1796_OS_64; for (i = 0; i < 4; ++i) - pcm1796_write(chip, i, 20, value); + pcm1796_write(chip, i, 20, data->pcm1796_oversampling); } static void update_pcm1796_volume(struct oxygen *chip) @@ -342,24 +346,24 @@ static void set_cs53x1_params(struct oxygen *chip, static void set_cs43xx_params(struct oxygen *chip, struct snd_pcm_hw_params *params) { - u8 fm_cs4398, fm_cs4362a; + struct xonar_data *data = chip->model_data; - fm_cs4398 = CS4398_DEM_NONE | CS4398_DIF_LJUST; - fm_cs4362a = CS4362A_ATAPI_B_R | CS4362A_ATAPI_A_L; + data->cs4398_fm = CS4398_DEM_NONE | CS4398_DIF_LJUST; + data->cs4362a_fm = CS4362A_ATAPI_B_R | CS4362A_ATAPI_A_L; if (params_rate(params) <= 50000) { - fm_cs4398 |= CS4398_FM_SINGLE; - fm_cs4362a |= CS4362A_FM_SINGLE; + data->cs4398_fm |= CS4398_FM_SINGLE; + data->cs4362a_fm |= CS4362A_FM_SINGLE; } else if (params_rate(params) <= 100000) { - fm_cs4398 |= CS4398_FM_DOUBLE; - fm_cs4362a |= CS4362A_FM_DOUBLE; + data->cs4398_fm |= CS4398_FM_DOUBLE; + data->cs4362a_fm |= CS4362A_FM_DOUBLE; } else { - fm_cs4398 |= CS4398_FM_QUAD; - fm_cs4362a |= CS4362A_FM_QUAD; + data->cs4398_fm |= CS4398_FM_QUAD; + data->cs4362a_fm |= CS4362A_FM_QUAD; } - cs4398_write(chip, 2, fm_cs4398); - cs4362a_write(chip, 0x06, fm_cs4362a); - cs4362a_write(chip, 0x09, fm_cs4362a); - cs4362a_write(chip, 0x0c, fm_cs4362a); + cs4398_write(chip, 2, data->cs4398_fm); + cs4362a_write(chip, 0x06, data->cs4362a_fm); + cs4362a_write(chip, 0x09, data->cs4362a_fm); + cs4362a_write(chip, 0x0c, data->cs4362a_fm); } static void update_cs4362a_volumes(struct oxygen *chip) -- cgit v0.10.2 From bbbfb5526650cb9d01c5c230a4e3cbbc18474c41 Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Tue, 13 May 2008 09:21:48 +0200 Subject: [ALSA] oxygen: simplify DAC volume initialization When initializing the DAC volume registers, we can just use the generic volume update functions instead of setting the registers manually. Signed-off-by: Clemens Ladisch Signed-off-by: Jaroslav Kysela diff --git a/sound/pci/oxygen/hifier.c b/sound/pci/oxygen/hifier.c index c5d2c86..0e425d5 100644 --- a/sound/pci/oxygen/hifier.c +++ b/sound/pci/oxygen/hifier.c @@ -62,6 +62,12 @@ static void ak4396_write(struct oxygen *chip, u8 reg, u8 value) AK4396_WRITE | (reg << 8) | value); } +static void update_ak4396_volume(struct oxygen *chip) +{ + ak4396_write(chip, AK4396_LCH_ATT, chip->dac_volume[0]); + ak4396_write(chip, AK4396_RCH_ATT, chip->dac_volume[1]); +} + static void hifier_init(struct oxygen *chip) { struct hifier_data *data = chip->model_data; @@ -70,8 +76,7 @@ static void hifier_init(struct oxygen *chip) ak4396_write(chip, AK4396_CONTROL_1, AK4396_DIF_24_MSB | AK4396_RSTN); ak4396_write(chip, AK4396_CONTROL_2, data->ak4396_ctl2); ak4396_write(chip, AK4396_CONTROL_3, AK4396_PCM); - ak4396_write(chip, AK4396_LCH_ATT, 0); - ak4396_write(chip, AK4396_RCH_ATT, 0); + update_ak4396_volume(chip); snd_component_add(chip->card, "AK4396"); snd_component_add(chip->card, "CS5340"); @@ -100,12 +105,6 @@ static void set_ak4396_params(struct oxygen *chip, ak4396_write(chip, AK4396_CONTROL_1, AK4396_DIF_24_MSB | AK4396_RSTN); } -static void update_ak4396_volume(struct oxygen *chip) -{ - ak4396_write(chip, AK4396_LCH_ATT, chip->dac_volume[0]); - ak4396_write(chip, AK4396_RCH_ATT, chip->dac_volume[1]); -} - static void update_ak4396_mute(struct oxygen *chip) { struct hifier_data *data = chip->model_data; diff --git a/sound/pci/oxygen/oxygen.c b/sound/pci/oxygen/oxygen.c index 8287ac2..ad8d950 100644 --- a/sound/pci/oxygen/oxygen.c +++ b/sound/pci/oxygen/oxygen.c @@ -112,6 +112,18 @@ static void wm8785_write(struct oxygen *chip, u8 reg, unsigned int value) data->saved_wm8785_registers[reg] = value; } +static void update_ak4396_volume(struct oxygen *chip) +{ + unsigned int i; + + for (i = 0; i < 4; ++i) { + ak4396_write(chip, i, + AK4396_LCH_ATT, chip->dac_volume[i * 2]); + ak4396_write(chip, i, + AK4396_RCH_ATT, chip->dac_volume[i * 2 + 1]); + } +} + static void ak4396_init(struct oxygen *chip) { struct generic_data *data = chip->model_data; @@ -125,9 +137,8 @@ static void ak4396_init(struct oxygen *chip) AK4396_CONTROL_2, data->ak4396_ctl2); ak4396_write(chip, i, AK4396_CONTROL_3, AK4396_PCM); - ak4396_write(chip, i, AK4396_LCH_ATT, 0); - ak4396_write(chip, i, AK4396_RCH_ATT, 0); } + update_ak4396_volume(chip); snd_component_add(chip->card, "AK4396"); } @@ -194,18 +205,6 @@ static void set_ak4396_params(struct oxygen *chip, } } -static void update_ak4396_volume(struct oxygen *chip) -{ - unsigned int i; - - for (i = 0; i < 4; ++i) { - ak4396_write(chip, i, - AK4396_LCH_ATT, chip->dac_volume[i * 2]); - ak4396_write(chip, i, - AK4396_RCH_ATT, chip->dac_volume[i * 2 + 1]); - } -} - static void update_ak4396_mute(struct oxygen *chip) { struct generic_data *data = chip->model_data; diff --git a/sound/pci/oxygen/virtuoso.c b/sound/pci/oxygen/virtuoso.c index 110786b..bd9e285 100644 --- a/sound/pci/oxygen/virtuoso.c +++ b/sound/pci/oxygen/virtuoso.c @@ -182,6 +182,28 @@ static void xonar_common_init(struct oxygen *chip) oxygen_set_bits16(chip, OXYGEN_GPIO_DATA, data->output_enable_bit); } +static void update_pcm1796_volume(struct oxygen *chip) +{ + unsigned int i; + + for (i = 0; i < 4; ++i) { + pcm1796_write(chip, i, 16, chip->dac_volume[i * 2]); + pcm1796_write(chip, i, 17, chip->dac_volume[i * 2 + 1]); + } +} + +static void update_pcm1796_mute(struct oxygen *chip) +{ + unsigned int i; + u8 value; + + value = PCM1796_DMF_DISABLED | PCM1796_FMT_24_LJUST | PCM1796_ATLD; + if (chip->dac_mute) + value |= PCM1796_MUTE; + for (i = 0; i < 4; ++i) + pcm1796_write(chip, i, 18, value); +} + static void xonar_d2_init(struct oxygen *chip) { struct xonar_data *data = chip->model_data; @@ -192,14 +214,12 @@ static void xonar_d2_init(struct oxygen *chip) data->pcm1796_oversampling = PCM1796_OS_64; for (i = 0; i < 4; ++i) { - pcm1796_write(chip, i, 18, PCM1796_MUTE | PCM1796_DMF_DISABLED | - PCM1796_FMT_24_LJUST | PCM1796_ATLD); pcm1796_write(chip, i, 19, PCM1796_FLT_SHARP | PCM1796_ATS_1); pcm1796_write(chip, i, 20, data->pcm1796_oversampling); pcm1796_write(chip, i, 21, 0); - pcm1796_write(chip, i, 16, 0x0f); /* set ATL/ATR after ATLD */ - pcm1796_write(chip, i, 17, 0x0f); } + update_pcm1796_mute(chip); /* set ATLD before ATL/ATR */ + update_pcm1796_volume(chip); oxygen_set_bits16(chip, OXYGEN_GPIO_CONTROL, GPIO_D2_ALT); oxygen_clear_bits16(chip, OXYGEN_GPIO_DATA, GPIO_D2_ALT); @@ -221,6 +241,37 @@ static void xonar_d2x_init(struct oxygen *chip) xonar_d2_init(chip); } +static void update_cs4362a_volumes(struct oxygen *chip) +{ + u8 mute; + + mute = chip->dac_mute ? CS4362A_MUTE : 0; + cs4362a_write(chip, 7, (127 - chip->dac_volume[2]) | mute); + cs4362a_write(chip, 8, (127 - chip->dac_volume[3]) | mute); + cs4362a_write(chip, 10, (127 - chip->dac_volume[4]) | mute); + cs4362a_write(chip, 11, (127 - chip->dac_volume[5]) | mute); + cs4362a_write(chip, 13, (127 - chip->dac_volume[6]) | mute); + cs4362a_write(chip, 14, (127 - chip->dac_volume[7]) | mute); +} + +static void update_cs43xx_volume(struct oxygen *chip) +{ + cs4398_write(chip, 5, (127 - chip->dac_volume[0]) * 2); + cs4398_write(chip, 6, (127 - chip->dac_volume[1]) * 2); + update_cs4362a_volumes(chip); +} + +static void update_cs43xx_mute(struct oxygen *chip) +{ + u8 reg; + + reg = CS4398_MUTEP_LOW | CS4398_PAMUTE; + if (chip->dac_mute) + reg |= CS4398_MUTE_B | CS4398_MUTE_A; + cs4398_write(chip, 4, reg); + update_cs4362a_volumes(chip); +} + static void xonar_dx_init(struct oxygen *chip) { struct xonar_data *data = chip->model_data; @@ -245,9 +296,6 @@ static void xonar_dx_init(struct oxygen *chip) /* configure */ cs4398_write(chip, 2, data->cs4398_fm); cs4398_write(chip, 3, CS4398_ATAPI_B_R | CS4398_ATAPI_A_L); - cs4398_write(chip, 4, CS4398_MUTEP_LOW | CS4398_PAMUTE); - cs4398_write(chip, 5, 0xfe); - cs4398_write(chip, 6, 0xfe); cs4398_write(chip, 7, CS4398_RMP_DN | CS4398_RMP_UP | CS4398_ZERO_CROSS | CS4398_SOFT_RAMP); cs4362a_write(chip, 0x02, CS4362A_DIF_LJUST); @@ -256,14 +304,10 @@ static void xonar_dx_init(struct oxygen *chip) cs4362a_write(chip, 0x04, CS4362A_RMP_DN | CS4362A_DEM_NONE); cs4362a_write(chip, 0x05, 0); cs4362a_write(chip, 0x06, data->cs4362a_fm); - cs4362a_write(chip, 0x07, 0x7f | CS4362A_MUTE); - cs4362a_write(chip, 0x08, 0x7f | CS4362A_MUTE); cs4362a_write(chip, 0x09, data->cs4362a_fm); - cs4362a_write(chip, 0x0a, 0x7f | CS4362A_MUTE); - cs4362a_write(chip, 0x0b, 0x7f | CS4362A_MUTE); cs4362a_write(chip, 0x0c, data->cs4362a_fm); - cs4362a_write(chip, 0x0d, 0x7f | CS4362A_MUTE); - cs4362a_write(chip, 0x0e, 0x7f | CS4362A_MUTE); + update_cs43xx_volume(chip); + update_cs43xx_mute(chip); /* clear power down */ cs4398_write(chip, 8, CS4398_CPEN); cs4362a_write(chip, 0x01, CS4362A_CPEN); @@ -306,28 +350,6 @@ static void set_pcm1796_params(struct oxygen *chip, pcm1796_write(chip, i, 20, data->pcm1796_oversampling); } -static void update_pcm1796_volume(struct oxygen *chip) -{ - unsigned int i; - - for (i = 0; i < 4; ++i) { - pcm1796_write(chip, i, 16, chip->dac_volume[i * 2]); - pcm1796_write(chip, i, 17, chip->dac_volume[i * 2 + 1]); - } -} - -static void update_pcm1796_mute(struct oxygen *chip) -{ - unsigned int i; - u8 value; - - value = PCM1796_FMT_24_LJUST | PCM1796_ATLD; - if (chip->dac_mute) - value |= PCM1796_MUTE; - for (i = 0; i < 4; ++i) - pcm1796_write(chip, i, 18, value); -} - static void set_cs53x1_params(struct oxygen *chip, struct snd_pcm_hw_params *params) { @@ -366,37 +388,6 @@ static void set_cs43xx_params(struct oxygen *chip, cs4362a_write(chip, 0x0c, data->cs4362a_fm); } -static void update_cs4362a_volumes(struct oxygen *chip) -{ - u8 mute; - - mute = chip->dac_mute ? CS4362A_MUTE : 0; - cs4362a_write(chip, 7, (127 - chip->dac_volume[2]) | mute); - cs4362a_write(chip, 8, (127 - chip->dac_volume[3]) | mute); - cs4362a_write(chip, 10, (127 - chip->dac_volume[4]) | mute); - cs4362a_write(chip, 11, (127 - chip->dac_volume[5]) | mute); - cs4362a_write(chip, 13, (127 - chip->dac_volume[6]) | mute); - cs4362a_write(chip, 14, (127 - chip->dac_volume[7]) | mute); -} - -static void update_cs43xx_volume(struct oxygen *chip) -{ - cs4398_write(chip, 5, (127 - chip->dac_volume[0]) * 2); - cs4398_write(chip, 6, (127 - chip->dac_volume[1]) * 2); - update_cs4362a_volumes(chip); -} - -static void update_cs43xx_mute(struct oxygen *chip) -{ - u8 reg; - - reg = CS4398_MUTEP_LOW | CS4398_PAMUTE; - if (chip->dac_mute) - reg |= CS4398_MUTE_B | CS4398_MUTE_A; - cs4398_write(chip, 4, reg); - update_cs4362a_volumes(chip); -} - static void xonar_gpio_changed(struct oxygen *chip) { struct xonar_data *data = chip->model_data; -- cgit v0.10.2 From 75146fc0f9368ea41419792ac8bfdd19273b4473 Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Tue, 13 May 2008 09:22:43 +0200 Subject: [ALSA] oxygen: separate out hardware initialization code Create separate functions for the code that initializes the hardware, as opposed to initializing internal driver state, so that they can be reused for resume support. Signed-off-by: Clemens Ladisch Signed-off-by: Jaroslav Kysela diff --git a/sound/pci/oxygen/hifier.c b/sound/pci/oxygen/hifier.c index 0e425d5..6e45a58 100644 --- a/sound/pci/oxygen/hifier.c +++ b/sound/pci/oxygen/hifier.c @@ -68,15 +68,22 @@ static void update_ak4396_volume(struct oxygen *chip) ak4396_write(chip, AK4396_RCH_ATT, chip->dac_volume[1]); } -static void hifier_init(struct oxygen *chip) +static void hifier_registers_init(struct oxygen *chip) { struct hifier_data *data = chip->model_data; - data->ak4396_ctl2 = AK4396_SMUTE | AK4396_DEM_OFF | AK4396_DFS_NORMAL; ak4396_write(chip, AK4396_CONTROL_1, AK4396_DIF_24_MSB | AK4396_RSTN); ak4396_write(chip, AK4396_CONTROL_2, data->ak4396_ctl2); ak4396_write(chip, AK4396_CONTROL_3, AK4396_PCM); update_ak4396_volume(chip); +} + +static void hifier_init(struct oxygen *chip) +{ + struct hifier_data *data = chip->model_data; + + data->ak4396_ctl2 = AK4396_SMUTE | AK4396_DEM_OFF | AK4396_DFS_NORMAL; + hifier_registers_init(chip); snd_component_add(chip->card, "AK4396"); snd_component_add(chip->card, "CS5340"); diff --git a/sound/pci/oxygen/oxygen.c b/sound/pci/oxygen/oxygen.c index ad8d950..800ae30 100644 --- a/sound/pci/oxygen/oxygen.c +++ b/sound/pci/oxygen/oxygen.c @@ -124,12 +124,11 @@ static void update_ak4396_volume(struct oxygen *chip) } } -static void ak4396_init(struct oxygen *chip) +static void ak4396_registers_init(struct oxygen *chip) { struct generic_data *data = chip->model_data; unsigned int i; - data->ak4396_ctl2 = AK4396_SMUTE | AK4396_DEM_OFF | AK4396_DFS_NORMAL; for (i = 0; i < 4; ++i) { ak4396_write(chip, i, AK4396_CONTROL_1, AK4396_DIF_24_MSB | AK4396_RSTN); @@ -139,6 +138,14 @@ static void ak4396_init(struct oxygen *chip) AK4396_CONTROL_3, AK4396_PCM); } update_ak4396_volume(chip); +} + +static void ak4396_init(struct oxygen *chip) +{ + struct generic_data *data = chip->model_data; + + data->ak4396_ctl2 = AK4396_SMUTE | AK4396_DEM_OFF | AK4396_DFS_NORMAL; + ak4396_registers_init(chip); snd_component_add(chip->card, "AK4396"); } @@ -149,18 +156,23 @@ static void ak5385_init(struct oxygen *chip) snd_component_add(chip->card, "AK5385"); } -static void wm8785_init(struct oxygen *chip) +static void wm8785_registers_init(struct oxygen *chip) { struct generic_data *data = chip->model_data; - data->saved_wm8785_registers[0] = WM8785_MCR_SLAVE | - WM8785_OSR_SINGLE | WM8785_FORMAT_LJUST; - data->saved_wm8785_registers[1] = WM8785_WL_24; - wm8785_write(chip, WM8785_R7, 0); wm8785_write(chip, WM8785_R0, data->saved_wm8785_registers[0]); wm8785_write(chip, WM8785_R1, data->saved_wm8785_registers[1]); +} +static void wm8785_init(struct oxygen *chip) +{ + struct generic_data *data = chip->model_data; + + data->saved_wm8785_registers[0] = WM8785_MCR_SLAVE | + WM8785_OSR_SINGLE | WM8785_FORMAT_LJUST; + data->saved_wm8785_registers[1] = WM8785_WL_24; + wm8785_registers_init(chip); snd_component_add(chip->card, "WM8785"); } diff --git a/sound/pci/oxygen/virtuoso.c b/sound/pci/oxygen/virtuoso.c index bd9e285..e95dc57 100644 --- a/sound/pci/oxygen/virtuoso.c +++ b/sound/pci/oxygen/virtuoso.c @@ -204,15 +204,11 @@ static void update_pcm1796_mute(struct oxygen *chip) pcm1796_write(chip, i, 18, value); } -static void xonar_d2_init(struct oxygen *chip) +static void pcm1796_init(struct oxygen *chip) { struct xonar_data *data = chip->model_data; unsigned int i; - data->anti_pop_delay = 300; - data->output_enable_bit = GPIO_D2_OUTPUT_ENABLE; - data->pcm1796_oversampling = PCM1796_OS_64; - for (i = 0; i < 4; ++i) { pcm1796_write(chip, i, 19, PCM1796_FLT_SHARP | PCM1796_ATS_1); pcm1796_write(chip, i, 20, data->pcm1796_oversampling); @@ -220,6 +216,17 @@ static void xonar_d2_init(struct oxygen *chip) } update_pcm1796_mute(chip); /* set ATLD before ATL/ATR */ update_pcm1796_volume(chip); +} + +static void xonar_d2_init(struct oxygen *chip) +{ + struct xonar_data *data = chip->model_data; + + data->anti_pop_delay = 300; + data->output_enable_bit = GPIO_D2_OUTPUT_ENABLE; + data->pcm1796_oversampling = PCM1796_OS_64; + + pcm1796_init(chip); oxygen_set_bits16(chip, OXYGEN_GPIO_CONTROL, GPIO_D2_ALT); oxygen_clear_bits16(chip, OXYGEN_GPIO_DATA, GPIO_D2_ALT); @@ -272,24 +279,10 @@ static void update_cs43xx_mute(struct oxygen *chip) update_cs4362a_volumes(chip); } -static void xonar_dx_init(struct oxygen *chip) +static void cs43xx_init(struct oxygen *chip) { struct xonar_data *data = chip->model_data; - data->anti_pop_delay = 800; - data->output_enable_bit = GPIO_DX_OUTPUT_ENABLE; - data->ext_power_reg = OXYGEN_GPI_DATA; - data->ext_power_int_reg = OXYGEN_GPI_INTERRUPT_MASK; - data->ext_power_bit = GPI_DX_EXT_POWER; - data->cs4398_fm = CS4398_FM_SINGLE | CS4398_DEM_NONE | CS4398_DIF_LJUST; - data->cs4362a_fm = CS4362A_FM_SINGLE | - CS4362A_ATAPI_B_R | CS4362A_ATAPI_A_L; - - oxygen_write16(chip, OXYGEN_2WIRE_BUS_STATUS, - OXYGEN_2WIRE_LENGTH_8 | - OXYGEN_2WIRE_INTERRUPT_MASK | - OXYGEN_2WIRE_SPEED_FAST); - /* set CPEN (control port mode) and power down */ cs4398_write(chip, 8, CS4398_CPEN | CS4398_PDN); cs4362a_write(chip, 0x01, CS4362A_PDN | CS4362A_CPEN); @@ -311,6 +304,27 @@ static void xonar_dx_init(struct oxygen *chip) /* clear power down */ cs4398_write(chip, 8, CS4398_CPEN); cs4362a_write(chip, 0x01, CS4362A_CPEN); +} + +static void xonar_dx_init(struct oxygen *chip) +{ + struct xonar_data *data = chip->model_data; + + data->anti_pop_delay = 800; + data->output_enable_bit = GPIO_DX_OUTPUT_ENABLE; + data->ext_power_reg = OXYGEN_GPI_DATA; + data->ext_power_int_reg = OXYGEN_GPI_INTERRUPT_MASK; + data->ext_power_bit = GPI_DX_EXT_POWER; + data->cs4398_fm = CS4398_FM_SINGLE | CS4398_DEM_NONE | CS4398_DIF_LJUST; + data->cs4362a_fm = CS4362A_FM_SINGLE | + CS4362A_ATAPI_B_R | CS4362A_ATAPI_A_L; + + oxygen_write16(chip, OXYGEN_2WIRE_BUS_STATUS, + OXYGEN_2WIRE_LENGTH_8 | + OXYGEN_2WIRE_INTERRUPT_MASK | + OXYGEN_2WIRE_SPEED_FAST); + + cs43xx_init(chip); oxygen_set_bits16(chip, OXYGEN_GPIO_CONTROL, GPIO_DX_FRONT_PANEL | GPIO_DX_INPUT_ROUTE); -- cgit v0.10.2 From 92215f3a178080bd9d7c65879499e9474e54d55c Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Tue, 13 May 2008 09:23:02 +0200 Subject: [ALSA] virtuoso: add xonar_enable_output() Move the setting of the output enable GPIO bit to a separate function. Signed-off-by: Clemens Ladisch Signed-off-by: Jaroslav Kysela diff --git a/sound/pci/oxygen/virtuoso.c b/sound/pci/oxygen/virtuoso.c index e95dc57..abd5313 100644 --- a/sound/pci/oxygen/virtuoso.c +++ b/sound/pci/oxygen/virtuoso.c @@ -162,6 +162,14 @@ static void cs4362a_write(struct oxygen *chip, u8 reg, u8 value) oxygen_write_i2c(chip, I2C_DEVICE_CS4362A, reg, value); } +static void xonar_enable_output(struct oxygen *chip) +{ + struct xonar_data *data = chip->model_data; + + msleep(data->anti_pop_delay); + oxygen_set_bits16(chip, OXYGEN_GPIO_DATA, data->output_enable_bit); +} + static void xonar_common_init(struct oxygen *chip) { struct xonar_data *data = chip->model_data; @@ -173,13 +181,12 @@ static void xonar_common_init(struct oxygen *chip) data->has_power = !!(oxygen_read8(chip, data->ext_power_reg) & data->ext_power_bit); } - oxygen_set_bits16(chip, OXYGEN_GPIO_CONTROL, GPIO_CS53x1_M_MASK); + oxygen_set_bits16(chip, OXYGEN_GPIO_CONTROL, + GPIO_CS53x1_M_MASK | data->output_enable_bit); oxygen_write16_masked(chip, OXYGEN_GPIO_DATA, GPIO_CS53x1_M_SINGLE, GPIO_CS53x1_M_MASK); oxygen_ac97_set_bits(chip, 0, CM9780_JACK, CM9780_FMIC2MIC); - msleep(data->anti_pop_delay); - oxygen_set_bits16(chip, OXYGEN_GPIO_CONTROL, data->output_enable_bit); - oxygen_set_bits16(chip, OXYGEN_GPIO_DATA, data->output_enable_bit); + xonar_enable_output(chip); } static void update_pcm1796_volume(struct oxygen *chip) -- cgit v0.10.2 From 4a4bc53bc52978dd6c918531921da925fd047d95 Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Tue, 13 May 2008 09:24:39 +0200 Subject: [ALSA] oxygen: add PM support Add suspend/resume support. Signed-off-by: Clemens Ladisch Signed-off-by: Jaroslav Kysela diff --git a/sound/pci/oxygen/hifier.c b/sound/pci/oxygen/hifier.c index 6e45a58..7442460 100644 --- a/sound/pci/oxygen/hifier.c +++ b/sound/pci/oxygen/hifier.c @@ -146,6 +146,7 @@ static const struct oxygen_model model_hifier = { .init = hifier_init, .control_filter = hifier_control_filter, .cleanup = hifier_cleanup, + .resume = hifier_registers_init, .set_dac_params = set_ak4396_params, .set_adc_params = set_cs5340_params, .update_dac_volume = update_ak4396_volume, @@ -186,6 +187,10 @@ static struct pci_driver hifier_driver = { .id_table = hifier_ids, .probe = hifier_probe, .remove = __devexit_p(oxygen_pci_remove), +#ifdef CONFIG_PM + .suspend = oxygen_pci_suspend, + .resume = oxygen_pci_resume, +#endif }; static int __init alsa_card_hifier_init(void) diff --git a/sound/pci/oxygen/oxygen.c b/sound/pci/oxygen/oxygen.c index 800ae30..7c8ae31 100644 --- a/sound/pci/oxygen/oxygen.c +++ b/sound/pci/oxygen/oxygen.c @@ -192,6 +192,12 @@ static void generic_cleanup(struct oxygen *chip) { } +static void generic_resume(struct oxygen *chip) +{ + ak4396_registers_init(chip); + wm8785_registers_init(chip); +} + static void set_ak4396_params(struct oxygen *chip, struct snd_pcm_hw_params *params) { @@ -278,6 +284,7 @@ static const struct oxygen_model model_generic = { .owner = THIS_MODULE, .init = generic_init, .cleanup = generic_cleanup, + .resume = generic_resume, .set_dac_params = set_ak4396_params, .set_adc_params = set_wm8785_params, .update_dac_volume = update_ak4396_volume, @@ -305,6 +312,7 @@ static const struct oxygen_model model_meridian = { .owner = THIS_MODULE, .init = meridian_init, .cleanup = generic_cleanup, + .resume = ak4396_registers_init, .set_dac_params = set_ak4396_params, .set_adc_params = set_ak5385_params, .update_dac_volume = update_ak4396_volume, @@ -353,6 +361,10 @@ static struct pci_driver oxygen_driver = { .id_table = oxygen_ids, .probe = generic_oxygen_probe, .remove = __devexit_p(oxygen_pci_remove), +#ifdef CONFIG_PM + .suspend = oxygen_pci_suspend, + .resume = oxygen_pci_resume, +#endif }; static int __init alsa_card_oxygen_init(void) diff --git a/sound/pci/oxygen/oxygen.h b/sound/pci/oxygen/oxygen.h index 29c9fa4..74a6448 100644 --- a/sound/pci/oxygen/oxygen.h +++ b/sound/pci/oxygen/oxygen.h @@ -97,6 +97,8 @@ struct oxygen_model { int (*control_filter)(struct snd_kcontrol_new *template); int (*mixer_init)(struct oxygen *chip); void (*cleanup)(struct oxygen *chip); + void (*suspend)(struct oxygen *chip); + void (*resume)(struct oxygen *chip); void (*pcm_hardware_filter)(unsigned int channel, struct snd_pcm_hardware *hardware); void (*set_dac_params)(struct oxygen *chip, @@ -125,6 +127,10 @@ struct oxygen_model { int oxygen_pci_probe(struct pci_dev *pci, int index, char *id, const struct oxygen_model *model); void oxygen_pci_remove(struct pci_dev *pci); +#ifdef CONFIG_PM +int oxygen_pci_suspend(struct pci_dev *pci, pm_message_t state); +int oxygen_pci_resume(struct pci_dev *pci); +#endif /* oxygen_mixer.c */ diff --git a/sound/pci/oxygen/oxygen_lib.c b/sound/pci/oxygen/oxygen_lib.c index 60bc159..22f3785 100644 --- a/sound/pci/oxygen/oxygen_lib.c +++ b/sound/pci/oxygen/oxygen_lib.c @@ -314,6 +314,10 @@ static void oxygen_init(struct oxygen *chip) OXYGEN_SPDIF_LOCK_MASK | OXYGEN_SPDIF_RATE_MASK); oxygen_write32(chip, OXYGEN_SPDIF_OUTPUT_BITS, chip->spdif_bits); + oxygen_write16(chip, OXYGEN_2WIRE_BUS_STATUS, + OXYGEN_2WIRE_LENGTH_8 | + OXYGEN_2WIRE_INTERRUPT_MASK | + OXYGEN_2WIRE_SPEED_STANDARD); oxygen_clear_bits8(chip, OXYGEN_MPU401_CONTROL, OXYGEN_MPU401_LOOPBACK); oxygen_write8(chip, OXYGEN_GPI_INTERRUPT_MASK, 0); oxygen_write16(chip, OXYGEN_GPIO_INTERRUPT_MASK, 0); @@ -534,3 +538,99 @@ void oxygen_pci_remove(struct pci_dev *pci) pci_set_drvdata(pci, NULL); } EXPORT_SYMBOL(oxygen_pci_remove); + +#ifdef CONFIG_PM +int oxygen_pci_suspend(struct pci_dev *pci, pm_message_t state) +{ + struct snd_card *card = pci_get_drvdata(pci); + struct oxygen *chip = card->private_data; + unsigned int i, saved_interrupt_mask; + + snd_power_change_state(card, SNDRV_CTL_POWER_D3hot); + + for (i = 0; i < PCM_COUNT; ++i) + if (chip->streams[i]) + snd_pcm_suspend(chip->streams[i]); + + if (chip->model->suspend) + chip->model->suspend(chip); + + spin_lock_irq(&chip->reg_lock); + saved_interrupt_mask = chip->interrupt_mask; + chip->interrupt_mask = 0; + oxygen_write16(chip, OXYGEN_DMA_STATUS, 0); + oxygen_write16(chip, OXYGEN_INTERRUPT_MASK, 0); + spin_unlock_irq(&chip->reg_lock); + + synchronize_irq(chip->irq); + flush_scheduled_work(); + chip->interrupt_mask = saved_interrupt_mask; + + pci_disable_device(pci); + pci_save_state(pci); + pci_set_power_state(pci, pci_choose_state(pci, state)); + return 0; +} +EXPORT_SYMBOL(oxygen_pci_suspend); + +static const u32 registers_to_restore[OXYGEN_IO_SIZE / 32] = { + 0xffffffff, 0x00ff077f, 0x00011d08, 0x007f00ff, + 0x00300000, 0x00000fe4, 0x0ff7001f, 0x00000000 +}; +static const u32 ac97_registers_to_restore[2][0x40 / 32] = { + { 0x18284fa2, 0x03060000 }, + { 0x00007fa6, 0x00200000 } +}; + +static inline int is_bit_set(const u32 *bitmap, unsigned int bit) +{ + return bitmap[bit / 32] & (1 << (bit & 31)); +} + +static void oxygen_restore_ac97(struct oxygen *chip, unsigned int codec) +{ + unsigned int i; + + oxygen_write_ac97(chip, codec, AC97_RESET, 0); + msleep(1); + for (i = 1; i < 0x40; ++i) + if (is_bit_set(ac97_registers_to_restore[codec], i)) + oxygen_write_ac97(chip, codec, i * 2, + chip->saved_ac97_registers[codec][i]); +} + +int oxygen_pci_resume(struct pci_dev *pci) +{ + struct snd_card *card = pci_get_drvdata(pci); + struct oxygen *chip = card->private_data; + unsigned int i; + + pci_set_power_state(pci, PCI_D0); + pci_restore_state(pci); + if (pci_enable_device(pci) < 0) { + snd_printk(KERN_ERR "cannot reenable device"); + snd_card_disconnect(card); + return -EIO; + } + pci_set_master(pci); + + oxygen_write16(chip, OXYGEN_DMA_STATUS, 0); + oxygen_write16(chip, OXYGEN_INTERRUPT_MASK, 0); + for (i = 0; i < OXYGEN_IO_SIZE; ++i) + if (is_bit_set(registers_to_restore, i)) + oxygen_write8(chip, i, chip->saved_registers._8[i]); + if (chip->has_ac97_0) + oxygen_restore_ac97(chip, 0); + if (chip->has_ac97_1) + oxygen_restore_ac97(chip, 1); + + if (chip->model->resume) + chip->model->resume(chip); + + oxygen_write16(chip, OXYGEN_INTERRUPT_MASK, chip->interrupt_mask); + + snd_power_change_state(card, SNDRV_CTL_POWER_D0); + return 0; +} +EXPORT_SYMBOL(oxygen_pci_resume); +#endif /* CONFIG_PM */ diff --git a/sound/pci/oxygen/oxygen_pcm.c b/sound/pci/oxygen/oxygen_pcm.c index b17c405..9680aa3 100644 --- a/sound/pci/oxygen/oxygen_pcm.c +++ b/sound/pci/oxygen/oxygen_pcm.c @@ -517,6 +517,7 @@ static int oxygen_trigger(struct snd_pcm_substream *substream, int cmd) switch (cmd) { case SNDRV_PCM_TRIGGER_STOP: case SNDRV_PCM_TRIGGER_START: + case SNDRV_PCM_TRIGGER_SUSPEND: pausing = 0; break; case SNDRV_PCM_TRIGGER_PAUSE_PUSH: diff --git a/sound/pci/oxygen/virtuoso.c b/sound/pci/oxygen/virtuoso.c index abd5313..9a2c16b 100644 --- a/sound/pci/oxygen/virtuoso.c +++ b/sound/pci/oxygen/virtuoso.c @@ -359,6 +359,18 @@ static void xonar_dx_cleanup(struct oxygen *chip) oxygen_clear_bits8(chip, OXYGEN_FUNCTION, OXYGEN_FUNCTION_RESET_CODEC); } +static void xonar_d2_resume(struct oxygen *chip) +{ + pcm1796_init(chip); + xonar_enable_output(chip); +} + +static void xonar_dx_resume(struct oxygen *chip) +{ + cs43xx_init(chip); + xonar_enable_output(chip); +} + static void set_pcm1796_params(struct oxygen *chip, struct snd_pcm_hw_params *params) { @@ -551,6 +563,8 @@ static const struct oxygen_model xonar_models[] = { .control_filter = xonar_d2_control_filter, .mixer_init = xonar_mixer_init, .cleanup = xonar_cleanup, + .suspend = xonar_cleanup, + .resume = xonar_d2_resume, .set_dac_params = set_pcm1796_params, .set_adc_params = set_cs53x1_params, .update_dac_volume = update_pcm1796_volume, @@ -579,6 +593,8 @@ static const struct oxygen_model xonar_models[] = { .control_filter = xonar_d2_control_filter, .mixer_init = xonar_mixer_init, .cleanup = xonar_cleanup, + .suspend = xonar_cleanup, + .resume = xonar_d2_resume, .set_dac_params = set_pcm1796_params, .set_adc_params = set_cs53x1_params, .update_dac_volume = update_pcm1796_volume, @@ -608,6 +624,8 @@ static const struct oxygen_model xonar_models[] = { .control_filter = xonar_dx_control_filter, .mixer_init = xonar_dx_mixer_init, .cleanup = xonar_dx_cleanup, + .suspend = xonar_dx_cleanup, + .resume = xonar_dx_resume, .set_dac_params = set_cs43xx_params, .set_adc_params = set_cs53x1_params, .update_dac_volume = update_cs43xx_volume, @@ -652,6 +670,10 @@ static struct pci_driver xonar_driver = { .id_table = xonar_ids, .probe = xonar_probe, .remove = __devexit_p(oxygen_pci_remove), +#ifdef CONFIG_PM + .suspend = oxygen_pci_suspend, + .resume = oxygen_pci_resume, +#endif }; static int __init alsa_card_xonar_init(void) -- cgit v0.10.2 From d55d7a1cbbd069f8368ec5c67480d319e7b227b9 Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Tue, 13 May 2008 09:25:39 +0200 Subject: [ALSA] oxygen: add symbols for buffer/period size constraints Introduce symbols for the buffer/period size constraints so that their limits and relationships become clearer. Signed-off-by: Clemens Ladisch Signed-off-by: Jaroslav Kysela diff --git a/sound/pci/oxygen/oxygen_pcm.c b/sound/pci/oxygen/oxygen_pcm.c index 9680aa3..09a16e4 100644 --- a/sound/pci/oxygen/oxygen_pcm.c +++ b/sound/pci/oxygen/oxygen_pcm.c @@ -24,6 +24,16 @@ #include #include "oxygen.h" +/* most DMA channels have a 16-bit counter for 32-bit words */ +#define BUFFER_BYTES_MAX ((1 << 16) * 4) +/* the multichannel DMA channel has a 24-bit counter */ +#define BUFFER_BYTES_MAX_MULTICH ((1 << 24) * 4) + +#define PERIOD_BYTES_MIN 64 + +#define DEFAULT_BUFFER_BYTES (BUFFER_BYTES_MAX / 2) +#define DEFAULT_BUFFER_BYTES_MULTICH (1024 * 1024) + static const struct snd_pcm_hardware oxygen_stereo_hardware = { .info = SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_MMAP_VALID | @@ -44,11 +54,11 @@ static const struct snd_pcm_hardware oxygen_stereo_hardware = { .rate_max = 192000, .channels_min = 2, .channels_max = 2, - .buffer_bytes_max = 256 * 1024, - .period_bytes_min = 128, - .period_bytes_max = 128 * 1024, + .buffer_bytes_max = BUFFER_BYTES_MAX, + .period_bytes_min = PERIOD_BYTES_MIN, + .period_bytes_max = BUFFER_BYTES_MAX / 2, .periods_min = 2, - .periods_max = 2048, + .periods_max = BUFFER_BYTES_MAX / PERIOD_BYTES_MIN, }; static const struct snd_pcm_hardware oxygen_multichannel_hardware = { .info = SNDRV_PCM_INFO_MMAP | @@ -70,11 +80,11 @@ static const struct snd_pcm_hardware oxygen_multichannel_hardware = { .rate_max = 192000, .channels_min = 2, .channels_max = 8, - .buffer_bytes_max = 2048 * 1024, - .period_bytes_min = 128, - .period_bytes_max = 256 * 1024, + .buffer_bytes_max = BUFFER_BYTES_MAX_MULTICH, + .period_bytes_min = PERIOD_BYTES_MIN, + .period_bytes_max = BUFFER_BYTES_MAX_MULTICH / 2, .periods_min = 2, - .periods_max = 16384, + .periods_max = BUFFER_BYTES_MAX_MULTICH / PERIOD_BYTES_MIN, }; static const struct snd_pcm_hardware oxygen_ac97_hardware = { .info = SNDRV_PCM_INFO_MMAP | @@ -88,11 +98,11 @@ static const struct snd_pcm_hardware oxygen_ac97_hardware = { .rate_max = 48000, .channels_min = 2, .channels_max = 2, - .buffer_bytes_max = 256 * 1024, - .period_bytes_min = 128, - .period_bytes_max = 128 * 1024, + .buffer_bytes_max = BUFFER_BYTES_MAX, + .period_bytes_min = PERIOD_BYTES_MIN, + .period_bytes_max = BUFFER_BYTES_MAX / 2, .periods_min = 2, - .periods_max = 2048, + .periods_max = BUFFER_BYTES_MAX / PERIOD_BYTES_MIN, }; static const struct snd_pcm_hardware *const oxygen_hardware[PCM_COUNT] = { @@ -664,12 +674,14 @@ int oxygen_pcm_init(struct oxygen *chip) snd_pcm_lib_preallocate_pages(pcm->streams[SNDRV_PCM_STREAM_PLAYBACK].substream, SNDRV_DMA_TYPE_DEV, snd_dma_pci_data(chip->pci), - 512 * 1024, 2048 * 1024); + DEFAULT_BUFFER_BYTES_MULTICH, + BUFFER_BYTES_MAX_MULTICH); if (ins) snd_pcm_lib_preallocate_pages(pcm->streams[SNDRV_PCM_STREAM_CAPTURE].substream, SNDRV_DMA_TYPE_DEV, snd_dma_pci_data(chip->pci), - 128 * 1024, 256 * 1024); + DEFAULT_BUFFER_BYTES, + BUFFER_BYTES_MAX); } outs = !!(chip->model->pcm_dev_cfg & PLAYBACK_1_TO_SPDIF); @@ -689,7 +701,8 @@ int oxygen_pcm_init(struct oxygen *chip) strcpy(pcm->name, "Digital"); snd_pcm_lib_preallocate_pages_for_all(pcm, SNDRV_DMA_TYPE_DEV, snd_dma_pci_data(chip->pci), - 128 * 1024, 256 * 1024); + DEFAULT_BUFFER_BYTES, + BUFFER_BYTES_MAX); } if (chip->has_ac97_1) { @@ -719,7 +732,8 @@ int oxygen_pcm_init(struct oxygen *chip) strcpy(pcm->name, outs ? "Front Panel" : "Analog 2"); snd_pcm_lib_preallocate_pages_for_all(pcm, SNDRV_DMA_TYPE_DEV, snd_dma_pci_data(chip->pci), - 128 * 1024, 256 * 1024); + DEFAULT_BUFFER_BYTES, + BUFFER_BYTES_MAX); } return 0; } -- cgit v0.10.2 From ca1f30ad6c3f002d1d9b9355a53b8bbf2fe70588 Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Tue, 13 May 2008 09:26:01 +0200 Subject: [ALSA] virtuoso: restrict period time to less than 10 s Add a constraint for the period time so that there are less than ten seconds between interrupts so that ALSA does not assume that the device is dead. Signed-off-by: Clemens Ladisch Signed-off-by: Jaroslav Kysela diff --git a/sound/pci/oxygen/oxygen_pcm.c b/sound/pci/oxygen/oxygen_pcm.c index 09a16e4..c4ad65a 100644 --- a/sound/pci/oxygen/oxygen_pcm.c +++ b/sound/pci/oxygen/oxygen_pcm.c @@ -165,6 +165,12 @@ static int oxygen_open(struct snd_pcm_substream *substream, if (err < 0) return err; } + if (channel == PCM_MULTICH) { + err = snd_pcm_hw_constraint_minmax + (runtime, SNDRV_PCM_HW_PARAM_PERIOD_TIME, 0, 8192000); + if (err < 0) + return err; + } snd_pcm_set_sync(substream); chip->streams[channel] = substream; -- cgit v0.10.2 From 4ba1327ab8ce179c40862f3dedb4ebaaa491d737 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 13 May 2008 14:51:19 +0200 Subject: [ALSA] soc - DAPM - Add bulk control registration Most SoC drivers cut'n'paste a loop iterating over an array to register their DAPM controls. Provide a function they can call instead. Signed-off-by: Mark Brown Cc: Graeme Gregory Cc: Frank Mandarino Cc: Jarkko Nikula Cc: Richard Purdie Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h index 40cc695..1f30616 100644 --- a/include/sound/soc-dapm.h +++ b/include/sound/soc-dapm.h @@ -205,6 +205,9 @@ int snd_soc_dapm_put_enum_double(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol); int snd_soc_dapm_new_control(struct snd_soc_codec *codec, const struct snd_soc_dapm_widget *widget); +int snd_soc_dapm_new_controls(struct snd_soc_codec *codec, + const struct snd_soc_dapm_widget *widget, + int num); /* dapm path setup */ int snd_soc_dapm_connect_input(struct snd_soc_codec *codec, diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index c60200c..811d652 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -1234,6 +1234,33 @@ int snd_soc_dapm_new_control(struct snd_soc_codec *codec, EXPORT_SYMBOL_GPL(snd_soc_dapm_new_control); /** + * snd_soc_dapm_new_controls - create new dapm controls + * @codec: audio codec + * @widget: widget array + * @num: number of widgets + * + * Creates new DAPM controls based upon the templates. + * + * Returns 0 for success else error. + */ +int snd_soc_dapm_new_controls(struct snd_soc_codec *codec, + const struct snd_soc_dapm_widget *widget, + int num) +{ + int i, ret; + + for (i = 0; i < num; i++) { + ret = snd_soc_dapm_new_control(codec, widget); + if (ret < 0) + return ret; + widget++; + } + return 0; +} +EXPORT_SYMBOL_GPL(snd_soc_dapm_new_controls); + + +/** * snd_soc_dapm_stream_event - send a stream event to the dapm core * @codec: audio codec * @stream: stream name -- cgit v0.10.2 From 105f1c28442301237d20b05a3d52d9987614016f Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 13 May 2008 14:52:19 +0200 Subject: [ALSA] soc - DAPM - Bulk route registration ASoC codecs and machine drivers that use DAPM routes all cut'n'paste a loop iterating over a null terminated array of routes. Factor out this into a bulk registration function, improving the error reporting for most users, and deprecate the old API to help out of tree users pick up the changes. Signed-off-by: Mark Brown Cc: Graeme Gregory Cc: Frank Mandarino Cc: Jarkko Nikula Cc: Richard Purdie Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h index 1f30616..bf4cf0c 100644 --- a/include/sound/soc-dapm.h +++ b/include/sound/soc-dapm.h @@ -193,6 +193,7 @@ struct snd_soc_dapm_widget; enum snd_soc_dapm_type; struct snd_soc_dapm_path; struct snd_soc_dapm_pin; +struct snd_soc_dapm_route; /* dapm controls */ int snd_soc_dapm_put_volsw(struct snd_kcontrol *kcontrol, @@ -210,10 +211,12 @@ int snd_soc_dapm_new_controls(struct snd_soc_codec *codec, int num); /* dapm path setup */ -int snd_soc_dapm_connect_input(struct snd_soc_codec *codec, +int __deprecated snd_soc_dapm_connect_input(struct snd_soc_codec *codec, const char *sink_name, const char *control_name, const char *src_name); int snd_soc_dapm_new_widgets(struct snd_soc_codec *codec); void snd_soc_dapm_free(struct snd_soc_device *socdev); +int snd_soc_dapm_add_routes(struct snd_soc_codec *codec, + const struct snd_soc_dapm_route *route, int num); /* dapm events */ int snd_soc_dapm_stream_event(struct snd_soc_codec *codec, char *stream, @@ -250,6 +253,18 @@ enum snd_soc_dapm_type { snd_soc_dapm_post, /* machine specific post widget - exec last */ }; +/* + * DAPM audio route definition. + * + * Defines an audio route originating at source via control and finishing + * at sink. + */ +struct snd_soc_dapm_route { + const char *sink; + const char *control; + const char *source; +}; + /* dapm audio path between two widgets */ struct snd_soc_dapm_path { char *name; diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index 811d652..1ef6d94 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -841,21 +841,8 @@ int snd_soc_dapm_sync_endpoints(struct snd_soc_codec *codec) } EXPORT_SYMBOL_GPL(snd_soc_dapm_sync_endpoints); -/** - * snd_soc_dapm_connect_input - connect dapm widgets - * @codec: audio codec - * @sink: name of target widget - * @control: mixer control name - * @source: name of source name - * - * Connects 2 dapm widgets together via a named audio path. The sink is - * the widget receiving the audio signal, whilst the source is the sender - * of the audio signal. - * - * Returns 0 for success else error. - */ -int snd_soc_dapm_connect_input(struct snd_soc_codec *codec, const char *sink, - const char * control, const char *source) +static int snd_soc_dapm_add_route(struct snd_soc_codec *codec, + const char *sink, const char *control, const char *source) { struct snd_soc_dapm_path *path; struct snd_soc_dapm_widget *wsource = NULL, *wsink = NULL, *w; @@ -957,9 +944,64 @@ err: kfree(path); return ret; } + +/** + * snd_soc_dapm_connect_input - connect dapm widgets + * @codec: audio codec + * @sink: name of target widget + * @control: mixer control name + * @source: name of source name + * + * Connects 2 dapm widgets together via a named audio path. The sink is + * the widget receiving the audio signal, whilst the source is the sender + * of the audio signal. + * + * This function has been deprecated in favour of snd_soc_dapm_add_routes(). + * + * Returns 0 for success else error. + */ +int snd_soc_dapm_connect_input(struct snd_soc_codec *codec, const char *sink, + const char *control, const char *source) +{ + return snd_soc_dapm_add_route(codec, sink, control, source); +} EXPORT_SYMBOL_GPL(snd_soc_dapm_connect_input); /** + * snd_soc_dapm_add_routes - Add routes between DAPM widgets + * @codec: codec + * @route: audio routes + * @num: number of routes + * + * Connects 2 dapm widgets together via a named audio path. The sink is + * the widget receiving the audio signal, whilst the source is the sender + * of the audio signal. + * + * Returns 0 for success else error. On error all resources can be freed + * with a call to snd_soc_card_free(). + */ +int snd_soc_dapm_add_routes(struct snd_soc_codec *codec, + const struct snd_soc_dapm_route *route, int num) +{ + int i, ret; + + for (i = 0; i < num; i++) { + ret = snd_soc_dapm_add_route(codec, route->sink, + route->control, route->source); + if (ret < 0) { + printk(KERN_ERR "Failed to add route %s->%s\n", + route->source, + route->sink); + return ret; + } + route++; + } + + return 0; +} +EXPORT_SYMBOL_GPL(snd_soc_dapm_add_routes); + +/** * snd_soc_dapm_new_widgets - add new dapm widgets * @codec: audio codec * -- cgit v0.10.2 From d0cc0d3a95cc3c022ee118072d243d3670ec1663 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 13 May 2008 14:55:22 +0200 Subject: [ALSA] soc - tlv320aic3x - Convert to use bulk registration APIs Signed-off-by: Mark Brown Cc: Jarkko Nikula Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela diff --git a/sound/soc/codecs/tlv320aic3x.c b/sound/soc/codecs/tlv320aic3x.c index 957996e..cb8365a 100644 --- a/sound/soc/codecs/tlv320aic3x.c +++ b/sound/soc/codecs/tlv320aic3x.c @@ -497,7 +497,7 @@ static const struct snd_soc_dapm_widget aic3x_dapm_widgets[] = { SND_SOC_DAPM_INPUT("LINE2R"), }; -static const char *intercon[][3] = { +static const struct snd_soc_dapm_route intercon[] = { /* Left Output */ {"Left DAC Mux", "DAC_L1", "Left DAC"}, {"Left DAC Mux", "DAC_L2", "Left DAC"}, @@ -641,22 +641,15 @@ static const char *intercon[][3] = { {"Right Line Out", NULL, "Right Line2 Bypass Mixer"}, {"Mono Out", NULL, "Right Line2 Bypass Mixer"}, {"Right HP Out", NULL, "Right Line2 Bypass Mixer"}, - - /* terminator */ - {NULL, NULL, NULL}, }; static int aic3x_add_widgets(struct snd_soc_codec *codec) { - int i; - - for (i = 0; i < ARRAY_SIZE(aic3x_dapm_widgets); i++) - snd_soc_dapm_new_control(codec, &aic3x_dapm_widgets[i]); + snd_soc_dapm_new_controls(codec, aic3x_dapm_widgets, + ARRAY_SIZE(aic3x_dapm_widgets)); /* set up audio path interconnects */ - for (i = 0; intercon[i][0] != NULL; i++) - snd_soc_dapm_connect_input(codec, intercon[i][0], - intercon[i][1], intercon[i][2]); + snd_soc_dapm_add_routes(codec, intercon, ARRAY_SIZE(intercon)); snd_soc_dapm_new_widgets(codec); return 0; -- cgit v0.10.2 From 25191c45aec54cd01b53391bb0b0e1e60377a5fc Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 13 May 2008 14:55:48 +0200 Subject: [ALSA] soc - Zaurus - Convert to bulk DAPM registration APIs Signed-off-by: Mark Brown Cc: Richard Purdie Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela diff --git a/sound/soc/pxa/corgi.c b/sound/soc/pxa/corgi.c index 7f32a11..852f16d 100644 --- a/sound/soc/pxa/corgi.c +++ b/sound/soc/pxa/corgi.c @@ -247,7 +247,7 @@ SND_SOC_DAPM_HP("Headset Jack", NULL), }; /* Corgi machine audio map (connections to the codec pins) */ -static const char *audio_map[][3] = { +static const struct snd_soc_dapm_route audio_map[] = { /* headset Jack - in = micin, out = LHPOUT*/ {"Headset Jack", NULL, "LHPOUT"}, @@ -265,8 +265,6 @@ static const char *audio_map[][3] = { /* Same as the above but no mic bias for line signals */ {"MICIN", NULL, "Line Jack"}, - - {NULL, NULL, NULL}, }; static const char *jack_function[] = {"Headphone", "Mic", "Line", "Headset", @@ -303,13 +301,11 @@ static int corgi_wm8731_init(struct snd_soc_codec *codec) } /* Add corgi specific widgets */ - for (i = 0; i < ARRAY_SIZE(wm8731_dapm_widgets); i++) - snd_soc_dapm_new_control(codec, &wm8731_dapm_widgets[i]); + snd_soc_dapm_new_controls(codec, wm8731_dapm_widgets, + ARRAY_SIZE(wm8731_dapm_widgets)); /* Set up corgi specific audio path audio_map */ - for (i = 0; audio_map[i][0] != NULL; i++) - snd_soc_dapm_connect_input(codec, audio_map[i][0], - audio_map[i][1], audio_map[i][2]); + snd_soc_dapm_add_routes(codec, audio_map, ARRAY_SIZE(audio_map)); snd_soc_dapm_sync_endpoints(codec); return 0; diff --git a/sound/soc/pxa/poodle.c b/sound/soc/pxa/poodle.c index 7e830b2..810f1fe 100644 --- a/sound/soc/pxa/poodle.c +++ b/sound/soc/pxa/poodle.c @@ -215,8 +215,8 @@ SND_SOC_DAPM_HP("Headphone Jack", NULL), SND_SOC_DAPM_SPK("Ext Spk", poodle_amp_event), }; -/* Corgi machine audio_mapnections to the codec pins */ -static const char *audio_map[][3] = { +/* Corgi machine connections to the codec pins */ +static const struct snd_soc_dapm_route audio_map[] = { /* headphone connected to LHPOUT1, RHPOUT1 */ {"Headphone Jack", NULL, "LHPOUT"}, @@ -225,8 +225,6 @@ static const char *audio_map[][3] = { /* speaker connected to LOUT, ROUT */ {"Ext Spk", NULL, "ROUT"}, {"Ext Spk", NULL, "LOUT"}, - - {NULL, NULL, NULL}, }; static const char *jack_function[] = {"Off", "Headphone"}; @@ -263,13 +261,11 @@ static int poodle_wm8731_init(struct snd_soc_codec *codec) } /* Add poodle specific widgets */ - for (i = 0; i < ARRAY_SIZE(wm8731_dapm_widgets); i++) - snd_soc_dapm_new_control(codec, &wm8731_dapm_widgets[i]); + snd_soc_dapm_new_controls(codec, wm8731_dapm_widgets, + ARRAY_SIZE(wm8731_dapm_widgets)); /* Set up poodle specific audio path audio_map */ - for (i = 0; audio_map[i][0] != NULL; i++) - snd_soc_dapm_connect_input(codec, audio_map[i][0], - audio_map[i][1], audio_map[i][2]); + snd_soc_dapm_add_routes(codec, audio_map, ARRAY_SIZE(audio_map)); snd_soc_dapm_sync_endpoints(codec); return 0; diff --git a/sound/soc/pxa/spitz.c b/sound/soc/pxa/spitz.c index d8b8372..73a6df8 100644 --- a/sound/soc/pxa/spitz.c +++ b/sound/soc/pxa/spitz.c @@ -250,7 +250,7 @@ static const struct snd_soc_dapm_widget wm8750_dapm_widgets[] = { }; /* Spitz machine audio_map */ -static const char *audio_map[][3] = { +static const struct snd_soc_dapm_route audio_map[] = { /* headphone connected to LOUT1, ROUT1 */ {"Headphone Jack", NULL, "LOUT1"}, @@ -269,8 +269,6 @@ static const char *audio_map[][3] = { /* line is connected to input 1 - no bias */ {"LINPUT1", NULL, "Line Jack"}, - - {NULL, NULL, NULL}, }; static const char *jack_function[] = {"Headphone", "Mic", "Line", "Headset", @@ -313,13 +311,11 @@ static int spitz_wm8750_init(struct snd_soc_codec *codec) } /* Add spitz specific widgets */ - for (i = 0; i < ARRAY_SIZE(wm8750_dapm_widgets); i++) - snd_soc_dapm_new_control(codec, &wm8750_dapm_widgets[i]); + snd_soc_dapm_new_controls(codec, wm8750_dapm_widgets, + ARRAY_SIZE(wm8750_dapm_widgets)); - /* Set up spitz specific audio path audio_map */ - for (i = 0; audio_map[i][0] != NULL; i++) - snd_soc_dapm_connect_input(codec, audio_map[i][0], - audio_map[i][1], audio_map[i][2]); + /* Set up spitz specific audio paths */ + snd_soc_dapm_add_routes(codec, audio_map, ARRAY_SIZE(audio_map)); snd_soc_dapm_sync_endpoints(codec); return 0; diff --git a/sound/soc/pxa/tosa.c b/sound/soc/pxa/tosa.c index 7346d7e..fda2aa0 100644 --- a/sound/soc/pxa/tosa.c +++ b/sound/soc/pxa/tosa.c @@ -154,7 +154,7 @@ SND_SOC_DAPM_SPK("Speaker", NULL), }; /* tosa audio map */ -static const char *audio_map[][3] = { +static const snd_soc_dapm_route audio_map[] = { /* headphone connected to HPOUTL, HPOUTR */ {"Headphone Jack", NULL, "HPOUTL"}, @@ -173,8 +173,6 @@ static const char *audio_map[][3] = { {"Headset Jack", NULL, "HPOUTR"}, {"LINEINR", NULL, "Mic Bias"}, {"Mic Bias", NULL, "Headset Jack"}, - - {NULL, NULL, NULL}, }; static const char *jack_function[] = {"Headphone", "Mic", "Line", "Headset", @@ -208,15 +206,11 @@ static int tosa_ac97_init(struct snd_soc_codec *codec) } /* add tosa specific widgets */ - for (i = 0; i < ARRAY_SIZE(tosa_dapm_widgets); i++) { - snd_soc_dapm_new_control(codec, &tosa_dapm_widgets[i]); - } + snd_soc_dapm_new_controls(codec, &tosa_dapm_widgets, + ARRAY_SIZE(tosa_dapm_widgets)); /* set up tosa specific audio path audio_map */ - for (i = 0; audio_map[i][0] != NULL; i++) { - snd_soc_dapm_connect_input(codec, audio_map[i][0], - audio_map[i][1], audio_map[i][2]); - } + snd_soc_dapm_add_routes(codec, audio_map, ARRAY_SIZE(audio_map)); snd_soc_dapm_sync_endpoints(codec); return 0; -- cgit v0.10.2 From 51e6a8411a9440f0fdba6cdd7d779e74f89debc4 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 13 May 2008 14:57:37 +0200 Subject: [ALSA] soc - eti_b1_wm8731 - Convert to use bulk DAPM control registration Signed-off-by: Mark Brown Cc: Frank Mandarino Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela diff --git a/sound/soc/at91/eti_b1_wm8731.c b/sound/soc/at91/eti_b1_wm8731.c index 1347dcf..4a383a4 100644 --- a/sound/soc/at91/eti_b1_wm8731.c +++ b/sound/soc/at91/eti_b1_wm8731.c @@ -191,7 +191,7 @@ static const struct snd_soc_dapm_widget eti_b1_dapm_widgets[] = { SND_SOC_DAPM_SPK("Ext Spk", NULL), }; -static const char *intercon[][3] = { +static const struct snd_soc_dapm_route intercon[] = { /* speaker connected to LHPOUT */ {"Ext Spk", NULL, "LHPOUT"}, @@ -199,9 +199,6 @@ static const char *intercon[][3] = { /* mic is connected to Mic Jack, with WM8731 Mic Bias */ {"MICIN", NULL, "Mic Bias"}, {"Mic Bias", NULL, "Int Mic"}, - - /* terminator */ - {NULL, NULL, NULL}, }; /* @@ -209,20 +206,14 @@ static const char *intercon[][3] = { */ static int eti_b1_wm8731_init(struct snd_soc_codec *codec) { - int i; - DBG("eti_b1_wm8731_init() called\n"); /* Add specific widgets */ - for(i = 0; i < ARRAY_SIZE(eti_b1_dapm_widgets); i++) { - snd_soc_dapm_new_control(codec, &eti_b1_dapm_widgets[i]); - } + snd_soc_dapm_new_controls(codec, eti_b1_dapm_widgets, + ARRAY_SIZE(eti_b1_dapm_widgets)); /* Set up specific audio path interconnects */ - for(i = 0; intercon[i][0] != NULL; i++) { - snd_soc_dapm_connect_input(codec, intercon[i][0], - intercon[i][1], intercon[i][2]); - } + snd_soc_dapm_add_route(codec, intercon, ARRAY_SIZE(intercon)); /* not connected */ snd_soc_dapm_set_endpoint(codec, "RLINEIN", 0); -- cgit v0.10.2 From 8f3112d7a847c2933a42ce29f17899f585d09106 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 13 May 2008 14:58:03 +0200 Subject: [ALSA] soc - neo1973_wm8753 - Convert to bulk DAPM registration APIs Signed-off-by: Mark Brown Cc: Graeme Gregory Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela diff --git a/sound/soc/s3c24xx/neo1973_wm8753.c b/sound/soc/s3c24xx/neo1973_wm8753.c index e469186..79c5027 100644 --- a/sound/soc/s3c24xx/neo1973_wm8753.c +++ b/sound/soc/s3c24xx/neo1973_wm8753.c @@ -425,8 +425,7 @@ static const struct snd_soc_dapm_widget wm8753_dapm_widgets[] = { }; -/* example machine audio_mapnections */ -static const char *audio_map[][3] = { +static const struct snd_soc_dapm_route dapm_routes[] = { /* Connections to the lm4857 amp */ {"Audio Out", NULL, "LOUT1"}, @@ -449,8 +448,6 @@ static const char *audio_map[][3] = { /* Connect the ALC pins */ {"ACIN", NULL, "ACOP"}, - - {NULL, NULL, NULL}, }; static const char *lm4857_mode[] = { @@ -526,8 +523,8 @@ static int neo1973_wm8753_init(struct snd_soc_codec *codec) set_scenario_endpoints(codec, NEO_AUDIO_OFF); /* Add neo1973 specific widgets */ - for (i = 0; i < ARRAY_SIZE(wm8753_dapm_widgets); i++) - snd_soc_dapm_new_control(codec, &wm8753_dapm_widgets[i]); + snd_soc_dapm_new_controls(codec, wm8753_dapm_widgets, + ARRAY_SIZE(wm8753_dapm_widgets)); /* add neo1973 specific controls */ for (i = 0; i < ARRAY_SIZE(wm8753_neo1973_controls); i++) { @@ -538,11 +535,9 @@ static int neo1973_wm8753_init(struct snd_soc_codec *codec) return err; } - /* set up neo1973 specific audio path audio_mapnects */ - for (i = 0; audio_map[i][0] != NULL; i++) { - snd_soc_dapm_connect_input(codec, audio_map[i][0], - audio_map[i][1], audio_map[i][2]); - } + /* set up neo1973 specific audio routes */ + err = snd_soc_dapm_add_routes(codec, dapm_routes, + ARRAY_SIZE(dapm_routes)); snd_soc_dapm_sync_endpoints(codec); return 0; -- cgit v0.10.2 From acf497f996aa08f03c62a3150abd7939ae23de4c Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 13 May 2008 14:58:30 +0200 Subject: [ALSA] soc - davinci-evm - Update for bulk DAPM registration APIs Signed-off-by: Mark Brown Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela diff --git a/sound/soc/davinci/davinci-evm.c b/sound/soc/davinci/davinci-evm.c index fcd1652..4c70a0e 100644 --- a/sound/soc/davinci/davinci-evm.c +++ b/sound/soc/davinci/davinci-evm.c @@ -71,7 +71,7 @@ static const struct snd_soc_dapm_widget aic3x_dapm_widgets[] = { }; /* davinci-evm machine audio_mapnections to the codec pins */ -static const char *audio_map[][3] = { +static const struct snd_soc_dapm_route audio_map[] = { /* Headphone connected to HPLOUT, HPROUT */ {"Headphone Jack", NULL, "HPLOUT"}, {"Headphone Jack", NULL, "HPROUT"}, @@ -90,23 +90,17 @@ static const char *audio_map[][3] = { {"LINE2L", NULL, "Line In"}, {"LINE1R", NULL, "Line In"}, {"LINE2R", NULL, "Line In"}, - - {NULL, NULL, NULL}, }; /* Logic for a aic3x as connected on a davinci-evm */ static int evm_aic3x_init(struct snd_soc_codec *codec) { - int i; - /* Add davinci-evm specific widgets */ - for (i = 0; i < ARRAY_SIZE(aic3x_dapm_widgets); i++) - snd_soc_dapm_new_control(codec, &aic3x_dapm_widgets[i]); + snd_soc_dapm_new_controls(codec, aic3x_dapm_widgets, + ARRAY_SIZE(aic3x_dapm_widgets)); /* Set up davinci-evm specific audio path audio_map */ - for (i = 0; audio_map[i][0] != NULL; i++) - snd_soc_dapm_connect_input(codec, audio_map[i][0], - audio_map[i][1], audio_map[i][2]); + snd_soc_dapm_add_routes(codec, audio_map, ARRAY_SIZE(audio_map)); /* not connected */ snd_soc_dapm_set_endpoint(codec, "MONO_LOUT", 0); -- cgit v0.10.2 From 1a2505988ea650b61bd07722e99080a40ff27653 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 13 May 2008 14:58:57 +0200 Subject: [ALSA] soc - n810 - Update for bulk DAPM registration APIs Signed-off-by: Mark Brown Cc: Jarkko Nikula Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela diff --git a/sound/soc/omap/n810.c b/sound/soc/omap/n810.c index 6533563..c2afffb 100644 --- a/sound/soc/omap/n810.c +++ b/sound/soc/omap/n810.c @@ -177,7 +177,7 @@ static const struct snd_soc_dapm_widget aic33_dapm_widgets[] = { SND_SOC_DAPM_HP("Headphone Jack", n810_jack_event), }; -static const char *audio_map[][3] = { +static const struct snd_soc_dapm_route audio_map[] = { {"Headphone Jack", NULL, "HPLOUT"}, {"Headphone Jack", NULL, "HPROUT"}, @@ -217,13 +217,11 @@ static int n810_aic33_init(struct snd_soc_codec *codec) } /* Add N810 specific widgets */ - for (i = 0; i < ARRAY_SIZE(aic33_dapm_widgets); i++) - snd_soc_dapm_new_control(codec, &aic33_dapm_widgets[i]); + snd_soc_dapm_new_controls(codec, aic33_dapm_widgets, + ARRAY_SIZE(aic33_dapm_widgets)); /* Set up N810 specific audio path audio_map */ - for (i = 0; i < ARRAY_SIZE(audio_map); i++) - snd_soc_dapm_connect_input(codec, audio_map[i][0], - audio_map[i][1], audio_map[i][2]); + snd_soc_dapm_add_routes(codec, audio_map, ARRAY_SIZE(audio_map)); snd_soc_dapm_sync_endpoints(codec); -- cgit v0.10.2 From f99a633a151686a599413bef758253dfd04887d1 Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Thu, 15 May 2008 11:01:36 +0200 Subject: [ALSA] ASoC: Convert N810 machine driver to use gpiolib Use gpiolib since it is now available for OMAPs. Change also references to HW version RX44 to product name N810. Signed-off-by: Jarkko Nikula Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela diff --git a/sound/soc/omap/n810.c b/sound/soc/omap/n810.c index c2afffb..c32487b 100644 --- a/sound/soc/omap/n810.c +++ b/sound/soc/omap/n810.c @@ -30,15 +30,15 @@ #include #include -#include +#include #include #include "omap-mcbsp.h" #include "omap-pcm.h" #include "../codecs/tlv320aic3x.h" -#define RX44_HEADSET_AMP_GPIO 10 -#define RX44_SPEAKER_AMP_GPIO 101 +#define N810_HEADSET_AMP_GPIO 10 +#define N810_SPEAKER_AMP_GPIO 101 static struct clk *sys_clkout2; static struct clk *sys_clkout2_src; @@ -154,9 +154,9 @@ static int n810_spk_event(struct snd_soc_dapm_widget *w, struct snd_kcontrol *k, int event) { if (SND_SOC_DAPM_EVENT_ON(event)) - omap_set_gpio_dataout(RX44_SPEAKER_AMP_GPIO, 1); + gpio_set_value(N810_SPEAKER_AMP_GPIO, 1); else - omap_set_gpio_dataout(RX44_SPEAKER_AMP_GPIO, 0); + gpio_set_value(N810_SPEAKER_AMP_GPIO, 0); return 0; } @@ -165,9 +165,9 @@ static int n810_jack_event(struct snd_soc_dapm_widget *w, struct snd_kcontrol *k, int event) { if (SND_SOC_DAPM_EVENT_ON(event)) - omap_set_gpio_dataout(RX44_HEADSET_AMP_GPIO, 1); + gpio_set_value(N810_HEADSET_AMP_GPIO, 1); else - omap_set_gpio_dataout(RX44_HEADSET_AMP_GPIO, 0); + gpio_set_value(N810_HEADSET_AMP_GPIO, 0); return 0; } @@ -303,12 +303,12 @@ static int __init n810_soc_init(void) clk_set_parent(sys_clkout2_src, func96m_clk); clk_set_rate(sys_clkout2, 12000000); - if (omap_request_gpio(RX44_HEADSET_AMP_GPIO) < 0) + if (gpio_request(N810_HEADSET_AMP_GPIO, "hs_amp") < 0) BUG(); - if (omap_request_gpio(RX44_SPEAKER_AMP_GPIO) < 0) + if (gpio_request(N810_SPEAKER_AMP_GPIO, "spk_amp") < 0) BUG(); - omap_set_gpio_direction(RX44_HEADSET_AMP_GPIO, 0); - omap_set_gpio_direction(RX44_SPEAKER_AMP_GPIO, 0); + gpio_direction_output(N810_HEADSET_AMP_GPIO, 0); + gpio_direction_output(N810_SPEAKER_AMP_GPIO, 0); return 0; err2: @@ -323,6 +323,9 @@ err1: static void __exit n810_soc_exit(void) { + gpio_free(N810_SPEAKER_AMP_GPIO); + gpio_free(N810_HEADSET_AMP_GPIO); + platform_device_unregister(n810_snd_device); } -- cgit v0.10.2 From 02330fbaaded5b603cba112e4bbf62cdadec159a Mon Sep 17 00:00:00 2001 From: Andreas Mohr Date: Fri, 16 May 2008 12:18:29 +0200 Subject: [ALSA] PCI168 snd-azt3328 Linux driver: another huge update - figured out 'Digital(ly) Enhanced Game Port' functionality, implemented support for it (eliminating gameport polling overhead) - removed optional joystick activation, gameport now enabled unconditionally, since we now support it via the PCI I/O space, not via conflict-prone legacy I/O (which I was thus able to DISABLE now)! - fix playback bug (a muted wave output would get unmuted upon start of playback, of course this is not what we want, thus remember mute state) - implement partial power management: when idle, lower clock rate and disable codec (reduced noise!), and disable gameport circuit when unused - instantiate OPL3 timer, too - much better implementation of snd_azf3328_mixer_write_volume_gradually() - slightly optimized interrupt handling - lots of cleanup This time, I also found a way to verify proper OPL3 operation via MIDI file playback (emulation via synth hardware). Signed-off-by: Andreas Mohr Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela diff --git a/sound/pci/azt3328.c b/sound/pci/azt3328.c index 5f63af6..b832333 100644 --- a/sound/pci/azt3328.c +++ b/sound/pci/azt3328.c @@ -1,6 +1,6 @@ /* * azt3328.c - driver for Aztech AZF3328 based soundcards (e.g. PCI168). - * Copyright (C) 2002, 2005, 2006, 2007 by Andreas Mohr + * Copyright (C) 2002, 2005 - 2008 by Andreas Mohr * * Framework borrowed from Bart Hartgers's als4000.c. * Driver developed on PCI168 AP(W) version (PCI rev. 10, subsystem ID 1801), @@ -35,9 +35,20 @@ * (3 weeks' worth of evenings filled with driver work). * (and no, I did NOT go the easy way: to pick up a SB PCI128 for 9 Euros) * + * It is quite likely that the AZF3328 chip is the PCI cousin of the + * AZF3318 ("azt1020 pnp", "MM Pro 16") ISA chip, given very similar specs. + * * The AZF3328 chip (note: AZF3328, *not* AZT3328, that's just the driver name - * for compatibility reasons) has the following features: + * for compatibility reasons) from Azfin (joint-venture of Aztech and Fincitec, + * Fincitec acquired by National Semiconductor in 2002, together with the + * Fincitec-related company ARSmikro) has the following features: * + * - compatibility & compliance: + * - Microsoft PC 97 ("PC 97 Hardware Design Guide", + * http://www.microsoft.com/whdc/archive/pcguides.mspx) + * - Microsoft PC 98 Baseline Audio + * - MPU401 UART + * - Sound Blaster Emulation (DOS Box) * - builtin AC97 conformant codec (SNR over 80dB) * Note that "conformant" != "compliant"!! this chip's mixer register layout * *differs* from the standard AC97 layout: @@ -48,21 +59,28 @@ * addresses illegally. So far unfortunately it looks like the very flexible * ALSA AC97 support is still not enough to easily compensate for such a * grave layout violation despite all tweaks and quirks mechanisms it offers. - * - builtin genuine OPL3 + * - builtin genuine OPL3 - verified to work fine, 20080506 * - full duplex 16bit playback/record at independent sampling rate - * - MPU401 (+ legacy address support) FIXME: how to enable legacy addr?? + * - MPU401 (+ legacy address support, claimed by one official spec sheet) + * FIXME: how to enable legacy addr?? * - game port (legacy address support) - * - builtin 3D enhancement (said to be YAMAHA Ymersion) * - builtin DirectInput support, helps reduce CPU overhead (interrupt-driven - * features supported) + * features supported). - See common term "Digital Enhanced Game Port"... + * (probably DirectInput 3.0 spec - confirm) + * - builtin 3D enhancement (said to be YAMAHA Ymersion) * - built-in General DirectX timer having a 20 bits counter * with 1us resolution (see below!) - * - I2S serial port for external DAC + * - I2S serial output port for external DAC * - supports 33MHz PCI spec 2.1, PCI power management 1.0, compliant with ACPI * - supports hardware volume control * - single chip low cost solution (128 pin QFP) * - supports programmable Sub-vendor and Sub-system ID * required for Microsoft's logo compliance (FIXME: where?) + * At least the Trident 4D Wave DX has one bit somewhere + * to enable writes to PCI subsystem VID registers, that should be it. + * This might easily be in extended PCI reg space, since PCI168 also has + * some custom data starting at 0x80. What kind of config settings + * are located in our extended PCI space anyway?? * - PCI168 AP(W) card: power amplifier with 4 Watts/channel at 4 Ohms * * Note that this driver now is actually *better* than the Windows driver, @@ -74,6 +92,24 @@ * - "timidity -iAv -B2,8 -Os -EFreverb=0" * - "pmidi -p 128:0 jazz.mid" * + * OPL3 hardware playback testing, try something like: + * cat /proc/asound/hwdep + * and + * aconnect -o + * Then use + * sbiload -Dhw:x,y --opl3 /usr/share/sounds/opl3/std.o3 ......./drums.o3 + * where x,y is the xx-yy number as given in hwdep. + * Then try + * pmidi -p a:b jazz.mid + * where a:b is the client number plus 0 usually, as given by aconnect above. + * Oh, and make sure to unmute the FM mixer control (doh!) + * NOTE: power use during OPL3 playback is _VERY_ high (70W --> 90W!) + * despite no CPU activity, possibly due to hindering ACPI idling somehow. + * Shouldn't be a problem of the AZF3328 chip itself, I'd hope. + * Higher PCM / FM mixer levels seem to conflict (causes crackling), + * at least sometimes. Maybe even use with hardware sequencer timer above :) + * adplay/adplug-utils might soon offer hardware-based OPL3 playback, too. + * * Certain PCI versions of this card are susceptible to DMA traffic underruns * in some systems (resulting in sound crackling/clicking/popping), * probably because they don't have a DMA FIFO buffer or so. @@ -87,6 +123,8 @@ * better than a VIA, yet ironically I still get crackling, like many other * people with the same chipset. * Possible remedies: + * - use speaker (amplifier) output instead of headphone output + * (in case crackling is due to overloaded output clipping) * - plug card into a different PCI slot, preferrably one that isn't shared * too much (this helps a lot, but not completely!) * - get rid of PCI VGA card, use AGP instead @@ -94,18 +132,23 @@ * - fiddle with PCI latency settings (setpci -v -s BUSID latency_timer=XX) * Not too helpful. * - Disable ACPI/power management/"Auto Detect RAM/PCI Clk" in BIOS - * + * * BUGS - * - full-duplex might *still* be problematic, not fully tested recently + * - full-duplex might *still* be problematic, however a recent test was fine * - (non-bug) "Bass/Treble or 3D settings don't work" - they do get evaluated * if you set PCM output switch to "pre 3D" instead of "post 3D". * If this can't be set, then get a mixer application that Isn't Stupid (tm) * (e.g. kmix, gamix) - unfortunately several are!! - * + * - locking is not entirely clean, especially the audio stream activity + * ints --> may be racy + * - an _unconnected_ secondary joystick at the gameport will be reported + * to be "active" (floating values, not precisely -1) due to the way we need + * to read the Digital Enhanced Game Port. Not sure whether it is fixable. + * * TODO * - test MPU401 MIDI playback etc. - * - add some power micro-management (disable various units of the card - * as long as they're unused). However this requires I/O ports which I + * - add more power micro-management (disable various units of the card + * as long as they're unused). However this requires more I/O ports which I * haven't figured out yet and which thus might not even exist... * The standard suspend/resume functionality could probably make use of * some improvement, too... @@ -113,6 +156,7 @@ * - figure out some cleverly evil scheme to possibly make ALSA AC97 code * fully accept our quite incompatible ""AC97"" mixer and thus save some * code (but I'm not too optimistic that doing this is possible at all) + * - use MMIO (memory-mapped I/O)? Slightly faster access, e.g. for gameport. */ #include @@ -138,7 +182,7 @@ MODULE_LICENSE("GPL"); MODULE_SUPPORTED_DEVICE("{{Aztech,AZF3328}}"); #if defined(CONFIG_GAMEPORT) || (defined(MODULE) && defined(CONFIG_GAMEPORT_MODULE)) -#define SUPPORT_JOYSTICK 1 +#define SUPPORT_GAMEPORT 1 #endif #define DEBUG_MISC 0 @@ -147,13 +191,14 @@ MODULE_SUPPORTED_DEVICE("{{Aztech,AZF3328}}"); #define DEBUG_PLAY_REC 0 #define DEBUG_IO 0 #define DEBUG_TIMER 0 +#define DEBUG_GAME 0 #define MIXER_TESTING 0 #if DEBUG_MISC #define snd_azf3328_dbgmisc(format, args...) printk(KERN_ERR format, ##args) #else #define snd_azf3328_dbgmisc(format, args...) -#endif +#endif #if DEBUG_CALLS #define snd_azf3328_dbgcalls(format, args...) printk(format, ##args) @@ -163,25 +208,31 @@ MODULE_SUPPORTED_DEVICE("{{Aztech,AZF3328}}"); #define snd_azf3328_dbgcalls(format, args...) #define snd_azf3328_dbgcallenter() #define snd_azf3328_dbgcallleave() -#endif +#endif #if DEBUG_MIXER #define snd_azf3328_dbgmixer(format, args...) printk(format, ##args) #else #define snd_azf3328_dbgmixer(format, args...) -#endif +#endif #if DEBUG_PLAY_REC #define snd_azf3328_dbgplay(format, args...) printk(KERN_ERR format, ##args) #else #define snd_azf3328_dbgplay(format, args...) -#endif +#endif #if DEBUG_MISC #define snd_azf3328_dbgtimer(format, args...) printk(KERN_ERR format, ##args) #else #define snd_azf3328_dbgtimer(format, args...) -#endif +#endif + +#if DEBUG_GAME +#define snd_azf3328_dbggame(format, args...) printk(KERN_ERR format, ##args) +#else +#define snd_azf3328_dbggame(format, args...) +#endif static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; /* Index 0-MAX */ module_param_array(index, int, NULL, 0444); @@ -195,39 +246,44 @@ static int enable[SNDRV_CARDS] = SNDRV_DEFAULT_ENABLE_PNP; /* Enable this card * module_param_array(enable, bool, NULL, 0444); MODULE_PARM_DESC(enable, "Enable AZF3328 soundcard."); -#ifdef SUPPORT_JOYSTICK -static int joystick[SNDRV_CARDS]; -module_param_array(joystick, bool, NULL, 0444); -MODULE_PARM_DESC(joystick, "Enable joystick for AZF3328 soundcard."); -#endif - static int seqtimer_scaling = 128; module_param(seqtimer_scaling, int, 0444); MODULE_PARM_DESC(seqtimer_scaling, "Set 1024000Hz sequencer timer scale factor (lockup danger!). Default 128."); +struct snd_azf3328_audio_stream { + struct snd_pcm_substream *substream; + int enabled; + int running; + unsigned long portbase; +}; + +enum snd_azf3328_stream_index { + AZF_PLAYBACK = 0, + AZF_CAPTURE = 1, +}; + struct snd_azf3328 { /* often-used fields towards beginning, then grouped */ - unsigned long codec_port; - unsigned long io2_port; - unsigned long mpu_port; - unsigned long synth_port; - unsigned long mixer_port; + + unsigned long codec_io; /* usually 0xb000, size 128 */ + unsigned long game_io; /* usually 0xb400, size 8 */ + unsigned long mpu_io; /* usually 0xb800, size 4 */ + unsigned long opl3_io; /* usually 0xbc00, size 8 */ + unsigned long mixer_io; /* usually 0xc000, size 64 */ spinlock_t reg_lock; struct snd_timer *timer; - + struct snd_pcm *pcm; - struct snd_pcm_substream *playback_substream; - struct snd_pcm_substream *capture_substream; - unsigned int is_playing; - unsigned int is_recording; + struct snd_azf3328_audio_stream audio_stream[2]; struct snd_card *card; struct snd_rawmidi *rmidi; -#ifdef SUPPORT_JOYSTICK +#ifdef SUPPORT_GAMEPORT struct gameport *gameport; + int axes[4]; #endif struct pci_dev *pci; @@ -236,10 +292,10 @@ struct snd_azf3328 { #ifdef CONFIG_PM /* register value containers for power management * Note: not always full I/O range preserved (just like Win driver!) */ - u16 saved_regs_codec [AZF_IO_SIZE_CODEC_PM / 2]; - u16 saved_regs_io2 [AZF_IO_SIZE_IO2_PM / 2]; - u16 saved_regs_mpu [AZF_IO_SIZE_MPU_PM / 2]; - u16 saved_regs_synth[AZF_IO_SIZE_SYNTH_PM / 2]; + u16 saved_regs_codec[AZF_IO_SIZE_CODEC_PM / 2]; + u16 saved_regs_game [AZF_IO_SIZE_GAME_PM / 2]; + u16 saved_regs_mpu [AZF_IO_SIZE_MPU_PM / 2]; + u16 saved_regs_opl3 [AZF_IO_SIZE_OPL3_PM / 2]; u16 saved_regs_mixer[AZF_IO_SIZE_MIXER_PM / 2]; #endif }; @@ -252,126 +308,181 @@ static const struct pci_device_id snd_azf3328_ids[] = { MODULE_DEVICE_TABLE(pci, snd_azf3328_ids); + +static int +snd_azf3328_io_reg_setb(unsigned reg, u8 mask, int do_set) +{ + u8 prev = inb(reg), new; + + new = (do_set) ? (prev|mask) : (prev & ~mask); + /* we need to always write the new value no matter whether it differs + * or not, since some register bits don't indicate their setting */ + outb(new, reg); + if (new != prev) + return 1; + + return 0; +} + +static int +snd_azf3328_io_reg_setw(unsigned reg, u16 mask, int do_set) +{ + u16 prev = inw(reg), new; + + new = (do_set) ? (prev|mask) : (prev & ~mask); + /* we need to always write the new value no matter whether it differs + * or not, since some register bits don't indicate their setting */ + outw(new, reg); + if (new != prev) + return 1; + + return 0; +} + static inline void -snd_azf3328_codec_outb(const struct snd_azf3328 *chip, int reg, u8 value) +snd_azf3328_codec_outb(const struct snd_azf3328 *chip, unsigned reg, u8 value) { - outb(value, chip->codec_port + reg); + outb(value, chip->codec_io + reg); } static inline u8 -snd_azf3328_codec_inb(const struct snd_azf3328 *chip, int reg) +snd_azf3328_codec_inb(const struct snd_azf3328 *chip, unsigned reg) { - return inb(chip->codec_port + reg); + return inb(chip->codec_io + reg); } static inline void -snd_azf3328_codec_outw(const struct snd_azf3328 *chip, int reg, u16 value) +snd_azf3328_codec_outw(const struct snd_azf3328 *chip, unsigned reg, u16 value) { - outw(value, chip->codec_port + reg); + outw(value, chip->codec_io + reg); } static inline u16 -snd_azf3328_codec_inw(const struct snd_azf3328 *chip, int reg) +snd_azf3328_codec_inw(const struct snd_azf3328 *chip, unsigned reg) +{ + return inw(chip->codec_io + reg); +} + +static inline void +snd_azf3328_codec_outl(const struct snd_azf3328 *chip, unsigned reg, u32 value) +{ + outl(value, chip->codec_io + reg); +} + +static inline u32 +snd_azf3328_codec_inl(const struct snd_azf3328 *chip, unsigned reg) { - return inw(chip->codec_port + reg); + return inl(chip->codec_io + reg); } static inline void -snd_azf3328_codec_outl(const struct snd_azf3328 *chip, int reg, u32 value) +snd_azf3328_game_outb(const struct snd_azf3328 *chip, unsigned reg, u8 value) { - outl(value, chip->codec_port + reg); + outb(value, chip->game_io + reg); } static inline void -snd_azf3328_io2_outb(const struct snd_azf3328 *chip, int reg, u8 value) +snd_azf3328_game_outw(const struct snd_azf3328 *chip, unsigned reg, u16 value) { - outb(value, chip->io2_port + reg); + outw(value, chip->game_io + reg); } static inline u8 -snd_azf3328_io2_inb(const struct snd_azf3328 *chip, int reg) +snd_azf3328_game_inb(const struct snd_azf3328 *chip, unsigned reg) +{ + return inb(chip->game_io + reg); +} + +static inline u16 +snd_azf3328_game_inw(const struct snd_azf3328 *chip, unsigned reg) { - return inb(chip->io2_port + reg); + return inw(chip->game_io + reg); } static inline void -snd_azf3328_mixer_outw(const struct snd_azf3328 *chip, int reg, u16 value) +snd_azf3328_mixer_outw(const struct snd_azf3328 *chip, unsigned reg, u16 value) { - outw(value, chip->mixer_port + reg); + outw(value, chip->mixer_io + reg); } static inline u16 -snd_azf3328_mixer_inw(const struct snd_azf3328 *chip, int reg) +snd_azf3328_mixer_inw(const struct snd_azf3328 *chip, unsigned reg) { - return inw(chip->mixer_port + reg); + return inw(chip->mixer_io + reg); } -static void -snd_azf3328_mixer_set_mute(const struct snd_azf3328 *chip, int reg, int do_mute) +#define AZF_MUTE_BIT 0x80 + +static int +snd_azf3328_mixer_set_mute(const struct snd_azf3328 *chip, + unsigned reg, int do_mute +) { - unsigned long portbase = chip->mixer_port + reg + 1; - unsigned char oldval; + unsigned long portbase = chip->mixer_io + reg + 1; + int updated; /* the mute bit is on the *second* (i.e. right) register of a * left/right channel setting */ - oldval = inb(portbase); - if (do_mute) - oldval |= 0x80; - else - oldval &= ~0x80; - outb(oldval, portbase); + updated = snd_azf3328_io_reg_setb(portbase, AZF_MUTE_BIT, do_mute); + + /* indicate whether it was muted before */ + return (do_mute) ? !updated : updated; } static void -snd_azf3328_mixer_write_volume_gradually(const struct snd_azf3328 *chip, int reg, unsigned char dst_vol_left, unsigned char dst_vol_right, int chan_sel, int delay) +snd_azf3328_mixer_write_volume_gradually(const struct snd_azf3328 *chip, + unsigned reg, + unsigned char dst_vol_left, + unsigned char dst_vol_right, + int chan_sel, int delay +) { - unsigned long portbase = chip->mixer_port + reg; + unsigned long portbase = chip->mixer_io + reg; unsigned char curr_vol_left = 0, curr_vol_right = 0; - int left_done = 0, right_done = 0; - + int left_change = 0, right_change = 0; + snd_azf3328_dbgcallenter(); - if (chan_sel & SET_CHAN_LEFT) + + if (chan_sel & SET_CHAN_LEFT) { curr_vol_left = inb(portbase + 1); - else - left_done = 1; - if (chan_sel & SET_CHAN_RIGHT) + + /* take care of muting flag contained in left channel */ + if (curr_vol_left & AZF_MUTE_BIT) + dst_vol_left |= AZF_MUTE_BIT; + else + dst_vol_left &= ~AZF_MUTE_BIT; + + left_change = (curr_vol_left > dst_vol_left) ? -1 : 1; + } + + if (chan_sel & SET_CHAN_RIGHT) { curr_vol_right = inb(portbase + 0); - else - right_done = 1; - - /* take care of muting flag (0x80) contained in left channel */ - if (curr_vol_left & 0x80) - dst_vol_left |= 0x80; - else - dst_vol_left &= ~0x80; + + right_change = (curr_vol_right > dst_vol_right) ? -1 : 1; + } do { - if (!left_done) { - if (curr_vol_left > dst_vol_left) - curr_vol_left--; - else - if (curr_vol_left < dst_vol_left) - curr_vol_left++; - else - left_done = 1; - outb(curr_vol_left, portbase + 1); + if (left_change) { + if (curr_vol_left != dst_vol_left) { + curr_vol_left += left_change; + outb(curr_vol_left, portbase + 1); + } else + left_change = 0; } - if (!right_done) { - if (curr_vol_right > dst_vol_right) - curr_vol_right--; - else - if (curr_vol_right < dst_vol_right) - curr_vol_right++; - else - right_done = 1; + if (right_change) { + if (curr_vol_right != dst_vol_right) { + curr_vol_right += right_change; + /* during volume change, the right channel is crackling * somewhat more than the left channel, unfortunately. * This seems to be a hardware issue. */ - outb(curr_vol_right, portbase + 0); + outb(curr_vol_right, portbase + 0); + } else + right_change = 0; } if (delay) mdelay(delay); - } while ((!left_done) || (!right_done)); + } while ((left_change) || (right_change)); snd_azf3328_dbgcallleave(); } @@ -379,7 +490,7 @@ snd_azf3328_mixer_write_volume_gradually(const struct snd_azf3328 *chip, int reg * general mixer element */ struct azf3328_mixer_reg { - unsigned int reg; + unsigned reg; unsigned int lchan_shift, rchan_shift; unsigned int mask; unsigned int invert: 1; @@ -544,13 +655,14 @@ snd_azf3328_info_mixer_enum(struct snd_kcontrol *kcontrol, "Mix", "Mic" }; static const char * const texts3[] = { - "Mic", "CD", "Video", "Aux", + "Mic", "CD", "Video", "Aux", "Line", "Mix", "Mix Mono", "Phone" }; static const char * const texts4[] = { "pre 3D", "post 3D" }; struct azf3328_mixer_reg reg; + const char *p = NULL; snd_azf3328_mixer_reg_decode(®, kcontrol->private_value); uinfo->type = SNDRV_CTL_ELEM_TYPE_ENUMERATED; @@ -561,18 +673,20 @@ snd_azf3328_info_mixer_enum(struct snd_kcontrol *kcontrol, if (reg.reg == IDX_MIXER_ADVCTL2) { switch(reg.lchan_shift) { case 8: /* modem out sel */ - strcpy(uinfo->value.enumerated.name, texts1[uinfo->value.enumerated.item]); + p = texts1[uinfo->value.enumerated.item]; break; case 9: /* mono sel source */ - strcpy(uinfo->value.enumerated.name, texts2[uinfo->value.enumerated.item]); + p = texts2[uinfo->value.enumerated.item]; break; case 15: /* PCM Out Path */ - strcpy(uinfo->value.enumerated.name, texts4[uinfo->value.enumerated.item]); + p = texts4[uinfo->value.enumerated.item]; break; } } else - strcpy(uinfo->value.enumerated.name, texts3[uinfo->value.enumerated.item] -); + if (reg.reg == IDX_MIXER_REC_SELECT) + p = texts3[uinfo->value.enumerated.item]; + + strcpy(uinfo->value.enumerated.name, p); return 0; } @@ -583,7 +697,7 @@ snd_azf3328_get_mixer_enum(struct snd_kcontrol *kcontrol, struct snd_azf3328 *chip = snd_kcontrol_chip(kcontrol); struct azf3328_mixer_reg reg; unsigned short val; - + snd_azf3328_mixer_reg_decode(®, kcontrol->private_value); val = snd_azf3328_mixer_inw(chip, reg.reg); if (reg.reg == IDX_MIXER_REC_SELECT) { @@ -605,7 +719,7 @@ snd_azf3328_put_mixer_enum(struct snd_kcontrol *kcontrol, struct snd_azf3328 *chip = snd_kcontrol_chip(kcontrol); struct azf3328_mixer_reg reg; unsigned int oreg, nreg, val; - + snd_azf3328_mixer_reg_decode(®, kcontrol->private_value); oreg = snd_azf3328_mixer_inw(chip, reg.reg); val = oreg; @@ -717,15 +831,16 @@ snd_azf3328_mixer_new(struct snd_azf3328 *chip) snd_azf3328_mixer_outw(chip, IDX_MIXER_RESET, 0x0000); /* mute and zero volume channels */ - for (idx = 0; idx < ARRAY_SIZE(snd_azf3328_init_values); idx++) { + for (idx = 0; idx < ARRAY_SIZE(snd_azf3328_init_values); ++idx) { snd_azf3328_mixer_outw(chip, snd_azf3328_init_values[idx][0], snd_azf3328_init_values[idx][1]); } - + /* add mixer controls */ sw = snd_azf3328_mixer_controls; - for (idx = 0; idx < ARRAY_SIZE(snd_azf3328_mixer_controls); idx++, sw++) { + for (idx = 0; idx < ARRAY_SIZE(snd_azf3328_mixer_controls); + ++idx, ++sw) { if ((err = snd_ctl_add(chip->card, snd_ctl_new1(sw, chip))) < 0) return err; } @@ -757,8 +872,8 @@ snd_azf3328_hw_free(struct snd_pcm_substream *substream) } static void -snd_azf3328_setfmt(struct snd_azf3328 *chip, - unsigned int reg, +snd_azf3328_codec_setfmt(struct snd_azf3328 *chip, + unsigned reg, unsigned int bitrate, unsigned int format_width, unsigned int channels @@ -769,24 +884,25 @@ snd_azf3328_setfmt(struct snd_azf3328 *chip, snd_azf3328_dbgcallenter(); switch (bitrate) { - case 4000: val |= SOUNDFORMAT_FREQ_SUSPECTED_4000; break; - case 4800: val |= SOUNDFORMAT_FREQ_SUSPECTED_4800; break; - case 5512: val |= SOUNDFORMAT_FREQ_5510; break; /* the AZF3328 names it "5510" for some strange reason */ - case 6620: val |= SOUNDFORMAT_FREQ_6620; break; - case 8000: val |= SOUNDFORMAT_FREQ_8000; break; - case 9600: val |= SOUNDFORMAT_FREQ_9600; break; - case 11025: val |= SOUNDFORMAT_FREQ_11025; break; - case 13240: val |= SOUNDFORMAT_FREQ_SUSPECTED_13240; break; - case 16000: val |= SOUNDFORMAT_FREQ_16000; break; - case 22050: val |= SOUNDFORMAT_FREQ_22050; break; - case 32000: val |= SOUNDFORMAT_FREQ_32000; break; - case 44100: val |= SOUNDFORMAT_FREQ_44100; break; - case 48000: val |= SOUNDFORMAT_FREQ_48000; break; - case 66200: val |= SOUNDFORMAT_FREQ_SUSPECTED_66200; break; + case AZF_FREQ_4000: val |= SOUNDFORMAT_FREQ_SUSPECTED_4000; break; + case AZF_FREQ_4800: val |= SOUNDFORMAT_FREQ_SUSPECTED_4800; break; + case AZF_FREQ_5512: + /* the AZF3328 names it "5510" for some strange reason */ + val |= SOUNDFORMAT_FREQ_5510; break; + case AZF_FREQ_6620: val |= SOUNDFORMAT_FREQ_6620; break; + case AZF_FREQ_8000: val |= SOUNDFORMAT_FREQ_8000; break; + case AZF_FREQ_9600: val |= SOUNDFORMAT_FREQ_9600; break; + case AZF_FREQ_11025: val |= SOUNDFORMAT_FREQ_11025; break; + case AZF_FREQ_13240: val |= SOUNDFORMAT_FREQ_SUSPECTED_13240; break; + case AZF_FREQ_16000: val |= SOUNDFORMAT_FREQ_16000; break; + case AZF_FREQ_22050: val |= SOUNDFORMAT_FREQ_22050; break; + case AZF_FREQ_32000: val |= SOUNDFORMAT_FREQ_32000; break; default: snd_printk(KERN_WARNING "unknown bitrate %d, assuming 44.1kHz!\n", bitrate); - val |= SOUNDFORMAT_FREQ_44100; - break; + /* fall-through */ + case AZF_FREQ_44100: val |= SOUNDFORMAT_FREQ_44100; break; + case AZF_FREQ_48000: val |= SOUNDFORMAT_FREQ_48000; break; + case AZF_FREQ_66200: val |= SOUNDFORMAT_FREQ_SUSPECTED_66200; break; } /* val = 0xff07; 3m27.993s (65301Hz; -> 64000Hz???) hmm, 66120, 65967, 66123 */ /* val = 0xff09; 17m15.098s (13123,478Hz; -> 12000Hz???) hmm, 13237.2Hz? */ @@ -805,10 +921,10 @@ snd_azf3328_setfmt(struct snd_azf3328 *chip, val |= SOUNDFORMAT_FLAG_16BIT; spin_lock_irqsave(&chip->reg_lock, flags); - + /* set bitrate/format */ snd_azf3328_codec_outw(chip, reg, val); - + /* changing the bitrate/format settings switches off the * audio output with an annoying click in case of 8/16bit format change * (maybe shutting down DAC/ADC?), thus immediately @@ -830,31 +946,81 @@ snd_azf3328_setfmt(struct snd_azf3328 *chip, snd_azf3328_dbgcallleave(); } +static inline void +snd_azf3328_codec_setfmt_lowpower(struct snd_azf3328 *chip, + unsigned reg +) +{ + /* choose lowest frequency for low power consumption. + * While this will cause louder noise due to rather coarse frequency, + * it should never matter since output should always + * get disabled properly when idle anyway. */ + snd_azf3328_codec_setfmt(chip, reg, AZF_FREQ_4000, 8, 1); +} + +static inline void +snd_azf3328_codec_enable(struct snd_azf3328 *chip, int enable) +{ + /* no idea what exactly is being done here, but I strongly assume it's + * PM related */ + snd_azf3328_io_reg_setw( + chip->codec_io+IDX_IO_6AH, + IO_6A_PAUSE_PLAYBACK_BIT8, + !enable + ); +} + +static void +snd_azf3328_codec_activity(struct snd_azf3328 *chip, + enum snd_azf3328_stream_index stream_type, + int enable +) +{ + int need_change = (chip->audio_stream[stream_type].running != enable); + + snd_azf3328_dbgplay( + "codec_activity: type %d, enable %d, need_change %d\n", + stream_type, enable, need_change + ); + if (need_change) { + enum snd_azf3328_stream_index other = + (stream_type == AZF_PLAYBACK) ? + AZF_CAPTURE : AZF_PLAYBACK; + /* small check to prevent shutting down the other party + * in case it's active */ + if ((enable) || !(chip->audio_stream[other].running)) + snd_azf3328_codec_enable(chip, enable); + + /* ...and adjust clock, too + * (reduce noise and power consumption) */ + if (!enable) + snd_azf3328_codec_setfmt_lowpower( + chip, + chip->audio_stream[stream_type].portbase + + IDX_IO_PLAY_SOUNDFORMAT + ); + } + chip->audio_stream[stream_type].running = enable; +} + static void snd_azf3328_setdmaa(struct snd_azf3328 *chip, long unsigned int addr, unsigned int count, unsigned int size, - int do_recording) + enum snd_azf3328_stream_index stream_type +) { - unsigned long flags, portbase; - unsigned int is_running; - snd_azf3328_dbgcallenter(); - if (do_recording) { - /* access capture registers, i.e. skip playback reg section */ - portbase = chip->codec_port + 0x20; - is_running = chip->is_recording; - } else { - /* access the playback register section */ - portbase = chip->codec_port + 0x00; - is_running = chip->is_playing; - } + if (!chip->audio_stream[stream_type].running) { + /* AZF3328 uses a two buffer pointer DMA playback approach */ + + unsigned long flags, portbase, addr_area2; + + /* width 32bit (prevent overflow): */ + unsigned long count_areas, count_tmp; - /* AZF3328 uses a two buffer pointer DMA playback approach */ - if (!is_running) { - unsigned long addr_area2; - unsigned long count_areas, count_tmp; /* width 32bit -- overflow!! */ + portbase = chip->audio_stream[stream_type].portbase; count_areas = size/2; addr_area2 = addr+count_areas; count_areas--; /* max. index */ @@ -884,11 +1050,11 @@ snd_azf3328_playback_prepare(struct snd_pcm_substream *substream) snd_azf3328_dbgcallenter(); #if 0 - snd_azf3328_setfmt(chip, IDX_IO_PLAY_SOUNDFORMAT, + snd_azf3328_codec_setfmt(chip, IDX_IO_PLAY_SOUNDFORMAT, runtime->rate, snd_pcm_format_width(runtime->format), runtime->channels); - snd_azf3328_setdmaa(chip, runtime->dma_addr, count, size, 0); + snd_azf3328_setdmaa(chip, runtime->dma_addr, count, size, AZF_PLAYBACK); #endif snd_azf3328_dbgcallleave(); return 0; @@ -906,11 +1072,11 @@ snd_azf3328_capture_prepare(struct snd_pcm_substream *substream) snd_azf3328_dbgcallenter(); #if 0 - snd_azf3328_setfmt(chip, IDX_IO_REC_SOUNDFORMAT, + snd_azf3328_codec_setfmt(chip, IDX_IO_REC_SOUNDFORMAT, runtime->rate, snd_pcm_format_width(runtime->format), runtime->channels); - snd_azf3328_setdmaa(chip, runtime->dma_addr, count, size, 1); + snd_azf3328_setdmaa(chip, runtime->dma_addr, count, size, AZF_CAPTURE); #endif snd_azf3328_dbgcallleave(); return 0; @@ -923,6 +1089,7 @@ snd_azf3328_playback_trigger(struct snd_pcm_substream *substream, int cmd) struct snd_pcm_runtime *runtime = substream->runtime; int result = 0; unsigned int status1; + int previously_muted; snd_azf3328_dbgcalls("snd_azf3328_playback_trigger cmd %d\n", cmd); @@ -930,20 +1097,23 @@ snd_azf3328_playback_trigger(struct snd_pcm_substream *substream, int cmd) case SNDRV_PCM_TRIGGER_START: snd_azf3328_dbgplay("START PLAYBACK\n"); - /* mute WaveOut */ - snd_azf3328_mixer_set_mute(chip, IDX_MIXER_WAVEOUT, 1); + /* mute WaveOut (avoid clicking during setup) */ + previously_muted = + snd_azf3328_mixer_set_mute(chip, IDX_MIXER_WAVEOUT, 1); - snd_azf3328_setfmt(chip, IDX_IO_PLAY_SOUNDFORMAT, + snd_azf3328_codec_setfmt(chip, IDX_IO_PLAY_SOUNDFORMAT, runtime->rate, snd_pcm_format_width(runtime->format), runtime->channels); spin_lock(&chip->reg_lock); - /* stop playback */ + /* first, remember current value: */ status1 = snd_azf3328_codec_inw(chip, IDX_IO_PLAY_FLAGS); + + /* stop playback */ status1 &= ~DMA_RESUME; snd_azf3328_codec_outw(chip, IDX_IO_PLAY_FLAGS, status1); - + /* FIXME: clear interrupts or what??? */ snd_azf3328_codec_outw(chip, IDX_IO_PLAY_IRQTYPE, 0xffff); spin_unlock(&chip->reg_lock); @@ -951,7 +1121,7 @@ snd_azf3328_playback_trigger(struct snd_pcm_substream *substream, int cmd) snd_azf3328_setdmaa(chip, runtime->dma_addr, snd_pcm_lib_period_bytes(substream), snd_pcm_lib_buffer_bytes(substream), - 0); + AZF_PLAYBACK); spin_lock(&chip->reg_lock); #ifdef WIN9X @@ -978,30 +1148,35 @@ snd_azf3328_playback_trigger(struct snd_pcm_substream *substream, int cmd) DMA_SOMETHING_ELSE); #endif spin_unlock(&chip->reg_lock); + snd_azf3328_codec_activity(chip, AZF_PLAYBACK, 1); /* now unmute WaveOut */ - snd_azf3328_mixer_set_mute(chip, IDX_MIXER_WAVEOUT, 0); + if (!previously_muted) + snd_azf3328_mixer_set_mute(chip, IDX_MIXER_WAVEOUT, 0); - chip->is_playing = 1; snd_azf3328_dbgplay("STARTED PLAYBACK\n"); break; case SNDRV_PCM_TRIGGER_RESUME: snd_azf3328_dbgplay("RESUME PLAYBACK\n"); /* resume playback if we were active */ - if (chip->is_playing) + spin_lock(&chip->reg_lock); + if (chip->audio_stream[AZF_PLAYBACK].running) snd_azf3328_codec_outw(chip, IDX_IO_PLAY_FLAGS, snd_azf3328_codec_inw(chip, IDX_IO_PLAY_FLAGS) | DMA_RESUME); + spin_unlock(&chip->reg_lock); break; case SNDRV_PCM_TRIGGER_STOP: snd_azf3328_dbgplay("STOP PLAYBACK\n"); - /* mute WaveOut */ - snd_azf3328_mixer_set_mute(chip, IDX_MIXER_WAVEOUT, 1); + /* mute WaveOut (avoid clicking during setup) */ + previously_muted = + snd_azf3328_mixer_set_mute(chip, IDX_MIXER_WAVEOUT, 1); spin_lock(&chip->reg_lock); - /* stop playback */ + /* first, remember current value: */ status1 = snd_azf3328_codec_inw(chip, IDX_IO_PLAY_FLAGS); + /* stop playback */ status1 &= ~DMA_RESUME; snd_azf3328_codec_outw(chip, IDX_IO_PLAY_FLAGS, status1); @@ -1013,10 +1188,12 @@ snd_azf3328_playback_trigger(struct snd_pcm_substream *substream, int cmd) status1 &= ~DMA_PLAY_SOMETHING1; snd_azf3328_codec_outw(chip, IDX_IO_PLAY_FLAGS, status1); spin_unlock(&chip->reg_lock); - + snd_azf3328_codec_activity(chip, AZF_PLAYBACK, 0); + /* now unmute WaveOut */ - snd_azf3328_mixer_set_mute(chip, IDX_MIXER_WAVEOUT, 0); - chip->is_playing = 0; + if (!previously_muted) + snd_azf3328_mixer_set_mute(chip, IDX_MIXER_WAVEOUT, 0); + snd_azf3328_dbgplay("STOPPED PLAYBACK\n"); break; case SNDRV_PCM_TRIGGER_SUSPEND: @@ -1035,7 +1212,7 @@ snd_azf3328_playback_trigger(struct snd_pcm_substream *substream, int cmd) printk(KERN_ERR "FIXME: unknown trigger mode!\n"); return -EINVAL; } - + snd_azf3328_dbgcallleave(); return result; } @@ -1057,17 +1234,19 @@ snd_azf3328_capture_trigger(struct snd_pcm_substream *substream, int cmd) snd_azf3328_dbgplay("START CAPTURE\n"); - snd_azf3328_setfmt(chip, IDX_IO_REC_SOUNDFORMAT, + snd_azf3328_codec_setfmt(chip, IDX_IO_REC_SOUNDFORMAT, runtime->rate, snd_pcm_format_width(runtime->format), runtime->channels); spin_lock(&chip->reg_lock); - /* stop recording */ + /* first, remember current value: */ status1 = snd_azf3328_codec_inw(chip, IDX_IO_REC_FLAGS); + + /* stop recording */ status1 &= ~DMA_RESUME; snd_azf3328_codec_outw(chip, IDX_IO_REC_FLAGS, status1); - + /* FIXME: clear interrupts or what??? */ snd_azf3328_codec_outw(chip, IDX_IO_REC_IRQTYPE, 0xffff); spin_unlock(&chip->reg_lock); @@ -1075,7 +1254,7 @@ snd_azf3328_capture_trigger(struct snd_pcm_substream *substream, int cmd) snd_azf3328_setdmaa(chip, runtime->dma_addr, snd_pcm_lib_period_bytes(substream), snd_pcm_lib_buffer_bytes(substream), - 1); + AZF_CAPTURE); spin_lock(&chip->reg_lock); #ifdef WIN9X @@ -1102,24 +1281,27 @@ snd_azf3328_capture_trigger(struct snd_pcm_substream *substream, int cmd) DMA_SOMETHING_ELSE); #endif spin_unlock(&chip->reg_lock); + snd_azf3328_codec_activity(chip, AZF_CAPTURE, 1); - chip->is_recording = 1; snd_azf3328_dbgplay("STARTED CAPTURE\n"); break; case SNDRV_PCM_TRIGGER_RESUME: snd_azf3328_dbgplay("RESUME CAPTURE\n"); /* resume recording if we were active */ - if (chip->is_recording) + spin_lock(&chip->reg_lock); + if (chip->audio_stream[AZF_CAPTURE].running) snd_azf3328_codec_outw(chip, IDX_IO_REC_FLAGS, snd_azf3328_codec_inw(chip, IDX_IO_REC_FLAGS) | DMA_RESUME); + spin_unlock(&chip->reg_lock); break; case SNDRV_PCM_TRIGGER_STOP: snd_azf3328_dbgplay("STOP CAPTURE\n"); spin_lock(&chip->reg_lock); - /* stop recording */ + /* first, remember current value: */ status1 = snd_azf3328_codec_inw(chip, IDX_IO_REC_FLAGS); + /* stop recording */ status1 &= ~DMA_RESUME; snd_azf3328_codec_outw(chip, IDX_IO_REC_FLAGS, status1); @@ -1129,8 +1311,8 @@ snd_azf3328_capture_trigger(struct snd_pcm_substream *substream, int cmd) status1 &= ~DMA_PLAY_SOMETHING1; snd_azf3328_codec_outw(chip, IDX_IO_REC_FLAGS, status1); spin_unlock(&chip->reg_lock); - - chip->is_recording = 0; + snd_azf3328_codec_activity(chip, AZF_CAPTURE, 0); + snd_azf3328_dbgplay("STOPPED CAPTURE\n"); break; case SNDRV_PCM_TRIGGER_SUSPEND: @@ -1149,7 +1331,7 @@ snd_azf3328_capture_trigger(struct snd_pcm_substream *substream, int cmd) printk(KERN_ERR "FIXME: unknown trigger mode!\n"); return -EINVAL; } - + snd_azf3328_dbgcallleave(); return result; } @@ -1162,11 +1344,11 @@ snd_azf3328_playback_pointer(struct snd_pcm_substream *substream) snd_pcm_uframes_t frmres; #ifdef QUERY_HARDWARE - bufptr = inl(chip->codec_port+IDX_IO_PLAY_DMA_START_1); + bufptr = snd_azf3328_codec_inl(chip, IDX_IO_PLAY_DMA_START_1); #else bufptr = substream->runtime->dma_addr; #endif - result = inl(chip->codec_port+IDX_IO_PLAY_DMA_CURRPOS); + result = snd_azf3328_codec_inl(chip, IDX_IO_PLAY_DMA_CURRPOS); /* calculate offset */ result -= bufptr; @@ -1183,11 +1365,11 @@ snd_azf3328_capture_pointer(struct snd_pcm_substream *substream) snd_pcm_uframes_t frmres; #ifdef QUERY_HARDWARE - bufptr = inl(chip->codec_port+IDX_IO_REC_DMA_START_1); + bufptr = snd_azf3328_codec_inl(chip, IDX_IO_REC_DMA_START_1); #else bufptr = substream->runtime->dma_addr; #endif - result = inl(chip->codec_port+IDX_IO_REC_DMA_CURRPOS); + result = snd_azf3328_codec_inl(chip, IDX_IO_REC_DMA_CURRPOS); /* calculate offset */ result -= bufptr; @@ -1196,27 +1378,233 @@ snd_azf3328_capture_pointer(struct snd_pcm_substream *substream) return frmres; } +/******************************************************************/ + +#ifdef SUPPORT_GAMEPORT +static inline void +snd_azf3328_gameport_irq_enable(struct snd_azf3328 *chip, int enable) +{ + snd_azf3328_io_reg_setb( + chip->game_io+IDX_GAME_HWCONFIG, + GAME_HWCFG_IRQ_ENABLE, + enable + ); +} + +static inline void +snd_azf3328_gameport_legacy_address_enable(struct snd_azf3328 *chip, int enable) +{ + snd_azf3328_io_reg_setb( + chip->game_io+IDX_GAME_HWCONFIG, + GAME_HWCFG_LEGACY_ADDRESS_ENABLE, + enable + ); +} + +static inline void +snd_azf3328_gameport_axis_circuit_enable(struct snd_azf3328 *chip, int enable) +{ + snd_azf3328_io_reg_setw( + chip->codec_io+IDX_IO_6AH, + IO_6A_SOMETHING2_GAMEPORT, + !enable + ); +} + +static inline void +snd_azf3328_gameport_interrupt(struct snd_azf3328 *chip) +{ + /* + * skeleton handler only + * (we do not want axis reading in interrupt handler - too much load!) + */ + snd_azf3328_dbggame("gameport irq\n"); + + /* this should ACK the gameport IRQ properly, hopefully. */ + snd_azf3328_game_inw(chip, IDX_GAME_AXIS_VALUE); +} + +static int +snd_azf3328_gameport_open(struct gameport *gameport, int mode) +{ + struct snd_azf3328 *chip = gameport_get_port_data(gameport); + int res; + + snd_azf3328_dbggame("gameport_open, mode %d\n", mode); + switch (mode) { + case GAMEPORT_MODE_COOKED: + case GAMEPORT_MODE_RAW: + res = 0; + break; + default: + res = -1; + break; + } + + snd_azf3328_gameport_axis_circuit_enable(chip, (res == 0)); + + return res; +} + +static void +snd_azf3328_gameport_close(struct gameport *gameport) +{ + struct snd_azf3328 *chip = gameport_get_port_data(gameport); + + snd_azf3328_dbggame("gameport_close\n"); + snd_azf3328_gameport_axis_circuit_enable(chip, 0); +} + +static int +snd_azf3328_gameport_cooked_read(struct gameport *gameport, + int *axes, + int *buttons +) +{ + struct snd_azf3328 *chip = gameport_get_port_data(gameport); + int i; + u8 val; + unsigned long flags; + + snd_assert(chip, return 0); + + spin_lock_irqsave(&chip->reg_lock, flags); + val = snd_azf3328_game_inb(chip, IDX_GAME_LEGACY_COMPATIBLE); + *buttons = (~(val) >> 4) & 0xf; + + /* ok, this one is a bit dirty: cooked_read is being polled by a timer, + * thus we're atomic and cannot actively wait in here + * (which would be useful for us since it probably would be better + * to trigger a measurement in here, then wait a short amount of + * time until it's finished, then read values of _this_ measurement). + * + * Thus we simply resort to reading values if they're available already + * and trigger the next measurement. + */ + + val = snd_azf3328_game_inb(chip, IDX_GAME_AXES_CONFIG); + if (val & GAME_AXES_SAMPLING_READY) { + for (i = 0; i < 4; ++i) { + /* configure the axis to read */ + val = (i << 4) | 0x0f; + snd_azf3328_game_outb(chip, IDX_GAME_AXES_CONFIG, val); + + chip->axes[i] = snd_azf3328_game_inw( + chip, IDX_GAME_AXIS_VALUE + ); + } + } + + /* trigger next axes sampling, to be evaluated the next time we + * enter this function */ + + /* for some very, very strange reason we cannot enable + * Measurement Ready monitoring for all axes here, + * at least not when only one joystick connected */ + val = 0x03; /* we're able to monitor axes 1 and 2 only */ + snd_azf3328_game_outb(chip, IDX_GAME_AXES_CONFIG, val); + + snd_azf3328_game_outw(chip, IDX_GAME_AXIS_VALUE, 0xffff); + spin_unlock_irqrestore(&chip->reg_lock, flags); + + for (i = 0; i < 4; i++) { + axes[i] = chip->axes[i]; + if (axes[i] == 0xffff) + axes[i] = -1; + } + + snd_azf3328_dbggame("cooked_read: axes %d %d %d %d buttons %d\n", + axes[0], axes[1], axes[2], axes[3], *buttons + ); + + return 0; +} + +static int __devinit +snd_azf3328_gameport(struct snd_azf3328 *chip, int dev) +{ + struct gameport *gp; + + int io_port = chip->game_io; + + chip->gameport = gp = gameport_allocate_port(); + if (!gp) { + printk(KERN_ERR "azt3328: cannot alloc memory for gameport\n"); + return -ENOMEM; + } + + gameport_set_name(gp, "AZF3328 Gameport"); + gameport_set_phys(gp, "pci%s/gameport0", pci_name(chip->pci)); + gameport_set_dev_parent(gp, &chip->pci->dev); + gp->io = io_port; + gameport_set_port_data(gp, chip); + + gp->open = snd_azf3328_gameport_open; + gp->close = snd_azf3328_gameport_close; + gp->fuzz = 16; /* seems ok */ + gp->cooked_read = snd_azf3328_gameport_cooked_read; + + /* DISABLE legacy address: we don't need it! */ + snd_azf3328_gameport_legacy_address_enable(chip, 0); + + snd_azf3328_gameport_axis_circuit_enable(chip, 0); + + gameport_register_port(chip->gameport); + + return 0; +} + +static void +snd_azf3328_gameport_free(struct snd_azf3328 *chip) +{ + if (chip->gameport) { + gameport_unregister_port(chip->gameport); + chip->gameport = NULL; + } + snd_azf3328_gameport_irq_enable(chip, 0); +} +#else +static inline int +snd_azf3328_gameport(struct snd_azf3328 *chip, int dev) { return -ENOSYS; } +static inline void +snd_azf3328_gameport_free(struct snd_azf3328 *chip) { } +static inline void +snd_azf3328_gameport_interrupt(struct snd_azf3328 *chip) +{ + printk(KERN_WARNING "huh, game port IRQ occurred!?\n"); +} +#endif /* SUPPORT_GAMEPORT */ + +/******************************************************************/ + static irqreturn_t snd_azf3328_interrupt(int irq, void *dev_id) { struct snd_azf3328 *chip = dev_id; u8 status, which; +#if DEBUG_PLAY_REC static unsigned long irq_count; +#endif status = snd_azf3328_codec_inb(chip, IDX_IO_IRQSTATUS); /* fast path out, to ease interrupt sharing */ - if (!(status & (IRQ_PLAYBACK|IRQ_RECORDING|IRQ_MPU401|IRQ_TIMER))) + if (!(status & + (IRQ_PLAYBACK|IRQ_RECORDING|IRQ_GAMEPORT|IRQ_MPU401|IRQ_TIMER) + )) return IRQ_NONE; /* must be interrupt for another device */ snd_azf3328_dbgplay("Interrupt %ld!\nIDX_IO_PLAY_FLAGS %04x, IDX_IO_PLAY_IRQTYPE %04x, IDX_IO_IRQSTATUS %04x\n", - irq_count, + irq_count++ /* debug-only */, snd_azf3328_codec_inw(chip, IDX_IO_PLAY_FLAGS), snd_azf3328_codec_inw(chip, IDX_IO_PLAY_IRQTYPE), status); - + if (status & IRQ_TIMER) { - /* snd_azf3328_dbgplay("timer %ld\n", inl(chip->codec_port+IDX_IO_TIMER_VALUE) & TIMER_VALUE_MASK); */ + /* snd_azf3328_dbgplay("timer %ld\n", + snd_azf3328_codec_inl(chip, IDX_IO_TIMER_VALUE) + & TIMER_VALUE_MASK + ); */ if (chip->timer) snd_timer_interrupt(chip->timer, chip->timer->sticks); /* ACK timer */ @@ -1232,11 +1620,16 @@ snd_azf3328_interrupt(int irq, void *dev_id) snd_azf3328_codec_outb(chip, IDX_IO_PLAY_IRQTYPE, which); spin_unlock(&chip->reg_lock); - if (chip->pcm && chip->playback_substream) { - snd_pcm_period_elapsed(chip->playback_substream); + if (chip->pcm && chip->audio_stream[AZF_PLAYBACK].substream) { + snd_pcm_period_elapsed( + chip->audio_stream[AZF_PLAYBACK].substream + ); snd_azf3328_dbgplay("PLAY period done (#%x), @ %x\n", which, - inl(chip->codec_port+IDX_IO_PLAY_DMA_CURRPOS)); + snd_azf3328_codec_inl( + chip, IDX_IO_PLAY_DMA_CURRPOS + ) + ); } else snd_azf3328_dbgplay("azt3328: ouch, irq handler problem!\n"); if (which & IRQ_PLAY_SOMETHING) @@ -1249,16 +1642,23 @@ snd_azf3328_interrupt(int irq, void *dev_id) snd_azf3328_codec_outb(chip, IDX_IO_REC_IRQTYPE, which); spin_unlock(&chip->reg_lock); - if (chip->pcm && chip->capture_substream) { - snd_pcm_period_elapsed(chip->capture_substream); + if (chip->pcm && chip->audio_stream[AZF_CAPTURE].substream) { + snd_pcm_period_elapsed( + chip->audio_stream[AZF_CAPTURE].substream + ); snd_azf3328_dbgplay("REC period done (#%x), @ %x\n", which, - inl(chip->codec_port+IDX_IO_REC_DMA_CURRPOS)); + snd_azf3328_codec_inl( + chip, IDX_IO_REC_DMA_CURRPOS + ) + ); } else snd_azf3328_dbgplay("azt3328: ouch, irq handler problem!\n"); if (which & IRQ_REC_SOMETHING) snd_azf3328_dbgplay("azt3328: unknown rec IRQ type occurred, please report!\n"); } + if (status & IRQ_GAMEPORT) + snd_azf3328_gameport_interrupt(chip); /* MPU401 has less critical IRQ requirements * than timer and playback/recording, right? */ if (status & IRQ_MPU401) { @@ -1268,7 +1668,6 @@ snd_azf3328_interrupt(int irq, void *dev_id) * If so, then I don't know how... */ snd_azf3328_dbgplay("azt3328: MPU401 IRQ\n"); } - irq_count++; return IRQ_HANDLED; } @@ -1287,8 +1686,8 @@ static const struct snd_pcm_hardware snd_azf3328_playback = .rates = SNDRV_PCM_RATE_5512 | SNDRV_PCM_RATE_8000_48000 | SNDRV_PCM_RATE_KNOT, - .rate_min = 4000, - .rate_max = 66200, + .rate_min = AZF_FREQ_4000, + .rate_max = AZF_FREQ_66200, .channels_min = 1, .channels_max = 2, .buffer_bytes_max = 65536, @@ -1315,8 +1714,8 @@ static const struct snd_pcm_hardware snd_azf3328_capture = .rates = SNDRV_PCM_RATE_5512 | SNDRV_PCM_RATE_8000_48000 | SNDRV_PCM_RATE_KNOT, - .rate_min = 4000, - .rate_max = 66200, + .rate_min = AZF_FREQ_4000, + .rate_max = AZF_FREQ_66200, .channels_min = 1, .channels_max = 2, .buffer_bytes_max = 65536, @@ -1329,10 +1728,24 @@ static const struct snd_pcm_hardware snd_azf3328_capture = static unsigned int snd_azf3328_fixed_rates[] = { - 4000, 4800, 5512, 6620, 8000, 9600, 11025, 13240, 16000, 22050, 32000, - 44100, 48000, 66200 }; + AZF_FREQ_4000, + AZF_FREQ_4800, + AZF_FREQ_5512, + AZF_FREQ_6620, + AZF_FREQ_8000, + AZF_FREQ_9600, + AZF_FREQ_11025, + AZF_FREQ_13240, + AZF_FREQ_16000, + AZF_FREQ_22050, + AZF_FREQ_32000, + AZF_FREQ_44100, + AZF_FREQ_48000, + AZF_FREQ_66200 +}; + static struct snd_pcm_hw_constraint_list snd_azf3328_hw_constraints_rates = { - .count = ARRAY_SIZE(snd_azf3328_fixed_rates), + .count = ARRAY_SIZE(snd_azf3328_fixed_rates), .list = snd_azf3328_fixed_rates, .mask = 0, }; @@ -1346,7 +1759,7 @@ snd_azf3328_playback_open(struct snd_pcm_substream *substream) struct snd_pcm_runtime *runtime = substream->runtime; snd_azf3328_dbgcallenter(); - chip->playback_substream = substream; + chip->audio_stream[AZF_PLAYBACK].substream = substream; runtime->hw = snd_azf3328_playback; snd_pcm_hw_constraint_list(runtime, 0, SNDRV_PCM_HW_PARAM_RATE, &snd_azf3328_hw_constraints_rates); @@ -1361,7 +1774,7 @@ snd_azf3328_capture_open(struct snd_pcm_substream *substream) struct snd_pcm_runtime *runtime = substream->runtime; snd_azf3328_dbgcallenter(); - chip->capture_substream = substream; + chip->audio_stream[AZF_CAPTURE].substream = substream; runtime->hw = snd_azf3328_capture; snd_pcm_hw_constraint_list(runtime, 0, SNDRV_PCM_HW_PARAM_RATE, &snd_azf3328_hw_constraints_rates); @@ -1375,7 +1788,7 @@ snd_azf3328_playback_close(struct snd_pcm_substream *substream) struct snd_azf3328 *chip = snd_pcm_substream_chip(substream); snd_azf3328_dbgcallenter(); - chip->playback_substream = NULL; + chip->audio_stream[AZF_PLAYBACK].substream = NULL; snd_azf3328_dbgcallleave(); return 0; } @@ -1386,7 +1799,7 @@ snd_azf3328_capture_close(struct snd_pcm_substream *substream) struct snd_azf3328 *chip = snd_pcm_substream_chip(substream); snd_azf3328_dbgcallenter(); - chip->capture_substream = NULL; + chip->audio_stream[AZF_CAPTURE].substream = NULL; snd_azf3328_dbgcallleave(); return 0; } @@ -1441,102 +1854,8 @@ snd_azf3328_pcm(struct snd_azf3328 *chip, int device) /******************************************************************/ -#ifdef SUPPORT_JOYSTICK -static int __devinit -snd_azf3328_config_joystick(struct snd_azf3328 *chip, int dev) -{ - struct gameport *gp; - struct resource *r; - - if (!joystick[dev]) - return -ENODEV; - - if (!(r = request_region(0x200, 8, "AZF3328 gameport"))) { - printk(KERN_WARNING "azt3328: cannot reserve joystick ports\n"); - return -EBUSY; - } - - chip->gameport = gp = gameport_allocate_port(); - if (!gp) { - printk(KERN_ERR "azt3328: cannot allocate memory for gameport\n"); - release_and_free_resource(r); - return -ENOMEM; - } - - gameport_set_name(gp, "AZF3328 Gameport"); - gameport_set_phys(gp, "pci%s/gameport0", pci_name(chip->pci)); - gameport_set_dev_parent(gp, &chip->pci->dev); - gp->io = 0x200; - gameport_set_port_data(gp, r); - - snd_azf3328_io2_outb(chip, IDX_IO2_LEGACY_ADDR, - snd_azf3328_io2_inb(chip, IDX_IO2_LEGACY_ADDR) | LEGACY_JOY); - - gameport_register_port(chip->gameport); - - return 0; -} - -static void -snd_azf3328_free_joystick(struct snd_azf3328 *chip) -{ - if (chip->gameport) { - struct resource *r = gameport_get_port_data(chip->gameport); - - gameport_unregister_port(chip->gameport); - chip->gameport = NULL; - /* disable gameport */ - snd_azf3328_io2_outb(chip, IDX_IO2_LEGACY_ADDR, - snd_azf3328_io2_inb(chip, IDX_IO2_LEGACY_ADDR) & ~LEGACY_JOY); - release_and_free_resource(r); - } -} -#else -static inline int -snd_azf3328_config_joystick(struct snd_azf3328 *chip, int dev) { return -ENOSYS; } -static inline void -snd_azf3328_free_joystick(struct snd_azf3328 *chip) { } -#endif - -/******************************************************************/ - -static int -snd_azf3328_free(struct snd_azf3328 *chip) -{ - if (chip->irq < 0) - goto __end_hw; - - /* reset (close) mixer */ - snd_azf3328_mixer_set_mute(chip, IDX_MIXER_PLAY_MASTER, 1); /* first mute master volume */ - snd_azf3328_mixer_outw(chip, IDX_MIXER_RESET, 0x0000); - - /* interrupt setup - mask everything (FIXME!) */ - /* well, at least we know how to disable the timer IRQ */ - snd_azf3328_codec_outb(chip, IDX_IO_TIMER_VALUE + 3, 0x00); - - if (chip->irq >= 0) - synchronize_irq(chip->irq); -__end_hw: - snd_azf3328_free_joystick(chip); - if (chip->irq >= 0) - free_irq(chip->irq, chip); - pci_release_regions(chip->pci); - pci_disable_device(chip->pci); - - kfree(chip); - return 0; -} - -static int -snd_azf3328_dev_free(struct snd_device *device) -{ - struct snd_azf3328 *chip = device->device_data; - return snd_azf3328_free(chip); -} - -/******************************************************************/ - -/*** NOTE: the physical timer resolution actually is 1024000 ticks per second, +/*** NOTE: the physical timer resolution actually is 1024000 ticks per second + *** (probably derived from main crystal via a divider of 24), *** but announcing those attributes to user-space would make programs *** configure the timer to a 1 tick value, resulting in an absolutely fatal *** timer IRQ storm. @@ -1564,7 +1883,7 @@ snd_azf3328_timer_start(struct snd_timer *timer) delay = 49; /* minimum time is 49 ticks */ } snd_azf3328_dbgtimer("setting timer countdown value %d, add COUNTDOWN|IRQ\n", delay); - delay |= TIMER_ENABLE_COUNTDOWN | TIMER_ENABLE_IRQ; + delay |= TIMER_COUNTDOWN_ENABLE | TIMER_IRQ_ENABLE; spin_lock_irqsave(&chip->reg_lock, flags); snd_azf3328_codec_outl(chip, IDX_IO_TIMER_VALUE, delay); spin_unlock_irqrestore(&chip->reg_lock, flags); @@ -1582,7 +1901,7 @@ snd_azf3328_timer_stop(struct snd_timer *timer) chip = snd_timer_chip(timer); spin_lock_irqsave(&chip->reg_lock, flags); /* disable timer countdown and interrupt */ - /* FIXME: should we write TIMER_ACK_IRQ here? */ + /* FIXME: should we write TIMER_IRQ_ACK here? */ snd_azf3328_codec_outb(chip, IDX_IO_TIMER_VALUE + 3, 0); spin_unlock_irqrestore(&chip->reg_lock, flags); snd_azf3328_dbgcallleave(); @@ -1626,9 +1945,10 @@ snd_azf3328_timer(struct snd_azf3328 *chip, int device) snd_azf3328_timer_hw.resolution *= seqtimer_scaling; snd_azf3328_timer_hw.ticks /= seqtimer_scaling; - if ((err = snd_timer_new(chip->card, "AZF3328", &tid, &timer)) < 0) { + + err = snd_timer_new(chip->card, "AZF3328", &tid, &timer); + if (err < 0) goto out; - } strcpy(timer->name, "AZF3328 timer"); timer->private_data = chip; @@ -1636,6 +1956,8 @@ snd_azf3328_timer(struct snd_azf3328 *chip, int device) chip->timer = timer; + snd_azf3328_timer_stop(timer); + err = 0; out: @@ -1645,10 +1967,44 @@ out: /******************************************************************/ +static int +snd_azf3328_free(struct snd_azf3328 *chip) +{ + if (chip->irq < 0) + goto __end_hw; + + /* reset (close) mixer: + * first mute master volume, then reset + */ + snd_azf3328_mixer_set_mute(chip, IDX_MIXER_PLAY_MASTER, 1); + snd_azf3328_mixer_outw(chip, IDX_MIXER_RESET, 0x0000); + + snd_azf3328_timer_stop(chip->timer); + snd_azf3328_gameport_free(chip); + + if (chip->irq >= 0) + synchronize_irq(chip->irq); +__end_hw: + if (chip->irq >= 0) + free_irq(chip->irq, chip); + pci_release_regions(chip->pci); + pci_disable_device(chip->pci); + + kfree(chip); + return 0; +} + +static int +snd_azf3328_dev_free(struct snd_device *device) +{ + struct snd_azf3328 *chip = device->device_data; + return snd_azf3328_free(chip); +} + #if 0 /* check whether a bit can be modified */ static void -snd_azf3328_test_bit(unsigned int reg, int bit) +snd_azf3328_test_bit(unsigned unsigned reg, int bit) { unsigned char val, valoff, valon; @@ -1659,42 +2015,74 @@ snd_azf3328_test_bit(unsigned int reg, int bit) outb(val|(1 << bit), reg); valon = inb(reg); - + outb(val, reg); - printk(KERN_ERR "reg %04x bit %d: %02x %02x %02x\n", reg, bit, val, valoff, valon); + printk(KERN_ERR "reg %04x bit %d: %02x %02x %02x\n", + reg, bit, val, valoff, valon + ); } #endif -#if DEBUG_MISC -static void +static inline void snd_azf3328_debug_show_ports(const struct snd_azf3328 *chip) { +#if DEBUG_MISC u16 tmp; - snd_azf3328_dbgmisc("codec_port 0x%lx, io2_port 0x%lx, mpu_port 0x%lx, synth_port 0x%lx, mixer_port 0x%lx, irq %d\n", chip->codec_port, chip->io2_port, chip->mpu_port, chip->synth_port, chip->mixer_port, chip->irq); - - snd_azf3328_dbgmisc("io2 %02x %02x %02x %02x %02x %02x\n", snd_azf3328_io2_inb(chip, 0), snd_azf3328_io2_inb(chip, 1), snd_azf3328_io2_inb(chip, 2), snd_azf3328_io2_inb(chip, 3), snd_azf3328_io2_inb(chip, 4), snd_azf3328_io2_inb(chip, 5)); - - for (tmp=0; tmp <= 0x01; tmp += 1) - snd_azf3328_dbgmisc("0x%02x: opl 0x%04x, mpu300 0x%04x, mpu310 0x%04x, mpu320 0x%04x, mpu330 0x%04x\n", tmp, inb(0x388 + tmp), inb(0x300 + tmp), inb(0x310 + tmp), inb(0x320 + tmp), inb(0x330 + tmp)); + snd_azf3328_dbgmisc( + "codec_io 0x%lx, game_io 0x%lx, mpu_io 0x%lx, " + "opl3_io 0x%lx, mixer_io 0x%lx, irq %d\n", + chip->codec_io, chip->game_io, chip->mpu_io, + chip->opl3_io, chip->mixer_io, chip->irq + ); + + snd_azf3328_dbgmisc("game %02x %02x %02x %02x %02x %02x\n", + snd_azf3328_game_inb(chip, 0), + snd_azf3328_game_inb(chip, 1), + snd_azf3328_game_inb(chip, 2), + snd_azf3328_game_inb(chip, 3), + snd_azf3328_game_inb(chip, 4), + snd_azf3328_game_inb(chip, 5) + ); + + for (tmp = 0; tmp < 0x07; tmp += 1) + snd_azf3328_dbgmisc("mpu_io 0x%04x\n", inb(chip->mpu_io + tmp)); + + for (tmp = 0; tmp <= 0x07; tmp += 1) + snd_azf3328_dbgmisc("0x%02x: game200 0x%04x, game208 0x%04x\n", + tmp, inb(0x200 + tmp), inb(0x208 + tmp)); + + for (tmp = 0; tmp <= 0x01; tmp += 1) + snd_azf3328_dbgmisc( + "0x%02x: mpu300 0x%04x, mpu310 0x%04x, mpu320 0x%04x, " + "mpu330 0x%04x opl388 0x%04x opl38c 0x%04x\n", + tmp, + inb(0x300 + tmp), + inb(0x310 + tmp), + inb(0x320 + tmp), + inb(0x330 + tmp), + inb(0x388 + tmp), + inb(0x38c + tmp) + ); for (tmp = 0; tmp < AZF_IO_SIZE_CODEC; tmp += 2) - snd_azf3328_dbgmisc("codec 0x%02x: 0x%04x\n", tmp, snd_azf3328_codec_inw(chip, tmp)); + snd_azf3328_dbgmisc("codec 0x%02x: 0x%04x\n", + tmp, snd_azf3328_codec_inw(chip, tmp) + ); for (tmp = 0; tmp < AZF_IO_SIZE_MIXER; tmp += 2) - snd_azf3328_dbgmisc("mixer 0x%02x: 0x%04x\n", tmp, snd_azf3328_mixer_inw(chip, tmp)); + snd_azf3328_dbgmisc("mixer 0x%02x: 0x%04x\n", + tmp, snd_azf3328_mixer_inw(chip, tmp) + ); +#endif /* DEBUG_MISC */ } -#else -static inline void -snd_azf3328_debug_show_ports(const struct snd_azf3328 *chip) {} -#endif static int __devinit snd_azf3328_create(struct snd_card *card, - struct pci_dev *pci, - unsigned long device_type, - struct snd_azf3328 ** rchip) + struct pci_dev *pci, + unsigned long device_type, + struct snd_azf3328 **rchip) { struct snd_azf3328 *chip; int err; @@ -1705,7 +2093,8 @@ snd_azf3328_create(struct snd_card *card, *rchip = NULL; - if ((err = pci_enable_device(pci)) < 0) + err = pci_enable_device(pci); + if (err < 0) return err; chip = kzalloc(sizeof(*chip), GFP_KERNEL); @@ -1721,20 +2110,25 @@ snd_azf3328_create(struct snd_card *card, /* check if we can restrict PCI DMA transfers to 24 bits */ if (pci_set_dma_mask(pci, DMA_24BIT_MASK) < 0 || pci_set_consistent_dma_mask(pci, DMA_24BIT_MASK) < 0) { - snd_printk(KERN_ERR "architecture does not support 24bit PCI busmaster DMA\n"); + snd_printk(KERN_ERR "architecture does not support " + "24bit PCI busmaster DMA\n" + ); err = -ENXIO; goto out_err; } - if ((err = pci_request_regions(pci, "Aztech AZF3328")) < 0) { + err = pci_request_regions(pci, "Aztech AZF3328"); + if (err < 0) goto out_err; - } - chip->codec_port = pci_resource_start(pci, 0); - chip->io2_port = pci_resource_start(pci, 1); - chip->mpu_port = pci_resource_start(pci, 2); - chip->synth_port = pci_resource_start(pci, 3); - chip->mixer_port = pci_resource_start(pci, 4); + chip->codec_io = pci_resource_start(pci, 0); + chip->game_io = pci_resource_start(pci, 1); + chip->mpu_io = pci_resource_start(pci, 2); + chip->opl3_io = pci_resource_start(pci, 3); + chip->mixer_io = pci_resource_start(pci, 4); + + chip->audio_stream[AZF_PLAYBACK].portbase = chip->codec_io + 0x00; + chip->audio_stream[AZF_CAPTURE].portbase = chip->codec_io + 0x20; if (request_irq(pci->irq, snd_azf3328_interrupt, IRQF_SHARED, card->shortname, chip)) { @@ -1747,29 +2141,29 @@ snd_azf3328_create(struct snd_card *card, synchronize_irq(chip->irq); snd_azf3328_debug_show_ports(chip); - - if ((err = snd_device_new(card, SNDRV_DEV_LOWLEVEL, chip, &ops)) < 0) { + + err = snd_device_new(card, SNDRV_DEV_LOWLEVEL, chip, &ops); + if (err < 0) goto out_err; - } /* create mixer interface & switches */ - if ((err = snd_azf3328_mixer_new(chip)) < 0) + err = snd_azf3328_mixer_new(chip); + if (err < 0) goto out_err; -#if 0 - /* set very low bitrate to reduce noise and power consumption? */ - snd_azf3328_setfmt(chip, IDX_IO_PLAY_SOUNDFORMAT, 5512, 8, 1); -#endif + /* shutdown codecs to save power */ + /* have snd_azf3328_codec_activity() act properly */ + chip->audio_stream[AZF_PLAYBACK].running = 1; + snd_azf3328_codec_activity(chip, AZF_PLAYBACK, 0); /* standard chip init stuff */ - /* default IRQ init value */ + /* default IRQ init value */ tmp = DMA_PLAY_SOMETHING2|DMA_EPILOGUE_SOMETHING|DMA_SOMETHING_ELSE; spin_lock_irq(&chip->reg_lock); snd_azf3328_codec_outb(chip, IDX_IO_PLAY_FLAGS, tmp); snd_azf3328_codec_outb(chip, IDX_IO_REC_FLAGS, tmp); snd_azf3328_codec_outb(chip, IDX_IO_SOMETHING_FLAGS, tmp); - snd_azf3328_codec_outb(chip, IDX_IO_TIMER_VALUE + 3, 0x00); /* disable timer */ spin_unlock_irq(&chip->reg_lock); snd_card_set_dev(card, &pci->dev); @@ -1805,52 +2199,61 @@ snd_azf3328_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) return -ENOENT; } - card = snd_card_new(index[dev], id[dev], THIS_MODULE, 0 ); + card = snd_card_new(index[dev], id[dev], THIS_MODULE, 0); if (card == NULL) return -ENOMEM; strcpy(card->driver, "AZF3328"); strcpy(card->shortname, "Aztech AZF3328 (PCI168)"); - if ((err = snd_azf3328_create(card, pci, pci_id->driver_data, &chip)) < 0) { + err = snd_azf3328_create(card, pci, pci_id->driver_data, &chip); + if (err < 0) goto out_err; - } card->private_data = chip; - if ((err = snd_mpu401_uart_new( card, 0, MPU401_HW_MPU401, - chip->mpu_port, MPU401_INFO_INTEGRATED, - pci->irq, 0, &chip->rmidi)) < 0) { - snd_printk(KERN_ERR "azf3328: no MPU-401 device at 0x%lx?\n", chip->mpu_port); + err = snd_mpu401_uart_new( + card, 0, MPU401_HW_MPU401, chip->mpu_io, MPU401_INFO_INTEGRATED, + pci->irq, 0, &chip->rmidi + ); + if (err < 0) { + snd_printk(KERN_ERR "azf3328: no MPU-401 device at 0x%lx?\n", + chip->mpu_io + ); goto out_err; } - if ((err = snd_azf3328_timer(chip, 0)) < 0) { + err = snd_azf3328_timer(chip, 0); + if (err < 0) goto out_err; - } - if ((err = snd_azf3328_pcm(chip, 0)) < 0) { + err = snd_azf3328_pcm(chip, 0); + if (err < 0) goto out_err; - } - if (snd_opl3_create(card, chip->synth_port, chip->synth_port+2, + if (snd_opl3_create(card, chip->opl3_io, chip->opl3_io+2, OPL3_HW_AUTO, 1, &opl3) < 0) { snd_printk(KERN_ERR "azf3328: no OPL3 device at 0x%lx-0x%lx?\n", - chip->synth_port, chip->synth_port+2 ); + chip->opl3_io, chip->opl3_io+2 + ); } else { - if ((err = snd_opl3_hwdep_new(opl3, 0, 1, NULL)) < 0) { + /* need to use IDs 1, 2 since ID 0 is snd_azf3328_timer above */ + err = snd_opl3_timer_new(opl3, 1, 2); + if (err < 0) + goto out_err; + err = snd_opl3_hwdep_new(opl3, 0, 1, NULL); + if (err < 0) goto out_err; - } } opl3->private_data = chip; sprintf(card->longname, "%s at 0x%lx, irq %i", - card->shortname, chip->codec_port, chip->irq); + card->shortname, chip->codec_io, chip->irq); - if ((err = snd_card_register(card)) < 0) { + err = snd_card_register(card); + if (err < 0) goto out_err; - } #ifdef MODULE printk( @@ -1861,19 +2264,18 @@ snd_azf3328_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) 1024000 / seqtimer_scaling, seqtimer_scaling); #endif - if (snd_azf3328_config_joystick(chip, dev) < 0) - snd_azf3328_io2_outb(chip, IDX_IO2_LEGACY_ADDR, - snd_azf3328_io2_inb(chip, IDX_IO2_LEGACY_ADDR) & ~LEGACY_JOY); + snd_azf3328_gameport(chip, dev); pci_set_drvdata(pci, card); dev++; err = 0; goto out; - + out_err: + snd_printk(KERN_ERR "azf3328: something failed, exiting\n"); snd_card_free(card); - + out: snd_azf3328_dbgcallleave(); return err; @@ -1894,27 +2296,27 @@ snd_azf3328_suspend(struct pci_dev *pci, pm_message_t state) { struct snd_card *card = pci_get_drvdata(pci); struct snd_azf3328 *chip = card->private_data; - int reg; + unsigned reg; snd_power_change_state(card, SNDRV_CTL_POWER_D3hot); - + snd_pcm_suspend_all(chip->pcm); - for (reg = 0; reg < AZF_IO_SIZE_MIXER_PM / 2; reg++) - chip->saved_regs_mixer[reg] = inw(chip->mixer_port + reg * 2); + for (reg = 0; reg < AZF_IO_SIZE_MIXER_PM / 2; ++reg) + chip->saved_regs_mixer[reg] = inw(chip->mixer_io + reg * 2); /* make sure to disable master volume etc. to prevent looping sound */ snd_azf3328_mixer_set_mute(chip, IDX_MIXER_PLAY_MASTER, 1); snd_azf3328_mixer_set_mute(chip, IDX_MIXER_WAVEOUT, 1); - - for (reg = 0; reg < AZF_IO_SIZE_CODEC_PM / 2; reg++) - chip->saved_regs_codec[reg] = inw(chip->codec_port + reg * 2); - for (reg = 0; reg < AZF_IO_SIZE_IO2_PM / 2; reg++) - chip->saved_regs_io2[reg] = inw(chip->io2_port + reg * 2); - for (reg = 0; reg < AZF_IO_SIZE_MPU_PM / 2; reg++) - chip->saved_regs_mpu[reg] = inw(chip->mpu_port + reg * 2); - for (reg = 0; reg < AZF_IO_SIZE_SYNTH_PM / 2; reg++) - chip->saved_regs_synth[reg] = inw(chip->synth_port + reg * 2); + + for (reg = 0; reg < AZF_IO_SIZE_CODEC_PM / 2; ++reg) + chip->saved_regs_codec[reg] = inw(chip->codec_io + reg * 2); + for (reg = 0; reg < AZF_IO_SIZE_GAME_PM / 2; ++reg) + chip->saved_regs_game[reg] = inw(chip->game_io + reg * 2); + for (reg = 0; reg < AZF_IO_SIZE_MPU_PM / 2; ++reg) + chip->saved_regs_mpu[reg] = inw(chip->mpu_io + reg * 2); + for (reg = 0; reg < AZF_IO_SIZE_OPL3_PM / 2; ++reg) + chip->saved_regs_opl3[reg] = inw(chip->opl3_io + reg * 2); pci_disable_device(pci); pci_save_state(pci); @@ -1927,7 +2329,7 @@ snd_azf3328_resume(struct pci_dev *pci) { struct snd_card *card = pci_get_drvdata(pci); struct snd_azf3328 *chip = card->private_data; - int reg; + unsigned reg; pci_set_power_state(pci, PCI_D0); pci_restore_state(pci); @@ -1939,23 +2341,21 @@ snd_azf3328_resume(struct pci_dev *pci) } pci_set_master(pci); - for (reg = 0; reg < AZF_IO_SIZE_IO2_PM / 2; reg++) - outw(chip->saved_regs_io2[reg], chip->io2_port + reg * 2); - for (reg = 0; reg < AZF_IO_SIZE_MPU_PM / 2; reg++) - outw(chip->saved_regs_mpu[reg], chip->mpu_port + reg * 2); - for (reg = 0; reg < AZF_IO_SIZE_SYNTH_PM / 2; reg++) - outw(chip->saved_regs_synth[reg], chip->synth_port + reg * 2); - for (reg = 0; reg < AZF_IO_SIZE_MIXER_PM / 2; reg++) - outw(chip->saved_regs_mixer[reg], chip->mixer_port + reg * 2); - for (reg = 0; reg < AZF_IO_SIZE_CODEC_PM / 2; reg++) - outw(chip->saved_regs_codec[reg], chip->codec_port + reg * 2); + for (reg = 0; reg < AZF_IO_SIZE_GAME_PM / 2; ++reg) + outw(chip->saved_regs_game[reg], chip->game_io + reg * 2); + for (reg = 0; reg < AZF_IO_SIZE_MPU_PM / 2; ++reg) + outw(chip->saved_regs_mpu[reg], chip->mpu_io + reg * 2); + for (reg = 0; reg < AZF_IO_SIZE_OPL3_PM / 2; ++reg) + outw(chip->saved_regs_opl3[reg], chip->opl3_io + reg * 2); + for (reg = 0; reg < AZF_IO_SIZE_MIXER_PM / 2; ++reg) + outw(chip->saved_regs_mixer[reg], chip->mixer_io + reg * 2); + for (reg = 0; reg < AZF_IO_SIZE_CODEC_PM / 2; ++reg) + outw(chip->saved_regs_codec[reg], chip->codec_io + reg * 2); snd_power_change_state(card, SNDRV_CTL_POWER_D0); return 0; } -#endif - - +#endif /* CONFIG_PM */ static struct pci_driver driver = { diff --git a/sound/pci/azt3328.h b/sound/pci/azt3328.h index 679fa99..3448fd6 100644 --- a/sound/pci/azt3328.h +++ b/sound/pci/azt3328.h @@ -54,7 +54,10 @@ #define SOUNDFORMAT_XTAL1 0x00 #define SOUNDFORMAT_XTAL2 0x01 /* all _SUSPECTED_ values are not used by Windows drivers, so we don't - * have any hard facts, only rough measurements */ + * have any hard facts, only rough measurements. + * All we know is that the crystal used on the board has 24.576MHz, + * like many soundcards (which results in the frequencies below when + * using certain divider values selected by the values below) */ #define SOUNDFORMAT_FREQ_SUSPECTED_4000 0x0c | SOUNDFORMAT_XTAL1 #define SOUNDFORMAT_FREQ_SUSPECTED_4800 0x0a | SOUNDFORMAT_XTAL1 #define SOUNDFORMAT_FREQ_5510 0x0c | SOUNDFORMAT_XTAL2 @@ -72,6 +75,26 @@ #define SOUNDFORMAT_FLAG_16BIT 0x0010 #define SOUNDFORMAT_FLAG_2CHANNELS 0x0020 +/* define frequency helpers, for maximum value safety */ +enum { +#define AZF_FREQ(rate) AZF_FREQ_##rate = rate + AZF_FREQ(4000), + AZF_FREQ(4800), + AZF_FREQ(5512), + AZF_FREQ(6620), + AZF_FREQ(8000), + AZF_FREQ(9600), + AZF_FREQ(11025), + AZF_FREQ(13240), + AZF_FREQ(16000), + AZF_FREQ(22050), + AZF_FREQ(32000), + AZF_FREQ(44100), + AZF_FREQ(48000), + AZF_FREQ(66200), +#undef AZF_FREQ +} AZF_FREQUENCIES; + /** recording area (see also: playback bit flag definitions) **/ #define IDX_IO_REC_FLAGS 0x20 /* ??, PU:0x0000 */ #define IDX_IO_REC_IRQTYPE 0x22 /* ??, PU:0x0000 */ @@ -97,40 +120,164 @@ /** DirectX timer, main interrupt area (FIXME: and something else?) **/ #define IDX_IO_TIMER_VALUE 0x60 /* found this timer area by pure luck :-) */ - #define TIMER_VALUE_MASK 0x000fffffUL /* timer countdown value; triggers IRQ when timer is finished */ - #define TIMER_ENABLE_COUNTDOWN 0x01000000UL /* activate the timer countdown */ - #define TIMER_ENABLE_IRQ 0x02000000UL /* trigger timer IRQ on zero transition */ - #define TIMER_ACK_IRQ 0x04000000UL /* being set in IRQ handler in case port 0x00 (hmm, not port 0x64!?!?) had 0x0020 set upon IRQ handler */ + /* timer countdown value; triggers IRQ when timer is finished */ + #define TIMER_VALUE_MASK 0x000fffffUL + /* activate timer countdown */ + #define TIMER_COUNTDOWN_ENABLE 0x01000000UL + /* trigger timer IRQ on zero transition */ + #define TIMER_IRQ_ENABLE 0x02000000UL + /* being set in IRQ handler in case port 0x00 (hmm, not port 0x64!?!?) + * had 0x0020 set upon IRQ handler */ + #define TIMER_IRQ_ACK 0x04000000UL #define IDX_IO_IRQSTATUS 0x64 - #define IRQ_PLAYBACK 0x0001 - #define IRQ_RECORDING 0x0002 - #define IRQ_MPU401 0x0010 - #define IRQ_TIMER 0x0020 /* DirectX timer */ - #define IRQ_UNKNOWN1 0x0040 /* probably unused, or possibly I2S port? or gameport IRQ? */ - #define IRQ_UNKNOWN2 0x0080 /* probably unused, or possibly I2S port? or gameport IRQ? */ + /* some IRQ bit in here might also be used to signal a power-management timer + * timeout, to request shutdown of the chip (e.g. AD1815JS has such a thing). + * Some OPL3 hardware (e.g. in LM4560) has some special timer hardware which + * can trigger an OPL3 timer IRQ, so maybe there's such a thing as well... */ + + #define IRQ_PLAYBACK 0x0001 + #define IRQ_RECORDING 0x0002 + #define IRQ_UNKNOWN1 0x0004 /* most probably I2S port */ + #define IRQ_GAMEPORT 0x0008 /* Interrupt of Digital(ly) Enhanced Game Port */ + #define IRQ_MPU401 0x0010 + #define IRQ_TIMER 0x0020 /* DirectX timer */ + #define IRQ_UNKNOWN2 0x0040 /* probably unused, or possibly I2S port? */ + #define IRQ_UNKNOWN3 0x0080 /* probably unused, or possibly I2S port? */ #define IDX_IO_66H 0x66 /* writing 0xffff returns 0x0000 */ -#define IDX_IO_SOME_VALUE 0x68 /* this is set to e.g. 0x3ff or 0x300, and writable; maybe some buffer limit, but I couldn't find out more, PU:0x00ff */ -#define IDX_IO_6AH 0x6A /* this WORD can be set to have bits 0x0028 activated (FIXME: correct??); actually inhibits PCM playback!!! maybe power management?? */ - #define IO_6A_PAUSE_PLAYBACK 0x0200 /* bit 9; sure, this pauses playback, but what the heck is this really about?? */ -#define IDX_IO_6CH 0x6C -#define IDX_IO_6EH 0x6E /* writing 0xffff returns 0x83fe */ -/* further I/O indices not saved/restored, so probably not used */ + /* this is set to e.g. 0x3ff or 0x300, and writable; + * maybe some buffer limit, but I couldn't find out more, PU:0x00ff: */ +#define IDX_IO_SOME_VALUE 0x68 + #define IO_68_RANDOM_TOGGLE1 0x0100 /* toggles randomly */ + #define IO_68_RANDOM_TOGGLE2 0x0200 /* toggles randomly */ + /* umm, nope, behaviour of these bits changes depending on what we wrote + * to 0x6b!! */ + +/* this WORD can be set to have bits 0x0028 activated (FIXME: correct??); + * actually inhibits PCM playback!!! maybe power management??: */ +#define IDX_IO_6AH 0x6A + /* bit 5: enabling this will activate permanent counting of bytes 2/3 + * at gameport I/O (0xb402/3) (equal values each) and cause + * gameport legacy I/O at 0x0200 to be _DISABLED_! + * Is this Digital Enhanced Game Port Enable??? Or maybe it's Testmode + * for Enhanced Digital Gameport (see 4D Wave DX card): */ + #define IO_6A_SOMETHING1_GAMEPORT 0x0020 + /* bit 8; sure, this _pauses_ playback (later resumes at same spot!), + * but what the heck is this really about??: */ + #define IO_6A_PAUSE_PLAYBACK_BIT8 0x0100 + /* bit 9; sure, this _pauses_ playback (later resumes at same spot!), + * but what the heck is this really about??: */ + #define IO_6A_PAUSE_PLAYBACK_BIT9 0x0200 + /* BIT8 and BIT9 are _NOT_ able to affect OPL3 MIDI playback, + * thus it suggests influence on PCM only!! + * However OTOH there seems to be no bit anywhere around here + * which is able to disable OPL3... */ + /* bit 10: enabling this actually changes values at legacy gameport + * I/O address (0x200); is this enabling of the Digital Enhanced Game Port??? + * Or maybe this simply switches off the NE558 circuit, since enabling this + * still lets us evaluate button states, but not axis states */ + #define IO_6A_SOMETHING2_GAMEPORT 0x0400 + /* writing 0x0300: causes quite some crackling during + * PC activity such as switching windows (PCI traffic?? + * --> FIFO/timing settings???) */ + /* writing 0x0100 plus/or 0x0200 inhibits playback */ + /* since the Windows .INF file has Flag_Enable_JoyStick and + * Flag_Enable_SB_DOS_Emulation directly together, it stands to reason + * that some other bit in this same register might be responsible + * for SB DOS Emulation activation (note that the file did NOT define + * a switch for OPL3!) */ +#define IDX_IO_6CH 0x6C /* unknown; fully read-writable */ +#define IDX_IO_6EH 0x6E + /* writing 0xffff returns 0x83fe (or 0x03fe only). + * writing 0x83 (and only 0x83!!) to 0x6f will cause 0x6c to switch + * from 0000 to ffff. */ +/* further I/O indices not saved/restored and not readable after writing, + * so probably not used */ -/*** I/O 2 area port indices ***/ + +/*** Gameport area port indices ***/ /* (only 0x06 of 0x08 bytes saved/restored by Windows driver) */ -#define AZF_IO_SIZE_IO2 0x08 -#define AZF_IO_SIZE_IO2_PM 0x06 +#define AZF_IO_SIZE_GAME 0x08 +#define AZF_IO_SIZE_GAME_PM 0x06 + +enum { + AZF_GAME_LEGACY_IO_PORT = 0x200 +} AZF_GAME_CONFIGS; + +#define IDX_GAME_LEGACY_COMPATIBLE 0x00 + /* in some operation mode, writing anything to this port + * triggers an interrupt: + * yup, that's in case IDX_GAME_01H has one of the + * axis measurement bits enabled + * (and of course one needs to have GAME_HWCFG_IRQ_ENABLE, too) */ + +#define IDX_GAME_AXES_CONFIG 0x01 + /* NOTE: layout of this register awfully similar (read: "identical??") + * to AD1815JS.pdf (p.29) */ + + /* enables axis 1 (X axis) measurement: */ + #define GAME_AXES_ENABLE_1 0x01 + /* enables axis 2 (Y axis) measurement: */ + #define GAME_AXES_ENABLE_2 0x02 + /* enables axis 3 (X axis) measurement: */ + #define GAME_AXES_ENABLE_3 0x04 + /* enables axis 4 (Y axis) measurement: */ + #define GAME_AXES_ENABLE_4 0x08 + /* selects the current axis to read the measured value of + * (at IDX_GAME_AXIS_VALUE): + * 00 = axis 1, 01 = axis 2, 10 = axis 3, 11 = axis 4: */ + #define GAME_AXES_READ_MASK 0x30 + /* enable to have the latch continuously accept ADC values + * (and continuously cause interrupts in case interrupts are enabled); + * AD1815JS.pdf says it's ~16ms interval there: */ + #define GAME_AXES_LATCH_ENABLE 0x40 + /* joystick data (measured axes) ready for reading: */ + #define GAME_AXES_SAMPLING_READY 0x80 + + /* NOTE: other card specs (SiS960 and others!) state that the + * game position latches should be frozen when reading and be freed + * (== reset?) after reading!!! + * Freezing most likely means disabling 0x40 (GAME_AXES_LATCH_ENABLE), + * but how to free the value? */ + /* An internet search for "gameport latch ADC" should provide some insight + * into how to program such a gameport system. */ + + /* writing 0xf0 to 01H once reset both counters to 0, in some special mode!? + * yup, in case 6AH 0x20 is not enabled + * (and 0x40 is sufficient, 0xf0 is not needed) */ + +#define IDX_GAME_AXIS_VALUE 0x02 + /* R: value of currently configured axis (word value!); + * W: trigger axis measurement */ + +#define IDX_GAME_HWCONFIG 0x04 + /* note: bits 4 to 7 are never set (== 0) when reading! + * --> reserved bits? */ + /* enables IRQ notification upon axes measurement ready: */ + #define GAME_HWCFG_IRQ_ENABLE 0x01 + /* these bits choose a different frequency for the + * internal ADC counter increment. + * hmm, seems to be a combo of bits: + * 00 --> standard frequency + * 10 --> 1/2 + * 01 --> 1/20 + * 11 --> 1/200: */ + #define GAME_HWCFG_ADC_COUNTER_FREQ_MASK 0x06 -#define IDX_IO2_LEGACY_ADDR 0x04 - #define LEGACY_SOMETHING 0x01 /* OPL3?? */ - #define LEGACY_JOY 0x08 + /* enable gameport legacy I/O address (0x200) + * I was unable to locate any configurability for a different address: */ + #define GAME_HWCFG_LEGACY_ADDRESS_ENABLE 0x08 +/*** MPU401 ***/ #define AZF_IO_SIZE_MPU 0x04 #define AZF_IO_SIZE_MPU_PM 0x04 -#define AZF_IO_SIZE_SYNTH 0x08 -#define AZF_IO_SIZE_SYNTH_PM 0x06 +/*** OPL3 synth ***/ +#define AZF_IO_SIZE_OPL3 0x08 +#define AZF_IO_SIZE_OPL3_PM 0x06 +/* hmm, given that a standard OPL3 has 4 registers only, + * there might be some enhanced functionality lurking at the end + * (especially since register 0x04 has a "non-empty" value 0xfe) */ /*** mixer I/O area port indices ***/ /* (only 0x22 of 0x40 bytes saved/restored by Windows driver) -- cgit v0.10.2 From 9ad593f6d326e7a4664e3856520f6c042f82a37f Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 16 May 2008 12:34:47 +0200 Subject: [ALSA] hda - Fix DMA position inaccuracy Many HD-audio controllers seem inaccurate about the IRQ timing of PCM period updates. This has caused problems on audio quality; e.g. JACK doesn't work with two periods. This patch fixes the problem by checking the current DMA position at IRQ handler and delays the period-update via a workq if it's inaccurate. Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index b3a618e..6ba7ac0 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -285,6 +285,7 @@ struct azx_dev { u32 *posbuf; /* position buffer pointer */ unsigned int bufsize; /* size of the play buffer in bytes */ + unsigned int period_bytes; /* size of the period in bytes */ unsigned int frags; /* number for period in the play buffer */ unsigned int fifo_size; /* FIFO size */ @@ -301,11 +302,10 @@ struct azx_dev { */ unsigned char stream_tag; /* assigned stream */ unsigned char index; /* stream index */ - /* for sanity check of position buffer */ - unsigned int period_intr; unsigned int opened :1; unsigned int running :1; + unsigned int irq_pending: 1; }; /* CORB/RIRB */ @@ -369,6 +369,9 @@ struct azx { /* for debugging */ unsigned int last_cmd; /* last issued command (to sync) */ + + /* for pending irqs */ + struct work_struct irq_pending_work; }; /* driver types */ @@ -908,6 +911,8 @@ static void azx_init_pci(struct azx *chip) } +static int azx_position_ok(struct azx *chip, struct azx_dev *azx_dev); + /* * interrupt handler */ @@ -930,11 +935,18 @@ static irqreturn_t azx_interrupt(int irq, void *dev_id) azx_dev = &chip->azx_dev[i]; if (status & azx_dev->sd_int_sta_mask) { azx_sd_writeb(azx_dev, SD_STS, SD_INT_MASK); - if (azx_dev->substream && azx_dev->running) { - azx_dev->period_intr++; + if (!azx_dev->substream || !azx_dev->running) + continue; + /* check whether this IRQ is really acceptable */ + if (azx_position_ok(chip, azx_dev)) { + azx_dev->irq_pending = 0; spin_unlock(&chip->reg_lock); snd_pcm_period_elapsed(azx_dev->substream); spin_lock(&chip->reg_lock); + } else { + /* bogus IRQ, process it later */ + azx_dev->irq_pending = 1; + schedule_work(&chip->irq_pending_work); } } } @@ -973,6 +985,7 @@ static int azx_setup_periods(struct snd_pcm_substream *substream, azx_sd_writel(azx_dev, SD_BDLPU, 0); period_bytes = snd_pcm_lib_period_bytes(substream); + azx_dev->period_bytes = period_bytes; periods = azx_dev->bufsize / period_bytes; /* program the initial BDL entries */ @@ -1421,27 +1434,16 @@ static int azx_pcm_trigger(struct snd_pcm_substream *substream, int cmd) return 0; } -static snd_pcm_uframes_t azx_pcm_pointer(struct snd_pcm_substream *substream) +static unsigned int azx_get_position(struct azx *chip, + struct azx_dev *azx_dev) { - struct azx_pcm *apcm = snd_pcm_substream_chip(substream); - struct azx *chip = apcm->chip; - struct azx_dev *azx_dev = get_azx_dev(substream); unsigned int pos; if (chip->position_fix == POS_FIX_POSBUF || chip->position_fix == POS_FIX_AUTO) { /* use the position buffer */ pos = le32_to_cpu(*azx_dev->posbuf); - if (chip->position_fix == POS_FIX_AUTO && - azx_dev->period_intr == 1 && !pos) { - printk(KERN_WARNING - "hda-intel: Invalid position buffer, " - "using LPIB read method instead.\n"); - chip->position_fix = POS_FIX_NONE; - goto read_lpib; - } } else { - read_lpib: /* read LPIB */ pos = azx_sd_readl(azx_dev, SD_LPIB); if (chip->position_fix == POS_FIX_FIFO) @@ -1449,7 +1451,90 @@ static snd_pcm_uframes_t azx_pcm_pointer(struct snd_pcm_substream *substream) } if (pos >= azx_dev->bufsize) pos = 0; - return bytes_to_frames(substream->runtime, pos); + return pos; +} + +static snd_pcm_uframes_t azx_pcm_pointer(struct snd_pcm_substream *substream) +{ + struct azx_pcm *apcm = snd_pcm_substream_chip(substream); + struct azx *chip = apcm->chip; + struct azx_dev *azx_dev = get_azx_dev(substream); + return bytes_to_frames(substream->runtime, + azx_get_position(chip, azx_dev)); +} + +/* + * Check whether the current DMA position is acceptable for updating + * periods. Returns non-zero if it's OK. + * + * Many HD-audio controllers appear pretty inaccurate about + * the update-IRQ timing. The IRQ is issued before actually the + * data is processed. So, we need to process it afterwords in a + * workqueue. + */ +static int azx_position_ok(struct azx *chip, struct azx_dev *azx_dev) +{ + unsigned int pos; + + pos = azx_get_position(chip, azx_dev); + if (chip->position_fix == POS_FIX_AUTO) { + if (!pos) { + printk(KERN_WARNING + "hda-intel: Invalid position buffer, " + "using LPIB read method instead.\n"); + chip->position_fix = POS_FIX_NONE; + pos = azx_get_position(chip, azx_dev); + } else + chip->position_fix = POS_FIX_POSBUF; + } + + if (pos % azx_dev->period_bytes > azx_dev->period_bytes / 2) + return 0; /* NG - it's below the period boundary */ + return 1; /* OK, it's fine */ +} + +/* + * The work for pending PCM period updates. + */ +static void azx_irq_pending_work(struct work_struct *work) +{ + struct azx *chip = container_of(work, struct azx, irq_pending_work); + int i, pending; + + for (;;) { + pending = 0; + spin_lock_irq(&chip->reg_lock); + for (i = 0; i < chip->num_streams; i++) { + struct azx_dev *azx_dev = &chip->azx_dev[i]; + if (!azx_dev->irq_pending || + !azx_dev->substream || + !azx_dev->running) + continue; + if (azx_position_ok(chip, azx_dev)) { + azx_dev->irq_pending = 0; + spin_unlock(&chip->reg_lock); + snd_pcm_period_elapsed(azx_dev->substream); + spin_lock(&chip->reg_lock); + } else + pending++; + } + spin_unlock_irq(&chip->reg_lock); + if (!pending) + return; + cond_resched(); + } +} + +/* clear irq_pending flags and assure no on-going workq */ +static void azx_clear_irq_pending(struct azx *chip) +{ + int i; + + spin_lock_irq(&chip->reg_lock); + for (i = 0; i < chip->num_streams; i++) + chip->azx_dev[i].irq_pending = 0; + spin_unlock_irq(&chip->reg_lock); + flush_scheduled_work(); } static struct snd_pcm_ops azx_pcm_ops = { @@ -1676,6 +1761,7 @@ static int azx_suspend(struct pci_dev *pci, pm_message_t state) int i; snd_power_change_state(card, SNDRV_CTL_POWER_D3hot); + azx_clear_irq_pending(chip); for (i = 0; i < AZX_MAX_PCMS; i++) snd_pcm_suspend_all(chip->pcm[i]); if (chip->initialized) @@ -1732,6 +1818,7 @@ static int azx_free(struct azx *chip) int i; if (chip->initialized) { + azx_clear_irq_pending(chip); for (i = 0; i < chip->num_streams; i++) azx_stream_stop(chip, &chip->azx_dev[i]); azx_stop_chip(chip); @@ -1857,6 +1944,7 @@ static int __devinit azx_create(struct snd_card *card, struct pci_dev *pci, chip->irq = -1; chip->driver_type = driver_type; chip->msi = enable_msi; + INIT_WORK(&chip->irq_pending_work, azx_irq_pending_work); chip->position_fix = check_position_fix(chip, position_fix[dev]); check_probe_mask(chip, dev); -- cgit v0.10.2 From b2efbbfba24efc8456d68d5af42d082ab1c2febc Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 19 May 2008 12:30:58 +0200 Subject: [ALSA] ASoC: Remove in-code changelogs The overwhelming majority just say 'initial version' anyway. Signed-off-by: Mark Brown Acked-by: Ben Dooks Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela diff --git a/sound/soc/codecs/ac97.c b/sound/soc/codecs/ac97.c index 2a1ffe3..7bf2081 100644 --- a/sound/soc/codecs/ac97.c +++ b/sound/soc/codecs/ac97.c @@ -10,9 +10,6 @@ * Free Software Foundation; either version 2 of the License, or (at your * option) any later version. * - * Revision history - * 17th Oct 2005 Initial version. - * * Generic AC97 support. */ diff --git a/sound/soc/codecs/wm9712.c b/sound/soc/codecs/wm9712.c index 76c1e2d..89efe40 100644 --- a/sound/soc/codecs/wm9712.c +++ b/sound/soc/codecs/wm9712.c @@ -9,9 +9,6 @@ * under the terms of the GNU General Public License as published by the * Free Software Foundation; either version 2 of the License, or (at your * option) any later version. - * - * Revision history - * 4th Feb 2006 Initial version. */ #include diff --git a/sound/soc/codecs/wm9713.c b/sound/soc/codecs/wm9713.c index 1f24116..9e6b2fd 100644 --- a/sound/soc/codecs/wm9713.c +++ b/sound/soc/codecs/wm9713.c @@ -10,9 +10,6 @@ * Free Software Foundation; either version 2 of the License, or (at your * option) any later version. * - * Revision history - * 4th Feb 2006 Initial version. - * * Features:- * * o Support for AC97 Codec, Voice DAC and Aux DAC diff --git a/sound/soc/pxa/corgi.c b/sound/soc/pxa/corgi.c index 852f16d..edeea63 100644 --- a/sound/soc/pxa/corgi.c +++ b/sound/soc/pxa/corgi.c @@ -11,10 +11,6 @@ * under the terms of the GNU General Public License as published by the * Free Software Foundation; either version 2 of the License, or (at your * option) any later version. - * - * Revision history - * 30th Nov 2005 Initial version. - * */ #include diff --git a/sound/soc/pxa/pxa2xx-i2s.c b/sound/soc/pxa/pxa2xx-i2s.c index 4250710..35090c2 100644 --- a/sound/soc/pxa/pxa2xx-i2s.c +++ b/sound/soc/pxa/pxa2xx-i2s.c @@ -9,9 +9,6 @@ * under the terms of the GNU General Public License as published by the * Free Software Foundation; either version 2 of the License, or (at your * option) any later version. - * - * Revision history - * 12th Aug 2005 Initial version. */ #include diff --git a/sound/soc/pxa/spitz.c b/sound/soc/pxa/spitz.c index 73a6df8..092b5c7 100644 --- a/sound/soc/pxa/spitz.c +++ b/sound/soc/pxa/spitz.c @@ -12,9 +12,6 @@ * Free Software Foundation; either version 2 of the License, or (at your * option) any later version. * - * Revision history - * 30th Nov 2005 Initial version. - * */ #include diff --git a/sound/soc/pxa/tosa.c b/sound/soc/pxa/tosa.c index fda2aa0..465ff0f 100644 --- a/sound/soc/pxa/tosa.c +++ b/sound/soc/pxa/tosa.c @@ -12,9 +12,6 @@ * Free Software Foundation; either version 2 of the License, or (at your * option) any later version. * - * Revision history - * 30th Nov 2005 Initial version. - * * GPIO's * 1 - Jack Insertion * 5 - Hookswitch (headset answer/hang up switch) diff --git a/sound/soc/s3c24xx/neo1973_wm8753.c b/sound/soc/s3c24xx/neo1973_wm8753.c index 79c5027..c1a0161 100644 --- a/sound/soc/s3c24xx/neo1973_wm8753.c +++ b/sound/soc/s3c24xx/neo1973_wm8753.c @@ -10,10 +10,6 @@ * Free Software Foundation; either version 2 of the License, or (at your * option) any later version. * - * Revision history - * 20th Jan 2007 Initial version. - * 05th Feb 2007 Rename all to Neo1973 - * */ #include diff --git a/sound/soc/s3c24xx/s3c2443-ac97.c b/sound/soc/s3c24xx/s3c2443-ac97.c index e81d9a6..0eed140 100644 --- a/sound/soc/s3c24xx/s3c2443-ac97.c +++ b/sound/soc/s3c24xx/s3c2443-ac97.c @@ -10,9 +10,6 @@ * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. - * - * Revision history - * 21st Mar 2007 Initial Version */ #include diff --git a/sound/soc/s3c24xx/s3c24xx-i2s.c b/sound/soc/s3c24xx/s3c24xx-i2s.c index 1ed6afd..ddf8724 100644 --- a/sound/soc/s3c24xx/s3c24xx-i2s.c +++ b/sound/soc/s3c24xx/s3c24xx-i2s.c @@ -12,11 +12,6 @@ * under the terms of the GNU General Public License as published by the * Free Software Foundation; either version 2 of the License, or (at your * option) any later version. - * - * - * Revision history - * 11th Dec 2006 Merged with Simtec driver - * 10th Nov 2006 Initial version. */ #include diff --git a/sound/soc/s3c24xx/s3c24xx-pcm.c b/sound/soc/s3c24xx/s3c24xx-pcm.c index 7806ae6..ef59974 100644 --- a/sound/soc/s3c24xx/s3c24xx-pcm.c +++ b/sound/soc/s3c24xx/s3c24xx-pcm.c @@ -12,10 +12,6 @@ * under the terms of the GNU General Public License as published by the * Free Software Foundation; either version 2 of the License, or (at your * option) any later version. - * - * Revision history - * 11th Dec 2006 Merged with Simtec driver - * 10th Nov 2006 Initial version. */ #include diff --git a/sound/soc/s3c24xx/smdk2443_wm9710.c b/sound/soc/s3c24xx/smdk2443_wm9710.c index b4a5630..8515d6f 100644 --- a/sound/soc/s3c24xx/smdk2443_wm9710.c +++ b/sound/soc/s3c24xx/smdk2443_wm9710.c @@ -10,9 +10,6 @@ * Free Software Foundation; either version 2 of the License, or (at your * option) any later version. * - * Revision history - * 8th Mar 2007 Initial version. - * */ #include diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index e148db9..0318d8a 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -14,10 +14,6 @@ * Free Software Foundation; either version 2 of the License, or (at your * option) any later version. * - * Revision history - * 12th Aug 2005 Initial version. - * 25th Oct 2005 Working Codec, Interface and Platform registration. - * * TODO: * o Add hw rules to enforce rates, etc. * o More testing with other codecs/machines. diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index 1ef6d94..8a3192b 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -10,11 +10,6 @@ * Free Software Foundation; either version 2 of the License, or (at your * option) any later version. * - * Revision history - * 12th Aug 2005 Initial version. - * 25th Oct 2005 Implemented path power domain. - * 18th Dec 2005 Implemented machine and stream level power domain. - * * Features: * o Changes power status of internal codec blocks depending on the * dynamic configuration of codec internal audio paths and active -- cgit v0.10.2 From 1ef6ab75c7deef931d6308af282ed7d8e480e77f Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 19 May 2008 12:31:55 +0200 Subject: [ALSA] ASoC: Make CPU and codec DAI operations have same type The CPU and codec DAI operations differ only in the presence of the digital mute operation for the codec so they may as well be the same type. Signed-off-by: Mark Brown Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela diff --git a/include/sound/soc.h b/include/sound/soc.h index d3c8c03..73accbc 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -272,9 +272,9 @@ struct snd_soc_ops { int (*trigger)(struct snd_pcm_substream *, int); }; -/* ASoC codec DAI ops */ -struct snd_soc_codec_ops { - /* codec DAI clocking configuration */ +/* ASoC DAI ops */ +struct snd_soc_dai_ops { + /* DAI clocking configuration */ int (*set_sysclk)(struct snd_soc_codec_dai *codec_dai, int clk_id, unsigned int freq, int dir); int (*set_pll)(struct snd_soc_codec_dai *codec_dai, @@ -282,7 +282,7 @@ struct snd_soc_codec_ops { int (*set_clkdiv)(struct snd_soc_codec_dai *codec_dai, int div_id, int div); - /* CPU DAI format configuration */ + /* DAI format configuration */ int (*set_fmt)(struct snd_soc_codec_dai *codec_dai, unsigned int fmt); int (*set_tdm_slot)(struct snd_soc_codec_dai *codec_dai, @@ -293,24 +293,6 @@ struct snd_soc_codec_ops { int (*digital_mute)(struct snd_soc_codec_dai *, int mute); }; -/* ASoC cpu DAI ops */ -struct snd_soc_cpu_ops { - /* CPU DAI clocking configuration */ - int (*set_sysclk)(struct snd_soc_cpu_dai *cpu_dai, - int clk_id, unsigned int freq, int dir); - int (*set_clkdiv)(struct snd_soc_cpu_dai *cpu_dai, - int div_id, int div); - int (*set_pll)(struct snd_soc_cpu_dai *cpu_dai, - int pll_id, unsigned int freq_in, unsigned int freq_out); - - /* CPU DAI format configuration */ - int (*set_fmt)(struct snd_soc_cpu_dai *cpu_dai, - unsigned int fmt); - int (*set_tdm_slot)(struct snd_soc_cpu_dai *cpu_dai, - unsigned int mask, int slots); - int (*set_tristate)(struct snd_soc_cpu_dai *, int tristate); -}; - /* SoC Codec DAI */ struct snd_soc_codec_dai { char *name; @@ -328,7 +310,7 @@ struct snd_soc_codec_dai { /* ops */ struct snd_soc_ops ops; - struct snd_soc_codec_ops dai_ops; + struct snd_soc_dai_ops dai_ops; /* DAI private data */ void *private_data; @@ -352,7 +334,7 @@ struct snd_soc_cpu_dai { /* ops */ struct snd_soc_ops ops; - struct snd_soc_cpu_ops dai_ops; + struct snd_soc_dai_ops dai_ops; /* DAI capabilities */ struct snd_soc_pcm_stream capture; -- cgit v0.10.2 From 1a189b97190d3f0f8cf0379a799d3555b2d648bb Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 13 Apr 2008 21:41:55 +0100 Subject: [ARM] pxa: Add bare bones PWM API Signed-off-by: Russell King diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index b786e68..c274dbb 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -22,6 +22,9 @@ config ARM Europe. There is an ARM Linux project with a web page at . +config HAVE_PWM + bool + config SYS_SUPPORTS_APM_EMULATION bool diff --git a/include/linux/pwm.h b/include/linux/pwm.h new file mode 100644 index 0000000..3945f80 --- /dev/null +++ b/include/linux/pwm.h @@ -0,0 +1,31 @@ +#ifndef __LINUX_PWM_H +#define __LINUX_PWM_H + +struct pwm_device; + +/* + * pwm_request - request a PWM device + */ +struct pwm_device *pwm_request(int pwm_id, const char *label); + +/* + * pwm_free - free a PWM device + */ +void pwm_free(struct pwm_device *pwm); + +/* + * pwm_config - change a PWM device configuration + */ +int pwm_config(struct pwm_device *pwm, int duty_ns, int period_ns); + +/* + * pwm_enable - start a PWM output toggling + */ +int pwm_enable(struct pwm_device *pwm); + +/* + * pwm_disable - stop a PWM output toggling + */ +void pwm_disable(struct pwm_device *pwm); + +#endif /* __ASM_ARCH_PWM_H */ -- cgit v0.10.2 From 75540c1ac3c7bd72ac8e092058f9714875239995 Mon Sep 17 00:00:00 2001 From: eric miao Date: Sun, 13 Apr 2008 21:44:04 +0100 Subject: [ARM] pxa: Add PXA support for PWM API Patch mainly from Eric Miao, with minor edits by rmk. Note: PWM0 and PWM2 share the same register I/O space and clock gating on pxa{27x, 3xx}, thus PWM2 is treated in the driver as a child PWM of PWM0. And this is also true for PWM1/3. Signed-off-by: eric miao Signed-off-by: Russell King diff --git a/arch/arm/mach-pxa/Kconfig b/arch/arm/mach-pxa/Kconfig index 5da7a68..9c01505 100644 --- a/arch/arm/mach-pxa/Kconfig +++ b/arch/arm/mach-pxa/Kconfig @@ -273,4 +273,9 @@ config PXA_SSP tristate help Enable support for PXA2xx SSP ports + +config PXA_PWM + tristate + help + Enable support for PXA2xx/PXA3xx PWM controllers endif diff --git a/arch/arm/mach-pxa/Makefile b/arch/arm/mach-pxa/Makefile index 0e6d05b..02cbc3c 100644 --- a/arch/arm/mach-pxa/Makefile +++ b/arch/arm/mach-pxa/Makefile @@ -10,6 +10,7 @@ obj-$(CONFIG_CPU_FREQ) += cpu-pxa.o # Generic drivers that other drivers may depend upon obj-$(CONFIG_PXA_SSP) += ssp.o +obj-$(CONFIG_PXA_PWM) += pwm.o # SoC-specific code obj-$(CONFIG_PXA25x) += mfp-pxa2xx.o pxa25x.o diff --git a/arch/arm/mach-pxa/devices.c b/arch/arm/mach-pxa/devices.c index d6c05b6..794a107 100644 --- a/arch/arm/mach-pxa/devices.c +++ b/arch/arm/mach-pxa/devices.c @@ -280,6 +280,36 @@ struct platform_device pxa_device_rtc = { #ifdef CONFIG_PXA25x +static struct resource pxa25x_resource_pwm0[] = { + [0] = { + .start = 0x40b00000, + .end = 0x40b0000f, + .flags = IORESOURCE_MEM, + }, +}; + +struct platform_device pxa25x_device_pwm0 = { + .name = "pxa25x-pwm", + .id = 0, + .resource = pxa25x_resource_pwm0, + .num_resources = ARRAY_SIZE(pxa25x_resource_pwm0), +}; + +static struct resource pxa25x_resource_pwm1[] = { + [0] = { + .start = 0x40c00000, + .end = 0x40c0000f, + .flags = IORESOURCE_MEM, + }, +}; + +struct platform_device pxa25x_device_pwm1 = { + .name = "pxa25x-pwm", + .id = 1, + .resource = pxa25x_resource_pwm1, + .num_resources = ARRAY_SIZE(pxa25x_resource_pwm1), +}; + static u64 pxa25x_ssp_dma_mask = DMA_BIT_MASK(32); static struct resource pxa25x_resource_ssp[] = { @@ -568,6 +598,36 @@ struct platform_device pxa27x_device_ssp3 = { .num_resources = ARRAY_SIZE(pxa27x_resource_ssp3), }; +static struct resource pxa27x_resource_pwm0[] = { + [0] = { + .start = 0x40b00000, + .end = 0x40b0001f, + .flags = IORESOURCE_MEM, + }, +}; + +struct platform_device pxa27x_device_pwm0 = { + .name = "pxa27x-pwm", + .id = 0, + .resource = pxa27x_resource_pwm0, + .num_resources = ARRAY_SIZE(pxa27x_resource_pwm0), +}; + +static struct resource pxa27x_resource_pwm1[] = { + [0] = { + .start = 0x40c00000, + .end = 0x40c0001f, + .flags = IORESOURCE_MEM, + }, +}; + +struct platform_device pxa27x_device_pwm1 = { + .name = "pxa27x-pwm", + .id = 1, + .resource = pxa27x_resource_pwm1, + .num_resources = ARRAY_SIZE(pxa27x_resource_pwm1), +}; + static struct resource pxa27x_resource_camera[] = { [0] = { .start = 0x50000000, diff --git a/arch/arm/mach-pxa/devices.h b/arch/arm/mach-pxa/devices.h index fcab017..e620a33 100644 --- a/arch/arm/mach-pxa/devices.h +++ b/arch/arm/mach-pxa/devices.h @@ -24,4 +24,9 @@ extern struct platform_device pxa27x_device_ssp2; extern struct platform_device pxa27x_device_ssp3; extern struct platform_device pxa3xx_device_ssp4; +extern struct platform_device pxa25x_device_pwm0; +extern struct platform_device pxa25x_device_pwm1; +extern struct platform_device pxa27x_device_pwm0; +extern struct platform_device pxa27x_device_pwm1; + void __init pxa_register_device(struct platform_device *dev, void *data); diff --git a/arch/arm/mach-pxa/pwm.c b/arch/arm/mach-pxa/pwm.c new file mode 100644 index 0000000..70fb3ca --- /dev/null +++ b/arch/arm/mach-pxa/pwm.c @@ -0,0 +1,299 @@ +/* + * linux/arch/arm/mach-pxa/pwm.c + * + * simple driver for PWM (Pulse Width Modulator) controller + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * 2008-02-13 initial version + * eric miao + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +/* PWM registers and bits definitions */ +#define PWMCR (0x00) +#define PWMDCR (0x04) +#define PWMPCR (0x08) + +#define PWMCR_SD (1 << 6) +#define PWMDCR_FD (1 << 10) + +struct pwm_device { + struct list_head node; + struct platform_device *pdev; + + const char *label; + struct clk *clk; + void __iomem *mmio_base; + + unsigned int use_count; + unsigned int pwm_id; +}; + +/* + * period_ns = 10^9 * (PRESCALE + 1) * (PV + 1) / PWM_CLK_RATE + * duty_ns = 10^9 * (PRESCALE + 1) * DC / PWM_CLK_RATE + */ +int pwm_config(struct pwm_device *pwm, int duty_ns, int period_ns) +{ + unsigned long long c; + unsigned long period_cycles, prescale, pv, dc; + + if (pwm == NULL || period_ns == 0 || duty_ns > period_ns) + return -EINVAL; + + c = clk_get_rate(pwm->clk); + c = c * period_ns; + do_div(c, 1000000000); + period_cycles = c; + + if (period_cycles < 0) + period_cycles = 1; + prescale = (period_cycles - 1) / 1024; + pv = period_cycles / (prescale + 1) - 1; + + if (prescale > 63) + return -EINVAL; + + if (duty_ns == period_ns) + dc = PWMDCR_FD; + else + dc = (pv + 1) * duty_ns / period_ns; + + /* NOTE: the clock to PWM has to be enabled first + * before writing to the registers + */ + clk_enable(pwm->clk); + __raw_writel(prescale, pwm->mmio_base + PWMCR); + __raw_writel(dc, pwm->mmio_base + PWMDCR); + __raw_writel(pv, pwm->mmio_base + PWMPCR); + clk_disable(pwm->clk); + + return 0; +} +EXPORT_SYMBOL(pwm_config); + +int pwm_enable(struct pwm_device *pwm) +{ + return clk_enable(pwm->clk); +} +EXPORT_SYMBOL(pwm_enable); + +void pwm_disable(struct pwm_device *pwm) +{ + clk_disable(pwm->clk); +} +EXPORT_SYMBOL(pwm_disable); + +static DEFINE_MUTEX(pwm_lock); +static LIST_HEAD(pwm_list); + +struct pwm_device *pwm_request(int pwm_id, const char *label) +{ + struct pwm_device *pwm; + int found = 0; + + mutex_lock(&pwm_lock); + + list_for_each_entry(pwm, &pwm_list, node) { + if (pwm->pwm_id == pwm_id && pwm->use_count == 0) { + pwm->use_count++; + pwm->label = label; + found = 1; + break; + } + } + + mutex_unlock(&pwm_lock); + + return (found) ? pwm : NULL; +} +EXPORT_SYMBOL(pwm_request); + +void pwm_free(struct pwm_device *pwm) +{ + mutex_lock(&pwm_lock); + + if (pwm->use_count) { + pwm->use_count--; + pwm->label = NULL; + } else + pr_warning("PWM device already freed\n"); + + mutex_unlock(&pwm_lock); +} +EXPORT_SYMBOL(pwm_free); + +static inline void __add_pwm(struct pwm_device *pwm) +{ + mutex_lock(&pwm_lock); + list_add_tail(&pwm->node, &pwm_list); + mutex_unlock(&pwm_lock); +} + +static struct pwm_device *pwm_probe(struct platform_device *pdev, + unsigned int pwm_id, struct pwm_device *parent_pwm) +{ + struct pwm_device *pwm; + struct resource *r; + int ret = 0; + + pwm = kzalloc(sizeof(struct pwm_device), GFP_KERNEL); + if (pwm == NULL) { + dev_err(&pdev->dev, "failed to allocate memory\n"); + return ERR_PTR(-ENOMEM); + } + + pwm->clk = clk_get(&pdev->dev, "PWMCLK"); + if (IS_ERR(pwm->clk)) { + ret = PTR_ERR(pwm->clk); + goto err_free; + } + + pwm->use_count = 0; + pwm->pwm_id = pwm_id; + pwm->pdev = pdev; + + if (parent_pwm != NULL) { + /* registers for the second PWM has offset of 0x10 */ + pwm->mmio_base = parent_pwm->mmio_base + 0x10; + __add_pwm(pwm); + return pwm; + } + + r = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (r == NULL) { + dev_err(&pdev->dev, "no memory resource defined\n"); + ret = -ENODEV; + goto err_free_clk; + } + + r = request_mem_region(r->start, r->end - r->start + 1, pdev->name); + if (r == NULL) { + dev_err(&pdev->dev, "failed to request memory resource\n"); + ret = -EBUSY; + goto err_free_clk; + } + + pwm->mmio_base = ioremap(r->start, r->end - r->start + 1); + if (pwm->mmio_base == NULL) { + dev_err(&pdev->dev, "failed to ioremap() registers\n"); + ret = -ENODEV; + goto err_free_mem; + } + + __add_pwm(pwm); + platform_set_drvdata(pdev, pwm); + return pwm; + +err_free_mem: + release_mem_region(r->start, r->end - r->start + 1); +err_free_clk: + clk_put(pwm->clk); +err_free: + kfree(pwm); + return ERR_PTR(ret); +} + +static int __devinit pxa25x_pwm_probe(struct platform_device *pdev) +{ + struct pwm_device *pwm = pwm_probe(pdev, pdev->id, NULL); + + if (IS_ERR(pwm)) + return PTR_ERR(pwm); + + return 0; +} + +static int __devinit pxa27x_pwm_probe(struct platform_device *pdev) +{ + struct pwm_device *pwm; + + pwm = pwm_probe(pdev, pdev->id * 2, NULL); + if (IS_ERR(pwm)) + return PTR_ERR(pwm); + + pwm = pwm_probe(pdev, pdev->id * 2 + 1, pwm); + if (IS_ERR(pwm)) + return PTR_ERR(pwm); + + return 0; +} + +static int __devexit pwm_remove(struct platform_device *pdev) +{ + struct pwm_device *pwm; + struct resource *r; + + pwm = platform_get_drvdata(pdev); + if (pwm == NULL) + return -ENODEV; + + mutex_lock(&pwm_lock); + list_del(&pwm->node); + mutex_unlock(&pwm_lock); + + iounmap(pwm->mmio_base); + + r = platform_get_resource(pdev, IORESOURCE_MEM, 0); + release_mem_region(r->start, r->end - r->start + 1); + + clk_put(pwm->clk); + kfree(pwm); + return 0; +} + +static struct platform_driver pxa25x_pwm_driver = { + .driver = { + .name = "pxa25x-pwm", + }, + .probe = pxa25x_pwm_probe, + .remove = __devexit_p(pwm_remove), +}; + +static struct platform_driver pxa27x_pwm_driver = { + .driver = { + .name = "pxa27x-pwm", + }, + .probe = pxa27x_pwm_probe, + .remove = __devexit_p(pwm_remove), +}; + +static int __init pwm_init(void) +{ + int ret = 0; + + ret = platform_driver_register(&pxa25x_pwm_driver); + if (ret) { + printk(KERN_ERR "failed to register pxa25x_pwm_driver\n"); + return ret; + } + + ret = platform_driver_register(&pxa27x_pwm_driver); + if (ret) { + printk(KERN_ERR "failed to register pxa27x_pwm_driver\n"); + return ret; + } + + return ret; +} +arch_initcall(pwm_init); + +static void __exit pwm_exit(void) +{ + platform_driver_unregister(&pxa25x_pwm_driver); + platform_driver_unregister(&pxa27x_pwm_driver); +} +module_exit(pwm_exit); diff --git a/arch/arm/mach-pxa/pxa25x.c b/arch/arm/mach-pxa/pxa25x.c index e5b417d..2bed3f9 100644 --- a/arch/arm/mach-pxa/pxa25x.c +++ b/arch/arm/mach-pxa/pxa25x.c @@ -129,12 +129,12 @@ static struct clk pxa25x_clks[] = { INIT_CKEN("SSPCLK", SSP, 3686400, 0, &pxa25x_device_ssp.dev), INIT_CKEN("SSPCLK", NSSP, 3686400, 0, &pxa25x_device_nssp.dev), INIT_CKEN("SSPCLK", ASSP, 3686400, 0, &pxa25x_device_assp.dev), + INIT_CKEN("PWMCLK", PWM0, 3686400, 0, &pxa25x_device_pwm0.dev), + INIT_CKEN("PWMCLK", PWM1, 3686400, 0, &pxa25x_device_pwm1.dev), INIT_CKEN("AC97CLK", AC97, 24576000, 0, NULL), /* - INIT_CKEN("PWMCLK", PWM0, 3686400, 0, NULL), - INIT_CKEN("PWMCLK", PWM0, 3686400, 0, NULL), INIT_CKEN("I2SCLK", I2S, 14745600, 0, NULL), */ INIT_CKEN("FICPCLK", FICP, 47923000, 0, NULL), @@ -269,6 +269,8 @@ static struct platform_device *pxa25x_devices[] __initdata = { &pxa25x_device_ssp, &pxa25x_device_nssp, &pxa25x_device_assp, + &pxa25x_device_pwm0, + &pxa25x_device_pwm1, }; static struct sys_device pxa25x_sysdev[] = { diff --git a/arch/arm/mach-pxa/pxa27x.c b/arch/arm/mach-pxa/pxa27x.c index 7e94583..bc2e80f 100644 --- a/arch/arm/mach-pxa/pxa27x.c +++ b/arch/arm/mach-pxa/pxa27x.c @@ -157,12 +157,13 @@ static struct clk pxa27x_clks[] = { INIT_CKEN("SSPCLK", SSP1, 13000000, 0, &pxa27x_device_ssp1.dev), INIT_CKEN("SSPCLK", SSP2, 13000000, 0, &pxa27x_device_ssp2.dev), INIT_CKEN("SSPCLK", SSP3, 13000000, 0, &pxa27x_device_ssp3.dev), + INIT_CKEN("PWMCLK", PWM0, 13000000, 0, &pxa27x_device_pwm0.dev), + INIT_CKEN("PWMCLK", PWM1, 13000000, 0, &pxa27x_device_pwm1.dev), INIT_CKEN("AC97CLK", AC97, 24576000, 0, NULL), INIT_CKEN("AC97CONFCLK", AC97CONF, 24576000, 0, NULL), /* - INIT_CKEN("PWMCLK", PWM0, 13000000, 0, NULL), INIT_CKEN("MSLCLK", MSL, 48000000, 0, NULL), INIT_CKEN("USIMCLK", USIM, 48000000, 0, NULL), INIT_CKEN("MSTKCLK", MEMSTK, 19500000, 0, NULL), @@ -363,6 +364,8 @@ static struct platform_device *devices[] __initdata = { &pxa27x_device_ssp1, &pxa27x_device_ssp2, &pxa27x_device_ssp3, + &pxa27x_device_pwm0, + &pxa27x_device_pwm1, }; static struct sys_device pxa27x_sysdev[] = { diff --git a/arch/arm/mach-pxa/pxa3xx.c b/arch/arm/mach-pxa/pxa3xx.c index 644550b..0f717df 100644 --- a/arch/arm/mach-pxa/pxa3xx.c +++ b/arch/arm/mach-pxa/pxa3xx.c @@ -239,6 +239,8 @@ static struct clk pxa3xx_clks[] = { PXA3xx_CKEN("SSPCLK", SSP2, 13000000, 0, &pxa27x_device_ssp2.dev), PXA3xx_CKEN("SSPCLK", SSP3, 13000000, 0, &pxa27x_device_ssp3.dev), PXA3xx_CKEN("SSPCLK", SSP4, 13000000, 0, &pxa3xx_device_ssp4.dev), + PXA3xx_CKEN("PWMCLK", PWM0, 13000000, 0, &pxa27x_device_pwm0.dev), + PXA3xx_CKEN("PWMCLK", PWM1, 13000000, 0, &pxa27x_device_pwm1.dev), PXA3xx_CKEN("MMCCLK", MMC1, 19500000, 0, &pxa_device_mci.dev), PXA3xx_CKEN("MMCCLK", MMC2, 19500000, 0, &pxa3xx_device_mci2.dev), @@ -530,6 +532,8 @@ static struct platform_device *devices[] __initdata = { &pxa27x_device_ssp2, &pxa27x_device_ssp3, &pxa3xx_device_ssp4, + &pxa27x_device_pwm0, + &pxa27x_device_pwm1, }; static struct sys_device pxa3xx_sysdev[] = { -- cgit v0.10.2 From 02a8e76979f9b439642e67955edb865c112926f6 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 20 Apr 2008 17:15:32 +0100 Subject: [ARM] pxa: corgibl_limit_intensity build errors If CONFIG_BACKLIGHT_CORGI is not selected, then corgibl_limit_intensity() is not present. However, both corgi_pm.c and sharp_pm.c reference this symbol, resulting in a link error. Wrap the references with the relevant ifdefs, and avoid the resulting NULL pointer dereference by making the code in sharpsl_pm.c also conditional on the config symbol. Signed-off-by: Russell King diff --git a/arch/arm/common/sharpsl_pm.c b/arch/arm/common/sharpsl_pm.c index 5bba525..5736c98 100644 --- a/arch/arm/common/sharpsl_pm.c +++ b/arch/arm/common/sharpsl_pm.c @@ -157,6 +157,7 @@ static void sharpsl_battery_thread(struct work_struct *private_) dev_dbg(sharpsl_pm.dev, "Battery: voltage: %d, status: %d, percentage: %d, time: %ld\n", voltage, sharpsl_pm.battstat.mainbat_status, sharpsl_pm.battstat.mainbat_percent, jiffies); +#ifdef CONFIG_BACKLIGHT_CORGI /* If battery is low. limit backlight intensity to save power. */ if ((sharpsl_pm.battstat.ac_status != APM_AC_ONLINE) && ((sharpsl_pm.battstat.mainbat_status == APM_BATTERY_STATUS_LOW) || @@ -169,6 +170,7 @@ static void sharpsl_battery_thread(struct work_struct *private_) sharpsl_pm.machinfo->backlight_limit(0); sharpsl_pm.flags &= ~SHARPSL_BL_LIMIT; } +#endif /* Suspend if critical battery level */ if ((sharpsl_pm.battstat.ac_status != APM_AC_ONLINE) diff --git a/arch/arm/mach-pxa/corgi_pm.c b/arch/arm/mach-pxa/corgi_pm.c index 0a85f70..ae91c4b 100644 --- a/arch/arm/mach-pxa/corgi_pm.c +++ b/arch/arm/mach-pxa/corgi_pm.c @@ -204,7 +204,9 @@ static struct sharpsl_charger_machinfo corgi_pm_machinfo = { .read_devdata = corgipm_read_devdata, .charger_wakeup = corgi_charger_wakeup, .should_wakeup = corgi_should_wakeup, +#ifdef CONFIG_BACKLIGHT_CORGI .backlight_limit = corgibl_limit_intensity, +#endif .charge_on_volt = SHARPSL_CHARGE_ON_VOLT, .charge_on_temp = SHARPSL_CHARGE_ON_TEMP, .charge_acin_high = SHARPSL_CHARGE_ON_ACIN_HIGH, diff --git a/arch/arm/mach-pxa/spitz_pm.c b/arch/arm/mach-pxa/spitz_pm.c index 23f050f..48396f4 100644 --- a/arch/arm/mach-pxa/spitz_pm.c +++ b/arch/arm/mach-pxa/spitz_pm.c @@ -207,7 +207,9 @@ struct sharpsl_charger_machinfo spitz_pm_machinfo = { .read_devdata = spitzpm_read_devdata, .charger_wakeup = spitz_charger_wakeup, .should_wakeup = spitz_should_wakeup, +#ifdef CONFIG_BACKLIGHT_CORGI .backlight_limit = corgibl_limit_intensity, +#endif .charge_on_volt = SHARPSL_CHARGE_ON_VOLT, .charge_on_temp = SHARPSL_CHARGE_ON_TEMP, .charge_acin_high = SHARPSL_CHARGE_ON_ACIN_HIGH, -- cgit v0.10.2 From 284d115ec9b70d7c38752d10ad393a198db07a4b Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 20 Apr 2008 17:32:16 +0100 Subject: [ARM] pxa: separate PXA25x and PXA27x UDC register definitions The PXA25x and PXA27x USB device controller register definitions are different. Currently, they live side by side in pxa-regs.h, but only one set is available depending on the setting of PXA25x or PXA27x. This means that if we build to support both PXA25x and PXA27x, the PXA27x definitions are unavailable, even to PXA27x specific code. Remove these definitions from pxa-regs.h, and place them in separate files. Include these files where appropriate. Note: according to the dependencies in drivers/usb/gadget/Kconfig, we do not support the UDC on PXA27x nor PXA3xx CPUs, so remove the platform devices from pxa27x.c and pxa3xx.c. Signed-off-by: Russell King diff --git a/arch/arm/mach-pxa/em-x270.c b/arch/arm/mach-pxa/em-x270.c index edc4f07..1269ac9 100644 --- a/arch/arm/mach-pxa/em-x270.c +++ b/arch/arm/mach-pxa/em-x270.c @@ -24,6 +24,7 @@ #include #include +#include #include #include #include diff --git a/arch/arm/mach-pxa/pxa27x.c b/arch/arm/mach-pxa/pxa27x.c index 7e94583..cdaf573 100644 --- a/arch/arm/mach-pxa/pxa27x.c +++ b/arch/arm/mach-pxa/pxa27x.c @@ -353,7 +353,7 @@ void __init pxa_set_i2c_power_info(struct i2c_pxa_platform_data *info) } static struct platform_device *devices[] __initdata = { - &pxa_device_udc, +/* &pxa_device_udc, The UDC driver is PXA25x only */ &pxa_device_ffuart, &pxa_device_btuart, &pxa_device_stuart, diff --git a/arch/arm/mach-pxa/pxa3xx.c b/arch/arm/mach-pxa/pxa3xx.c index 644550b..7fbe786 100644 --- a/arch/arm/mach-pxa/pxa3xx.c +++ b/arch/arm/mach-pxa/pxa3xx.c @@ -520,7 +520,7 @@ void __init pxa3xx_init_irq(void) */ static struct platform_device *devices[] __initdata = { - &pxa_device_udc, +/* &pxa_device_udc, The UDC driver is PXA25x only */ &pxa_device_ffuart, &pxa_device_btuart, &pxa_device_stuart, diff --git a/drivers/usb/gadget/pxa2xx_udc.c b/drivers/usb/gadget/pxa2xx_udc.c index 08f699b..63db96a 100644 --- a/drivers/usb/gadget/pxa2xx_udc.c +++ b/drivers/usb/gadget/pxa2xx_udc.c @@ -46,19 +46,25 @@ #include #include #include +#include #include #include #include -#include #include #include #include -#include #include #include +/* + * This driver is PXA25x only. Grab the right register definitions. + */ +#ifdef CONFIG_ARCH_PXA +#include +#endif + #include diff --git a/include/asm-arm/arch-pxa/pxa-regs.h b/include/asm-arm/arch-pxa/pxa-regs.h index 4b2ea1e..68d7428 100644 --- a/include/asm-arm/arch-pxa/pxa-regs.h +++ b/include/asm-arm/arch-pxa/pxa-regs.h @@ -600,418 +600,6 @@ /* - * USB Device Controller - * PXA25x and PXA27x USB device controller registers are different. - */ -#if defined(CONFIG_PXA25x) - -#define UDC_RES1 __REG(0x40600004) /* UDC Undocumented - Reserved1 */ -#define UDC_RES2 __REG(0x40600008) /* UDC Undocumented - Reserved2 */ -#define UDC_RES3 __REG(0x4060000C) /* UDC Undocumented - Reserved3 */ - -#define UDCCR __REG(0x40600000) /* UDC Control Register */ -#define UDCCR_UDE (1 << 0) /* UDC enable */ -#define UDCCR_UDA (1 << 1) /* UDC active */ -#define UDCCR_RSM (1 << 2) /* Device resume */ -#define UDCCR_RESIR (1 << 3) /* Resume interrupt request */ -#define UDCCR_SUSIR (1 << 4) /* Suspend interrupt request */ -#define UDCCR_SRM (1 << 5) /* Suspend/resume interrupt mask */ -#define UDCCR_RSTIR (1 << 6) /* Reset interrupt request */ -#define UDCCR_REM (1 << 7) /* Reset interrupt mask */ - -#define UDCCS0 __REG(0x40600010) /* UDC Endpoint 0 Control/Status Register */ -#define UDCCS0_OPR (1 << 0) /* OUT packet ready */ -#define UDCCS0_IPR (1 << 1) /* IN packet ready */ -#define UDCCS0_FTF (1 << 2) /* Flush Tx FIFO */ -#define UDCCS0_DRWF (1 << 3) /* Device remote wakeup feature */ -#define UDCCS0_SST (1 << 4) /* Sent stall */ -#define UDCCS0_FST (1 << 5) /* Force stall */ -#define UDCCS0_RNE (1 << 6) /* Receive FIFO no empty */ -#define UDCCS0_SA (1 << 7) /* Setup active */ - -/* Bulk IN - Endpoint 1,6,11 */ -#define UDCCS1 __REG(0x40600014) /* UDC Endpoint 1 (IN) Control/Status Register */ -#define UDCCS6 __REG(0x40600028) /* UDC Endpoint 6 (IN) Control/Status Register */ -#define UDCCS11 __REG(0x4060003C) /* UDC Endpoint 11 (IN) Control/Status Register */ - -#define UDCCS_BI_TFS (1 << 0) /* Transmit FIFO service */ -#define UDCCS_BI_TPC (1 << 1) /* Transmit packet complete */ -#define UDCCS_BI_FTF (1 << 2) /* Flush Tx FIFO */ -#define UDCCS_BI_TUR (1 << 3) /* Transmit FIFO underrun */ -#define UDCCS_BI_SST (1 << 4) /* Sent stall */ -#define UDCCS_BI_FST (1 << 5) /* Force stall */ -#define UDCCS_BI_TSP (1 << 7) /* Transmit short packet */ - -/* Bulk OUT - Endpoint 2,7,12 */ -#define UDCCS2 __REG(0x40600018) /* UDC Endpoint 2 (OUT) Control/Status Register */ -#define UDCCS7 __REG(0x4060002C) /* UDC Endpoint 7 (OUT) Control/Status Register */ -#define UDCCS12 __REG(0x40600040) /* UDC Endpoint 12 (OUT) Control/Status Register */ - -#define UDCCS_BO_RFS (1 << 0) /* Receive FIFO service */ -#define UDCCS_BO_RPC (1 << 1) /* Receive packet complete */ -#define UDCCS_BO_DME (1 << 3) /* DMA enable */ -#define UDCCS_BO_SST (1 << 4) /* Sent stall */ -#define UDCCS_BO_FST (1 << 5) /* Force stall */ -#define UDCCS_BO_RNE (1 << 6) /* Receive FIFO not empty */ -#define UDCCS_BO_RSP (1 << 7) /* Receive short packet */ - -/* Isochronous IN - Endpoint 3,8,13 */ -#define UDCCS3 __REG(0x4060001C) /* UDC Endpoint 3 (IN) Control/Status Register */ -#define UDCCS8 __REG(0x40600030) /* UDC Endpoint 8 (IN) Control/Status Register */ -#define UDCCS13 __REG(0x40600044) /* UDC Endpoint 13 (IN) Control/Status Register */ - -#define UDCCS_II_TFS (1 << 0) /* Transmit FIFO service */ -#define UDCCS_II_TPC (1 << 1) /* Transmit packet complete */ -#define UDCCS_II_FTF (1 << 2) /* Flush Tx FIFO */ -#define UDCCS_II_TUR (1 << 3) /* Transmit FIFO underrun */ -#define UDCCS_II_TSP (1 << 7) /* Transmit short packet */ - -/* Isochronous OUT - Endpoint 4,9,14 */ -#define UDCCS4 __REG(0x40600020) /* UDC Endpoint 4 (OUT) Control/Status Register */ -#define UDCCS9 __REG(0x40600034) /* UDC Endpoint 9 (OUT) Control/Status Register */ -#define UDCCS14 __REG(0x40600048) /* UDC Endpoint 14 (OUT) Control/Status Register */ - -#define UDCCS_IO_RFS (1 << 0) /* Receive FIFO service */ -#define UDCCS_IO_RPC (1 << 1) /* Receive packet complete */ -#define UDCCS_IO_ROF (1 << 2) /* Receive overflow */ -#define UDCCS_IO_DME (1 << 3) /* DMA enable */ -#define UDCCS_IO_RNE (1 << 6) /* Receive FIFO not empty */ -#define UDCCS_IO_RSP (1 << 7) /* Receive short packet */ - -/* Interrupt IN - Endpoint 5,10,15 */ -#define UDCCS5 __REG(0x40600024) /* UDC Endpoint 5 (Interrupt) Control/Status Register */ -#define UDCCS10 __REG(0x40600038) /* UDC Endpoint 10 (Interrupt) Control/Status Register */ -#define UDCCS15 __REG(0x4060004C) /* UDC Endpoint 15 (Interrupt) Control/Status Register */ - -#define UDCCS_INT_TFS (1 << 0) /* Transmit FIFO service */ -#define UDCCS_INT_TPC (1 << 1) /* Transmit packet complete */ -#define UDCCS_INT_FTF (1 << 2) /* Flush Tx FIFO */ -#define UDCCS_INT_TUR (1 << 3) /* Transmit FIFO underrun */ -#define UDCCS_INT_SST (1 << 4) /* Sent stall */ -#define UDCCS_INT_FST (1 << 5) /* Force stall */ -#define UDCCS_INT_TSP (1 << 7) /* Transmit short packet */ - -#define UFNRH __REG(0x40600060) /* UDC Frame Number Register High */ -#define UFNRL __REG(0x40600064) /* UDC Frame Number Register Low */ -#define UBCR2 __REG(0x40600068) /* UDC Byte Count Reg 2 */ -#define UBCR4 __REG(0x4060006c) /* UDC Byte Count Reg 4 */ -#define UBCR7 __REG(0x40600070) /* UDC Byte Count Reg 7 */ -#define UBCR9 __REG(0x40600074) /* UDC Byte Count Reg 9 */ -#define UBCR12 __REG(0x40600078) /* UDC Byte Count Reg 12 */ -#define UBCR14 __REG(0x4060007c) /* UDC Byte Count Reg 14 */ -#define UDDR0 __REG(0x40600080) /* UDC Endpoint 0 Data Register */ -#define UDDR1 __REG(0x40600100) /* UDC Endpoint 1 Data Register */ -#define UDDR2 __REG(0x40600180) /* UDC Endpoint 2 Data Register */ -#define UDDR3 __REG(0x40600200) /* UDC Endpoint 3 Data Register */ -#define UDDR4 __REG(0x40600400) /* UDC Endpoint 4 Data Register */ -#define UDDR5 __REG(0x406000A0) /* UDC Endpoint 5 Data Register */ -#define UDDR6 __REG(0x40600600) /* UDC Endpoint 6 Data Register */ -#define UDDR7 __REG(0x40600680) /* UDC Endpoint 7 Data Register */ -#define UDDR8 __REG(0x40600700) /* UDC Endpoint 8 Data Register */ -#define UDDR9 __REG(0x40600900) /* UDC Endpoint 9 Data Register */ -#define UDDR10 __REG(0x406000C0) /* UDC Endpoint 10 Data Register */ -#define UDDR11 __REG(0x40600B00) /* UDC Endpoint 11 Data Register */ -#define UDDR12 __REG(0x40600B80) /* UDC Endpoint 12 Data Register */ -#define UDDR13 __REG(0x40600C00) /* UDC Endpoint 13 Data Register */ -#define UDDR14 __REG(0x40600E00) /* UDC Endpoint 14 Data Register */ -#define UDDR15 __REG(0x406000E0) /* UDC Endpoint 15 Data Register */ - -#define UICR0 __REG(0x40600050) /* UDC Interrupt Control Register 0 */ - -#define UICR0_IM0 (1 << 0) /* Interrupt mask ep 0 */ -#define UICR0_IM1 (1 << 1) /* Interrupt mask ep 1 */ -#define UICR0_IM2 (1 << 2) /* Interrupt mask ep 2 */ -#define UICR0_IM3 (1 << 3) /* Interrupt mask ep 3 */ -#define UICR0_IM4 (1 << 4) /* Interrupt mask ep 4 */ -#define UICR0_IM5 (1 << 5) /* Interrupt mask ep 5 */ -#define UICR0_IM6 (1 << 6) /* Interrupt mask ep 6 */ -#define UICR0_IM7 (1 << 7) /* Interrupt mask ep 7 */ - -#define UICR1 __REG(0x40600054) /* UDC Interrupt Control Register 1 */ - -#define UICR1_IM8 (1 << 0) /* Interrupt mask ep 8 */ -#define UICR1_IM9 (1 << 1) /* Interrupt mask ep 9 */ -#define UICR1_IM10 (1 << 2) /* Interrupt mask ep 10 */ -#define UICR1_IM11 (1 << 3) /* Interrupt mask ep 11 */ -#define UICR1_IM12 (1 << 4) /* Interrupt mask ep 12 */ -#define UICR1_IM13 (1 << 5) /* Interrupt mask ep 13 */ -#define UICR1_IM14 (1 << 6) /* Interrupt mask ep 14 */ -#define UICR1_IM15 (1 << 7) /* Interrupt mask ep 15 */ - -#define USIR0 __REG(0x40600058) /* UDC Status Interrupt Register 0 */ - -#define USIR0_IR0 (1 << 0) /* Interrupt request ep 0 */ -#define USIR0_IR1 (1 << 1) /* Interrupt request ep 1 */ -#define USIR0_IR2 (1 << 2) /* Interrupt request ep 2 */ -#define USIR0_IR3 (1 << 3) /* Interrupt request ep 3 */ -#define USIR0_IR4 (1 << 4) /* Interrupt request ep 4 */ -#define USIR0_IR5 (1 << 5) /* Interrupt request ep 5 */ -#define USIR0_IR6 (1 << 6) /* Interrupt request ep 6 */ -#define USIR0_IR7 (1 << 7) /* Interrupt request ep 7 */ - -#define USIR1 __REG(0x4060005C) /* UDC Status Interrupt Register 1 */ - -#define USIR1_IR8 (1 << 0) /* Interrupt request ep 8 */ -#define USIR1_IR9 (1 << 1) /* Interrupt request ep 9 */ -#define USIR1_IR10 (1 << 2) /* Interrupt request ep 10 */ -#define USIR1_IR11 (1 << 3) /* Interrupt request ep 11 */ -#define USIR1_IR12 (1 << 4) /* Interrupt request ep 12 */ -#define USIR1_IR13 (1 << 5) /* Interrupt request ep 13 */ -#define USIR1_IR14 (1 << 6) /* Interrupt request ep 14 */ -#define USIR1_IR15 (1 << 7) /* Interrupt request ep 15 */ - -#elif defined(CONFIG_PXA27x) - -#define UDCCR __REG(0x40600000) /* UDC Control Register */ -#define UDCCR_OEN (1 << 31) /* On-the-Go Enable */ -#define UDCCR_AALTHNP (1 << 30) /* A-device Alternate Host Negotiation - Protocol Port Support */ -#define UDCCR_AHNP (1 << 29) /* A-device Host Negotiation Protocol - Support */ -#define UDCCR_BHNP (1 << 28) /* B-device Host Negotiation Protocol - Enable */ -#define UDCCR_DWRE (1 << 16) /* Device Remote Wake-up Enable */ -#define UDCCR_ACN (0x03 << 11) /* Active UDC configuration Number */ -#define UDCCR_ACN_S 11 -#define UDCCR_AIN (0x07 << 8) /* Active UDC interface Number */ -#define UDCCR_AIN_S 8 -#define UDCCR_AAISN (0x07 << 5) /* Active UDC Alternate Interface - Setting Number */ -#define UDCCR_AAISN_S 5 -#define UDCCR_SMAC (1 << 4) /* Switch Endpoint Memory to Active - Configuration */ -#define UDCCR_EMCE (1 << 3) /* Endpoint Memory Configuration - Error */ -#define UDCCR_UDR (1 << 2) /* UDC Resume */ -#define UDCCR_UDA (1 << 1) /* UDC Active */ -#define UDCCR_UDE (1 << 0) /* UDC Enable */ - -#define UDCICR0 __REG(0x40600004) /* UDC Interrupt Control Register0 */ -#define UDCICR1 __REG(0x40600008) /* UDC Interrupt Control Register1 */ -#define UDCICR_FIFOERR (1 << 1) /* FIFO Error interrupt for EP */ -#define UDCICR_PKTCOMPL (1 << 0) /* Packet Complete interrupt for EP */ - -#define UDC_INT_FIFOERROR (0x2) -#define UDC_INT_PACKETCMP (0x1) - -#define UDCICR_INT(n,intr) (((intr) & 0x03) << (((n) & 0x0F) * 2)) -#define UDCICR1_IECC (1 << 31) /* IntEn - Configuration Change */ -#define UDCICR1_IESOF (1 << 30) /* IntEn - Start of Frame */ -#define UDCICR1_IERU (1 << 29) /* IntEn - Resume */ -#define UDCICR1_IESU (1 << 28) /* IntEn - Suspend */ -#define UDCICR1_IERS (1 << 27) /* IntEn - Reset */ - -#define UDCISR0 __REG(0x4060000C) /* UDC Interrupt Status Register 0 */ -#define UDCISR1 __REG(0x40600010) /* UDC Interrupt Status Register 1 */ -#define UDCISR_INT(n,intr) (((intr) & 0x03) << (((n) & 0x0F) * 2)) -#define UDCISR1_IRCC (1 << 31) /* IntReq - Configuration Change */ -#define UDCISR1_IRSOF (1 << 30) /* IntReq - Start of Frame */ -#define UDCISR1_IRRU (1 << 29) /* IntReq - Resume */ -#define UDCISR1_IRSU (1 << 28) /* IntReq - Suspend */ -#define UDCISR1_IRRS (1 << 27) /* IntReq - Reset */ - -#define UDCFNR __REG(0x40600014) /* UDC Frame Number Register */ -#define UDCOTGICR __REG(0x40600018) /* UDC On-The-Go interrupt control */ -#define UDCOTGICR_IESF (1 << 24) /* OTG SET_FEATURE command recvd */ -#define UDCOTGICR_IEXR (1 << 17) /* Extra Transciever Interrupt - Rising Edge Interrupt Enable */ -#define UDCOTGICR_IEXF (1 << 16) /* Extra Transciever Interrupt - Falling Edge Interrupt Enable */ -#define UDCOTGICR_IEVV40R (1 << 9) /* OTG Vbus Valid 4.0V Rising Edge - Interrupt Enable */ -#define UDCOTGICR_IEVV40F (1 << 8) /* OTG Vbus Valid 4.0V Falling Edge - Interrupt Enable */ -#define UDCOTGICR_IEVV44R (1 << 7) /* OTG Vbus Valid 4.4V Rising Edge - Interrupt Enable */ -#define UDCOTGICR_IEVV44F (1 << 6) /* OTG Vbus Valid 4.4V Falling Edge - Interrupt Enable */ -#define UDCOTGICR_IESVR (1 << 5) /* OTG Session Valid Rising Edge - Interrupt Enable */ -#define UDCOTGICR_IESVF (1 << 4) /* OTG Session Valid Falling Edge - Interrupt Enable */ -#define UDCOTGICR_IESDR (1 << 3) /* OTG A-Device SRP Detect Rising - Edge Interrupt Enable */ -#define UDCOTGICR_IESDF (1 << 2) /* OTG A-Device SRP Detect Falling - Edge Interrupt Enable */ -#define UDCOTGICR_IEIDR (1 << 1) /* OTG ID Change Rising Edge - Interrupt Enable */ -#define UDCOTGICR_IEIDF (1 << 0) /* OTG ID Change Falling Edge - Interrupt Enable */ - -#define UP2OCR __REG(0x40600020) /* USB Port 2 Output Control register */ - -#define UP2OCR_CPVEN (1 << 0) /* Charge Pump Vbus Enable */ -#define UP2OCR_CPVPE (1 << 1) /* Charge Pump Vbus Pulse Enable */ -#define UP2OCR_DPPDE (1 << 2) /* Host Port 2 Transceiver D+ Pull Down Enable */ -#define UP2OCR_DMPDE (1 << 3) /* Host Port 2 Transceiver D- Pull Down Enable */ -#define UP2OCR_DPPUE (1 << 4) /* Host Port 2 Transceiver D+ Pull Up Enable */ -#define UP2OCR_DMPUE (1 << 5) /* Host Port 2 Transceiver D- Pull Up Enable */ -#define UP2OCR_DPPUBE (1 << 6) /* Host Port 2 Transceiver D+ Pull Up Bypass Enable */ -#define UP2OCR_DMPUBE (1 << 7) /* Host Port 2 Transceiver D- Pull Up Bypass Enable */ -#define UP2OCR_EXSP (1 << 8) /* External Transceiver Speed Control */ -#define UP2OCR_EXSUS (1 << 9) /* External Transceiver Speed Enable */ -#define UP2OCR_IDON (1 << 10) /* OTG ID Read Enable */ -#define UP2OCR_HXS (1 << 16) /* Host Port 2 Transceiver Output Select */ -#define UP2OCR_HXOE (1 << 17) /* Host Port 2 Transceiver Output Enable */ -#define UP2OCR_SEOS (1 << 24) /* Single-Ended Output Select */ - -#define UDCCSN(x) __REG2(0x40600100, (x) << 2) -#define UDCCSR0 __REG(0x40600100) /* UDC Control/Status register - Endpoint 0 */ -#define UDCCSR0_SA (1 << 7) /* Setup Active */ -#define UDCCSR0_RNE (1 << 6) /* Receive FIFO Not Empty */ -#define UDCCSR0_FST (1 << 5) /* Force Stall */ -#define UDCCSR0_SST (1 << 4) /* Sent Stall */ -#define UDCCSR0_DME (1 << 3) /* DMA Enable */ -#define UDCCSR0_FTF (1 << 2) /* Flush Transmit FIFO */ -#define UDCCSR0_IPR (1 << 1) /* IN Packet Ready */ -#define UDCCSR0_OPC (1 << 0) /* OUT Packet Complete */ - -#define UDCCSRA __REG(0x40600104) /* UDC Control/Status register - Endpoint A */ -#define UDCCSRB __REG(0x40600108) /* UDC Control/Status register - Endpoint B */ -#define UDCCSRC __REG(0x4060010C) /* UDC Control/Status register - Endpoint C */ -#define UDCCSRD __REG(0x40600110) /* UDC Control/Status register - Endpoint D */ -#define UDCCSRE __REG(0x40600114) /* UDC Control/Status register - Endpoint E */ -#define UDCCSRF __REG(0x40600118) /* UDC Control/Status register - Endpoint F */ -#define UDCCSRG __REG(0x4060011C) /* UDC Control/Status register - Endpoint G */ -#define UDCCSRH __REG(0x40600120) /* UDC Control/Status register - Endpoint H */ -#define UDCCSRI __REG(0x40600124) /* UDC Control/Status register - Endpoint I */ -#define UDCCSRJ __REG(0x40600128) /* UDC Control/Status register - Endpoint J */ -#define UDCCSRK __REG(0x4060012C) /* UDC Control/Status register - Endpoint K */ -#define UDCCSRL __REG(0x40600130) /* UDC Control/Status register - Endpoint L */ -#define UDCCSRM __REG(0x40600134) /* UDC Control/Status register - Endpoint M */ -#define UDCCSRN __REG(0x40600138) /* UDC Control/Status register - Endpoint N */ -#define UDCCSRP __REG(0x4060013C) /* UDC Control/Status register - Endpoint P */ -#define UDCCSRQ __REG(0x40600140) /* UDC Control/Status register - Endpoint Q */ -#define UDCCSRR __REG(0x40600144) /* UDC Control/Status register - Endpoint R */ -#define UDCCSRS __REG(0x40600148) /* UDC Control/Status register - Endpoint S */ -#define UDCCSRT __REG(0x4060014C) /* UDC Control/Status register - Endpoint T */ -#define UDCCSRU __REG(0x40600150) /* UDC Control/Status register - Endpoint U */ -#define UDCCSRV __REG(0x40600154) /* UDC Control/Status register - Endpoint V */ -#define UDCCSRW __REG(0x40600158) /* UDC Control/Status register - Endpoint W */ -#define UDCCSRX __REG(0x4060015C) /* UDC Control/Status register - Endpoint X */ - -#define UDCCSR_DPE (1 << 9) /* Data Packet Error */ -#define UDCCSR_FEF (1 << 8) /* Flush Endpoint FIFO */ -#define UDCCSR_SP (1 << 7) /* Short Packet Control/Status */ -#define UDCCSR_BNE (1 << 6) /* Buffer Not Empty (IN endpoints) */ -#define UDCCSR_BNF (1 << 6) /* Buffer Not Full (OUT endpoints) */ -#define UDCCSR_FST (1 << 5) /* Force STALL */ -#define UDCCSR_SST (1 << 4) /* Sent STALL */ -#define UDCCSR_DME (1 << 3) /* DMA Enable */ -#define UDCCSR_TRN (1 << 2) /* Tx/Rx NAK */ -#define UDCCSR_PC (1 << 1) /* Packet Complete */ -#define UDCCSR_FS (1 << 0) /* FIFO needs service */ - -#define UDCBCN(x) __REG2(0x40600200, (x)<<2) -#define UDCBCR0 __REG(0x40600200) /* Byte Count Register - EP0 */ -#define UDCBCRA __REG(0x40600204) /* Byte Count Register - EPA */ -#define UDCBCRB __REG(0x40600208) /* Byte Count Register - EPB */ -#define UDCBCRC __REG(0x4060020C) /* Byte Count Register - EPC */ -#define UDCBCRD __REG(0x40600210) /* Byte Count Register - EPD */ -#define UDCBCRE __REG(0x40600214) /* Byte Count Register - EPE */ -#define UDCBCRF __REG(0x40600218) /* Byte Count Register - EPF */ -#define UDCBCRG __REG(0x4060021C) /* Byte Count Register - EPG */ -#define UDCBCRH __REG(0x40600220) /* Byte Count Register - EPH */ -#define UDCBCRI __REG(0x40600224) /* Byte Count Register - EPI */ -#define UDCBCRJ __REG(0x40600228) /* Byte Count Register - EPJ */ -#define UDCBCRK __REG(0x4060022C) /* Byte Count Register - EPK */ -#define UDCBCRL __REG(0x40600230) /* Byte Count Register - EPL */ -#define UDCBCRM __REG(0x40600234) /* Byte Count Register - EPM */ -#define UDCBCRN __REG(0x40600238) /* Byte Count Register - EPN */ -#define UDCBCRP __REG(0x4060023C) /* Byte Count Register - EPP */ -#define UDCBCRQ __REG(0x40600240) /* Byte Count Register - EPQ */ -#define UDCBCRR __REG(0x40600244) /* Byte Count Register - EPR */ -#define UDCBCRS __REG(0x40600248) /* Byte Count Register - EPS */ -#define UDCBCRT __REG(0x4060024C) /* Byte Count Register - EPT */ -#define UDCBCRU __REG(0x40600250) /* Byte Count Register - EPU */ -#define UDCBCRV __REG(0x40600254) /* Byte Count Register - EPV */ -#define UDCBCRW __REG(0x40600258) /* Byte Count Register - EPW */ -#define UDCBCRX __REG(0x4060025C) /* Byte Count Register - EPX */ - -#define UDCDN(x) __REG2(0x40600300, (x)<<2) -#define PHYS_UDCDN(x) (0x40600300 + ((x)<<2)) -#define PUDCDN(x) (volatile u32 *)(io_p2v(PHYS_UDCDN((x)))) -#define UDCDR0 __REG(0x40600300) /* Data Register - EP0 */ -#define UDCDRA __REG(0x40600304) /* Data Register - EPA */ -#define UDCDRB __REG(0x40600308) /* Data Register - EPB */ -#define UDCDRC __REG(0x4060030C) /* Data Register - EPC */ -#define UDCDRD __REG(0x40600310) /* Data Register - EPD */ -#define UDCDRE __REG(0x40600314) /* Data Register - EPE */ -#define UDCDRF __REG(0x40600318) /* Data Register - EPF */ -#define UDCDRG __REG(0x4060031C) /* Data Register - EPG */ -#define UDCDRH __REG(0x40600320) /* Data Register - EPH */ -#define UDCDRI __REG(0x40600324) /* Data Register - EPI */ -#define UDCDRJ __REG(0x40600328) /* Data Register - EPJ */ -#define UDCDRK __REG(0x4060032C) /* Data Register - EPK */ -#define UDCDRL __REG(0x40600330) /* Data Register - EPL */ -#define UDCDRM __REG(0x40600334) /* Data Register - EPM */ -#define UDCDRN __REG(0x40600338) /* Data Register - EPN */ -#define UDCDRP __REG(0x4060033C) /* Data Register - EPP */ -#define UDCDRQ __REG(0x40600340) /* Data Register - EPQ */ -#define UDCDRR __REG(0x40600344) /* Data Register - EPR */ -#define UDCDRS __REG(0x40600348) /* Data Register - EPS */ -#define UDCDRT __REG(0x4060034C) /* Data Register - EPT */ -#define UDCDRU __REG(0x40600350) /* Data Register - EPU */ -#define UDCDRV __REG(0x40600354) /* Data Register - EPV */ -#define UDCDRW __REG(0x40600358) /* Data Register - EPW */ -#define UDCDRX __REG(0x4060035C) /* Data Register - EPX */ - -#define UDCCN(x) __REG2(0x40600400, (x)<<2) -#define UDCCRA __REG(0x40600404) /* Configuration register EPA */ -#define UDCCRB __REG(0x40600408) /* Configuration register EPB */ -#define UDCCRC __REG(0x4060040C) /* Configuration register EPC */ -#define UDCCRD __REG(0x40600410) /* Configuration register EPD */ -#define UDCCRE __REG(0x40600414) /* Configuration register EPE */ -#define UDCCRF __REG(0x40600418) /* Configuration register EPF */ -#define UDCCRG __REG(0x4060041C) /* Configuration register EPG */ -#define UDCCRH __REG(0x40600420) /* Configuration register EPH */ -#define UDCCRI __REG(0x40600424) /* Configuration register EPI */ -#define UDCCRJ __REG(0x40600428) /* Configuration register EPJ */ -#define UDCCRK __REG(0x4060042C) /* Configuration register EPK */ -#define UDCCRL __REG(0x40600430) /* Configuration register EPL */ -#define UDCCRM __REG(0x40600434) /* Configuration register EPM */ -#define UDCCRN __REG(0x40600438) /* Configuration register EPN */ -#define UDCCRP __REG(0x4060043C) /* Configuration register EPP */ -#define UDCCRQ __REG(0x40600440) /* Configuration register EPQ */ -#define UDCCRR __REG(0x40600444) /* Configuration register EPR */ -#define UDCCRS __REG(0x40600448) /* Configuration register EPS */ -#define UDCCRT __REG(0x4060044C) /* Configuration register EPT */ -#define UDCCRU __REG(0x40600450) /* Configuration register EPU */ -#define UDCCRV __REG(0x40600454) /* Configuration register EPV */ -#define UDCCRW __REG(0x40600458) /* Configuration register EPW */ -#define UDCCRX __REG(0x4060045C) /* Configuration register EPX */ - -#define UDCCONR_CN (0x03 << 25) /* Configuration Number */ -#define UDCCONR_CN_S (25) -#define UDCCONR_IN (0x07 << 22) /* Interface Number */ -#define UDCCONR_IN_S (22) -#define UDCCONR_AISN (0x07 << 19) /* Alternate Interface Number */ -#define UDCCONR_AISN_S (19) -#define UDCCONR_EN (0x0f << 15) /* Endpoint Number */ -#define UDCCONR_EN_S (15) -#define UDCCONR_ET (0x03 << 13) /* Endpoint Type: */ -#define UDCCONR_ET_S (13) -#define UDCCONR_ET_INT (0x03 << 13) /* Interrupt */ -#define UDCCONR_ET_BULK (0x02 << 13) /* Bulk */ -#define UDCCONR_ET_ISO (0x01 << 13) /* Isochronous */ -#define UDCCONR_ET_NU (0x00 << 13) /* Not used */ -#define UDCCONR_ED (1 << 12) /* Endpoint Direction */ -#define UDCCONR_MPS (0x3ff << 2) /* Maximum Packet Size */ -#define UDCCONR_MPS_S (2) -#define UDCCONR_DE (1 << 1) /* Double Buffering Enable */ -#define UDCCONR_EE (1 << 0) /* Endpoint Enable */ - - -#define UDC_INT_FIFOERROR (0x2) -#define UDC_INT_PACKETCMP (0x1) - -#define UDC_FNR_MASK (0x7ff) - -#define UDCCSR_WR_MASK (UDCCSR_DME|UDCCSR_FST) -#define UDC_BCR_MASK (0x3ff) -#endif - -/* * Fast Infrared Communication Port */ diff --git a/include/asm-arm/arch-pxa/pxa25x-udc.h b/include/asm-arm/arch-pxa/pxa25x-udc.h new file mode 100644 index 0000000..8403059 --- /dev/null +++ b/include/asm-arm/arch-pxa/pxa25x-udc.h @@ -0,0 +1,163 @@ +#ifndef _ASM_ARCH_PXA25X_UDC_H +#define _ASM_ARCH_PXA25X_UDC_H + +#ifdef _ASM_ARCH_PXA27X_UDC_H +#error You can't include both PXA25x and PXA27x UDC support +#endif + +#define UDC_RES1 __REG(0x40600004) /* UDC Undocumented - Reserved1 */ +#define UDC_RES2 __REG(0x40600008) /* UDC Undocumented - Reserved2 */ +#define UDC_RES3 __REG(0x4060000C) /* UDC Undocumented - Reserved3 */ + +#define UDCCR __REG(0x40600000) /* UDC Control Register */ +#define UDCCR_UDE (1 << 0) /* UDC enable */ +#define UDCCR_UDA (1 << 1) /* UDC active */ +#define UDCCR_RSM (1 << 2) /* Device resume */ +#define UDCCR_RESIR (1 << 3) /* Resume interrupt request */ +#define UDCCR_SUSIR (1 << 4) /* Suspend interrupt request */ +#define UDCCR_SRM (1 << 5) /* Suspend/resume interrupt mask */ +#define UDCCR_RSTIR (1 << 6) /* Reset interrupt request */ +#define UDCCR_REM (1 << 7) /* Reset interrupt mask */ + +#define UDCCS0 __REG(0x40600010) /* UDC Endpoint 0 Control/Status Register */ +#define UDCCS0_OPR (1 << 0) /* OUT packet ready */ +#define UDCCS0_IPR (1 << 1) /* IN packet ready */ +#define UDCCS0_FTF (1 << 2) /* Flush Tx FIFO */ +#define UDCCS0_DRWF (1 << 3) /* Device remote wakeup feature */ +#define UDCCS0_SST (1 << 4) /* Sent stall */ +#define UDCCS0_FST (1 << 5) /* Force stall */ +#define UDCCS0_RNE (1 << 6) /* Receive FIFO no empty */ +#define UDCCS0_SA (1 << 7) /* Setup active */ + +/* Bulk IN - Endpoint 1,6,11 */ +#define UDCCS1 __REG(0x40600014) /* UDC Endpoint 1 (IN) Control/Status Register */ +#define UDCCS6 __REG(0x40600028) /* UDC Endpoint 6 (IN) Control/Status Register */ +#define UDCCS11 __REG(0x4060003C) /* UDC Endpoint 11 (IN) Control/Status Register */ + +#define UDCCS_BI_TFS (1 << 0) /* Transmit FIFO service */ +#define UDCCS_BI_TPC (1 << 1) /* Transmit packet complete */ +#define UDCCS_BI_FTF (1 << 2) /* Flush Tx FIFO */ +#define UDCCS_BI_TUR (1 << 3) /* Transmit FIFO underrun */ +#define UDCCS_BI_SST (1 << 4) /* Sent stall */ +#define UDCCS_BI_FST (1 << 5) /* Force stall */ +#define UDCCS_BI_TSP (1 << 7) /* Transmit short packet */ + +/* Bulk OUT - Endpoint 2,7,12 */ +#define UDCCS2 __REG(0x40600018) /* UDC Endpoint 2 (OUT) Control/Status Register */ +#define UDCCS7 __REG(0x4060002C) /* UDC Endpoint 7 (OUT) Control/Status Register */ +#define UDCCS12 __REG(0x40600040) /* UDC Endpoint 12 (OUT) Control/Status Register */ + +#define UDCCS_BO_RFS (1 << 0) /* Receive FIFO service */ +#define UDCCS_BO_RPC (1 << 1) /* Receive packet complete */ +#define UDCCS_BO_DME (1 << 3) /* DMA enable */ +#define UDCCS_BO_SST (1 << 4) /* Sent stall */ +#define UDCCS_BO_FST (1 << 5) /* Force stall */ +#define UDCCS_BO_RNE (1 << 6) /* Receive FIFO not empty */ +#define UDCCS_BO_RSP (1 << 7) /* Receive short packet */ + +/* Isochronous IN - Endpoint 3,8,13 */ +#define UDCCS3 __REG(0x4060001C) /* UDC Endpoint 3 (IN) Control/Status Register */ +#define UDCCS8 __REG(0x40600030) /* UDC Endpoint 8 (IN) Control/Status Register */ +#define UDCCS13 __REG(0x40600044) /* UDC Endpoint 13 (IN) Control/Status Register */ + +#define UDCCS_II_TFS (1 << 0) /* Transmit FIFO service */ +#define UDCCS_II_TPC (1 << 1) /* Transmit packet complete */ +#define UDCCS_II_FTF (1 << 2) /* Flush Tx FIFO */ +#define UDCCS_II_TUR (1 << 3) /* Transmit FIFO underrun */ +#define UDCCS_II_TSP (1 << 7) /* Transmit short packet */ + +/* Isochronous OUT - Endpoint 4,9,14 */ +#define UDCCS4 __REG(0x40600020) /* UDC Endpoint 4 (OUT) Control/Status Register */ +#define UDCCS9 __REG(0x40600034) /* UDC Endpoint 9 (OUT) Control/Status Register */ +#define UDCCS14 __REG(0x40600048) /* UDC Endpoint 14 (OUT) Control/Status Register */ + +#define UDCCS_IO_RFS (1 << 0) /* Receive FIFO service */ +#define UDCCS_IO_RPC (1 << 1) /* Receive packet complete */ +#define UDCCS_IO_ROF (1 << 2) /* Receive overflow */ +#define UDCCS_IO_DME (1 << 3) /* DMA enable */ +#define UDCCS_IO_RNE (1 << 6) /* Receive FIFO not empty */ +#define UDCCS_IO_RSP (1 << 7) /* Receive short packet */ + +/* Interrupt IN - Endpoint 5,10,15 */ +#define UDCCS5 __REG(0x40600024) /* UDC Endpoint 5 (Interrupt) Control/Status Register */ +#define UDCCS10 __REG(0x40600038) /* UDC Endpoint 10 (Interrupt) Control/Status Register */ +#define UDCCS15 __REG(0x4060004C) /* UDC Endpoint 15 (Interrupt) Control/Status Register */ + +#define UDCCS_INT_TFS (1 << 0) /* Transmit FIFO service */ +#define UDCCS_INT_TPC (1 << 1) /* Transmit packet complete */ +#define UDCCS_INT_FTF (1 << 2) /* Flush Tx FIFO */ +#define UDCCS_INT_TUR (1 << 3) /* Transmit FIFO underrun */ +#define UDCCS_INT_SST (1 << 4) /* Sent stall */ +#define UDCCS_INT_FST (1 << 5) /* Force stall */ +#define UDCCS_INT_TSP (1 << 7) /* Transmit short packet */ + +#define UFNRH __REG(0x40600060) /* UDC Frame Number Register High */ +#define UFNRL __REG(0x40600064) /* UDC Frame Number Register Low */ +#define UBCR2 __REG(0x40600068) /* UDC Byte Count Reg 2 */ +#define UBCR4 __REG(0x4060006c) /* UDC Byte Count Reg 4 */ +#define UBCR7 __REG(0x40600070) /* UDC Byte Count Reg 7 */ +#define UBCR9 __REG(0x40600074) /* UDC Byte Count Reg 9 */ +#define UBCR12 __REG(0x40600078) /* UDC Byte Count Reg 12 */ +#define UBCR14 __REG(0x4060007c) /* UDC Byte Count Reg 14 */ +#define UDDR0 __REG(0x40600080) /* UDC Endpoint 0 Data Register */ +#define UDDR1 __REG(0x40600100) /* UDC Endpoint 1 Data Register */ +#define UDDR2 __REG(0x40600180) /* UDC Endpoint 2 Data Register */ +#define UDDR3 __REG(0x40600200) /* UDC Endpoint 3 Data Register */ +#define UDDR4 __REG(0x40600400) /* UDC Endpoint 4 Data Register */ +#define UDDR5 __REG(0x406000A0) /* UDC Endpoint 5 Data Register */ +#define UDDR6 __REG(0x40600600) /* UDC Endpoint 6 Data Register */ +#define UDDR7 __REG(0x40600680) /* UDC Endpoint 7 Data Register */ +#define UDDR8 __REG(0x40600700) /* UDC Endpoint 8 Data Register */ +#define UDDR9 __REG(0x40600900) /* UDC Endpoint 9 Data Register */ +#define UDDR10 __REG(0x406000C0) /* UDC Endpoint 10 Data Register */ +#define UDDR11 __REG(0x40600B00) /* UDC Endpoint 11 Data Register */ +#define UDDR12 __REG(0x40600B80) /* UDC Endpoint 12 Data Register */ +#define UDDR13 __REG(0x40600C00) /* UDC Endpoint 13 Data Register */ +#define UDDR14 __REG(0x40600E00) /* UDC Endpoint 14 Data Register */ +#define UDDR15 __REG(0x406000E0) /* UDC Endpoint 15 Data Register */ + +#define UICR0 __REG(0x40600050) /* UDC Interrupt Control Register 0 */ + +#define UICR0_IM0 (1 << 0) /* Interrupt mask ep 0 */ +#define UICR0_IM1 (1 << 1) /* Interrupt mask ep 1 */ +#define UICR0_IM2 (1 << 2) /* Interrupt mask ep 2 */ +#define UICR0_IM3 (1 << 3) /* Interrupt mask ep 3 */ +#define UICR0_IM4 (1 << 4) /* Interrupt mask ep 4 */ +#define UICR0_IM5 (1 << 5) /* Interrupt mask ep 5 */ +#define UICR0_IM6 (1 << 6) /* Interrupt mask ep 6 */ +#define UICR0_IM7 (1 << 7) /* Interrupt mask ep 7 */ + +#define UICR1 __REG(0x40600054) /* UDC Interrupt Control Register 1 */ + +#define UICR1_IM8 (1 << 0) /* Interrupt mask ep 8 */ +#define UICR1_IM9 (1 << 1) /* Interrupt mask ep 9 */ +#define UICR1_IM10 (1 << 2) /* Interrupt mask ep 10 */ +#define UICR1_IM11 (1 << 3) /* Interrupt mask ep 11 */ +#define UICR1_IM12 (1 << 4) /* Interrupt mask ep 12 */ +#define UICR1_IM13 (1 << 5) /* Interrupt mask ep 13 */ +#define UICR1_IM14 (1 << 6) /* Interrupt mask ep 14 */ +#define UICR1_IM15 (1 << 7) /* Interrupt mask ep 15 */ + +#define USIR0 __REG(0x40600058) /* UDC Status Interrupt Register 0 */ + +#define USIR0_IR0 (1 << 0) /* Interrupt request ep 0 */ +#define USIR0_IR1 (1 << 1) /* Interrupt request ep 1 */ +#define USIR0_IR2 (1 << 2) /* Interrupt request ep 2 */ +#define USIR0_IR3 (1 << 3) /* Interrupt request ep 3 */ +#define USIR0_IR4 (1 << 4) /* Interrupt request ep 4 */ +#define USIR0_IR5 (1 << 5) /* Interrupt request ep 5 */ +#define USIR0_IR6 (1 << 6) /* Interrupt request ep 6 */ +#define USIR0_IR7 (1 << 7) /* Interrupt request ep 7 */ + +#define USIR1 __REG(0x4060005C) /* UDC Status Interrupt Register 1 */ + +#define USIR1_IR8 (1 << 0) /* Interrupt request ep 8 */ +#define USIR1_IR9 (1 << 1) /* Interrupt request ep 9 */ +#define USIR1_IR10 (1 << 2) /* Interrupt request ep 10 */ +#define USIR1_IR11 (1 << 3) /* Interrupt request ep 11 */ +#define USIR1_IR12 (1 << 4) /* Interrupt request ep 12 */ +#define USIR1_IR13 (1 << 5) /* Interrupt request ep 13 */ +#define USIR1_IR14 (1 << 6) /* Interrupt request ep 14 */ +#define USIR1_IR15 (1 << 7) /* Interrupt request ep 15 */ + +#endif diff --git a/include/asm-arm/arch-pxa/pxa27x-udc.h b/include/asm-arm/arch-pxa/pxa27x-udc.h new file mode 100644 index 0000000..9cf0b1f --- /dev/null +++ b/include/asm-arm/arch-pxa/pxa27x-udc.h @@ -0,0 +1,256 @@ +#ifndef _ASM_ARCH_PXA27X_UDC_H +#define _ASM_ARCH_PXA27X_UDC_H + +#ifdef _ASM_ARCH_PXA25X_UDC_H +#error You can't include both PXA25x and PXA27x UDC support +#endif + +#define UDCCR __REG(0x40600000) /* UDC Control Register */ +#define UDCCR_OEN (1 << 31) /* On-the-Go Enable */ +#define UDCCR_AALTHNP (1 << 30) /* A-device Alternate Host Negotiation + Protocol Port Support */ +#define UDCCR_AHNP (1 << 29) /* A-device Host Negotiation Protocol + Support */ +#define UDCCR_BHNP (1 << 28) /* B-device Host Negotiation Protocol + Enable */ +#define UDCCR_DWRE (1 << 16) /* Device Remote Wake-up Enable */ +#define UDCCR_ACN (0x03 << 11) /* Active UDC configuration Number */ +#define UDCCR_ACN_S 11 +#define UDCCR_AIN (0x07 << 8) /* Active UDC interface Number */ +#define UDCCR_AIN_S 8 +#define UDCCR_AAISN (0x07 << 5) /* Active UDC Alternate Interface + Setting Number */ +#define UDCCR_AAISN_S 5 +#define UDCCR_SMAC (1 << 4) /* Switch Endpoint Memory to Active + Configuration */ +#define UDCCR_EMCE (1 << 3) /* Endpoint Memory Configuration + Error */ +#define UDCCR_UDR (1 << 2) /* UDC Resume */ +#define UDCCR_UDA (1 << 1) /* UDC Active */ +#define UDCCR_UDE (1 << 0) /* UDC Enable */ + +#define UDCICR0 __REG(0x40600004) /* UDC Interrupt Control Register0 */ +#define UDCICR1 __REG(0x40600008) /* UDC Interrupt Control Register1 */ +#define UDCICR_FIFOERR (1 << 1) /* FIFO Error interrupt for EP */ +#define UDCICR_PKTCOMPL (1 << 0) /* Packet Complete interrupt for EP */ + +#define UDC_INT_FIFOERROR (0x2) +#define UDC_INT_PACKETCMP (0x1) + +#define UDCICR_INT(n,intr) (((intr) & 0x03) << (((n) & 0x0F) * 2)) +#define UDCICR1_IECC (1 << 31) /* IntEn - Configuration Change */ +#define UDCICR1_IESOF (1 << 30) /* IntEn - Start of Frame */ +#define UDCICR1_IERU (1 << 29) /* IntEn - Resume */ +#define UDCICR1_IESU (1 << 28) /* IntEn - Suspend */ +#define UDCICR1_IERS (1 << 27) /* IntEn - Reset */ + +#define UDCISR0 __REG(0x4060000C) /* UDC Interrupt Status Register 0 */ +#define UDCISR1 __REG(0x40600010) /* UDC Interrupt Status Register 1 */ +#define UDCISR_INT(n,intr) (((intr) & 0x03) << (((n) & 0x0F) * 2)) +#define UDCISR1_IRCC (1 << 31) /* IntReq - Configuration Change */ +#define UDCISR1_IRSOF (1 << 30) /* IntReq - Start of Frame */ +#define UDCISR1_IRRU (1 << 29) /* IntReq - Resume */ +#define UDCISR1_IRSU (1 << 28) /* IntReq - Suspend */ +#define UDCISR1_IRRS (1 << 27) /* IntReq - Reset */ + +#define UDCFNR __REG(0x40600014) /* UDC Frame Number Register */ +#define UDCOTGICR __REG(0x40600018) /* UDC On-The-Go interrupt control */ +#define UDCOTGICR_IESF (1 << 24) /* OTG SET_FEATURE command recvd */ +#define UDCOTGICR_IEXR (1 << 17) /* Extra Transciever Interrupt + Rising Edge Interrupt Enable */ +#define UDCOTGICR_IEXF (1 << 16) /* Extra Transciever Interrupt + Falling Edge Interrupt Enable */ +#define UDCOTGICR_IEVV40R (1 << 9) /* OTG Vbus Valid 4.0V Rising Edge + Interrupt Enable */ +#define UDCOTGICR_IEVV40F (1 << 8) /* OTG Vbus Valid 4.0V Falling Edge + Interrupt Enable */ +#define UDCOTGICR_IEVV44R (1 << 7) /* OTG Vbus Valid 4.4V Rising Edge + Interrupt Enable */ +#define UDCOTGICR_IEVV44F (1 << 6) /* OTG Vbus Valid 4.4V Falling Edge + Interrupt Enable */ +#define UDCOTGICR_IESVR (1 << 5) /* OTG Session Valid Rising Edge + Interrupt Enable */ +#define UDCOTGICR_IESVF (1 << 4) /* OTG Session Valid Falling Edge + Interrupt Enable */ +#define UDCOTGICR_IESDR (1 << 3) /* OTG A-Device SRP Detect Rising + Edge Interrupt Enable */ +#define UDCOTGICR_IESDF (1 << 2) /* OTG A-Device SRP Detect Falling + Edge Interrupt Enable */ +#define UDCOTGICR_IEIDR (1 << 1) /* OTG ID Change Rising Edge + Interrupt Enable */ +#define UDCOTGICR_IEIDF (1 << 0) /* OTG ID Change Falling Edge + Interrupt Enable */ + +#define UP2OCR __REG(0x40600020) /* USB Port 2 Output Control register */ + +#define UP2OCR_CPVEN (1 << 0) /* Charge Pump Vbus Enable */ +#define UP2OCR_CPVPE (1 << 1) /* Charge Pump Vbus Pulse Enable */ +#define UP2OCR_DPPDE (1 << 2) /* Host Port 2 Transceiver D+ Pull Down Enable */ +#define UP2OCR_DMPDE (1 << 3) /* Host Port 2 Transceiver D- Pull Down Enable */ +#define UP2OCR_DPPUE (1 << 4) /* Host Port 2 Transceiver D+ Pull Up Enable */ +#define UP2OCR_DMPUE (1 << 5) /* Host Port 2 Transceiver D- Pull Up Enable */ +#define UP2OCR_DPPUBE (1 << 6) /* Host Port 2 Transceiver D+ Pull Up Bypass Enable */ +#define UP2OCR_DMPUBE (1 << 7) /* Host Port 2 Transceiver D- Pull Up Bypass Enable */ +#define UP2OCR_EXSP (1 << 8) /* External Transceiver Speed Control */ +#define UP2OCR_EXSUS (1 << 9) /* External Transceiver Speed Enable */ +#define UP2OCR_IDON (1 << 10) /* OTG ID Read Enable */ +#define UP2OCR_HXS (1 << 16) /* Host Port 2 Transceiver Output Select */ +#define UP2OCR_HXOE (1 << 17) /* Host Port 2 Transceiver Output Enable */ +#define UP2OCR_SEOS (1 << 24) /* Single-Ended Output Select */ + +#define UDCCSN(x) __REG2(0x40600100, (x) << 2) +#define UDCCSR0 __REG(0x40600100) /* UDC Control/Status register - Endpoint 0 */ +#define UDCCSR0_SA (1 << 7) /* Setup Active */ +#define UDCCSR0_RNE (1 << 6) /* Receive FIFO Not Empty */ +#define UDCCSR0_FST (1 << 5) /* Force Stall */ +#define UDCCSR0_SST (1 << 4) /* Sent Stall */ +#define UDCCSR0_DME (1 << 3) /* DMA Enable */ +#define UDCCSR0_FTF (1 << 2) /* Flush Transmit FIFO */ +#define UDCCSR0_IPR (1 << 1) /* IN Packet Ready */ +#define UDCCSR0_OPC (1 << 0) /* OUT Packet Complete */ + +#define UDCCSRA __REG(0x40600104) /* UDC Control/Status register - Endpoint A */ +#define UDCCSRB __REG(0x40600108) /* UDC Control/Status register - Endpoint B */ +#define UDCCSRC __REG(0x4060010C) /* UDC Control/Status register - Endpoint C */ +#define UDCCSRD __REG(0x40600110) /* UDC Control/Status register - Endpoint D */ +#define UDCCSRE __REG(0x40600114) /* UDC Control/Status register - Endpoint E */ +#define UDCCSRF __REG(0x40600118) /* UDC Control/Status register - Endpoint F */ +#define UDCCSRG __REG(0x4060011C) /* UDC Control/Status register - Endpoint G */ +#define UDCCSRH __REG(0x40600120) /* UDC Control/Status register - Endpoint H */ +#define UDCCSRI __REG(0x40600124) /* UDC Control/Status register - Endpoint I */ +#define UDCCSRJ __REG(0x40600128) /* UDC Control/Status register - Endpoint J */ +#define UDCCSRK __REG(0x4060012C) /* UDC Control/Status register - Endpoint K */ +#define UDCCSRL __REG(0x40600130) /* UDC Control/Status register - Endpoint L */ +#define UDCCSRM __REG(0x40600134) /* UDC Control/Status register - Endpoint M */ +#define UDCCSRN __REG(0x40600138) /* UDC Control/Status register - Endpoint N */ +#define UDCCSRP __REG(0x4060013C) /* UDC Control/Status register - Endpoint P */ +#define UDCCSRQ __REG(0x40600140) /* UDC Control/Status register - Endpoint Q */ +#define UDCCSRR __REG(0x40600144) /* UDC Control/Status register - Endpoint R */ +#define UDCCSRS __REG(0x40600148) /* UDC Control/Status register - Endpoint S */ +#define UDCCSRT __REG(0x4060014C) /* UDC Control/Status register - Endpoint T */ +#define UDCCSRU __REG(0x40600150) /* UDC Control/Status register - Endpoint U */ +#define UDCCSRV __REG(0x40600154) /* UDC Control/Status register - Endpoint V */ +#define UDCCSRW __REG(0x40600158) /* UDC Control/Status register - Endpoint W */ +#define UDCCSRX __REG(0x4060015C) /* UDC Control/Status register - Endpoint X */ + +#define UDCCSR_DPE (1 << 9) /* Data Packet Error */ +#define UDCCSR_FEF (1 << 8) /* Flush Endpoint FIFO */ +#define UDCCSR_SP (1 << 7) /* Short Packet Control/Status */ +#define UDCCSR_BNE (1 << 6) /* Buffer Not Empty (IN endpoints) */ +#define UDCCSR_BNF (1 << 6) /* Buffer Not Full (OUT endpoints) */ +#define UDCCSR_FST (1 << 5) /* Force STALL */ +#define UDCCSR_SST (1 << 4) /* Sent STALL */ +#define UDCCSR_DME (1 << 3) /* DMA Enable */ +#define UDCCSR_TRN (1 << 2) /* Tx/Rx NAK */ +#define UDCCSR_PC (1 << 1) /* Packet Complete */ +#define UDCCSR_FS (1 << 0) /* FIFO needs service */ + +#define UDCBCN(x) __REG2(0x40600200, (x)<<2) +#define UDCBCR0 __REG(0x40600200) /* Byte Count Register - EP0 */ +#define UDCBCRA __REG(0x40600204) /* Byte Count Register - EPA */ +#define UDCBCRB __REG(0x40600208) /* Byte Count Register - EPB */ +#define UDCBCRC __REG(0x4060020C) /* Byte Count Register - EPC */ +#define UDCBCRD __REG(0x40600210) /* Byte Count Register - EPD */ +#define UDCBCRE __REG(0x40600214) /* Byte Count Register - EPE */ +#define UDCBCRF __REG(0x40600218) /* Byte Count Register - EPF */ +#define UDCBCRG __REG(0x4060021C) /* Byte Count Register - EPG */ +#define UDCBCRH __REG(0x40600220) /* Byte Count Register - EPH */ +#define UDCBCRI __REG(0x40600224) /* Byte Count Register - EPI */ +#define UDCBCRJ __REG(0x40600228) /* Byte Count Register - EPJ */ +#define UDCBCRK __REG(0x4060022C) /* Byte Count Register - EPK */ +#define UDCBCRL __REG(0x40600230) /* Byte Count Register - EPL */ +#define UDCBCRM __REG(0x40600234) /* Byte Count Register - EPM */ +#define UDCBCRN __REG(0x40600238) /* Byte Count Register - EPN */ +#define UDCBCRP __REG(0x4060023C) /* Byte Count Register - EPP */ +#define UDCBCRQ __REG(0x40600240) /* Byte Count Register - EPQ */ +#define UDCBCRR __REG(0x40600244) /* Byte Count Register - EPR */ +#define UDCBCRS __REG(0x40600248) /* Byte Count Register - EPS */ +#define UDCBCRT __REG(0x4060024C) /* Byte Count Register - EPT */ +#define UDCBCRU __REG(0x40600250) /* Byte Count Register - EPU */ +#define UDCBCRV __REG(0x40600254) /* Byte Count Register - EPV */ +#define UDCBCRW __REG(0x40600258) /* Byte Count Register - EPW */ +#define UDCBCRX __REG(0x4060025C) /* Byte Count Register - EPX */ + +#define UDCDN(x) __REG2(0x40600300, (x)<<2) +#define PHYS_UDCDN(x) (0x40600300 + ((x)<<2)) +#define PUDCDN(x) (volatile u32 *)(io_p2v(PHYS_UDCDN((x)))) +#define UDCDR0 __REG(0x40600300) /* Data Register - EP0 */ +#define UDCDRA __REG(0x40600304) /* Data Register - EPA */ +#define UDCDRB __REG(0x40600308) /* Data Register - EPB */ +#define UDCDRC __REG(0x4060030C) /* Data Register - EPC */ +#define UDCDRD __REG(0x40600310) /* Data Register - EPD */ +#define UDCDRE __REG(0x40600314) /* Data Register - EPE */ +#define UDCDRF __REG(0x40600318) /* Data Register - EPF */ +#define UDCDRG __REG(0x4060031C) /* Data Register - EPG */ +#define UDCDRH __REG(0x40600320) /* Data Register - EPH */ +#define UDCDRI __REG(0x40600324) /* Data Register - EPI */ +#define UDCDRJ __REG(0x40600328) /* Data Register - EPJ */ +#define UDCDRK __REG(0x4060032C) /* Data Register - EPK */ +#define UDCDRL __REG(0x40600330) /* Data Register - EPL */ +#define UDCDRM __REG(0x40600334) /* Data Register - EPM */ +#define UDCDRN __REG(0x40600338) /* Data Register - EPN */ +#define UDCDRP __REG(0x4060033C) /* Data Register - EPP */ +#define UDCDRQ __REG(0x40600340) /* Data Register - EPQ */ +#define UDCDRR __REG(0x40600344) /* Data Register - EPR */ +#define UDCDRS __REG(0x40600348) /* Data Register - EPS */ +#define UDCDRT __REG(0x4060034C) /* Data Register - EPT */ +#define UDCDRU __REG(0x40600350) /* Data Register - EPU */ +#define UDCDRV __REG(0x40600354) /* Data Register - EPV */ +#define UDCDRW __REG(0x40600358) /* Data Register - EPW */ +#define UDCDRX __REG(0x4060035C) /* Data Register - EPX */ + +#define UDCCN(x) __REG2(0x40600400, (x)<<2) +#define UDCCRA __REG(0x40600404) /* Configuration register EPA */ +#define UDCCRB __REG(0x40600408) /* Configuration register EPB */ +#define UDCCRC __REG(0x4060040C) /* Configuration register EPC */ +#define UDCCRD __REG(0x40600410) /* Configuration register EPD */ +#define UDCCRE __REG(0x40600414) /* Configuration register EPE */ +#define UDCCRF __REG(0x40600418) /* Configuration register EPF */ +#define UDCCRG __REG(0x4060041C) /* Configuration register EPG */ +#define UDCCRH __REG(0x40600420) /* Configuration register EPH */ +#define UDCCRI __REG(0x40600424) /* Configuration register EPI */ +#define UDCCRJ __REG(0x40600428) /* Configuration register EPJ */ +#define UDCCRK __REG(0x4060042C) /* Configuration register EPK */ +#define UDCCRL __REG(0x40600430) /* Configuration register EPL */ +#define UDCCRM __REG(0x40600434) /* Configuration register EPM */ +#define UDCCRN __REG(0x40600438) /* Configuration register EPN */ +#define UDCCRP __REG(0x4060043C) /* Configuration register EPP */ +#define UDCCRQ __REG(0x40600440) /* Configuration register EPQ */ +#define UDCCRR __REG(0x40600444) /* Configuration register EPR */ +#define UDCCRS __REG(0x40600448) /* Configuration register EPS */ +#define UDCCRT __REG(0x4060044C) /* Configuration register EPT */ +#define UDCCRU __REG(0x40600450) /* Configuration register EPU */ +#define UDCCRV __REG(0x40600454) /* Configuration register EPV */ +#define UDCCRW __REG(0x40600458) /* Configuration register EPW */ +#define UDCCRX __REG(0x4060045C) /* Configuration register EPX */ + +#define UDCCONR_CN (0x03 << 25) /* Configuration Number */ +#define UDCCONR_CN_S (25) +#define UDCCONR_IN (0x07 << 22) /* Interface Number */ +#define UDCCONR_IN_S (22) +#define UDCCONR_AISN (0x07 << 19) /* Alternate Interface Number */ +#define UDCCONR_AISN_S (19) +#define UDCCONR_EN (0x0f << 15) /* Endpoint Number */ +#define UDCCONR_EN_S (15) +#define UDCCONR_ET (0x03 << 13) /* Endpoint Type: */ +#define UDCCONR_ET_S (13) +#define UDCCONR_ET_INT (0x03 << 13) /* Interrupt */ +#define UDCCONR_ET_BULK (0x02 << 13) /* Bulk */ +#define UDCCONR_ET_ISO (0x01 << 13) /* Isochronous */ +#define UDCCONR_ET_NU (0x00 << 13) /* Not used */ +#define UDCCONR_ED (1 << 12) /* Endpoint Direction */ +#define UDCCONR_MPS (0x3ff << 2) /* Maximum Packet Size */ +#define UDCCONR_MPS_S (2) +#define UDCCONR_DE (1 << 1) /* Double Buffering Enable */ +#define UDCCONR_EE (1 << 0) /* Endpoint Enable */ + + +#define UDC_INT_FIFOERROR (0x2) +#define UDC_INT_PACKETCMP (0x1) + +#define UDC_FNR_MASK (0x7ff) + +#define UDCCSR_WR_MASK (UDCCSR_DME|UDCCSR_FST) +#define UDC_BCR_MASK (0x3ff) + +#endif -- cgit v0.10.2 From 0be9898adb6f58fee44f0fec0bbc0eac997ea9eb Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 19 May 2008 12:31:28 +0200 Subject: [ALSA] ASoC: Clarify API for bias configuration Currently the ASoC core configures the bias levels in the system using a callback on codecs and machines called 'dapm_event', passing it PCI style power levels as SNDRV_CTL_POWER_ constants. This is more obscure than it needs to be and has caused confusion to driver authors, especially given that DAPM is also performing power management. Address this by renaming the callback function to 'set_bias_level' and using constants explicitly representing the off, standby, pre-on and on states which DAPM transitions through. Also unexport the API for setting bias level: there are currently no in-tree users of this API other than the core itself and it is likely that the core would need to be extended to cater for any users. Signed-off-by: Mark Brown Cc: Jarkko Nikula Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h index bf4cf0c..f8223fa 100644 --- a/include/sound/soc-dapm.h +++ b/include/sound/soc-dapm.h @@ -221,7 +221,8 @@ int snd_soc_dapm_add_routes(struct snd_soc_codec *codec, /* dapm events */ int snd_soc_dapm_stream_event(struct snd_soc_codec *codec, char *stream, int event); -int snd_soc_dapm_device_event(struct snd_soc_device *socdev, int event); +int snd_soc_dapm_set_bias_level(struct snd_soc_device *socdev, + enum snd_soc_bias_level level); /* dapm sys fs - used by the core */ int snd_soc_dapm_sys_add(struct device *dev); diff --git a/include/sound/soc.h b/include/sound/soc.h index 73accbc..bca9538 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -103,6 +103,24 @@ .private_value = (unsigned long)&xenum } /* + * Bias levels + * + * @ON: Bias is fully on for audio playback and capture operations. + * @PREPARE: Prepare for audio operations. Called before DAPM switching for + * stream start and stop operations. + * @STANDBY: Low power standby state when no playback/capture operations are + * in progress. NOTE: The transition time between STANDBY and ON + * should be as fast as possible and no longer than 10ms. + * @OFF: Power Off. No restrictions on transition times. + */ +enum snd_soc_bias_level { + SND_SOC_BIAS_ON, + SND_SOC_BIAS_PREPARE, + SND_SOC_BIAS_STANDBY, + SND_SOC_BIAS_OFF, +}; + +/* * Digital Audio Interface (DAI) types */ #define SND_SOC_DAI_AC97 0x1 @@ -356,7 +374,8 @@ struct snd_soc_codec { struct mutex mutex; /* callbacks */ - int (*dapm_event)(struct snd_soc_codec *codec, int event); + int (*set_bias_level)(struct snd_soc_codec *, + enum snd_soc_bias_level level); /* runtime */ struct snd_card *card; @@ -378,8 +397,8 @@ struct snd_soc_codec { /* dapm */ struct list_head dapm_widgets; struct list_head dapm_paths; - unsigned int dapm_state; - unsigned int suspend_dapm_state; + enum snd_soc_bias_level bias_level; + enum snd_soc_bias_level suspend_bias_level; struct delayed_work delayed_work; /* codec DAI's */ @@ -449,7 +468,8 @@ struct snd_soc_machine { int (*resume_post)(struct platform_device *pdev); /* callbacks */ - int (*dapm_event)(struct snd_soc_machine *, int event); + int (*set_bias_level)(struct snd_soc_machine *, + enum snd_soc_bias_level level); /* CPU <--> Codec DAI links */ struct snd_soc_dai_link *dai_link; diff --git a/sound/soc/codecs/tlv320aic3x.c b/sound/soc/codecs/tlv320aic3x.c index cb8365a..dc8a38d 100644 --- a/sound/soc/codecs/tlv320aic3x.c +++ b/sound/soc/codecs/tlv320aic3x.c @@ -847,13 +847,14 @@ static int aic3x_set_dai_fmt(struct snd_soc_codec_dai *codec_dai, return 0; } -static int aic3x_dapm_event(struct snd_soc_codec *codec, int event) +static int aic3x_set_bias_level(struct snd_soc_codec *codec, + enum snd_soc_bias_level level) { struct aic3x_priv *aic3x = codec->private_data; u8 reg; - switch (event) { - case SNDRV_CTL_POWER_D0: + switch (level) { + case SND_SOC_BIAS_ON: /* all power is driven by DAPM system */ if (aic3x->master) { /* enable pll */ @@ -862,10 +863,9 @@ static int aic3x_dapm_event(struct snd_soc_codec *codec, int event) reg | PLL_ENABLE); } break; - case SNDRV_CTL_POWER_D1: - case SNDRV_CTL_POWER_D2: + case SND_SOC_BIAS_PREPARE: break; - case SNDRV_CTL_POWER_D3hot: + case SND_SOC_BIAS_STANDBY: /* * all power is driven by DAPM system, * so output power is safe if bypass was set @@ -877,7 +877,7 @@ static int aic3x_dapm_event(struct snd_soc_codec *codec, int event) reg & ~PLL_ENABLE); } break; - case SNDRV_CTL_POWER_D3cold: + case SND_SOC_BIAS_OFF: /* force all power off */ reg = aic3x_read_reg_cache(codec, LINE1L_2_LADC_CTRL); aic3x_write(codec, LINE1L_2_LADC_CTRL, reg & ~LADC_PWR_ON); @@ -913,7 +913,7 @@ static int aic3x_dapm_event(struct snd_soc_codec *codec, int event) } break; } - codec->dapm_state = event; + codec->bias_level = level; return 0; } @@ -979,7 +979,7 @@ static int aic3x_suspend(struct platform_device *pdev, pm_message_t state) struct snd_soc_device *socdev = platform_get_drvdata(pdev); struct snd_soc_codec *codec = socdev->codec; - aic3x_dapm_event(codec, SNDRV_CTL_POWER_D3cold); + aic3x_set_bias_level(codec, SND_SOC_BIAS_OFF); return 0; } @@ -999,7 +999,7 @@ static int aic3x_resume(struct platform_device *pdev) codec->hw_write(codec->control_data, data, 2); } - aic3x_dapm_event(codec, codec->suspend_dapm_state); + aic3x_set_bias_level(codec, codec->suspend_bias_level); return 0; } @@ -1018,7 +1018,7 @@ static int aic3x_init(struct snd_soc_device *socdev) codec->owner = THIS_MODULE; codec->read = aic3x_read_reg_cache; codec->write = aic3x_write; - codec->dapm_event = aic3x_dapm_event; + codec->set_bias_level = aic3x_set_bias_level; codec->dai = &aic3x_dai; codec->num_dai = 1; codec->reg_cache_size = sizeof(aic3x_reg); @@ -1100,7 +1100,7 @@ static int aic3x_init(struct snd_soc_device *socdev) aic3x_write(codec, LINE2R_2_MONOLOPM_VOL, DEFAULT_VOL); /* off, with power on */ - aic3x_dapm_event(codec, SNDRV_CTL_POWER_D3hot); + aic3x_set_bias_level(codec, SND_SOC_BIAS_STANDBY); /* setup GPIO functions */ aic3x_write(codec, AIC3X_GPIO1_REG, (setup->gpio_func[0] & 0xf) << 4); @@ -1271,7 +1271,7 @@ static int aic3x_remove(struct platform_device *pdev) /* power down chip */ if (codec->control_data) - aic3x_dapm_event(codec, SNDRV_CTL_POWER_D3); + aic3x_set_bias_level(codec, SND_SOC_BIAS_OFF); snd_soc_free_pcms(socdev); snd_soc_dapm_free(socdev); diff --git a/sound/soc/codecs/wm8731.c b/sound/soc/codecs/wm8731.c index 0cf9265..0f28aa4 100644 --- a/sound/soc/codecs/wm8731.c +++ b/sound/soc/codecs/wm8731.c @@ -435,29 +435,29 @@ static int wm8731_set_dai_fmt(struct snd_soc_codec_dai *codec_dai, return 0; } -static int wm8731_dapm_event(struct snd_soc_codec *codec, int event) +static int wm8731_set_bias_level(struct snd_soc_codec *codec, + enum snd_soc_bias_level level) { u16 reg = wm8731_read_reg_cache(codec, WM8731_PWR) & 0xff7f; - switch (event) { - case SNDRV_CTL_POWER_D0: /* full On */ + switch (level) { + case SND_SOC_BIAS_ON: /* vref/mid, osc on, dac unmute */ wm8731_write(codec, WM8731_PWR, reg); break; - case SNDRV_CTL_POWER_D1: /* partial On */ - case SNDRV_CTL_POWER_D2: /* partial On */ + case SND_SOC_BIAS_PREPARE: break; - case SNDRV_CTL_POWER_D3hot: /* Off, with power */ + case SND_SOC_BIAS_STANDBY: /* everything off except vref/vmid, */ wm8731_write(codec, WM8731_PWR, reg | 0x0040); break; - case SNDRV_CTL_POWER_D3cold: /* Off, without power */ + case SND_SOC_BIAS_OFF: /* everything off, dac mute, inactive */ wm8731_write(codec, WM8731_ACTIVE, 0x0); wm8731_write(codec, WM8731_PWR, 0xffff); break; } - codec->dapm_state = event; + codec->bias_level = level; return 0; } @@ -503,7 +503,7 @@ static int wm8731_suspend(struct platform_device *pdev, pm_message_t state) struct snd_soc_codec *codec = socdev->codec; wm8731_write(codec, WM8731_ACTIVE, 0x0); - wm8731_dapm_event(codec, SNDRV_CTL_POWER_D3cold); + wm8731_set_bias_level(codec, SND_SOC_BIAS_OFF); return 0; } @@ -521,8 +521,8 @@ static int wm8731_resume(struct platform_device *pdev) data[1] = cache[i] & 0x00ff; codec->hw_write(codec->control_data, data, 2); } - wm8731_dapm_event(codec, SNDRV_CTL_POWER_D3hot); - wm8731_dapm_event(codec, codec->suspend_dapm_state); + wm8731_set_bias_level(codec, SND_SOC_BIAS_STANDBY); + wm8731_set_bias_level(codec, codec->suspend_bias_level); return 0; } @@ -539,7 +539,7 @@ static int wm8731_init(struct snd_soc_device *socdev) codec->owner = THIS_MODULE; codec->read = wm8731_read_reg_cache; codec->write = wm8731_write; - codec->dapm_event = wm8731_dapm_event; + codec->set_bias_level = wm8731_set_bias_level; codec->dai = &wm8731_dai; codec->num_dai = 1; codec->reg_cache_size = sizeof(wm8731_reg); @@ -557,7 +557,7 @@ static int wm8731_init(struct snd_soc_device *socdev) } /* power on device */ - wm8731_dapm_event(codec, SNDRV_CTL_POWER_D3hot); + wm8731_set_bias_level(codec, SND_SOC_BIAS_STANDBY); /* set the update bits */ reg = wm8731_read_reg_cache(codec, WM8731_LOUT1V); @@ -730,7 +730,7 @@ static int wm8731_remove(struct platform_device *pdev) struct snd_soc_codec *codec = socdev->codec; if (codec->control_data) - wm8731_dapm_event(codec, SNDRV_CTL_POWER_D3cold); + wm8731_set_bias_level(codec, SND_SOC_BIAS_OFF); snd_soc_free_pcms(socdev); snd_soc_dapm_free(socdev); diff --git a/sound/soc/codecs/wm8750.c b/sound/soc/codecs/wm8750.c index 16cd5d4..62423f4 100644 --- a/sound/soc/codecs/wm8750.c +++ b/sound/soc/codecs/wm8750.c @@ -686,29 +686,29 @@ static int wm8750_mute(struct snd_soc_codec_dai *dai, int mute) return 0; } -static int wm8750_dapm_event(struct snd_soc_codec *codec, int event) +static int wm8750_set_bias_level(struct snd_soc_codec *codec, + enum snd_soc_bias_level level) { u16 pwr_reg = wm8750_read_reg_cache(codec, WM8750_PWR1) & 0xfe3e; - switch (event) { - case SNDRV_CTL_POWER_D0: /* full On */ + switch (level) { + case SND_SOC_BIAS_ON: /* set vmid to 50k and unmute dac */ wm8750_write(codec, WM8750_PWR1, pwr_reg | 0x00c0); break; - case SNDRV_CTL_POWER_D1: /* partial On */ - case SNDRV_CTL_POWER_D2: /* partial On */ + case SND_SOC_BIAS_PREPARE: /* set vmid to 5k for quick power up */ wm8750_write(codec, WM8750_PWR1, pwr_reg | 0x01c1); break; - case SNDRV_CTL_POWER_D3hot: /* Off, with power */ + case SND_SOC_BIAS_STANDBY: /* mute dac and set vmid to 500k, enable VREF */ wm8750_write(codec, WM8750_PWR1, pwr_reg | 0x0141); break; - case SNDRV_CTL_POWER_D3cold: /* Off, without power */ + case SND_SOC_BIAS_OFF: wm8750_write(codec, WM8750_PWR1, 0x0001); break; } - codec->dapm_state = event; + codec->bias_level = level; return 0; } @@ -748,7 +748,7 @@ static void wm8750_work(struct work_struct *work) { struct snd_soc_codec *codec = container_of(work, struct snd_soc_codec, delayed_work.work); - wm8750_dapm_event(codec, codec->dapm_state); + wm8750_set_bias_level(codec, codec->bias_level); } static int wm8750_suspend(struct platform_device *pdev, pm_message_t state) @@ -756,7 +756,7 @@ static int wm8750_suspend(struct platform_device *pdev, pm_message_t state) struct snd_soc_device *socdev = platform_get_drvdata(pdev); struct snd_soc_codec *codec = socdev->codec; - wm8750_dapm_event(codec, SNDRV_CTL_POWER_D3cold); + wm8750_set_bias_level(codec, SND_SOC_BIAS_OFF); return 0; } @@ -777,12 +777,12 @@ static int wm8750_resume(struct platform_device *pdev) codec->hw_write(codec->control_data, data, 2); } - wm8750_dapm_event(codec, SNDRV_CTL_POWER_D3hot); + wm8750_set_bias_level(codec, SND_SOC_BIAS_STANDBY); /* charge wm8750 caps */ - if (codec->suspend_dapm_state == SNDRV_CTL_POWER_D0) { - wm8750_dapm_event(codec, SNDRV_CTL_POWER_D2); - codec->dapm_state = SNDRV_CTL_POWER_D0; + if (codec->suspend_bias_level == SND_SOC_BIAS_ON) { + wm8750_set_bias_level(codec, SND_SOC_BIAS_PREPARE); + codec->bias_level = SND_SOC_BIAS_ON; schedule_delayed_work(&codec->delayed_work, msecs_to_jiffies(1000)); } @@ -803,7 +803,7 @@ static int wm8750_init(struct snd_soc_device *socdev) codec->owner = THIS_MODULE; codec->read = wm8750_read_reg_cache; codec->write = wm8750_write; - codec->dapm_event = wm8750_dapm_event; + codec->set_bias_level = wm8750_set_bias_level; codec->dai = &wm8750_dai; codec->num_dai = 1; codec->reg_cache_size = sizeof(wm8750_reg); @@ -821,8 +821,8 @@ static int wm8750_init(struct snd_soc_device *socdev) } /* charge output caps */ - wm8750_dapm_event(codec, SNDRV_CTL_POWER_D2); - codec->dapm_state = SNDRV_CTL_POWER_D3hot; + wm8750_set_bias_level(codec, SND_SOC_BIAS_PREPARE); + codec->bias_level = SND_SOC_BIAS_STANDBY; schedule_delayed_work(&codec->delayed_work, msecs_to_jiffies(1000)); /* set the update bits */ @@ -1021,7 +1021,7 @@ static int wm8750_remove(struct platform_device *pdev) struct snd_soc_codec *codec = socdev->codec; if (codec->control_data) - wm8750_dapm_event(codec, SNDRV_CTL_POWER_D3cold); + wm8750_set_bias_level(codec, SND_SOC_BIAS_OFF); run_delayed_work(&codec->delayed_work); snd_soc_free_pcms(socdev); snd_soc_dapm_free(socdev); diff --git a/sound/soc/codecs/wm8753.c b/sound/soc/codecs/wm8753.c index fb41826..9032b0c 100644 --- a/sound/soc/codecs/wm8753.c +++ b/sound/soc/codecs/wm8753.c @@ -1274,29 +1274,29 @@ static int wm8753_mute(struct snd_soc_codec_dai *dai, int mute) return 0; } -static int wm8753_dapm_event(struct snd_soc_codec *codec, int event) +static int wm8753_set_bias_level(struct snd_soc_codec *codec, + enum snd_soc_bias_level level) { u16 pwr_reg = wm8753_read_reg_cache(codec, WM8753_PWR1) & 0xfe3e; - switch (event) { - case SNDRV_CTL_POWER_D0: /* full On */ + switch (level) { + case SND_SOC_BIAS_ON: /* set vmid to 50k and unmute dac */ wm8753_write(codec, WM8753_PWR1, pwr_reg | 0x00c0); break; - case SNDRV_CTL_POWER_D1: /* partial On */ - case SNDRV_CTL_POWER_D2: /* partial On */ + case SND_SOC_BIAS_PREPARE: /* set vmid to 5k for quick power up */ wm8753_write(codec, WM8753_PWR1, pwr_reg | 0x01c1); break; - case SNDRV_CTL_POWER_D3hot: /* Off, with power */ + case SND_SOC_BIAS_STANDBY: /* mute dac and set vmid to 500k, enable VREF */ wm8753_write(codec, WM8753_PWR1, pwr_reg | 0x0141); break; - case SNDRV_CTL_POWER_D3cold: /* Off, without power */ + case SND_SOC_BIAS_OFF: wm8753_write(codec, WM8753_PWR1, 0x0001); break; } - codec->dapm_state = event; + codec->bias_level = level; return 0; } @@ -1500,7 +1500,7 @@ static void wm8753_work(struct work_struct *work) { struct snd_soc_codec *codec = container_of(work, struct snd_soc_codec, delayed_work.work); - wm8753_dapm_event(codec, codec->dapm_state); + wm8753_set_bias_level(codec, codec->bias_level); } static int wm8753_suspend(struct platform_device *pdev, pm_message_t state) @@ -1512,7 +1512,7 @@ static int wm8753_suspend(struct platform_device *pdev, pm_message_t state) if (!codec->card) return 0; - wm8753_dapm_event(codec, SNDRV_CTL_POWER_D3cold); + wm8753_set_bias_level(codec, SND_SOC_BIAS_OFF); return 0; } @@ -1537,12 +1537,12 @@ static int wm8753_resume(struct platform_device *pdev) codec->hw_write(codec->control_data, data, 2); } - wm8753_dapm_event(codec, SNDRV_CTL_POWER_D3hot); + wm8753_set_bias_level(codec, SND_SOC_BIAS_STANDBY); /* charge wm8753 caps */ - if (codec->suspend_dapm_state == SNDRV_CTL_POWER_D0) { - wm8753_dapm_event(codec, SNDRV_CTL_POWER_D2); - codec->dapm_state = SNDRV_CTL_POWER_D0; + if (codec->suspend_bias_level == SND_SOC_BIAS_ON) { + wm8753_set_bias_level(codec, SND_SOC_BIAS_PREPARE); + codec->bias_level = SND_SOC_BIAS_ON; schedule_delayed_work(&codec->delayed_work, msecs_to_jiffies(caps_charge)); } @@ -1563,7 +1563,7 @@ static int wm8753_init(struct snd_soc_device *socdev) codec->owner = THIS_MODULE; codec->read = wm8753_read_reg_cache; codec->write = wm8753_write; - codec->dapm_event = wm8753_dapm_event; + codec->set_bias_level = wm8753_set_bias_level; codec->dai = wm8753_dai; codec->num_dai = 2; codec->reg_cache_size = sizeof(wm8753_reg); @@ -1584,8 +1584,8 @@ static int wm8753_init(struct snd_soc_device *socdev) } /* charge output caps */ - wm8753_dapm_event(codec, SNDRV_CTL_POWER_D2); - codec->dapm_state = SNDRV_CTL_POWER_D3hot; + wm8753_set_bias_level(codec, SND_SOC_BIAS_PREPARE); + codec->bias_level = SND_SOC_BIAS_STANDBY; schedule_delayed_work(&codec->delayed_work, msecs_to_jiffies(caps_charge)); @@ -1792,7 +1792,7 @@ static int wm8753_remove(struct platform_device *pdev) struct snd_soc_codec *codec = socdev->codec; if (codec->control_data) - wm8753_dapm_event(codec, SNDRV_CTL_POWER_D3cold); + wm8753_set_bias_level(codec, SND_SOC_BIAS_OFF); run_delayed_work(&codec->delayed_work); snd_soc_free_pcms(socdev); snd_soc_dapm_free(socdev); diff --git a/sound/soc/codecs/wm9712.c b/sound/soc/codecs/wm9712.c index 89efe40..e26cfcf 100644 --- a/sound/soc/codecs/wm9712.c +++ b/sound/soc/codecs/wm9712.c @@ -571,23 +571,23 @@ struct snd_soc_codec_dai wm9712_dai[] = { }; EXPORT_SYMBOL_GPL(wm9712_dai); -static int wm9712_dapm_event(struct snd_soc_codec *codec, int event) +static int wm9712_set_bias_level(struct snd_soc_codec *codec, + enum snd_soc_bias_level level) { - switch (event) { - case SNDRV_CTL_POWER_D0: /* full On */ - case SNDRV_CTL_POWER_D1: /* partial On */ - case SNDRV_CTL_POWER_D2: /* partial On */ + switch (level) { + case SND_SOC_BIAS_ON: + case SND_SOC_BIAS_PREPARE: break; - case SNDRV_CTL_POWER_D3hot: /* Off, with power */ + case SND_SOC_BIAS_STANDBY: ac97_write(codec, AC97_POWERDOWN, 0x0000); break; - case SNDRV_CTL_POWER_D3cold: /* Off, without power */ + case SND_SOC_BIAS_OFF: /* disable everything including AC link */ ac97_write(codec, AC97_EXTENDED_MSTATUS, 0xffff); ac97_write(codec, AC97_POWERDOWN, 0xffff); break; } - codec->dapm_state = event; + codec->bias_level = level; return 0; } @@ -615,7 +615,7 @@ static int wm9712_soc_suspend(struct platform_device *pdev, struct snd_soc_device *socdev = platform_get_drvdata(pdev); struct snd_soc_codec *codec = socdev->codec; - wm9712_dapm_event(codec, SNDRV_CTL_POWER_D3cold); + wm9712_set_bias_level(codec, SND_SOC_BIAS_OFF); return 0; } @@ -632,7 +632,7 @@ static int wm9712_soc_resume(struct platform_device *pdev) return ret; } - wm9712_dapm_event(codec, SNDRV_CTL_POWER_D3hot); + wm9712_set_bias_level(codec, SND_SOC_BIAS_STANDBY); if (ret == 0) { /* Sync reg_cache with the hardware after cold reset */ @@ -644,8 +644,8 @@ static int wm9712_soc_resume(struct platform_device *pdev) } } - if (codec->suspend_dapm_state == SNDRV_CTL_POWER_D0) - wm9712_dapm_event(codec, SNDRV_CTL_POWER_D0); + if (codec->suspend_bias_level == SND_SOC_BIAS_ON) + wm9712_set_bias_level(codec, SND_SOC_BIAS_ON); return ret; } @@ -679,7 +679,7 @@ static int wm9712_soc_probe(struct platform_device *pdev) codec->num_dai = ARRAY_SIZE(wm9712_dai); codec->write = ac97_write; codec->read = ac97_read; - codec->dapm_event = wm9712_dapm_event; + codec->set_bias_level = wm9712_set_bias_level; INIT_LIST_HEAD(&codec->dapm_widgets); INIT_LIST_HEAD(&codec->dapm_paths); @@ -703,7 +703,7 @@ static int wm9712_soc_probe(struct platform_device *pdev) /* set alc mux to none */ ac97_write(codec, AC97_VIDEO, ac97_read(codec, AC97_VIDEO) | 0x3000); - wm9712_dapm_event(codec, SNDRV_CTL_POWER_D3hot); + wm9712_set_bias_level(codec, SND_SOC_BIAS_STANDBY); wm9712_add_controls(codec); wm9712_add_widgets(codec); ret = snd_soc_register_card(socdev); diff --git a/sound/soc/codecs/wm9713.c b/sound/soc/codecs/wm9713.c index 9e6b2fd..4863636 100644 --- a/sound/soc/codecs/wm9713.c +++ b/sound/soc/codecs/wm9713.c @@ -1094,33 +1094,33 @@ int wm9713_reset(struct snd_soc_codec *codec, int try_warm) } EXPORT_SYMBOL_GPL(wm9713_reset); -static int wm9713_dapm_event(struct snd_soc_codec *codec, int event) +static int wm9713_set_bias_level(struct snd_soc_codec *codec, + enum snd_soc_bias_level level) { u16 reg; - switch (event) { - case SNDRV_CTL_POWER_D0: /* full On */ + switch (level) { + case SND_SOC_BIAS_ON: /* enable thermal shutdown */ reg = ac97_read(codec, AC97_EXTENDED_MID) & 0x1bff; ac97_write(codec, AC97_EXTENDED_MID, reg); break; - case SNDRV_CTL_POWER_D1: /* partial On */ - case SNDRV_CTL_POWER_D2: /* partial On */ + case SND_SOC_BIAS_PREPARE: break; - case SNDRV_CTL_POWER_D3hot: /* Off, with power */ + case SND_SOC_BIAS_STANDBY: /* enable master bias and vmid */ reg = ac97_read(codec, AC97_EXTENDED_MID) & 0x3bff; ac97_write(codec, AC97_EXTENDED_MID, reg); ac97_write(codec, AC97_POWERDOWN, 0x0000); break; - case SNDRV_CTL_POWER_D3cold: /* Off, without power */ + case SND_SOC_BIAS_OFF: /* disable everything including AC link */ ac97_write(codec, AC97_EXTENDED_MID, 0xffff); ac97_write(codec, AC97_EXTENDED_MSTATUS, 0xffff); ac97_write(codec, AC97_POWERDOWN, 0xffff); break; } - codec->dapm_state = event; + codec->bias_level = level; return 0; } @@ -1157,7 +1157,7 @@ static int wm9713_soc_resume(struct platform_device *pdev) return ret; } - wm9713_dapm_event(codec, SNDRV_CTL_POWER_D3hot); + wm9713_set_bias_level(codec, SND_SOC_BIAS_STANDBY); /* do we need to re-start the PLL ? */ if (wm9713->pll_out) @@ -1173,8 +1173,8 @@ static int wm9713_soc_resume(struct platform_device *pdev) } } - if (codec->suspend_dapm_state == SNDRV_CTL_POWER_D0) - wm9713_dapm_event(codec, SNDRV_CTL_POWER_D0); + if (codec->suspend_bias_level == SND_SOC_BIAS_ON) + wm9713_set_bias_level(codec, SND_SOC_BIAS_ON); return ret; } @@ -1213,7 +1213,7 @@ static int wm9713_soc_probe(struct platform_device *pdev) codec->num_dai = ARRAY_SIZE(wm9713_dai); codec->write = ac97_write; codec->read = ac97_read; - codec->dapm_event = wm9713_dapm_event; + codec->set_bias_level = wm9713_set_bias_level; INIT_LIST_HEAD(&codec->dapm_widgets); INIT_LIST_HEAD(&codec->dapm_paths); @@ -1235,7 +1235,7 @@ static int wm9713_soc_probe(struct platform_device *pdev) goto reset_err; } - wm9713_dapm_event(codec, SNDRV_CTL_POWER_D3hot); + wm9713_set_bias_level(codec, SND_SOC_BIAS_STANDBY); /* unmute the adc - move to kcontrol */ reg = ac97_read(codec, AC97_CD) & 0x7fff; diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index 0318d8a..a05b345 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -283,12 +283,12 @@ static void close_delayed_work(struct work_struct *work) /* are we waiting on this codec DAI stream */ if (codec_dai->pop_wait == 1) { - /* power down the codec to D1 if no longer active */ + /* Reduce power if no longer active */ if (codec->active == 0) { dbg("pop wq D1 %s %s\n", codec->name, codec_dai->playback.stream_name); - snd_soc_dapm_device_event(socdev, - SNDRV_CTL_POWER_D1); + snd_soc_dapm_set_bias_level(socdev, + SND_SOC_BIAS_PREPARE); } codec_dai->pop_wait = 0; @@ -296,12 +296,12 @@ static void close_delayed_work(struct work_struct *work) codec_dai->playback.stream_name, SND_SOC_DAPM_STREAM_STOP); - /* power down the codec power domain if no longer active */ + /* Fall into standby if no longer active */ if (codec->active == 0) { dbg("pop wq D3 %s %s\n", codec->name, codec_dai->playback.stream_name); - snd_soc_dapm_device_event(socdev, - SNDRV_CTL_POWER_D3hot); + snd_soc_dapm_set_bias_level(socdev, + SND_SOC_BIAS_STANDBY); } } } @@ -361,8 +361,8 @@ static int soc_codec_close(struct snd_pcm_substream *substream) SND_SOC_DAPM_STREAM_STOP); if (codec->active == 0 && codec_dai->pop_wait == 0) - snd_soc_dapm_device_event(socdev, - SNDRV_CTL_POWER_D3hot); + snd_soc_dapm_set_bias_level(socdev, + SND_SOC_BIAS_STANDBY); } mutex_unlock(&pcm_mutex); @@ -435,9 +435,10 @@ static int soc_pcm_prepare(struct snd_pcm_substream *substream) } } else { /* no delayed work - do we need to power up codec */ - if (codec->dapm_state != SNDRV_CTL_POWER_D0) { + if (codec->bias_level != SND_SOC_BIAS_ON) { - snd_soc_dapm_device_event(socdev, SNDRV_CTL_POWER_D1); + snd_soc_dapm_set_bias_level(socdev, + SND_SOC_BIAS_PREPARE); if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) snd_soc_dapm_stream_event(codec, @@ -448,7 +449,7 @@ static int soc_pcm_prepare(struct snd_pcm_substream *substream) codec_dai->capture.stream_name, SND_SOC_DAPM_STREAM_START); - snd_soc_dapm_device_event(socdev, SNDRV_CTL_POWER_D0); + snd_soc_dapm_set_bias_level(socdev, SND_SOC_BIAS_ON); if (codec_dai->dai_ops.digital_mute) codec_dai->dai_ops.digital_mute(codec_dai, 0); @@ -658,7 +659,7 @@ static int soc_suspend(struct platform_device *pdev, pm_message_t state) /* close any waiting streams and save state */ run_delayed_work(&socdev->delayed_work); - codec->suspend_dapm_state = codec->dapm_state; + codec->suspend_bias_level = codec->bias_level; for(i = 0; i < codec->num_dai; i++) { char *stream = codec->dai[i].playback.stream_name; diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index 8a3192b..728f3ac 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -763,21 +763,18 @@ static ssize_t dapm_widget_show(struct device *dev, } } - switch(codec->dapm_state){ - case SNDRV_CTL_POWER_D0: - state = "D0"; + switch (codec->bias_level) { + case SND_SOC_BIAS_ON: + state = "On"; break; - case SNDRV_CTL_POWER_D1: - state = "D1"; + case SND_SOC_BIAS_PREPARE: + state = "Prepare"; break; - case SNDRV_CTL_POWER_D2: - state = "D2"; + case SND_SOC_BIAS_STANDBY: + state = "Standby"; break; - case SNDRV_CTL_POWER_D3hot: - state = "D3hot"; - break; - case SNDRV_CTL_POWER_D3cold: - state = "D3cold"; + case SND_SOC_BIAS_OFF: + state = "Off"; break; } count += sprintf(buf + count, "PM State: %s\n", state); @@ -1358,27 +1355,28 @@ int snd_soc_dapm_stream_event(struct snd_soc_codec *codec, EXPORT_SYMBOL_GPL(snd_soc_dapm_stream_event); /** - * snd_soc_dapm_device_event - send a device event to the dapm core + * snd_soc_dapm_set_bias_level - set the bias level for the system * @socdev: audio device - * @event: device event + * @level: level to configure * - * Sends a device event to the dapm core. The core then makes any - * necessary machine or codec power changes.. + * Configure the bias (power) levels for the SoC audio device. * * Returns 0 for success else error. */ -int snd_soc_dapm_device_event(struct snd_soc_device *socdev, int event) +int snd_soc_dapm_set_bias_level(struct snd_soc_device *socdev, + enum snd_soc_bias_level level) { struct snd_soc_codec *codec = socdev->codec; struct snd_soc_machine *machine = socdev->machine; + int ret = 0; - if (machine->dapm_event) - machine->dapm_event(machine, event); - if (codec->dapm_event) - codec->dapm_event(codec, event); - return 0; + if (machine->set_bias_level) + ret = machine->set_bias_level(machine, level); + if (ret == 0 && codec->set_bias_level) + ret = codec->set_bias_level(codec, level); + + return ret; } -EXPORT_SYMBOL_GPL(snd_soc_dapm_device_event); /** * snd_soc_dapm_set_endpoint - set audio endpoint status -- cgit v0.10.2 From 3ff3f64ba04b3e5a86dce5239b10268798f68ad7 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 19 May 2008 12:32:25 +0200 Subject: [ALSA] ASoC: core checkpatch cleanups Signed-off-by: Mark Brown Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index a05b345..a3f091e 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -108,9 +108,9 @@ static int soc_ac97_dev_register(struct snd_soc_codec *codec) } #endif -static inline const char* get_dai_name(int type) +static inline const char *get_dai_name(int type) { - switch(type) { + switch (type) { case SND_SOC_DAI_AC97_BUS: case SND_SOC_DAI_AC97: return "AC97"; @@ -178,9 +178,11 @@ static int soc_pcm_open(struct snd_pcm_substream *substream) /* Check that the codec and cpu DAI's are compatible */ if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) { runtime->hw.rate_min = - max(codec_dai->playback.rate_min, cpu_dai->playback.rate_min); + max(codec_dai->playback.rate_min, + cpu_dai->playback.rate_min); runtime->hw.rate_max = - min(codec_dai->playback.rate_max, cpu_dai->playback.rate_max); + min(codec_dai->playback.rate_max, + cpu_dai->playback.rate_max); runtime->hw.channels_min = max(codec_dai->playback.channels_min, cpu_dai->playback.channels_min); @@ -193,9 +195,11 @@ static int soc_pcm_open(struct snd_pcm_substream *substream) codec_dai->playback.rates & cpu_dai->playback.rates; } else { runtime->hw.rate_min = - max(codec_dai->capture.rate_min, cpu_dai->capture.rate_min); + max(codec_dai->capture.rate_min, + cpu_dai->capture.rate_min); runtime->hw.rate_max = - min(codec_dai->capture.rate_max, cpu_dai->capture.rate_max); + min(codec_dai->capture.rate_max, + cpu_dai->capture.rate_max); runtime->hw.channels_min = max(codec_dai->capture.channels_min, cpu_dai->capture.channels_min); @@ -225,7 +229,7 @@ static int soc_pcm_open(struct snd_pcm_substream *substream) goto machine_err; } - dbg("asoc: %s <-> %s info:\n",codec_dai->name, cpu_dai->name); + dbg("asoc: %s <-> %s info:\n", codec_dai->name, cpu_dai->name); dbg("asoc: rate mask 0x%x\n", runtime->hw.rates); dbg("asoc: min ch %d max ch %d\n", runtime->hw.channels_min, runtime->hw.channels_max); @@ -272,7 +276,7 @@ static void close_delayed_work(struct work_struct *work) int i; mutex_lock(&pcm_mutex); - for(i = 0; i < codec->num_dai; i++) { + for (i = 0; i < codec->num_dai; i++) { codec_dai = &codec->dai[i]; dbg("pop wq checking: %s status: %s waiting: %s\n", @@ -511,7 +515,7 @@ static int soc_pcm_hw_params(struct snd_pcm_substream *substream, if (cpu_dai->ops.hw_params) { ret = cpu_dai->ops.hw_params(substream, params); if (ret < 0) { - printk(KERN_ERR "asoc: can't set interface %s hw params\n", + printk(KERN_ERR "asoc: interface %s hw params failed\n", cpu_dai->name); goto interface_err; } @@ -520,7 +524,7 @@ static int soc_pcm_hw_params(struct snd_pcm_substream *substream, if (platform->pcm_ops->hw_params) { ret = platform->pcm_ops->hw_params(substream, params); if (ret < 0) { - printk(KERN_ERR "asoc: can't set platform %s hw params\n", + printk(KERN_ERR "asoc: platform %s hw params failed\n", platform->name); goto platform_err; } @@ -539,7 +543,7 @@ interface_err: codec_dai->ops.hw_free(substream); codec_err: - if(machine->ops && machine->ops->hw_free) + if (machine->ops && machine->ops->hw_free) machine->ops->hw_free(substream); mutex_unlock(&pcm_mutex); @@ -628,15 +632,15 @@ static struct snd_pcm_ops soc_pcm_ops = { /* powers down audio subsystem for suspend */ static int soc_suspend(struct platform_device *pdev, pm_message_t state) { - struct snd_soc_device *socdev = platform_get_drvdata(pdev); - struct snd_soc_machine *machine = socdev->machine; - struct snd_soc_platform *platform = socdev->platform; - struct snd_soc_codec_device *codec_dev = socdev->codec_dev; + struct snd_soc_device *socdev = platform_get_drvdata(pdev); + struct snd_soc_machine *machine = socdev->machine; + struct snd_soc_platform *platform = socdev->platform; + struct snd_soc_codec_device *codec_dev = socdev->codec_dev; struct snd_soc_codec *codec = socdev->codec; int i; /* mute any active DAC's */ - for(i = 0; i < machine->num_links; i++) { + for (i = 0; i < machine->num_links; i++) { struct snd_soc_codec_dai *dai = machine->dai_link[i].codec_dai; if (dai->dai_ops.digital_mute && dai->playback.active) dai->dai_ops.digital_mute(dai, 1); @@ -649,7 +653,7 @@ static int soc_suspend(struct platform_device *pdev, pm_message_t state) if (machine->suspend_pre) machine->suspend_pre(pdev, state); - for(i = 0; i < machine->num_links; i++) { + for (i = 0; i < machine->num_links; i++) { struct snd_soc_cpu_dai *cpu_dai = machine->dai_link[i].cpu_dai; if (cpu_dai->suspend && cpu_dai->type != SND_SOC_DAI_AC97) cpu_dai->suspend(pdev, cpu_dai); @@ -661,7 +665,7 @@ static int soc_suspend(struct platform_device *pdev, pm_message_t state) run_delayed_work(&socdev->delayed_work); codec->suspend_bias_level = codec->bias_level; - for(i = 0; i < codec->num_dai; i++) { + for (i = 0; i < codec->num_dai; i++) { char *stream = codec->dai[i].playback.stream_name; if (stream != NULL) snd_soc_dapm_stream_event(codec, stream, @@ -675,7 +679,7 @@ static int soc_suspend(struct platform_device *pdev, pm_message_t state) if (codec_dev->suspend) codec_dev->suspend(pdev, state); - for(i = 0; i < machine->num_links; i++) { + for (i = 0; i < machine->num_links; i++) { struct snd_soc_cpu_dai *cpu_dai = machine->dai_link[i].cpu_dai; if (cpu_dai->suspend && cpu_dai->type == SND_SOC_DAI_AC97) cpu_dai->suspend(pdev, cpu_dai); @@ -690,17 +694,17 @@ static int soc_suspend(struct platform_device *pdev, pm_message_t state) /* powers up audio subsystem after a suspend */ static int soc_resume(struct platform_device *pdev) { - struct snd_soc_device *socdev = platform_get_drvdata(pdev); - struct snd_soc_machine *machine = socdev->machine; - struct snd_soc_platform *platform = socdev->platform; - struct snd_soc_codec_device *codec_dev = socdev->codec_dev; + struct snd_soc_device *socdev = platform_get_drvdata(pdev); + struct snd_soc_machine *machine = socdev->machine; + struct snd_soc_platform *platform = socdev->platform; + struct snd_soc_codec_device *codec_dev = socdev->codec_dev; struct snd_soc_codec *codec = socdev->codec; int i; if (machine->resume_pre) machine->resume_pre(pdev); - for(i = 0; i < machine->num_links; i++) { + for (i = 0; i < machine->num_links; i++) { struct snd_soc_cpu_dai *cpu_dai = machine->dai_link[i].cpu_dai; if (cpu_dai->resume && cpu_dai->type == SND_SOC_DAI_AC97) cpu_dai->resume(pdev, cpu_dai); @@ -709,8 +713,8 @@ static int soc_resume(struct platform_device *pdev) if (codec_dev->resume) codec_dev->resume(pdev); - for(i = 0; i < codec->num_dai; i++) { - char* stream = codec->dai[i].playback.stream_name; + for (i = 0; i < codec->num_dai; i++) { + char *stream = codec->dai[i].playback.stream_name; if (stream != NULL) snd_soc_dapm_stream_event(codec, stream, SND_SOC_DAPM_STREAM_RESUME); @@ -720,14 +724,14 @@ static int soc_resume(struct platform_device *pdev) SND_SOC_DAPM_STREAM_RESUME); } - /* unmute any active DAC's */ - for(i = 0; i < machine->num_links; i++) { + /* unmute any active DACs */ + for (i = 0; i < machine->num_links; i++) { struct snd_soc_codec_dai *dai = machine->dai_link[i].codec_dai; if (dai->dai_ops.digital_mute && dai->playback.active) dai->dai_ops.digital_mute(dai, 0); } - for(i = 0; i < machine->num_links; i++) { + for (i = 0; i < machine->num_links; i++) { struct snd_soc_cpu_dai *cpu_dai = machine->dai_link[i].cpu_dai; if (cpu_dai->resume && cpu_dai->type != SND_SOC_DAI_AC97) cpu_dai->resume(pdev, cpu_dai); @@ -757,7 +761,7 @@ static int soc_probe(struct platform_device *pdev) if (machine->probe) { ret = machine->probe(pdev); - if(ret < 0) + if (ret < 0) return ret; } @@ -765,20 +769,20 @@ static int soc_probe(struct platform_device *pdev) struct snd_soc_cpu_dai *cpu_dai = machine->dai_link[i].cpu_dai; if (cpu_dai->probe) { ret = cpu_dai->probe(pdev); - if(ret < 0) + if (ret < 0) goto cpu_dai_err; } } if (codec_dev->probe) { ret = codec_dev->probe(pdev); - if(ret < 0) + if (ret < 0) goto cpu_dai_err; } if (platform->probe) { ret = platform->probe(pdev); - if(ret < 0) + if (ret < 0) goto platform_err; } @@ -865,7 +869,7 @@ static int soc_new_pcm(struct snd_soc_device *socdev, codec_dai->codec = socdev->codec; /* check client and interface hw capabilities */ - sprintf(new_name, "%s %s-%s-%d",dai_link->stream_name, codec_dai->name, + sprintf(new_name, "%s %s-%s-%d", dai_link->stream_name, codec_dai->name, get_dai_name(cpu_dai->type), num); if (codec_dai->playback.channels_min) @@ -876,7 +880,8 @@ static int soc_new_pcm(struct snd_soc_device *socdev, ret = snd_pcm_new(codec->card, new_name, codec->pcm_devs++, playback, capture, &pcm); if (ret < 0) { - printk(KERN_ERR "asoc: can't create pcm for codec %s\n", codec->name); + printk(KERN_ERR "asoc: can't create pcm for codec %s\n", + codec->name); kfree(rtd); return ret; } @@ -925,8 +930,9 @@ static ssize_t codec_reg_show(struct device *dev, step = codec->reg_cache_step; count += sprintf(buf, "%s registers\n", codec->name); - for(i = 0; i < codec->reg_cache_size; i += step) - count += sprintf(buf + count, "%2x: %4x\n", i, codec->read(codec, i)); + for (i = 0; i < codec->reg_cache_size; i += step) + count += sprintf(buf + count, "%2x: %4x\n", i, + codec->read(codec, i)); return count; } @@ -1069,7 +1075,7 @@ int snd_soc_new_pcms(struct snd_soc_device *socdev, int idx, const char *xid) strncpy(codec->card->driver, codec->name, sizeof(codec->card->driver)); /* create the pcms */ - for(i = 0; i < machine->num_links; i++) { + for (i = 0; i < machine->num_links; i++) { ret = soc_new_pcm(socdev, &machine->dai_link[i], i); if (ret < 0) { printk(KERN_ERR "asoc: can't create pcm %s\n", @@ -1099,7 +1105,7 @@ int snd_soc_register_card(struct snd_soc_device *socdev) struct snd_soc_machine *machine = socdev->machine; int ret = 0, i, ac97 = 0, err = 0; - for(i = 0; i < machine->num_links; i++) { + for (i = 0; i < machine->num_links; i++) { if (socdev->machine->dai_link[i].init) { err = socdev->machine->dai_link[i].init(codec); if (err < 0) { @@ -1108,7 +1114,7 @@ int snd_soc_register_card(struct snd_soc_device *socdev) continue; } } - if (socdev->machine->dai_link[i].codec_dai->type == + if (socdev->machine->dai_link[i].codec_dai->type == SND_SOC_DAI_AC97_BUS) ac97 = 1; } @@ -1119,7 +1125,7 @@ int snd_soc_register_card(struct snd_soc_device *socdev) ret = snd_card_register(codec->card); if (ret < 0) { - printk(KERN_ERR "asoc: failed to register soundcard for codec %s\n", + printk(KERN_ERR "asoc: failed to register soundcard for %s\n", codec->name); goto out; } @@ -1143,7 +1149,7 @@ int snd_soc_register_card(struct snd_soc_device *socdev) err = device_create_file(socdev->dev, &dev_attr_codec_reg); if (err < 0) - printk(KERN_WARNING "asoc: failed to add codec sysfs entries\n"); + printk(KERN_WARNING "asoc: failed to add codec sysfs files\n"); mutex_unlock(&codec->mutex); @@ -1169,7 +1175,7 @@ void snd_soc_free_pcms(struct snd_soc_device *socdev) mutex_lock(&codec->mutex); #ifdef CONFIG_SND_SOC_AC97_BUS - for(i = 0; i < codec->num_dai; i++) { + for (i = 0; i < codec->num_dai; i++) { codec_dai = &codec->dai[i]; if (codec_dai->type == SND_SOC_DAI_AC97_BUS && codec->ac97) { soc_ac97_dev_unregister(codec); @@ -1279,7 +1285,8 @@ int snd_soc_get_enum_double(struct snd_kcontrol *kcontrol, for (bitmask = 1; bitmask < e->mask; bitmask <<= 1) ; val = snd_soc_read(codec, e->reg); - ucontrol->value.enumerated.item[0] = (val >> e->shift_l) & (bitmask - 1); + ucontrol->value.enumerated.item[0] + = (val >> e->shift_l) & (bitmask - 1); if (e->shift_l != e->shift_r) ucontrol->value.enumerated.item[1] = (val >> e->shift_r) & (bitmask - 1); @@ -1573,7 +1580,8 @@ int snd_soc_put_volsw_2r(struct snd_kcontrol *kcontrol, val = val << shift; val2 = val2 << shift; - if ((err = snd_soc_update_bits(codec, reg, val_mask, val)) < 0) + err = snd_soc_update_bits(codec, reg, val_mask, val); + if (err < 0) return err; err = snd_soc_update_bits(codec, reg2, val_mask, val2); @@ -1589,7 +1597,7 @@ static int __devinit snd_soc_init(void) static void snd_soc_exit(void) { - platform_driver_unregister(&soc_driver); + platform_driver_unregister(&soc_driver); } module_init(snd_soc_init); -- cgit v0.10.2 From a65f0568f6cc8433877fb71dd7d36b551854b0bc Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 13 May 2008 14:54:43 +0200 Subject: [ALSA] soc - Convert Wolfson codec drivers to use bulk DAPM registration Signed-off-by: Mark Brown Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela diff --git a/sound/soc/codecs/wm8731.c b/sound/soc/codecs/wm8731.c index 0f28aa4..5acf43a 100644 --- a/sound/soc/codecs/wm8731.c +++ b/sound/soc/codecs/wm8731.c @@ -193,7 +193,7 @@ SND_SOC_DAPM_INPUT("RLINEIN"), SND_SOC_DAPM_INPUT("LLINEIN"), }; -static const char *intercon[][3] = { +static const struct snd_soc_dapm_route intercon[] = { /* output mixer */ {"Output Mixer", "Line Bypass Switch", "Line Input"}, {"Output Mixer", "HiFi Playback Switch", "DAC"}, @@ -214,22 +214,14 @@ static const char *intercon[][3] = { {"Line Input", NULL, "LLINEIN"}, {"Line Input", NULL, "RLINEIN"}, {"Mic Bias", NULL, "MICIN"}, - - /* terminator */ - {NULL, NULL, NULL}, }; static int wm8731_add_widgets(struct snd_soc_codec *codec) { - int i; - - for (i = 0; i < ARRAY_SIZE(wm8731_dapm_widgets); i++) - snd_soc_dapm_new_control(codec, &wm8731_dapm_widgets[i]); + snd_soc_dapm_new_controls(codec, wm8731_dapm_widgets, + ARRAY_SIZE(wm8731_dapm_widgets)); - /* set up audio path interconnects */ - for (i = 0; intercon[i][0] != NULL; i++) - snd_soc_dapm_connect_input(codec, intercon[i][0], - intercon[i][1], intercon[i][2]); + snd_soc_dapm_add_routes(codec, intercon, ARRAY_SIZE(intercon)); snd_soc_dapm_new_widgets(codec); return 0; diff --git a/sound/soc/codecs/wm8750.c b/sound/soc/codecs/wm8750.c index 62423f4..1f11ad2 100644 --- a/sound/soc/codecs/wm8750.c +++ b/sound/soc/codecs/wm8750.c @@ -378,7 +378,7 @@ static const struct snd_soc_dapm_widget wm8750_dapm_widgets[] = { SND_SOC_DAPM_INPUT("RINPUT3"), }; -static const char *audio_map[][3] = { +static const struct snd_soc_dapm_route audio_map[] = { /* left mixer */ {"Left Mixer", "Playback Switch", "Left DAC"}, {"Left Mixer", "Left Bypass Switch", "Left Line Mux"}, @@ -470,22 +470,14 @@ static const char *audio_map[][3] = { /* ADC */ {"Left ADC", NULL, "Left ADC Mux"}, {"Right ADC", NULL, "Right ADC Mux"}, - - /* terminator */ - {NULL, NULL, NULL}, }; static int wm8750_add_widgets(struct snd_soc_codec *codec) { - int i; - - for (i = 0; i < ARRAY_SIZE(wm8750_dapm_widgets); i++) - snd_soc_dapm_new_control(codec, &wm8750_dapm_widgets[i]); + snd_soc_dapm_new_controls(codec, wm8750_dapm_widgets, + ARRAY_SIZE(wm8750_dapm_widgets)); - /* set up audio path audio_mapnects */ - for (i = 0; audio_map[i][0] != NULL; i++) - snd_soc_dapm_connect_input(codec, audio_map[i][0], - audio_map[i][1], audio_map[i][2]); + snd_soc_dapm_add_routes(codec, audio_map, ARRAY_SIZE(audio_map)); snd_soc_dapm_new_widgets(codec); return 0; diff --git a/sound/soc/codecs/wm8753.c b/sound/soc/codecs/wm8753.c index 9032b0c..c32e632 100644 --- a/sound/soc/codecs/wm8753.c +++ b/sound/soc/codecs/wm8753.c @@ -523,7 +523,7 @@ SND_SOC_DAPM_INPUT("MIC2"), SND_SOC_DAPM_VMID("VREF"), }; -static const char *audio_map[][3] = { +static const struct snd_soc_dapm_route audio_map[] = { /* left mixer */ {"Left Mixer", "Left Playback Switch", "Left DAC"}, {"Left Mixer", "Voice Playback Switch", "Voice DAC"}, @@ -674,23 +674,14 @@ static const char *audio_map[][3] = { /* ACOP */ {"ACOP", NULL, "ALC Mixer"}, - - /* terminator */ - {NULL, NULL, NULL}, }; static int wm8753_add_widgets(struct snd_soc_codec *codec) { - int i; + snd_soc_dapm_new_controls(codec, wm8753_dapm_widgets, + ARRAY_SIZE(wm8753_dapm_widgets)); - for (i = 0; i < ARRAY_SIZE(wm8753_dapm_widgets); i++) - snd_soc_dapm_new_control(codec, &wm8753_dapm_widgets[i]); - - /* set up the WM8753 audio map */ - for (i = 0; audio_map[i][0] != NULL; i++) { - snd_soc_dapm_connect_input(codec, audio_map[i][0], - audio_map[i][1], audio_map[i][2]); - } + snd_soc_dapm_add_routes(codec, audio_map, ARRAY_SIZE(audio_map)); snd_soc_dapm_new_widgets(codec); return 0; diff --git a/sound/soc/codecs/wm9712.c b/sound/soc/codecs/wm9712.c index e26cfcf..d9789f1 100644 --- a/sound/soc/codecs/wm9712.c +++ b/sound/soc/codecs/wm9712.c @@ -348,7 +348,7 @@ SND_SOC_DAPM_INPUT("MIC1"), SND_SOC_DAPM_INPUT("MIC2"), }; -static const char *audio_map[][3] = { +static const struct snd_soc_dapm_route audio_map[] = { /* virtual mixer - mixes left & right channels for spk and mono */ {"AC97 Mixer", NULL, "Left DAC"}, {"AC97 Mixer", NULL, "Right DAC"}, @@ -443,21 +443,14 @@ static const char *audio_map[][3] = { {"Speaker PGA", NULL, "Speaker Mux"}, {"LOUT2", NULL, "Speaker PGA"}, {"ROUT2", NULL, "Speaker PGA"}, - - {NULL, NULL, NULL}, }; static int wm9712_add_widgets(struct snd_soc_codec *codec) { - int i; - - for (i = 0; i < ARRAY_SIZE(wm9712_dapm_widgets); i++) - snd_soc_dapm_new_control(codec, &wm9712_dapm_widgets[i]); + snd_soc_dapm_new_controls(codec, wm9712_dapm_widgets, + ARRAY_SIZE(wm9712_dapm_widgets)); - /* set up audio path connects */ - for (i = 0; audio_map[i][0] != NULL; i++) - snd_soc_dapm_connect_input(codec, audio_map[i][0], - audio_map[i][1], audio_map[i][2]); + snd_soc_dapm_add_routes(codec, audio_map, ARRAY_SIZE(audio_map)); snd_soc_dapm_new_widgets(codec); return 0; diff --git a/sound/soc/codecs/wm9713.c b/sound/soc/codecs/wm9713.c index 4863636..4f516a5 100644 --- a/sound/soc/codecs/wm9713.c +++ b/sound/soc/codecs/wm9713.c @@ -453,7 +453,7 @@ SND_SOC_DAPM_INPUT("MIC2B"), SND_SOC_DAPM_VMID("VMID"), }; -static const char *audio_map[][3] = { +static const struct snd_soc_dapm_route audio_map[] = { /* left HP mixer */ {"Left HP Mixer", "PC Beep Playback Switch", "PCBEEP"}, {"Left HP Mixer", "Voice Playback Switch", "Voice DAC"}, @@ -604,21 +604,14 @@ static const char *audio_map[][3] = { {"Capture Mono Mux", "Stereo", "Capture Mixer"}, {"Capture Mono Mux", "Left", "Left Capture Source"}, {"Capture Mono Mux", "Right", "Right Capture Source"}, - - {NULL, NULL, NULL}, }; static int wm9713_add_widgets(struct snd_soc_codec *codec) { - int i; - - for (i = 0; i < ARRAY_SIZE(wm9713_dapm_widgets); i++) - snd_soc_dapm_new_control(codec, &wm9713_dapm_widgets[i]); + snd_soc_dapm_new_controls(codec, wm9713_dapm_widgets, + ARRAY_SIZE(wm9713_dapm_widgets)); - /* set up audio path audio_mapnects */ - for (i = 0; audio_map[i][0] != NULL; i++) - snd_soc_dapm_connect_input(codec, audio_map[i][0], - audio_map[i][1], audio_map[i][2]); + snd_soc_dapm_add_routes(codec, audio_map, ARRAY_SIZE(audio_map)); snd_soc_dapm_new_widgets(codec); return 0; -- cgit v0.10.2 From 33e5b22285f63ede858c00456f3ffbc2ea79d6cf Mon Sep 17 00:00:00 2001 From: Werner Almesberger Date: Mon, 14 Apr 2008 14:26:44 +0200 Subject: [ALSA] soc - Fix s3c24xx-i2s LR sync while timer ticks are disabled When timer ticks are disabled when calling sound/soc/s3c24xx/s3c24xx-i2s.c:s3c24xx_snd_lrsync and the LR signal never happens, we loop forever. This has been observed in the following call chain: snd_pcm_common_ioctl1 -> snd_pcm_action_lock_irq -> snd_pcm_action_single -> snd_pcm_do_resume -> soc_pcm_trigger -> s3c24xx_i2s_trigger The patch below changes the timeout mechanism to use udelay, which doesn't need timer ticks. Signed-off-by: Werner Almesberger Signed-off-by: Mark Brown Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela diff --git a/sound/soc/s3c24xx/s3c24xx-i2s.c b/sound/soc/s3c24xx/s3c24xx-i2s.c index ddf8724..4c52f79 100644 --- a/sound/soc/s3c24xx/s3c24xx-i2s.c +++ b/sound/soc/s3c24xx/s3c24xx-i2s.c @@ -175,7 +175,7 @@ static void s3c24xx_snd_rxctrl(int on) static int s3c24xx_snd_lrsync(void) { u32 iiscon; - unsigned long timeout = jiffies + msecs_to_jiffies(5); + int timeout = 50; /* 5ms */ DBG("Entered %s\n", __func__); @@ -184,8 +184,9 @@ static int s3c24xx_snd_lrsync(void) if (iiscon & S3C2410_IISCON_LRINDEX) break; - if (time_after(jiffies, timeout)) + if (!timeout--) return -ETIMEDOUT; + udelay(100); } return 0; -- cgit v0.10.2 From b9ada4281c48076bdb252813be24ddb57e17cade Mon Sep 17 00:00:00 2001 From: Andy Whitcroft Date: Tue, 20 May 2008 11:01:08 +0100 Subject: x86: reinstate numa remap for SPARSEMEM on x86 NUMA systems Recent kernels have been panic'ing trying to allocate memory early in boot, in __alloc_pages: BUG: unable to handle kernel paging request at 00001568 IP: [] __alloc_pages+0x33/0x2cc *pdpt = 00000000013a5001 *pde = 0000000000000000 Oops: 0000 [#1] SMP Modules linked in: Pid: 1, comm: swapper Not tainted (2.6.25 #78) EIP: 0060:[] EFLAGS: 00010246 CPU: 0 EIP is at __alloc_pages+0x33/0x2cc EAX: 00001564 EBX: 000412d0 ECX: 00001564 EDX: 000005c3 ESI: f78012a0 EDI: 00000001 EBP: 00001564 ESP: f7871e50 DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068 Process swapper (pid: 1, ti=f7870000 task=f786f670 task.ti=f7870000) Stack: 00000000 f786f670 00000010 00000000 0000b700 000412d0 f78012a0 00000001 00000000 c105b64d 00000000 000412d0 f78012a0 f7803120 00000000 c105c1c5 00000010 f7803144 000412d0 00000001 f7803130 f7803120 f78012a0 00000001 Call Trace: [] kmem_getpages+0x94/0x129 [] cache_grow+0x8f/0x123 [] ____cache_alloc_node+0xb9/0xe4 [] kmem_cache_alloc_node+0x92/0xd2 [] build_sched_domains+0x536/0x70d [] do_flush_tlb_all+0x0/0x3f [] do_flush_tlb_all+0x0/0x3f [] interleave_nodes+0x23/0x5a [] alternate_node_alloc+0x43/0x5b [] arch_init_sched_domains+0x46/0x51 [] kernel_init+0x0/0x82 [] sched_init_smp+0x10/0xbb [] kernel_init+0x43/0x82 [] kernel_thread_helper+0x7/0x10 Debugging this showed that the NODE_DATA() for nodes other than node 0 were all NULL. Tracing this back showed that the NODE_DATA() pointers were being initialised to each nodes remap space. However under SPARSEMEM remap is disabled which leads to the pgdat's being placed incorrectly at kernel virtual address 0. Leading to the panic when attempting to allocate memory from these nodes. Numa remap was disabled in the commit below. This occured while fixing problems triggered when attempting to boot x86_32 NUMA SPARSEMEM kernels on non-numa hardware. x86: make NUMA work on 32-bit commit 1b000a5dbeb2f34bc03d45ebdf3f6d24a60c3aed The real problem is believed to be related to other alignment issues in the regions blocked out from the bootmem allocator for small memory systems, and has been fixed separately. Therefore re-enable remap for SPARSMEM, which fixes pgdat allocation issues. Testing confirms that SPARSMEM NUMA kernels will boot correctly with this part of the change reverted. Signed-off-by: Andy Whitcroft Acked-by: Mel Gorman Signed-off-by: Ingo Molnar diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c index 914ccf9..026201f 100644 --- a/arch/x86/mm/discontig_32.c +++ b/arch/x86/mm/discontig_32.c @@ -164,16 +164,13 @@ static void __init allocate_pgdat(int nid) } } -#ifdef CONFIG_DISCONTIGMEM /* - * In the discontig memory model, a portion of the kernel virtual area (KVA) - * is reserved and portions of nodes are mapped using it. This is to allow - * node-local memory to be allocated for structures that would normally require - * ZONE_NORMAL. The memory is allocated with alloc_remap() and callers - * should be prepared to allocate from the bootmem allocator instead. This KVA - * mechanism is incompatible with SPARSEMEM as it makes assumptions about the - * layout of memory that are broken if alloc_remap() succeeds for some of the - * map and fails for others + * In the DISCONTIGMEM and SPARSEMEM memory model, a portion of the kernel + * virtual address space (KVA) is reserved and portions of nodes are mapped + * using it. This is to allow node-local memory to be allocated for + * structures that would normally require ZONE_NORMAL. The memory is + * allocated with alloc_remap() and callers should be prepared to allocate + * from the bootmem allocator instead. */ static unsigned long node_remap_start_pfn[MAX_NUMNODES]; static void *node_remap_end_vaddr[MAX_NUMNODES]; @@ -290,25 +287,6 @@ static void init_remap_allocator(int nid) (ulong) pfn_to_kaddr(highstart_pfn + node_remap_offset[nid] + node_remap_size[nid])); } -#else -void *alloc_remap(int nid, unsigned long size) -{ - return NULL; -} - -static unsigned long calculate_numa_remap_pages(void) -{ - return 0; -} - -static void init_remap_allocator(int nid) -{ -} - -void __init remap_numa_kva(void) -{ -} -#endif /* CONFIG_DISCONTIGMEM */ extern void setup_bootmem_allocator(void); unsigned long __init setup_memory(void) -- cgit v0.10.2 From 84d6bd0e272e6eace1e7e4c501f32bddfb665bb2 Mon Sep 17 00:00:00 2001 From: Andy Whitcroft Date: Tue, 20 May 2008 11:01:19 +0100 Subject: x86: cope with no remap space being allocated for a numa node When allocating the pgdat's for numa nodes on x86_32 we attempt to place them in the numa remap space for that node. However should the node not have any remap space allocated (such as due to having non-ram pages in the remap location in the node) then we will incorrectly place the pgdat at zero. Check we have remap available, falling back to node 0 memory where we do not. Signed-off-by: Andy Whitcroft Acked-by: Mel Gorman Signed-off-by: Ingo Molnar diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c index 026201f..435c343 100644 --- a/arch/x86/mm/discontig_32.c +++ b/arch/x86/mm/discontig_32.c @@ -156,7 +156,7 @@ static void __init propagate_e820_map_node(int nid) */ static void __init allocate_pgdat(int nid) { - if (nid && node_has_online_mem(nid)) + if (nid && node_has_online_mem(nid) && node_remap_start_vaddr[nid]) NODE_DATA(nid) = (pg_data_t *)node_remap_start_vaddr[nid]; else { NODE_DATA(nid) = (pg_data_t *)(pfn_to_kaddr(min_low_pfn)); -- cgit v0.10.2 From aea3bfbcfb0453217c8da6cfdc1b2394d214bee5 Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Tue, 20 May 2008 14:22:44 +0200 Subject: [ALSA] ice1724: fix MIDI The VT1724 MIDI port is not MPU-401 compatible; remove the hacks that try to make the MPU-401 library work with it, and just use some simple device-specific code. Signed-off-by: Clemens Ladisch Tested-by: Pavel Hofman diff --git a/sound/pci/Kconfig b/sound/pci/Kconfig index 7e47421..d95fbb2 100644 --- a/sound/pci/Kconfig +++ b/sound/pci/Kconfig @@ -692,7 +692,7 @@ config SND_ICE1712 config SND_ICE1724 tristate "ICE/VT1724/1720 (Envy24HT/PT)" depends on SND - select SND_MPU401_UART + select SND_RAWMIDI select SND_AC97_CODEC select SND_VMASTER help diff --git a/sound/pci/ice1712/envy24ht.h b/sound/pci/ice1712/envy24ht.h index 43b9e3e..a0c5e00 100644 --- a/sound/pci/ice1712/envy24ht.h +++ b/sound/pci/ice1712/envy24ht.h @@ -93,9 +93,13 @@ enum { #define VT1724_REG_MPU_TXFIFO 0x0a /*byte ro. number of bytes in TX fifo*/ #define VT1724_REG_MPU_RXFIFO 0x0b /*byte ro. number of bytes in RX fifo*/ -//are these 2 the wrong way around? they don't seem to be used yet anyway -#define VT1724_REG_MPU_CTRL 0x0c /* byte */ -#define VT1724_REG_MPU_DATA 0x0d /* byte */ +#define VT1724_REG_MPU_DATA 0x0c /* byte */ +#define VT1724_REG_MPU_CTRL 0x0d /* byte */ +#define VT1724_MPU_UART 0x01 +#define VT1724_MPU_TX_EMPTY 0x02 +#define VT1724_MPU_TX_FULL 0x04 +#define VT1724_MPU_RX_EMPTY 0x08 +#define VT1724_MPU_RX_FULL 0x10 #define VT1724_REG_MPU_FIFO_WM 0x0e /*byte set the high/low watermarks for RX/TX fifos*/ #define VT1724_MPU_RX_FIFO 0x20 //1=rx fifo watermark 0=tx fifo watermark diff --git a/sound/pci/ice1712/ice1712.h b/sound/pci/ice1712/ice1712.h index 3208901..762fbd7 100644 --- a/sound/pci/ice1712/ice1712.h +++ b/sound/pci/ice1712/ice1712.h @@ -333,6 +333,8 @@ struct snd_ice1712 { unsigned int has_spdif: 1; /* VT1720/4 - has SPDIF I/O */ unsigned int force_pdma4: 1; /* VT1720/4 - PDMA4 as non-spdif */ unsigned int force_rdma1: 1; /* VT1720/4 - RDMA1 as non-spdif */ + unsigned int midi_output: 1; /* VT1720/4: MIDI output triggered */ + unsigned int midi_input: 1; /* VT1720/4: MIDI input triggered */ unsigned int num_total_dacs; /* total DACs */ unsigned int num_total_adcs; /* total ADCs */ unsigned int cur_rate; /* current rate */ diff --git a/sound/pci/ice1712/ice1724.c b/sound/pci/ice1712/ice1724.c index 6735090..e596d77 100644 --- a/sound/pci/ice1712/ice1724.c +++ b/sound/pci/ice1712/ice1724.c @@ -32,7 +32,7 @@ #include #include #include -#include +#include #include #include @@ -223,30 +223,153 @@ static unsigned int snd_vt1724_get_gpio_data(struct snd_ice1712 *ice) } /* - * MPU401 accessor + * MIDI */ -static unsigned char snd_vt1724_mpu401_read(struct snd_mpu401 *mpu, - unsigned long addr) + +static void vt1724_midi_clear_rx(struct snd_ice1712 *ice) +{ + unsigned int count; + + for (count = inb(ICEREG1724(ice, MPU_RXFIFO)); count > 0; --count) + inb(ICEREG1724(ice, MPU_DATA)); +} + +static inline struct snd_rawmidi_substream * +get_rawmidi_substream(struct snd_ice1712 *ice, unsigned int stream) { - /* fix status bits to the standard position */ - /* only RX_EMPTY and TX_FULL are checked */ - if (addr == MPU401C(mpu)) - return (inb(addr) & 0x0c) << 4; + return list_first_entry(&ice->rmidi[0]->streams[stream].substreams, + struct snd_rawmidi_substream, list); +} + +static void vt1724_midi_write(struct snd_ice1712 *ice) +{ + struct snd_rawmidi_substream *s; + int count, i; + u8 buffer[32]; + + s = get_rawmidi_substream(ice, SNDRV_RAWMIDI_STREAM_OUTPUT); + count = 31 - inb(ICEREG1724(ice, MPU_TXFIFO)); + if (count > 0) { + count = snd_rawmidi_transmit(s, buffer, count); + for (i = 0; i < count; ++i) + outb(buffer[i], ICEREG1724(ice, MPU_DATA)); + } +} + +static void vt1724_midi_read(struct snd_ice1712 *ice) +{ + struct snd_rawmidi_substream *s; + int count, i; + u8 buffer[32]; + + s = get_rawmidi_substream(ice, SNDRV_RAWMIDI_STREAM_INPUT); + count = inb(ICEREG1724(ice, MPU_RXFIFO)); + if (count > 0) { + count = min(count, 32); + for (i = 0; i < count; ++i) + buffer[i] = inb(ICEREG1724(ice, MPU_DATA)); + snd_rawmidi_receive(s, buffer, count); + } +} + +static void vt1724_enable_midi_irq(struct snd_rawmidi_substream *substream, + u8 flag, int enable) +{ + struct snd_ice1712 *ice = substream->rmidi->private_data; + u8 mask; + + spin_lock_irq(&ice->reg_lock); + mask = inb(ICEREG1724(ice, IRQMASK)); + if (enable) + mask &= ~flag; else - return inb(addr); + mask |= flag; + outb(mask, ICEREG1724(ice, IRQMASK)); + spin_unlock_irq(&ice->reg_lock); } -static void snd_vt1724_mpu401_write(struct snd_mpu401 *mpu, - unsigned char data, unsigned long addr) +static int vt1724_midi_output_open(struct snd_rawmidi_substream *s) { - if (addr == MPU401C(mpu)) { - if (data == MPU401_ENTER_UART) - outb(0x01, addr); - /* what else? */ - } else - outb(data, addr); + vt1724_enable_midi_irq(s, VT1724_IRQ_MPU_TX, 1); + return 0; +} + +static int vt1724_midi_output_close(struct snd_rawmidi_substream *s) +{ + vt1724_enable_midi_irq(s, VT1724_IRQ_MPU_TX, 0); + return 0; } +static void vt1724_midi_output_trigger(struct snd_rawmidi_substream *s, int up) +{ + struct snd_ice1712 *ice = s->rmidi->private_data; + unsigned long flags; + + spin_lock_irqsave(&ice->reg_lock, flags); + if (up) { + ice->midi_output = 1; + vt1724_midi_write(ice); + } else { + ice->midi_output = 0; + } + spin_unlock_irqrestore(&ice->reg_lock, flags); +} + +static void vt1724_midi_output_drain(struct snd_rawmidi_substream *s) +{ + struct snd_ice1712 *ice = s->rmidi->private_data; + unsigned long timeout; + + /* 32 bytes should be transmitted in less than about 12 ms */ + timeout = jiffies + msecs_to_jiffies(15); + do { + if (inb(ICEREG1724(ice, MPU_CTRL)) & VT1724_MPU_TX_EMPTY) + break; + schedule_timeout_uninterruptible(1); + } while (time_after(timeout, jiffies)); +} + +static struct snd_rawmidi_ops vt1724_midi_output_ops = { + .open = vt1724_midi_output_open, + .close = vt1724_midi_output_close, + .trigger = vt1724_midi_output_trigger, + .drain = vt1724_midi_output_drain, +}; + +static int vt1724_midi_input_open(struct snd_rawmidi_substream *s) +{ + vt1724_midi_clear_rx(s->rmidi->private_data); + vt1724_enable_midi_irq(s, VT1724_IRQ_MPU_RX, 1); + return 0; +} + +static int vt1724_midi_input_close(struct snd_rawmidi_substream *s) +{ + vt1724_enable_midi_irq(s, VT1724_IRQ_MPU_RX, 0); + return 0; +} + +static void vt1724_midi_input_trigger(struct snd_rawmidi_substream *s, int up) +{ + struct snd_ice1712 *ice = s->rmidi->private_data; + unsigned long flags; + + spin_lock_irqsave(&ice->reg_lock, flags); + if (up) { + ice->midi_input = 1; + vt1724_midi_read(ice); + } else { + ice->midi_input = 0; + } + spin_unlock_irqrestore(&ice->reg_lock, flags); +} + +static struct snd_rawmidi_ops vt1724_midi_input_ops = { + .open = vt1724_midi_input_open, + .close = vt1724_midi_input_close, + .trigger = vt1724_midi_input_trigger, +}; + /* * Interrupt handler @@ -278,13 +401,10 @@ static irqreturn_t snd_vt1724_interrupt(int irq, void *dev_id) #endif handled = 1; if (status & VT1724_IRQ_MPU_TX) { - if (ice->rmidi[0]) - snd_mpu401_uart_interrupt_tx(irq, - ice->rmidi[0]->private_data); - else /* disable TX to be sure */ - outb(inb(ICEREG1724(ice, IRQMASK)) | - VT1724_IRQ_MPU_TX, - ICEREG1724(ice, IRQMASK)); + spin_lock(&ice->reg_lock); + if (ice->midi_output) + vt1724_midi_write(ice); + spin_unlock(&ice->reg_lock); /* Due to mysterical reasons, MPU_TX is always * generated (and can't be cleared) when a PCM * playback is going. So let's ignore at the @@ -293,13 +413,12 @@ static irqreturn_t snd_vt1724_interrupt(int irq, void *dev_id) status_mask &= ~VT1724_IRQ_MPU_TX; } if (status & VT1724_IRQ_MPU_RX) { - if (ice->rmidi[0]) - snd_mpu401_uart_interrupt(irq, - ice->rmidi[0]->private_data); - else /* disable RX to be sure */ - outb(inb(ICEREG1724(ice, IRQMASK)) | - VT1724_IRQ_MPU_RX, - ICEREG1724(ice, IRQMASK)); + spin_lock(&ice->reg_lock); + if (ice->midi_input) + vt1724_midi_read(ice); + else + vt1724_midi_clear_rx(ice); + spin_unlock(&ice->reg_lock); } /* ack MPU irq */ outb(status, ICEREG1724(ice, IRQSTAT)); @@ -2425,28 +2544,30 @@ static int __devinit snd_vt1724_probe(struct pci_dev *pci, if (! c->no_mpu401) { if (ice->eeprom.data[ICE_EEP2_SYSCONF] & VT1724_CFG_MPU401) { - struct snd_mpu401 *mpu; - if ((err = snd_mpu401_uart_new(card, 0, MPU401_HW_ICE1712, - ICEREG1724(ice, MPU_CTRL), - (MPU401_INFO_INTEGRATED | - MPU401_INFO_NO_ACK | - MPU401_INFO_TX_IRQ), - ice->irq, 0, - &ice->rmidi[0])) < 0) { + struct snd_rawmidi *rmidi; + + err = snd_rawmidi_new(card, "MIDI", 0, 1, 1, &rmidi); + if (err < 0) { snd_card_free(card); return err; } - mpu = ice->rmidi[0]->private_data; - mpu->read = snd_vt1724_mpu401_read; - mpu->write = snd_vt1724_mpu401_write; - /* unmask MPU RX/TX irqs */ - outb(inb(ICEREG1724(ice, IRQMASK)) & - ~(VT1724_IRQ_MPU_RX | VT1724_IRQ_MPU_TX), - ICEREG1724(ice, IRQMASK)); + ice->rmidi[0] = rmidi; + rmidi->private_data = ice; + strcpy(rmidi->name, "ICE1724 MIDI"); + rmidi->info_flags = SNDRV_RAWMIDI_INFO_OUTPUT | + SNDRV_RAWMIDI_INFO_INPUT | + SNDRV_RAWMIDI_INFO_DUPLEX; + snd_rawmidi_set_ops(rmidi, SNDRV_RAWMIDI_STREAM_OUTPUT, + &vt1724_midi_output_ops); + snd_rawmidi_set_ops(rmidi, SNDRV_RAWMIDI_STREAM_INPUT, + &vt1724_midi_input_ops); + /* set watermarks */ outb(VT1724_MPU_RX_FIFO | 0x1, ICEREG1724(ice, MPU_FIFO_WM)); outb(0x1, ICEREG1724(ice, MPU_FIFO_WM)); + /* set UART mode */ + outb(VT1724_MPU_UART, ICEREG1724(ice, MPU_CTRL)); } } -- cgit v0.10.2 From 6536d2891ba2c4e837ba8478dc13bb173ed24a23 Mon Sep 17 00:00:00 2001 From: Dave Kleikamp Date: Wed, 21 May 2008 10:45:16 -0500 Subject: JFS: skip bad iput() call in error path If jfs_iget() fails, we can't call iput() on the returned error. Thanks to Eric Sesterhenn's fuzzer testing for reporting the problem. Signed-off-by: Dave Kleikamp diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 50ea654..0288e6d 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -499,7 +499,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) inode = jfs_iget(sb, ROOT_I); if (IS_ERR(inode)) { ret = PTR_ERR(inode); - goto out_no_root; + goto out_no_rw; } sb->s_root = d_alloc_root(inode); if (!sb->s_root) @@ -521,9 +521,8 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) return 0; out_no_root: - jfs_err("jfs_read_super: get root inode failed"); - if (inode) - iput(inode); + jfs_err("jfs_read_super: get root dentry failed"); + iput(inode); out_no_rw: rc = jfs_umount(sb); -- cgit v0.10.2 From 2ba4cc319ab26c56205d4f23724c4748a553c845 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 13 May 2008 15:37:05 +0200 Subject: rcu: fix nf_conntrack_helper.c build bug Signed-off-by: Ingo Molnar diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 7d1b117..8e0b4c8 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include -- cgit v0.10.2 From 0abbc78a0137fee60ef092f0b20a3d3d7e7e0cc2 Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Tue, 20 May 2008 16:27:17 +0200 Subject: x86, aperture_64: use symbolic constants Factor-out common aperture_valid code. Signed-off-by: Pavel Machek Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 02f4dba..5373f78 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -109,27 +109,6 @@ static u32 __init allocate_aperture(void) return (u32)__pa(p); } -static int __init aperture_valid(u64 aper_base, u32 aper_size, u32 min_size) -{ - if (!aper_base) - return 0; - - if (aper_base + aper_size > 0x100000000UL) { - printk(KERN_ERR "Aperture beyond 4GB. Ignoring.\n"); - return 0; - } - if (e820_any_mapped(aper_base, aper_base + aper_size, E820_RAM)) { - printk(KERN_ERR "Aperture pointing to e820 RAM. Ignoring.\n"); - return 0; - } - if (aper_size < min_size) { - printk(KERN_ERR "Aperture too small (%d MB) than (%d MB)\n", - aper_size>>20, min_size>>20); - return 0; - } - - return 1; -} /* Find a PCI capability */ static __u32 __init find_cap(int bus, int slot, int func, int cap) @@ -344,7 +323,7 @@ out: if (gart_fix_e820 && !fix && aper_enabled) { if (!e820_all_mapped(aper_base, aper_base + aper_size, E820_RESERVED)) { - /* reserved it, so we can resuse it in second kernel */ + /* reserve it, so we can reuse it in second kernel */ printk(KERN_INFO "update e820 for GART\n"); add_memory_region(aper_base, aper_size, E820_RESERVED); update_e820(); diff --git a/drivers/char/agp/amd64-agp.c b/drivers/char/agp/amd64-agp.c index e3c7ea0..f5af65a 100644 --- a/drivers/char/agp/amd64-agp.c +++ b/drivers/char/agp/amd64-agp.c @@ -228,24 +228,10 @@ static const struct agp_bridge_driver amd_8151_driver = { }; /* Some basic sanity checks for the aperture. */ -static int __devinit aperture_valid(u64 aper, u32 size) +static int __devinit agp_aperture_valid(u64 aper, u32 size) { - if (aper == 0) { - printk(KERN_ERR PFX "No aperture\n"); + if (!aperture_valid(aper, size, 32*1024*1024)) return 0; - } - if ((u64)aper + size > 0x100000000ULL) { - printk(KERN_ERR PFX "Aperture out of bounds\n"); - return 0; - } - if (e820_any_mapped(aper, aper + size, E820_RAM)) { - printk(KERN_ERR PFX "Aperture pointing to RAM\n"); - return 0; - } - if (size < 32*1024*1024) { - printk(KERN_ERR PFX "Aperture too small (%d MB)\n", size>>20); - return 0; - } /* Request the Aperture. This catches cases when someone else already put a mapping in there - happens with some very broken BIOS @@ -282,7 +268,7 @@ static __devinit int fix_northbridge(struct pci_dev *nb, struct pci_dev *agp, nb_order = (nb_order >> 1) & 7; pci_read_config_dword(nb, AMD64_GARTAPERTUREBASE, &nb_base); nb_aper = nb_base << 25; - if (aperture_valid(nb_aper, (32*1024*1024)< + extern void pci_iommu_shutdown(void); extern void no_iommu_init(void); extern int force_iommu, no_iommu; @@ -69,4 +71,26 @@ static inline void enable_gart_translation(struct pci_dev *dev, u64 addr) pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, ctl); } +static inline int aperture_valid(u64 aper_base, u32 aper_size, u32 min_size) +{ + if (!aper_base) + return 0; + + if (aper_base + aper_size > 0x100000000ULL) { + printk(KERN_ERR "Aperture beyond 4GB. Ignoring.\n"); + return 0; + } + if (e820_any_mapped(aper_base, aper_base + aper_size, E820_RAM)) { + printk(KERN_ERR "Aperture pointing to e820 RAM. Ignoring.\n"); + return 0; + } + if (aper_size < min_size) { + printk(KERN_ERR "Aperture too small (%d MB) than (%d MB)\n", + aper_size>>20, min_size>>20); + return 0; + } + + return 1; +} + #endif -- cgit v0.10.2 From f70c5253b41444fd2779e1f76bfe25811d9b8c23 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 20 Apr 2008 12:22:36 +0100 Subject: [RTC] remove references to asm/mach/time.h asm/mach/time.h is the ARM header file for setting up kernel ticker timekeeping (be that the old jiffy interrupt or the new clocksource.) RTC drivers have no business using this header file, and in fact do not require it. Build tested on at91sam9rl, omap and s3c2410 configurations. Acked-by: Alessandro Zummo Acked-by: Andrew Victor Signed-off-by: Russell King diff --git a/drivers/rtc/rtc-at91rm9200.c b/drivers/rtc/rtc-at91rm9200.c index 39e64ab..2af1e6f 100644 --- a/drivers/rtc/rtc-at91rm9200.c +++ b/drivers/rtc/rtc-at91rm9200.c @@ -31,8 +31,6 @@ #include #include -#include - #include diff --git a/drivers/rtc/rtc-at91sam9.c b/drivers/rtc/rtc-at91sam9.c index 38d8742..f0246ef 100644 --- a/drivers/rtc/rtc-at91sam9.c +++ b/drivers/rtc/rtc-at91sam9.c @@ -19,7 +19,6 @@ #include #include -#include #include #include diff --git a/drivers/rtc/rtc-omap.c b/drivers/rtc/rtc-omap.c index 58f81c7..eb23d84 100644 --- a/drivers/rtc/rtc-omap.c +++ b/drivers/rtc/rtc-omap.c @@ -22,7 +22,6 @@ #include #include -#include /* The OMAP1 RTC is a year/month/day/hours/minutes/seconds BCD clock diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c index f26e0ca..2c6e467 100644 --- a/drivers/rtc/rtc-s3c.c +++ b/drivers/rtc/rtc-s3c.c @@ -28,8 +28,6 @@ #include #include -#include - #include /* I have yet to find an S3C implementation with more than one -- cgit v0.10.2 From 2dba8518b7761aee3ba757b298efa15dd34eff18 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 20 Apr 2008 12:08:04 +0100 Subject: [RTC] rtc-pl031: use proper resources, use proper apis, clean up includes Clean up PL031 RTC includes, make driver use proper resource checking, and use amba bus specific accessors. Acked-by: Alessandro Zummo Signed-off-by: Russell King diff --git a/drivers/rtc/rtc-pl031.c b/drivers/rtc/rtc-pl031.c index 2fd49ed..08b4610 100644 --- a/drivers/rtc/rtc-pl031.c +++ b/drivers/rtc/rtc-pl031.c @@ -12,23 +12,12 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ - -#include #include #include #include -#include #include -#include -#include -#include - #include - -#include -#include -#include -#include +#include /* * Register definitions @@ -142,13 +131,12 @@ static int pl031_remove(struct amba_device *adev) { struct pl031_local *ldata = dev_get_drvdata(&adev->dev); - if (ldata) { - dev_set_drvdata(&adev->dev, NULL); - free_irq(adev->irq[0], ldata->rtc); - rtc_device_unregister(ldata->rtc); - iounmap(ldata->base); - kfree(ldata); - } + amba_set_drvdata(adev, NULL); + free_irq(adev->irq[0], ldata->rtc); + rtc_device_unregister(ldata->rtc); + iounmap(ldata->base); + kfree(ldata); + amba_release_regions(adev); return 0; } @@ -158,13 +146,15 @@ static int pl031_probe(struct amba_device *adev, void *id) int ret; struct pl031_local *ldata; + ret = amba_request_regions(adev, NULL); + if (ret) + goto err_req; ldata = kmalloc(sizeof(struct pl031_local), GFP_KERNEL); if (!ldata) { ret = -ENOMEM; goto out; } - dev_set_drvdata(&adev->dev, ldata); ldata->base = ioremap(adev->res.start, adev->res.end - adev->res.start + 1); @@ -173,6 +163,8 @@ static int pl031_probe(struct amba_device *adev, void *id) goto out_no_remap; } + amba_set_drvdata(adev, ldata); + if (request_irq(adev->irq[0], pl031_interrupt, IRQF_DISABLED, "rtc-pl031", ldata->rtc)) { ret = -EIO; @@ -192,10 +184,12 @@ out_no_rtc: free_irq(adev->irq[0], ldata->rtc); out_no_irq: iounmap(ldata->base); + amba_set_drvdata(adev, NULL); out_no_remap: - dev_set_drvdata(&adev->dev, NULL); kfree(ldata); out: + amba_release_regions(adev); +err_req: return ret; } -- cgit v0.10.2 From a190901c6b5f1f4a31681e8c69d811a4f9426e2b Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 20 Apr 2008 12:08:36 +0100 Subject: [RTC] rtc-pl030: add driver, remove old non-rtc lib driver Convert Integrator PL030 RTC driver to use the RTC class interfaces. Acked-by: Alessandro Zummo Signed-off-by: Russell King diff --git a/arch/arm/mach-integrator/Makefile b/arch/arm/mach-integrator/Makefile index 158daaf..6a5ef8d 100644 --- a/arch/arm/mach-integrator/Makefile +++ b/arch/arm/mach-integrator/Makefile @@ -4,7 +4,7 @@ # Object file lists. -obj-y := clock.o core.o lm.o time.o +obj-y := clock.o core.o lm.o obj-$(CONFIG_ARCH_INTEGRATOR_AP) += integrator_ap.o obj-$(CONFIG_ARCH_INTEGRATOR_CP) += integrator_cp.o diff --git a/arch/arm/mach-integrator/time.c b/arch/arm/mach-integrator/time.c deleted file mode 100644 index 8508a0d..0000000 --- a/arch/arm/mach-integrator/time.c +++ /dev/null @@ -1,223 +0,0 @@ -/* - * linux/arch/arm/mach-integrator/time.c - * - * Copyright (C) 2000-2001 Deep Blue Solutions Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include - -#define RTC_DR (0) -#define RTC_MR (4) -#define RTC_STAT (8) -#define RTC_EOI (8) -#define RTC_LR (12) -#define RTC_CR (16) -#define RTC_CR_MIE (1 << 0) - -extern int (*set_rtc)(void); -static void __iomem *rtc_base; - -static int integrator_set_rtc(void) -{ - __raw_writel(xtime.tv_sec, rtc_base + RTC_LR); - return 1; -} - -static int integrator_rtc_read_alarm(struct rtc_wkalrm *alrm) -{ - rtc_time_to_tm(readl(rtc_base + RTC_MR), &alrm->time); - return 0; -} - -static inline int integrator_rtc_set_alarm(struct rtc_wkalrm *alrm) -{ - unsigned long time; - int ret; - - /* - * At the moment, we can only deal with non-wildcarded alarm times. - */ - ret = rtc_valid_tm(&alrm->time); - if (ret == 0) - ret = rtc_tm_to_time(&alrm->time, &time); - if (ret == 0) - writel(time, rtc_base + RTC_MR); - return ret; -} - -static int integrator_rtc_read_time(struct rtc_time *tm) -{ - rtc_time_to_tm(readl(rtc_base + RTC_DR), tm); - return 0; -} - -/* - * Set the RTC time. Unfortunately, we can't accurately set - * the point at which the counter updates. - * - * Also, since RTC_LR is transferred to RTC_CR on next rising - * edge of the 1Hz clock, we must write the time one second - * in advance. - */ -static inline int integrator_rtc_set_time(struct rtc_time *tm) -{ - unsigned long time; - int ret; - - ret = rtc_tm_to_time(tm, &time); - if (ret == 0) - writel(time + 1, rtc_base + RTC_LR); - - return ret; -} - -static struct rtc_ops rtc_ops = { - .owner = THIS_MODULE, - .read_time = integrator_rtc_read_time, - .set_time = integrator_rtc_set_time, - .read_alarm = integrator_rtc_read_alarm, - .set_alarm = integrator_rtc_set_alarm, -}; - -static irqreturn_t arm_rtc_interrupt(int irq, void *dev_id) -{ - writel(0, rtc_base + RTC_EOI); - return IRQ_HANDLED; -} - -static int rtc_probe(struct amba_device *dev, void *id) -{ - int ret; - - if (rtc_base) - return -EBUSY; - - ret = amba_request_regions(dev, NULL); - if (ret) - goto out; - - rtc_base = ioremap(dev->res.start, SZ_4K); - if (!rtc_base) { - ret = -ENOMEM; - goto res_out; - } - - __raw_writel(0, rtc_base + RTC_CR); - __raw_writel(0, rtc_base + RTC_EOI); - - xtime.tv_sec = __raw_readl(rtc_base + RTC_DR); - - /* note that 'dev' is merely used for irq disambiguation; - * it is not actually referenced in the irq handler - */ - ret = request_irq(dev->irq[0], arm_rtc_interrupt, IRQF_DISABLED, - "rtc-pl030", dev); - if (ret) - goto map_out; - - ret = register_rtc(&rtc_ops); - if (ret) - goto irq_out; - - set_rtc = integrator_set_rtc; - return 0; - - irq_out: - free_irq(dev->irq[0], dev); - map_out: - iounmap(rtc_base); - rtc_base = NULL; - res_out: - amba_release_regions(dev); - out: - return ret; -} - -static int rtc_remove(struct amba_device *dev) -{ - set_rtc = NULL; - - writel(0, rtc_base + RTC_CR); - - free_irq(dev->irq[0], dev); - unregister_rtc(&rtc_ops); - - iounmap(rtc_base); - rtc_base = NULL; - amba_release_regions(dev); - - return 0; -} - -static struct timespec rtc_delta; - -static int rtc_suspend(struct amba_device *dev, pm_message_t state) -{ - struct timespec rtc; - - rtc.tv_sec = readl(rtc_base + RTC_DR); - rtc.tv_nsec = 0; - save_time_delta(&rtc_delta, &rtc); - - return 0; -} - -static int rtc_resume(struct amba_device *dev) -{ - struct timespec rtc; - - rtc.tv_sec = readl(rtc_base + RTC_DR); - rtc.tv_nsec = 0; - restore_time_delta(&rtc_delta, &rtc); - - return 0; -} - -static struct amba_id rtc_ids[] = { - { - .id = 0x00041030, - .mask = 0x000fffff, - }, - { 0, 0 }, -}; - -static struct amba_driver rtc_driver = { - .drv = { - .name = "rtc-pl030", - }, - .probe = rtc_probe, - .remove = rtc_remove, - .suspend = rtc_suspend, - .resume = rtc_resume, - .id_table = rtc_ids, -}; - -static int __init integrator_rtc_init(void) -{ - return amba_driver_register(&rtc_driver); -} - -static void __exit integrator_rtc_exit(void) -{ - amba_driver_unregister(&rtc_driver); -} - -module_init(integrator_rtc_init); -module_exit(integrator_rtc_exit); diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index 6cc2c03..1b26eeb 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -458,6 +458,16 @@ config RTC_DRV_VR41XX To compile this driver as a module, choose M here: the module will be called rtc-vr41xx. +config RTC_DRV_PL030 + tristate "ARM AMBA PL030 RTC" + depends on ARM_AMBA + help + If you say Y here you will get access to ARM AMBA + PrimeCell PL030 RTC found on certain ARM SOCs. + + To compile this driver as a module, choose M here: the + module will be called rtc-pl030. + config RTC_DRV_PL031 tristate "ARM AMBA PL031 RTC" depends on ARM_AMBA diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile index 872f1218f..af3ee66 100644 --- a/drivers/rtc/Makefile +++ b/drivers/rtc/Makefile @@ -40,6 +40,7 @@ obj-$(CONFIG_RTC_DRV_MAX6902) += rtc-max6902.o obj-$(CONFIG_RTC_DRV_OMAP) += rtc-omap.o obj-$(CONFIG_RTC_DRV_PCF8563) += rtc-pcf8563.o obj-$(CONFIG_RTC_DRV_PCF8583) += rtc-pcf8583.o +obj-$(CONFIG_RTC_DRV_PL030) += rtc-pl030.o obj-$(CONFIG_RTC_DRV_PL031) += rtc-pl031.o obj-$(CONFIG_RTC_DRV_R9701) += rtc-r9701.o obj-$(CONFIG_RTC_DRV_RS5C313) += rtc-rs5c313.o diff --git a/drivers/rtc/rtc-pl030.c b/drivers/rtc/rtc-pl030.c new file mode 100644 index 0000000..8448eeb --- /dev/null +++ b/drivers/rtc/rtc-pl030.c @@ -0,0 +1,217 @@ +/* + * linux/drivers/rtc/rtc-pl030.c + * + * Copyright (C) 2000-2001 Deep Blue Solutions Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include + +#define RTC_DR (0) +#define RTC_MR (4) +#define RTC_STAT (8) +#define RTC_EOI (8) +#define RTC_LR (12) +#define RTC_CR (16) +#define RTC_CR_MIE (1 << 0) + +struct pl030_rtc { + struct rtc_device *rtc; + void __iomem *base; +}; + +static irqreturn_t pl030_interrupt(int irq, void *dev_id) +{ + struct pl030_rtc *rtc = dev_id; + writel(0, rtc->base + RTC_EOI); + return IRQ_HANDLED; +} + +static int pl030_open(struct device *dev) +{ + return 0; +} + +static void pl030_release(struct device *dev) +{ +} + +static int pl030_ioctl(struct device *dev, unsigned int cmd, unsigned long arg) +{ + return -ENOIOCTLCMD; +} + +static int pl030_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) +{ + struct pl030_rtc *rtc = dev_get_drvdata(dev); + + rtc_time_to_tm(readl(rtc->base + RTC_MR), &alrm->time); + return 0; +} + +static int pl030_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) +{ + struct pl030_rtc *rtc = dev_get_drvdata(dev); + unsigned long time; + int ret; + + /* + * At the moment, we can only deal with non-wildcarded alarm times. + */ + ret = rtc_valid_tm(&alrm->time); + if (ret == 0) + ret = rtc_tm_to_time(&alrm->time, &time); + if (ret == 0) + writel(time, rtc->base + RTC_MR); + return ret; +} + +static int pl030_read_time(struct device *dev, struct rtc_time *tm) +{ + struct pl030_rtc *rtc = dev_get_drvdata(dev); + + rtc_time_to_tm(readl(rtc->base + RTC_DR), tm); + + return 0; +} + +/* + * Set the RTC time. Unfortunately, we can't accurately set + * the point at which the counter updates. + * + * Also, since RTC_LR is transferred to RTC_CR on next rising + * edge of the 1Hz clock, we must write the time one second + * in advance. + */ +static int pl030_set_time(struct device *dev, struct rtc_time *tm) +{ + struct pl030_rtc *rtc = dev_get_drvdata(dev); + unsigned long time; + int ret; + + ret = rtc_tm_to_time(tm, &time); + if (ret == 0) + writel(time + 1, rtc->base + RTC_LR); + + return ret; +} + +static const struct rtc_class_ops pl030_ops = { + .open = pl030_open, + .release = pl030_release, + .ioctl = pl030_ioctl, + .read_time = pl030_read_time, + .set_time = pl030_set_time, + .read_alarm = pl030_read_alarm, + .set_alarm = pl030_set_alarm, +}; + +static int pl030_probe(struct amba_device *dev, void *id) +{ + struct pl030_rtc *rtc; + int ret; + + ret = amba_request_regions(dev, NULL); + if (ret) + goto err_req; + + rtc = kmalloc(sizeof(*rtc), GFP_KERNEL); + if (!rtc) { + ret = -ENOMEM; + goto err_rtc; + } + + rtc->base = ioremap(dev->res.start, SZ_4K); + if (!rtc->base) { + ret = -ENOMEM; + goto err_map; + } + + __raw_writel(0, rtc->base + RTC_CR); + __raw_writel(0, rtc->base + RTC_EOI); + + amba_set_drvdata(dev, rtc); + + ret = request_irq(dev->irq[0], pl030_interrupt, IRQF_DISABLED, + "rtc-pl030", rtc); + if (ret) + goto err_irq; + + rtc->rtc = rtc_device_register("pl030", &dev->dev, &pl030_ops, + THIS_MODULE); + if (IS_ERR(rtc->rtc)) { + ret = PTR_ERR(rtc->rtc); + goto err_reg; + } + + return 0; + + err_reg: + free_irq(dev->irq[0], rtc); + err_irq: + iounmap(rtc->base); + err_map: + kfree(rtc); + err_rtc: + amba_release_regions(dev); + err_req: + return ret; +} + +static int pl030_remove(struct amba_device *dev) +{ + struct pl030_rtc *rtc = amba_get_drvdata(dev); + + amba_set_drvdata(dev, NULL); + + writel(0, rtc->base + RTC_CR); + + free_irq(dev->irq[0], rtc); + rtc_device_unregister(rtc->rtc); + iounmap(rtc->base); + kfree(rtc); + amba_release_regions(dev); + + return 0; +} + +static struct amba_id pl030_ids[] = { + { + .id = 0x00041030, + .mask = 0x000fffff, + }, + { 0, 0 }, +}; + +static struct amba_driver pl030_driver = { + .drv = { + .name = "rtc-pl030", + }, + .probe = pl030_probe, + .remove = pl030_remove, + .id_table = pl030_ids, +}; + +static int __init pl030_init(void) +{ + return amba_driver_register(&pl030_driver); +} + +static void __exit pl030_exit(void) +{ + amba_driver_unregister(&pl030_driver); +} + +module_init(pl030_init); +module_exit(pl030_exit); + +MODULE_AUTHOR("Russell King "); +MODULE_DESCRIPTION("ARM AMBA PL030 RTC Driver"); +MODULE_LICENSE("GPL"); -- cgit v0.10.2 From 7da285b626860eb6d35e08ae33eba90f0e83ad58 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 20 Apr 2008 12:26:48 +0100 Subject: [RTC] remove unused asm/rtc.h includes from ARM RTC drivers On ARM, asm/rtc.h only contains definitions for the predecessor to the RTC class support. RTC class drivers should not be including this include. Build tested on at91sam9rl and s3c2410 configurations. Acked-by: Alessandro Zummo Signed-off-by: Russell King diff --git a/drivers/rtc/rtc-at91rm9200.c b/drivers/rtc/rtc-at91rm9200.c index 2af1e6f..9c3db93 100644 --- a/drivers/rtc/rtc-at91rm9200.c +++ b/drivers/rtc/rtc-at91rm9200.c @@ -29,8 +29,6 @@ #include #include -#include - #include diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c index 2c6e467..fed86e5 100644 --- a/drivers/rtc/rtc-s3c.c +++ b/drivers/rtc/rtc-s3c.c @@ -26,8 +26,6 @@ #include #include #include -#include - #include /* I have yet to find an S3C implementation with more than one -- cgit v0.10.2 From 797276ec9e4d2ee210e11068a2ce815394fe8c58 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 20 Apr 2008 12:30:41 +0100 Subject: [RTC] rtc-sa1100: remove dependence on asm/rtc.h Move the two functions rtc-sa1100 wants from the old ARM RTC library into the rtc-sa1100 driver. Acked-by: Alessandro Zummo Signed-off-by: Russell King diff --git a/drivers/rtc/rtc-sa1100.c b/drivers/rtc/rtc-sa1100.c index 82f62d2..e31a687 100644 --- a/drivers/rtc/rtc-sa1100.c +++ b/drivers/rtc/rtc-sa1100.c @@ -33,7 +33,6 @@ #include #include -#include #ifdef CONFIG_ARCH_PXA #include @@ -47,6 +46,42 @@ static unsigned long rtc_freq = 1024; static struct rtc_time rtc_alarm; static DEFINE_SPINLOCK(sa1100_rtc_lock); +static inline int rtc_periodic_alarm(struct rtc_time *tm) +{ + return (tm->tm_year == -1) || + ((unsigned)tm->tm_mon >= 12) || + ((unsigned)(tm->tm_mday - 1) >= 31) || + ((unsigned)tm->tm_hour > 23) || + ((unsigned)tm->tm_min > 59) || + ((unsigned)tm->tm_sec > 59); +} + +/* + * Calculate the next alarm time given the requested alarm time mask + * and the current time. + */ +static void rtc_next_alarm_time(struct rtc_time *next, struct rtc_time *now, struct rtc_time *alrm) +{ + unsigned long next_time; + unsigned long now_time; + + next->tm_year = now->tm_year; + next->tm_mon = now->tm_mon; + next->tm_mday = now->tm_mday; + next->tm_hour = alrm->tm_hour; + next->tm_min = alrm->tm_min; + next->tm_sec = alrm->tm_sec; + + rtc_tm_to_time(now, &now_time); + rtc_tm_to_time(next, &next_time); + + if (next_time < now_time) { + /* Advance one day */ + next_time += 60 * 60 * 24; + rtc_time_to_tm(next_time, next); + } +} + static int rtc_update_alarm(struct rtc_time *alrm) { struct rtc_time alarm_tm, now_tm; -- cgit v0.10.2 From bedd78ca786c1d18c2a2785c7e40593dc9c9870f Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 20 Apr 2008 12:31:29 +0100 Subject: [RTC] remove old ARM rtc library code Now that all drivers using it are gone, remove the old ARM RTC library. Acked-by: Alessandro Zummo Signed-off-by: Russell King diff --git a/arch/arm/common/Makefile b/arch/arm/common/Makefile index 3d0b9fa..325e4b6 100644 --- a/arch/arm/common/Makefile +++ b/arch/arm/common/Makefile @@ -2,7 +2,6 @@ # Makefile for the linux kernel. # -obj-y += rtctime.o obj-$(CONFIG_ARM_GIC) += gic.o obj-$(CONFIG_ARM_VIC) += vic.o obj-$(CONFIG_ICST525) += icst525.o diff --git a/arch/arm/common/rtctime.c b/arch/arm/common/rtctime.c deleted file mode 100644 index aa8f773..0000000 --- a/arch/arm/common/rtctime.c +++ /dev/null @@ -1,434 +0,0 @@ -/* - * linux/arch/arm/common/rtctime.c - * - * Copyright (C) 2003 Deep Blue Solutions Ltd. - * Based on sa1100-rtc.c, Nils Faerber, CIH, Nicolas Pitre. - * Based on rtc.c by Paul Gortmaker - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -static DECLARE_WAIT_QUEUE_HEAD(rtc_wait); -static struct fasync_struct *rtc_async_queue; - -/* - * rtc_lock protects rtc_irq_data - */ -static DEFINE_SPINLOCK(rtc_lock); -static unsigned long rtc_irq_data; - -/* - * rtc_sem protects rtc_inuse and rtc_ops - */ -static DEFINE_MUTEX(rtc_mutex); -static unsigned long rtc_inuse; -static struct rtc_ops *rtc_ops; - -#define rtc_epoch 1900UL - -/* - * Calculate the next alarm time given the requested alarm time mask - * and the current time. - */ -void rtc_next_alarm_time(struct rtc_time *next, struct rtc_time *now, struct rtc_time *alrm) -{ - unsigned long next_time; - unsigned long now_time; - - next->tm_year = now->tm_year; - next->tm_mon = now->tm_mon; - next->tm_mday = now->tm_mday; - next->tm_hour = alrm->tm_hour; - next->tm_min = alrm->tm_min; - next->tm_sec = alrm->tm_sec; - - rtc_tm_to_time(now, &now_time); - rtc_tm_to_time(next, &next_time); - - if (next_time < now_time) { - /* Advance one day */ - next_time += 60 * 60 * 24; - rtc_time_to_tm(next_time, next); - } -} -EXPORT_SYMBOL(rtc_next_alarm_time); - -static inline int rtc_arm_read_time(struct rtc_ops *ops, struct rtc_time *tm) -{ - memset(tm, 0, sizeof(struct rtc_time)); - return ops->read_time(tm); -} - -static inline int rtc_arm_set_time(struct rtc_ops *ops, struct rtc_time *tm) -{ - int ret; - - ret = rtc_valid_tm(tm); - if (ret == 0) - ret = ops->set_time(tm); - - return ret; -} - -static inline int rtc_arm_read_alarm(struct rtc_ops *ops, struct rtc_wkalrm *alrm) -{ - int ret = -EINVAL; - if (ops->read_alarm) { - memset(alrm, 0, sizeof(struct rtc_wkalrm)); - ret = ops->read_alarm(alrm); - } - return ret; -} - -static inline int rtc_arm_set_alarm(struct rtc_ops *ops, struct rtc_wkalrm *alrm) -{ - int ret = -EINVAL; - if (ops->set_alarm) - ret = ops->set_alarm(alrm); - return ret; -} - -void rtc_update(unsigned long num, unsigned long events) -{ - spin_lock(&rtc_lock); - rtc_irq_data = (rtc_irq_data + (num << 8)) | events; - spin_unlock(&rtc_lock); - - wake_up_interruptible(&rtc_wait); - kill_fasync(&rtc_async_queue, SIGIO, POLL_IN); -} -EXPORT_SYMBOL(rtc_update); - - -static ssize_t -rtc_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) -{ - DECLARE_WAITQUEUE(wait, current); - unsigned long data; - ssize_t ret; - - if (count < sizeof(unsigned long)) - return -EINVAL; - - add_wait_queue(&rtc_wait, &wait); - do { - __set_current_state(TASK_INTERRUPTIBLE); - - spin_lock_irq(&rtc_lock); - data = rtc_irq_data; - rtc_irq_data = 0; - spin_unlock_irq(&rtc_lock); - - if (data != 0) { - ret = 0; - break; - } - if (file->f_flags & O_NONBLOCK) { - ret = -EAGAIN; - break; - } - if (signal_pending(current)) { - ret = -ERESTARTSYS; - break; - } - schedule(); - } while (1); - set_current_state(TASK_RUNNING); - remove_wait_queue(&rtc_wait, &wait); - - if (ret == 0) { - ret = put_user(data, (unsigned long __user *)buf); - if (ret == 0) - ret = sizeof(unsigned long); - } - return ret; -} - -static unsigned int rtc_poll(struct file *file, poll_table *wait) -{ - unsigned long data; - - poll_wait(file, &rtc_wait, wait); - - spin_lock_irq(&rtc_lock); - data = rtc_irq_data; - spin_unlock_irq(&rtc_lock); - - return data != 0 ? POLLIN | POLLRDNORM : 0; -} - -static int rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd, - unsigned long arg) -{ - struct rtc_ops *ops = file->private_data; - struct rtc_time tm; - struct rtc_wkalrm alrm; - void __user *uarg = (void __user *)arg; - int ret = -EINVAL; - - switch (cmd) { - case RTC_ALM_READ: - ret = rtc_arm_read_alarm(ops, &alrm); - if (ret) - break; - ret = copy_to_user(uarg, &alrm.time, sizeof(tm)); - if (ret) - ret = -EFAULT; - break; - - case RTC_ALM_SET: - ret = copy_from_user(&alrm.time, uarg, sizeof(tm)); - if (ret) { - ret = -EFAULT; - break; - } - alrm.enabled = 0; - alrm.pending = 0; - alrm.time.tm_mday = -1; - alrm.time.tm_mon = -1; - alrm.time.tm_year = -1; - alrm.time.tm_wday = -1; - alrm.time.tm_yday = -1; - alrm.time.tm_isdst = -1; - ret = rtc_arm_set_alarm(ops, &alrm); - break; - - case RTC_RD_TIME: - ret = rtc_arm_read_time(ops, &tm); - if (ret) - break; - ret = copy_to_user(uarg, &tm, sizeof(tm)); - if (ret) - ret = -EFAULT; - break; - - case RTC_SET_TIME: - if (!capable(CAP_SYS_TIME)) { - ret = -EACCES; - break; - } - ret = copy_from_user(&tm, uarg, sizeof(tm)); - if (ret) { - ret = -EFAULT; - break; - } - ret = rtc_arm_set_time(ops, &tm); - break; - - case RTC_EPOCH_SET: -#ifndef rtc_epoch - /* - * There were no RTC clocks before 1900. - */ - if (arg < 1900) { - ret = -EINVAL; - break; - } - if (!capable(CAP_SYS_TIME)) { - ret = -EACCES; - break; - } - rtc_epoch = arg; - ret = 0; -#endif - break; - - case RTC_EPOCH_READ: - ret = put_user(rtc_epoch, (unsigned long __user *)uarg); - break; - - case RTC_WKALM_SET: - ret = copy_from_user(&alrm, uarg, sizeof(alrm)); - if (ret) { - ret = -EFAULT; - break; - } - ret = rtc_arm_set_alarm(ops, &alrm); - break; - - case RTC_WKALM_RD: - ret = rtc_arm_read_alarm(ops, &alrm); - if (ret) - break; - ret = copy_to_user(uarg, &alrm, sizeof(alrm)); - if (ret) - ret = -EFAULT; - break; - - default: - if (ops->ioctl) - ret = ops->ioctl(cmd, arg); - break; - } - return ret; -} - -static int rtc_open(struct inode *inode, struct file *file) -{ - int ret; - - mutex_lock(&rtc_mutex); - - if (rtc_inuse) { - ret = -EBUSY; - } else if (!rtc_ops || !try_module_get(rtc_ops->owner)) { - ret = -ENODEV; - } else { - file->private_data = rtc_ops; - - ret = rtc_ops->open ? rtc_ops->open() : 0; - if (ret == 0) { - spin_lock_irq(&rtc_lock); - rtc_irq_data = 0; - spin_unlock_irq(&rtc_lock); - - rtc_inuse = 1; - } - } - mutex_unlock(&rtc_mutex); - - return ret; -} - -static int rtc_release(struct inode *inode, struct file *file) -{ - struct rtc_ops *ops = file->private_data; - - if (ops->release) - ops->release(); - - spin_lock_irq(&rtc_lock); - rtc_irq_data = 0; - spin_unlock_irq(&rtc_lock); - - module_put(rtc_ops->owner); - rtc_inuse = 0; - - return 0; -} - -static int rtc_fasync(int fd, struct file *file, int on) -{ - return fasync_helper(fd, file, on, &rtc_async_queue); -} - -static const struct file_operations rtc_fops = { - .owner = THIS_MODULE, - .llseek = no_llseek, - .read = rtc_read, - .poll = rtc_poll, - .ioctl = rtc_ioctl, - .open = rtc_open, - .release = rtc_release, - .fasync = rtc_fasync, -}; - -static struct miscdevice rtc_miscdev = { - .minor = RTC_MINOR, - .name = "rtc", - .fops = &rtc_fops, -}; - - -static int rtc_read_proc(char *page, char **start, off_t off, int count, int *eof, void *data) -{ - struct rtc_ops *ops = data; - struct rtc_wkalrm alrm; - struct rtc_time tm; - char *p = page; - - if (rtc_arm_read_time(ops, &tm) == 0) { - p += sprintf(p, - "rtc_time\t: %02d:%02d:%02d\n" - "rtc_date\t: %04d-%02d-%02d\n" - "rtc_epoch\t: %04lu\n", - tm.tm_hour, tm.tm_min, tm.tm_sec, - tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, - rtc_epoch); - } - - if (rtc_arm_read_alarm(ops, &alrm) == 0) { - p += sprintf(p, "alrm_time\t: "); - if ((unsigned int)alrm.time.tm_hour <= 24) - p += sprintf(p, "%02d:", alrm.time.tm_hour); - else - p += sprintf(p, "**:"); - if ((unsigned int)alrm.time.tm_min <= 59) - p += sprintf(p, "%02d:", alrm.time.tm_min); - else - p += sprintf(p, "**:"); - if ((unsigned int)alrm.time.tm_sec <= 59) - p += sprintf(p, "%02d\n", alrm.time.tm_sec); - else - p += sprintf(p, "**\n"); - - p += sprintf(p, "alrm_date\t: "); - if ((unsigned int)alrm.time.tm_year <= 200) - p += sprintf(p, "%04d-", alrm.time.tm_year + 1900); - else - p += sprintf(p, "****-"); - if ((unsigned int)alrm.time.tm_mon <= 11) - p += sprintf(p, "%02d-", alrm.time.tm_mon + 1); - else - p += sprintf(p, "**-"); - if ((unsigned int)alrm.time.tm_mday <= 31) - p += sprintf(p, "%02d\n", alrm.time.tm_mday); - else - p += sprintf(p, "**\n"); - p += sprintf(p, "alrm_wakeup\t: %s\n", - alrm.enabled ? "yes" : "no"); - p += sprintf(p, "alrm_pending\t: %s\n", - alrm.pending ? "yes" : "no"); - } - - if (ops->proc) - p += ops->proc(p); - - return p - page; -} - -int register_rtc(struct rtc_ops *ops) -{ - int ret = -EBUSY; - - mutex_lock(&rtc_mutex); - if (rtc_ops == NULL) { - rtc_ops = ops; - - ret = misc_register(&rtc_miscdev); - if (ret == 0) - create_proc_read_entry("driver/rtc", 0, NULL, - rtc_read_proc, ops); - } - mutex_unlock(&rtc_mutex); - - return ret; -} -EXPORT_SYMBOL(register_rtc); - -void unregister_rtc(struct rtc_ops *rtc) -{ - mutex_lock(&rtc_mutex); - if (rtc == rtc_ops) { - remove_proc_entry("driver/rtc", NULL); - misc_deregister(&rtc_miscdev); - rtc_ops = NULL; - } - mutex_unlock(&rtc_mutex); -} -EXPORT_SYMBOL(unregister_rtc); diff --git a/include/asm-arm/rtc.h b/include/asm-arm/rtc.h deleted file mode 100644 index 1a5c923..0000000 --- a/include/asm-arm/rtc.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * linux/include/asm-arm/rtc.h - * - * Copyright (C) 2003 Deep Blue Solutions Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#ifndef ASMARM_RTC_H -#define ASMARM_RTC_H - -struct module; - -struct rtc_ops { - struct module *owner; - int (*open)(void); - void (*release)(void); - int (*ioctl)(unsigned int, unsigned long); - - int (*read_time)(struct rtc_time *); - int (*set_time)(struct rtc_time *); - int (*read_alarm)(struct rtc_wkalrm *); - int (*set_alarm)(struct rtc_wkalrm *); - int (*proc)(char *buf); -}; - -void rtc_next_alarm_time(struct rtc_time *, struct rtc_time *, struct rtc_time *); -void rtc_update(unsigned long, unsigned long); -int register_rtc(struct rtc_ops *); -void unregister_rtc(struct rtc_ops *); - -static inline int rtc_periodic_alarm(struct rtc_time *tm) -{ - return (tm->tm_year == -1) || - ((unsigned)tm->tm_mon >= 12) || - ((unsigned)(tm->tm_mday - 1) >= 31) || - ((unsigned)tm->tm_hour > 23) || - ((unsigned)tm->tm_min > 59) || - ((unsigned)tm->tm_sec > 59); -} - -#endif -- cgit v0.10.2 From 0748aca6f0af917591dcfd70dccac770a25d4f71 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 May 2008 10:34:20 +0200 Subject: x86: remove useless static current_tsc_khz variable current_tsc_khz is just written by the init code and never used again. Remove it. Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/tsc_32.c b/arch/x86/kernel/tsc_32.c index e479072..d53b104 100644 --- a/arch/x86/kernel/tsc_32.c +++ b/arch/x86/kernel/tsc_32.c @@ -283,7 +283,6 @@ core_initcall(cpufreq_tsc); /* clock source code */ -static unsigned long current_tsc_khz; static struct clocksource clocksource_tsc; /* @@ -434,9 +433,8 @@ void __init tsc_init(void) unsynchronized_tsc(); check_geode_tsc_reliable(); - current_tsc_khz = tsc_khz; - clocksource_tsc.mult = clocksource_khz2mult(current_tsc_khz, - clocksource_tsc.shift); + clocksource_tsc.mult = clocksource_khz2mult(tsc_khz, + clocksource_tsc.shift); /* lower the rating if we already know its unstable: */ if (check_tsc_unstable()) { clocksource_tsc.rating = 0; -- cgit v0.10.2 From 3843fc2575e3389f4f0ad0420a720240a5746a5d Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 9 May 2008 12:05:57 +0100 Subject: xen: remove support for non-PAE 32-bit Non-PAE operation has been deprecated in Xen for a while, and is rarely tested or used. xen-unstable has now officially dropped non-PAE support. Since Xen/pvops' non-PAE support has also been broken for a while, we may as well completely drop it altogether. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index 2e641be..525b108 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig @@ -6,7 +6,7 @@ config XEN bool "Xen guest support" select PARAVIRT depends on X86_32 - depends on X86_CMPXCHG && X86_TSC && !(X86_VISWS || X86_VOYAGER) + depends on X86_CMPXCHG && X86_TSC && X86_PAE && !(X86_VISWS || X86_VOYAGER) help This is the Linux Xen port. Enabling this will allow the kernel to boot in a paravirtualized environment under the diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index c8a56e4..a05b772 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -785,38 +785,35 @@ static __init void xen_set_pte_init(pte_t *ptep, pte_t pte) static __init void xen_pagetable_setup_start(pgd_t *base) { pgd_t *xen_pgd = (pgd_t *)xen_start_info->pt_base; + int i; /* special set_pte for pagetable initialization */ pv_mmu_ops.set_pte = xen_set_pte_init; init_mm.pgd = base; /* - * copy top-level of Xen-supplied pagetable into place. For - * !PAE we can use this as-is, but for PAE it is a stand-in - * while we copy the pmd pages. + * copy top-level of Xen-supplied pagetable into place. This + * is a stand-in while we copy the pmd pages. */ memcpy(base, xen_pgd, PTRS_PER_PGD * sizeof(pgd_t)); - if (PTRS_PER_PMD > 1) { - int i; - /* - * For PAE, need to allocate new pmds, rather than - * share Xen's, since Xen doesn't like pmd's being - * shared between address spaces. - */ - for (i = 0; i < PTRS_PER_PGD; i++) { - if (pgd_val_ma(xen_pgd[i]) & _PAGE_PRESENT) { - pmd_t *pmd = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE); + /* + * For PAE, need to allocate new pmds, rather than + * share Xen's, since Xen doesn't like pmd's being + * shared between address spaces. + */ + for (i = 0; i < PTRS_PER_PGD; i++) { + if (pgd_val_ma(xen_pgd[i]) & _PAGE_PRESENT) { + pmd_t *pmd = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE); - memcpy(pmd, (void *)pgd_page_vaddr(xen_pgd[i]), - PAGE_SIZE); + memcpy(pmd, (void *)pgd_page_vaddr(xen_pgd[i]), + PAGE_SIZE); - make_lowmem_page_readonly(pmd); + make_lowmem_page_readonly(pmd); - set_pgd(&base[i], __pgd(1 + __pa(pmd))); - } else - pgd_clear(&base[i]); - } + set_pgd(&base[i], __pgd(1 + __pa(pmd))); + } else + pgd_clear(&base[i]); } /* make sure zero_page is mapped RO so we can use it in pagetables */ @@ -873,17 +870,7 @@ static __init void xen_pagetable_setup_done(pgd_t *base) /* Actually pin the pagetable down, but we can't set PG_pinned yet because the page structures don't exist yet. */ - { - unsigned level; - -#ifdef CONFIG_X86_PAE - level = MMUEXT_PIN_L3_TABLE; -#else - level = MMUEXT_PIN_L2_TABLE; -#endif - - pin_pagetable_pfn(level, PFN_DOWN(__pa(base))); - } + pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(base))); } /* This is called once we have the cpu_possible_map */ @@ -1093,7 +1080,6 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { .make_pte = xen_make_pte, .make_pgd = xen_make_pgd, -#ifdef CONFIG_X86_PAE .set_pte_atomic = xen_set_pte_atomic, .set_pte_present = xen_set_pte_at, .set_pud = xen_set_pud, @@ -1102,7 +1088,6 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { .make_pmd = xen_make_pmd, .pmd_val = xen_pmd_val, -#endif /* PAE */ .activate_mm = xen_activate_mm, .dup_mmap = xen_dup_mmap, diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 126766d..07c2653 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -222,7 +222,7 @@ pmdval_t xen_pmd_val(pmd_t pmd) ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT; return ret; } -#ifdef CONFIG_X86_PAE + void xen_set_pud(pud_t *ptr, pud_t val) { struct multicall_space mcs; @@ -272,12 +272,6 @@ pmd_t xen_make_pmd(pmdval_t pmd) return native_make_pmd(pmd); } -#else /* !PAE */ -void xen_set_pte(pte_t *ptep, pte_t pte) -{ - *ptep = pte; -} -#endif /* CONFIG_X86_PAE */ /* (Yet another) pagetable walker. This one is intended for pinning a @@ -430,8 +424,6 @@ static int pin_page(struct page *page, enum pt_level level) read-only, and can be pinned. */ void xen_pgd_pin(pgd_t *pgd) { - unsigned level; - xen_mc_batch(); if (pgd_walk(pgd, pin_page, TASK_SIZE)) { @@ -441,14 +433,7 @@ void xen_pgd_pin(pgd_t *pgd) xen_mc_batch(); } -#ifdef CONFIG_X86_PAE - level = MMUEXT_PIN_L3_TABLE; -#else - level = MMUEXT_PIN_L2_TABLE; -#endif - - xen_do_pin(level, PFN_DOWN(__pa(pgd))); - + xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd))); xen_mc_issue(0); } diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h index b5e189b..5fe961c 100644 --- a/arch/x86/xen/mmu.h +++ b/arch/x86/xen/mmu.h @@ -37,14 +37,13 @@ void xen_exit_mmap(struct mm_struct *mm); void xen_pgd_pin(pgd_t *pgd); //void xen_pgd_unpin(pgd_t *pgd); -#ifdef CONFIG_X86_PAE -unsigned long long xen_pte_val(pte_t); -unsigned long long xen_pmd_val(pmd_t); -unsigned long long xen_pgd_val(pgd_t); +pteval_t xen_pte_val(pte_t); +pmdval_t xen_pmd_val(pmd_t); +pgdval_t xen_pgd_val(pgd_t); -pte_t xen_make_pte(unsigned long long); -pmd_t xen_make_pmd(unsigned long long); -pgd_t xen_make_pgd(unsigned long long); +pte_t xen_make_pte(pteval_t); +pmd_t xen_make_pmd(pmdval_t); +pgd_t xen_make_pgd(pgdval_t); void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pteval); @@ -53,15 +52,4 @@ void xen_set_pud(pud_t *ptr, pud_t val); void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep); void xen_pmd_clear(pmd_t *pmdp); - -#else -unsigned long xen_pte_val(pte_t); -unsigned long xen_pmd_val(pmd_t); -unsigned long xen_pgd_val(pgd_t); - -pte_t xen_make_pte(unsigned long); -pmd_t xen_make_pmd(unsigned long); -pgd_t xen_make_pgd(unsigned long); -#endif - #endif /* _XEN_MMU_H */ diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 288d587..2ab5f42 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -30,11 +30,7 @@ ENTRY(hypercall_page) ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, .long startup_xen) ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .long hypercall_page) ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "!writable_page_tables|pae_pgdir_above_4gb") -#ifdef CONFIG_X86_PAE ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes") -#else - ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "no") -#endif ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic") #endif /*CONFIG_XEN */ diff --git a/include/asm-x86/xen/page.h b/include/asm-x86/xen/page.h index baf3a4d..e11f240 100644 --- a/include/asm-x86/xen/page.h +++ b/include/asm-x86/xen/page.h @@ -150,13 +150,9 @@ static inline pte_t __pte_ma(pteval_t x) return (pte_t) { .pte = x }; } -#ifdef CONFIG_X86_PAE #define pmd_val_ma(v) ((v).pmd) #define pud_val_ma(v) ((v).pgd.pgd) #define __pmd_ma(x) ((pmd_t) { (x) } ) -#else /* !X86_PAE */ -#define pmd_val_ma(v) ((v).pud.pgd.pgd) -#endif /* CONFIG_X86_PAE */ #define pgd_val_ma(x) ((x).pgd) -- cgit v0.10.2 From b478458aeebfc55fe409abec43794ac72a623c79 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 12 May 2008 15:44:39 +0200 Subject: x86: avoid re-loading LDT in unrelated address spaces Performance optimization. Signed-off-by: Jan Beulich Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index 0224c36..21f2bae 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -20,9 +20,9 @@ #include #ifdef CONFIG_SMP -static void flush_ldt(void *null) +static void flush_ldt(void *current_mm) { - if (current->active_mm) + if (current->active_mm == current_mm) load_LDT(¤t->active_mm->context); } #endif @@ -68,7 +68,7 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload) load_LDT(pc); mask = cpumask_of_cpu(smp_processor_id()); if (!cpus_equal(current->mm->cpu_vm_mask, mask)) - smp_call_function(flush_ldt, NULL, 1, 1); + smp_call_function(flush_ldt, current->mm, 1, 1); preempt_enable(); #else load_LDT(pc); -- cgit v0.10.2 From 873b274a41c0cfe58b2eb0a7722424eb367b905b Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Thu, 22 May 2008 13:02:23 -0400 Subject: x86: Add Centaur and Transmeta CPUs to PAT whitelist Unconditionally enable PAT support on Centaur and Transmeta CPUs. All known models that advertise PAT have no known errata. Signed-off-by: Dave Jones Signed-off-by: H. Peter Anvin diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c index c2e1ce3..d8b3e4a 100644 --- a/arch/x86/kernel/cpu/addon_cpuid_features.c +++ b/arch/x86/kernel/cpu/addon_cpuid_features.c @@ -62,6 +62,9 @@ void __cpuinit validate_pat_support(struct cpuinfo_x86 *c) if (c->x86 == 0xF || (c->x86 == 6 && c->x86_model >= 15)) return; break; + case X86_VENDOR_CENTAUR: + case X86_VENDOR_TRANSMETA: + return; } pat_disable(cpu_has_pat ? -- cgit v0.10.2 From c3ed64295f0fed9131b42122234b1ce4a4a266cf Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Fri, 16 May 2008 10:44:39 -0700 Subject: x86: change maximum NR_CPUS to 4096 and MAX_NUMNODES to 512 * Change the range of NR_CPUS from 2-255 to 2-4096 and change the range of MAX_NUMNODES (NODES_SHIFT) from 1-32768 to 1-512. * Alter comment about how much each increment of NR_CPUS consumes. (This was found by configuring for 256 cpus and then 512 cpus and dividing the difference by 256.) Signed-off-by: Mike Travis Cc: Andrew Morton Cc: Dave Jones Signed-off-by: Thomas Gleixner diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index fe361ae..bbafeac 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -565,18 +565,18 @@ config IOMMU_HELPER def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB) config NR_CPUS - int "Maximum number of CPUs (2-255)" - range 2 255 + int "Maximum number of CPUs (2-4096)" + range 2 4096 depends on SMP default "32" if X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000 default "8" help This allows you to specify the maximum number of CPUs which this - kernel will support. The maximum supported value is 255 and the + kernel will support. The maximum supported value is 4096 and the minimum value which makes sense is 2. This is purely to save memory - each supported CPU adds - approximately eight kilobytes to the kernel image. + approximately one kilobyte to the kernel image. config SCHED_SMT bool "SMT (Hyperthreading) scheduler support" @@ -968,8 +968,8 @@ config NUMA_EMU number of nodes. This is only useful for debugging. config NODES_SHIFT - int "Max num nodes shift(1-15)" - range 1 15 if X86_64 + int "Max num nodes shift(1-9)" + range 1 9 if X86_64 default "6" if X86_64 default "4" if X86_NUMAQ default "3" -- cgit v0.10.2 From bd3bff9e20f454b242d979ec2f9a4dca0d5fa06f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:41 +0200 Subject: sched: add latency tracer callbacks to the scheduler add 3 lightweight callbacks to the tracer backend. zero impact if tracing is turned off. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/include/linux/sched.h b/include/linux/sched.h index 5395a61..717cab8 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2117,6 +2117,32 @@ static inline void arch_pick_mmap_layout(struct mm_struct *mm) } #endif +#ifdef CONFIG_CONTEXT_SWITCH_TRACER +extern void +ftrace_ctx_switch(struct task_struct *prev, struct task_struct *next); +#else +static inline void +ftrace_ctx_switch(struct task_struct *prev, struct task_struct *next) +{ +} +#endif + +#ifdef CONFIG_SCHED_TRACER +extern void +ftrace_wake_up_task(struct task_struct *wakee, struct task_struct *curr); +extern void +ftrace_wake_up_new_task(struct task_struct *wakee, struct task_struct *curr); +#else +static inline void +ftrace_wake_up_task(struct task_struct *wakee, struct task_struct *curr) +{ +} +static inline void +ftrace_wake_up_new_task(struct task_struct *wakee, struct task_struct *curr) +{ +} +#endif + extern long sched_setaffinity(pid_t pid, const cpumask_t *new_mask); extern long sched_getaffinity(pid_t pid, cpumask_t *mask); diff --git a/kernel/sched.c b/kernel/sched.c index cfa222a..463dcdb 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2467,6 +2467,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) out_activate: #endif /* CONFIG_SMP */ + ftrace_wake_up_task(p, rq->curr); schedstat_inc(p, se.nr_wakeups); if (sync) schedstat_inc(p, se.nr_wakeups_sync); @@ -2611,6 +2612,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags) p->sched_class->task_new(rq, p); inc_nr_running(rq); } + ftrace_wake_up_new_task(p, rq->curr); check_preempt_curr(rq, p); #ifdef CONFIG_SMP if (p->sched_class->task_wake_up) @@ -2783,6 +2785,7 @@ context_switch(struct rq *rq, struct task_struct *prev, struct mm_struct *mm, *oldmm; prepare_task_switch(rq, prev, next); + ftrace_ctx_switch(prev, next); mm = next->mm; oldmm = prev->active_mm; /* -- cgit v0.10.2 From 7c731e0a495e25e79dc1e9e68772a67a55721a65 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:41 +0200 Subject: ftrace: make the task state char-string visible to all The tracer wants to be able to convert the state number into a user visible character. This patch pulls that conversion string out the scheduler into the header. This way if it were to ever change, other parts of the kernel will know. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/include/linux/sched.h b/include/linux/sched.h index 717cab8..6e26f1f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2237,6 +2237,8 @@ static inline void mm_init_owner(struct mm_struct *mm, struct task_struct *p) } #endif /* CONFIG_MM_OWNER */ +#define TASK_STATE_TO_CHAR_STR "RSDTtZX" + #endif /* __KERNEL__ */ #endif diff --git a/kernel/sched.c b/kernel/sched.c index 463dcdb..73e6008 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5729,7 +5729,7 @@ out_unlock: return retval; } -static const char stat_nam[] = "RSDTtZX"; +static const char stat_nam[] = TASK_STATE_TO_CHAR_STR; void sched_show_task(struct task_struct *p) { -- cgit v0.10.2 From 502825282e6f79c975a644afc124432ec1744de4 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:41 +0200 Subject: ftrace: add preempt_enable/disable notrace macros The tracer may need to call preempt_enable and disable functions for time keeping and such. The trace gets ugly when we see these functions show up for all traces. To make the output cleaner this patch adds preempt_enable_notrace and preempt_disable_notrace to be used by tracer (and debugging) functions. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 23f0c54..36b03d5 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -52,6 +52,34 @@ do { \ preempt_check_resched(); \ } while (0) +/* For debugging and tracer internals only! */ +#define add_preempt_count_notrace(val) \ + do { preempt_count() += (val); } while (0) +#define sub_preempt_count_notrace(val) \ + do { preempt_count() -= (val); } while (0) +#define inc_preempt_count_notrace() add_preempt_count_notrace(1) +#define dec_preempt_count_notrace() sub_preempt_count_notrace(1) + +#define preempt_disable_notrace() \ +do { \ + inc_preempt_count_notrace(); \ + barrier(); \ +} while (0) + +#define preempt_enable_no_resched_notrace() \ +do { \ + barrier(); \ + dec_preempt_count_notrace(); \ +} while (0) + +/* preempt_check_resched is OK to trace */ +#define preempt_enable_notrace() \ +do { \ + preempt_enable_no_resched_notrace(); \ + barrier(); \ + preempt_check_resched(); \ +} while (0) + #else #define preempt_disable() do { } while (0) @@ -59,6 +87,10 @@ do { \ #define preempt_enable() do { } while (0) #define preempt_check_resched() do { } while (0) +#define preempt_disable_notrace() do { } while (0) +#define preempt_enable_no_resched_notrace() do { } while (0) +#define preempt_enable_notrace() do { } while (0) + #endif #ifdef CONFIG_PREEMPT_NOTIFIERS -- cgit v0.10.2 From ffdc1a09ae7e2cbd714a446ee38a27f625b5f1c8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:41 +0200 Subject: tracing: add notrace to linkage.h notrace signals that a function should not be traced. Most of the time this is used by tracers to annotate code that cannot be traced - it's in a volatile state (such as in user vdso context or NMI context) or it's in the tracer internals. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/include/linux/linkage.h b/include/linux/linkage.h index 2119610..14f329c 100644 --- a/include/linux/linkage.h +++ b/include/linux/linkage.h @@ -3,6 +3,8 @@ #include +#define notrace __attribute__((no_instrument_function)) + #ifdef __cplusplus #define CPP_ASMLINKAGE extern "C" #else -- cgit v0.10.2 From 23adec554a7648f99c8acc0caf49c66320cd2b84 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:41 +0200 Subject: x86: add notrace annotations to vsyscall. Add the notrace annotations to the vsyscall functions - there we are not in kernel context yet, so the tracer function cannot (and must not) be called. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 61efa2f..4063dfa 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -42,7 +42,8 @@ #include #include -#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) +#define __vsyscall(nr) \ + __attribute__ ((unused, __section__(".vsyscall_" #nr))) notrace #define __syscall_clobber "r11","cx","memory" /* diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c index 23476c2..5cb8f75 100644 --- a/arch/x86/vdso/vclock_gettime.c +++ b/arch/x86/vdso/vclock_gettime.c @@ -23,7 +23,7 @@ #define gtod vdso_vsyscall_gtod_data -static long vdso_fallback_gettime(long clock, struct timespec *ts) +notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) { long ret; asm("syscall" : "=a" (ret) : @@ -31,7 +31,7 @@ static long vdso_fallback_gettime(long clock, struct timespec *ts) return ret; } -static inline long vgetns(void) +notrace static inline long vgetns(void) { long v; cycles_t (*vread)(void); @@ -40,7 +40,7 @@ static inline long vgetns(void) return (v * gtod->clock.mult) >> gtod->clock.shift; } -static noinline int do_realtime(struct timespec *ts) +notrace static noinline int do_realtime(struct timespec *ts) { unsigned long seq, ns; do { @@ -54,7 +54,8 @@ static noinline int do_realtime(struct timespec *ts) } /* Copy of the version in kernel/time.c which we cannot directly access */ -static void vset_normalized_timespec(struct timespec *ts, long sec, long nsec) +notrace static void +vset_normalized_timespec(struct timespec *ts, long sec, long nsec) { while (nsec >= NSEC_PER_SEC) { nsec -= NSEC_PER_SEC; @@ -68,7 +69,7 @@ static void vset_normalized_timespec(struct timespec *ts, long sec, long nsec) ts->tv_nsec = nsec; } -static noinline int do_monotonic(struct timespec *ts) +notrace static noinline int do_monotonic(struct timespec *ts) { unsigned long seq, ns, secs; do { @@ -82,7 +83,7 @@ static noinline int do_monotonic(struct timespec *ts) return 0; } -int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) +notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) { if (likely(gtod->sysctl_enabled && gtod->clock.vread)) switch (clock) { @@ -96,7 +97,7 @@ int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) int clock_gettime(clockid_t, struct timespec *) __attribute__((weak, alias("__vdso_clock_gettime"))); -int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) +notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) { long ret; if (likely(gtod->sysctl_enabled && gtod->clock.vread)) { diff --git a/arch/x86/vdso/vgetcpu.c b/arch/x86/vdso/vgetcpu.c index c8097f1..9fbc6b2 100644 --- a/arch/x86/vdso/vgetcpu.c +++ b/arch/x86/vdso/vgetcpu.c @@ -13,7 +13,8 @@ #include #include "vextern.h" -long __vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused) +notrace long +__vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused) { unsigned int p; diff --git a/include/asm-x86/vsyscall.h b/include/asm-x86/vsyscall.h index 17b3700..6b66ff9 100644 --- a/include/asm-x86/vsyscall.h +++ b/include/asm-x86/vsyscall.h @@ -24,7 +24,8 @@ enum vsyscall_num { ((unused, __section__ (".vsyscall_gtod_data"),aligned(16))) #define __section_vsyscall_clock __attribute__ \ ((unused, __section__ (".vsyscall_clock"),aligned(16))) -#define __vsyscall_fn __attribute__ ((unused,__section__(".vsyscall_fn"))) +#define __vsyscall_fn \ + __attribute__ ((unused, __section__(".vsyscall_fn"))) notrace #define VGETCPU_RDTSCP 1 #define VGETCPU_LSL 2 -- cgit v0.10.2 From 6e766410c4babd37bc7cd5e25009c179781742c8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 12 May 2008 21:20:41 +0200 Subject: ftrace: annotate core code that should not be traced Mark with "notrace" functions in core code that should not be traced. The "notrace" attribute will prevent gcc from adding a call to ftrace on the annotated funtions. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c index 6c90fb9..e555ab6 100644 --- a/lib/smp_processor_id.c +++ b/lib/smp_processor_id.c @@ -7,7 +7,7 @@ #include #include -unsigned int debug_smp_processor_id(void) +notrace unsigned int debug_smp_processor_id(void) { unsigned long preempt_count = preempt_count(); int this_cpu = raw_smp_processor_id(); -- cgit v0.10.2 From 16444a8a40d4c7b4f6de34af0cae1f76a4f6c901 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 12 May 2008 21:20:42 +0200 Subject: ftrace: add basic support for gcc profiler instrumentation If CONFIG_FTRACE is selected and /proc/sys/kernel/ftrace_enabled is set to a non-zero value the ftrace routine will be called everytime we enter a kernel function that is not marked with the "notrace" attribute. The ftrace routine will then call a registered function if a function happens to be registered. [ This code has been highly hacked by Steven Rostedt and Ingo Molnar, so don't blame Arnaldo for all of this ;-) ] Update: It is now possible to register more than one ftrace function. If only one ftrace function is registered, that will be the function that ftrace calls directly. If more than one function is registered, then ftrace will call a function that will loop through the functions to call. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/Makefile b/Makefile index 20b3235..b4a273f 100644 --- a/Makefile +++ b/Makefile @@ -528,6 +528,10 @@ KBUILD_CFLAGS += -g KBUILD_AFLAGS += -gdwarf-2 endif +ifdef CONFIG_FTRACE +KBUILD_CFLAGS += -pg +endif + # We trigger additional mismatches with less inlining ifdef CONFIG_DEBUG_SECTION_MISMATCH KBUILD_CFLAGS += $(call cc-option, -fno-inline-functions-called-once) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index fe361ae..c742dfe 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -23,6 +23,7 @@ config X86 select HAVE_OPROFILE select HAVE_KPROBES select HAVE_KRETPROBES + select HAVE_FTRACE select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) select HAVE_ARCH_KGDB if !X86_VOYAGER diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 2a609dc..f47b9b5 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1109,6 +1109,33 @@ ENDPROC(xen_failsafe_callback) #endif /* CONFIG_XEN */ +#ifdef CONFIG_FTRACE +ENTRY(mcount) + cmpl $ftrace_stub, ftrace_trace_function + jnz trace + +.globl ftrace_stub +ftrace_stub: + ret + + /* taken from glibc */ +trace: + pushl %eax + pushl %ecx + pushl %edx + movl 0xc(%esp), %eax + movl 0x4(%ebp), %edx + + call *ftrace_trace_function + + popl %edx + popl %ecx + popl %eax + + jmp ftrace_stub +END(mcount) +#endif + .section .rodata,"a" #include "syscall_table_32.S" diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 556a8df..f046e0c 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -54,6 +54,43 @@ .code64 +#ifdef CONFIG_FTRACE +ENTRY(mcount) + cmpq $ftrace_stub, ftrace_trace_function + jnz trace +.globl ftrace_stub +ftrace_stub: + retq + +trace: + /* taken from glibc */ + subq $0x38, %rsp + movq %rax, (%rsp) + movq %rcx, 8(%rsp) + movq %rdx, 16(%rsp) + movq %rsi, 24(%rsp) + movq %rdi, 32(%rsp) + movq %r8, 40(%rsp) + movq %r9, 48(%rsp) + + movq 0x38(%rsp), %rdi + movq 8(%rbp), %rsi + + call *ftrace_trace_function + + movq 48(%rsp), %r9 + movq 40(%rsp), %r8 + movq 32(%rsp), %rdi + movq 24(%rsp), %rsi + movq 16(%rsp), %rdx + movq 8(%rsp), %rcx + movq (%rsp), %rax + addq $0x38, %rsp + + jmp ftrace_stub +END(mcount) +#endif + #ifndef CONFIG_PREEMPT #define retint_kernel retint_restore_args #endif diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h new file mode 100644 index 0000000..b96ef14 --- /dev/null +++ b/include/linux/ftrace.h @@ -0,0 +1,38 @@ +#ifndef _LINUX_FTRACE_H +#define _LINUX_FTRACE_H + +#ifdef CONFIG_FTRACE + +#include + +#define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0)) +#define CALLER_ADDR1 ((unsigned long)__builtin_return_address(1)) +#define CALLER_ADDR2 ((unsigned long)__builtin_return_address(2)) + +typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip); + +struct ftrace_ops { + ftrace_func_t func; + struct ftrace_ops *next; +}; + +/* + * The ftrace_ops must be a static and should also + * be read_mostly. These functions do modify read_mostly variables + * so use them sparely. Never free an ftrace_op or modify the + * next pointer after it has been registered. Even after unregistering + * it, the next pointer may still be used internally. + */ +int register_ftrace_function(struct ftrace_ops *ops); +int unregister_ftrace_function(struct ftrace_ops *ops); +void clear_ftrace_function(void); + +extern void ftrace_stub(unsigned long a0, unsigned long a1); +extern void mcount(void); + +#else /* !CONFIG_FTRACE */ +# define register_ftrace_function(ops) do { } while (0) +# define unregister_ftrace_function(ops) do { } while (0) +# define clear_ftrace_function(ops) do { } while (0) +#endif /* CONFIG_FTRACE */ +#endif /* _LINUX_FTRACE_H */ diff --git a/kernel/Makefile b/kernel/Makefile index 1c9938a..fa05f6d 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -69,6 +69,7 @@ obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o obj-$(CONFIG_MARKERS) += marker.o obj-$(CONFIG_LATENCYTOP) += latencytop.o +obj-$(CONFIG_FTRACE) += trace/ ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y) # According to Alan Modra , the -fno-omit-frame-pointer is diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig new file mode 100644 index 0000000..8185c91 --- /dev/null +++ b/kernel/trace/Kconfig @@ -0,0 +1,5 @@ +# +# Architectures that offer an FTRACE implementation should select HAVE_FTRACE: +# +config HAVE_FTRACE + bool diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile new file mode 100644 index 0000000..bf4fd21 --- /dev/null +++ b/kernel/trace/Makefile @@ -0,0 +1,3 @@ +obj-$(CONFIG_FTRACE) += libftrace.o + +libftrace-y := ftrace.o diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c new file mode 100644 index 0000000..b6a80b9 --- /dev/null +++ b/kernel/trace/ftrace.c @@ -0,0 +1,138 @@ +/* + * Infrastructure for profiling code inserted by 'gcc -pg'. + * + * Copyright (C) 2007-2008 Steven Rostedt + * Copyright (C) 2004-2008 Ingo Molnar + * + * Originally ported from the -rt patch by: + * Copyright (C) 2007 Arnaldo Carvalho de Melo + * + * Based on code in the latency_tracer, that is: + * + * Copyright (C) 2004-2006 Ingo Molnar + * Copyright (C) 2004 William Lee Irwin III + */ + +#include +#include + +static DEFINE_SPINLOCK(ftrace_func_lock); +static struct ftrace_ops ftrace_list_end __read_mostly = +{ + .func = ftrace_stub, +}; + +static struct ftrace_ops *ftrace_list __read_mostly = &ftrace_list_end; +ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub; + +/* mcount is defined per arch in assembly */ +EXPORT_SYMBOL(mcount); + +notrace void ftrace_list_func(unsigned long ip, unsigned long parent_ip) +{ + struct ftrace_ops *op = ftrace_list; + + /* in case someone actually ports this to alpha! */ + read_barrier_depends(); + + while (op != &ftrace_list_end) { + /* silly alpha */ + read_barrier_depends(); + op->func(ip, parent_ip); + op = op->next; + }; +} + +/** + * register_ftrace_function - register a function for profiling + * @ops - ops structure that holds the function for profiling. + * + * Register a function to be called by all functions in the + * kernel. + * + * Note: @ops->func and all the functions it calls must be labeled + * with "notrace", otherwise it will go into a + * recursive loop. + */ +int register_ftrace_function(struct ftrace_ops *ops) +{ + unsigned long flags; + + spin_lock_irqsave(&ftrace_func_lock, flags); + ops->next = ftrace_list; + /* + * We are entering ops into the ftrace_list but another + * CPU might be walking that list. We need to make sure + * the ops->next pointer is valid before another CPU sees + * the ops pointer included into the ftrace_list. + */ + smp_wmb(); + ftrace_list = ops; + /* + * For one func, simply call it directly. + * For more than one func, call the chain. + */ + if (ops->next == &ftrace_list_end) + ftrace_trace_function = ops->func; + else + ftrace_trace_function = ftrace_list_func; + spin_unlock_irqrestore(&ftrace_func_lock, flags); + + return 0; +} + +/** + * unregister_ftrace_function - unresgister a function for profiling. + * @ops - ops structure that holds the function to unregister + * + * Unregister a function that was added to be called by ftrace profiling. + */ +int unregister_ftrace_function(struct ftrace_ops *ops) +{ + unsigned long flags; + struct ftrace_ops **p; + int ret = 0; + + spin_lock_irqsave(&ftrace_func_lock, flags); + + /* + * If we are the only function, then the ftrace pointer is + * pointing directly to that function. + */ + if (ftrace_list == ops && ops->next == &ftrace_list_end) { + ftrace_trace_function = ftrace_stub; + ftrace_list = &ftrace_list_end; + goto out; + } + + for (p = &ftrace_list; *p != &ftrace_list_end; p = &(*p)->next) + if (*p == ops) + break; + + if (*p != ops) { + ret = -1; + goto out; + } + + *p = (*p)->next; + + /* If we only have one func left, then call that directly */ + if (ftrace_list->next == &ftrace_list_end) + ftrace_trace_function = ftrace_list->func; + + out: + spin_unlock_irqrestore(&ftrace_func_lock, flags); + + return 0; +} + +/** + * clear_ftrace_function - reset the ftrace function + * + * This NULLs the ftrace function and in essence stops + * tracing. There may be lag + */ +void clear_ftrace_function(void) +{ + ftrace_trace_function = ftrace_stub; +} diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index d2099f4..d8b6279 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -634,6 +634,8 @@ config LATENCYTOP Enable this option if you want to use the LatencyTOP tool to find out which userspace is blocking on what kernel operations. +source kernel/trace/Kconfig + config PROVIDE_OHCI1394_DMA_INIT bool "Remote debugging over FireWire early on boot" depends on PCI && X86 -- cgit v0.10.2 From bc0c38d139ec7fcd5c030aea16b008f3732e42ac Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:42 +0200 Subject: ftrace: latency tracer infrastructure This patch adds the latency tracer infrastructure. This patch does not add anything that will select and turn it on, but will be used by later patches. If it were to be compiled, it would add the following files to the debugfs: The root tracing directory: /debugfs/tracing/ This patch also adds the following files: available_tracers list of available tracers. Currently no tracers are available. Looking into this file only shows "none" which is used to unregister all tracers. current_tracer The trace that is currently active. Empty on start up. To switch to a tracer simply echo one of the tracers that are listed in available_tracers: example: (used with later patches) echo function > /debugfs/tracing/current_tracer To disable the tracer: echo disable > /debugfs/tracing/current_tracer tracing_enabled echoing "1" into this file starts the ftrace function tracing (if sysctl kernel.ftrace_enabled=1) echoing "0" turns it off. latency_trace This file is readonly and holds the result of the trace. trace This file outputs a easier to read version of the trace. iter_ctrl Controls the way the output of traces look. So far there's two controls: echoing in "symonly" will only show the kallsyms variables without the addresses (if kallsyms was configured) echoing in "verbose" will change the output to show a lot more data, but not very easy to understand by humans. echoing in "nosymonly" turns off symonly. echoing in "noverbose" turns off verbose. Signed-off-by: Steven Rostedt Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/Makefile b/kernel/Makefile index fa05f6d..7e344e7 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -70,6 +70,7 @@ obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o obj-$(CONFIG_MARKERS) += marker.o obj-$(CONFIG_LATENCYTOP) += latencytop.o obj-$(CONFIG_FTRACE) += trace/ +obj-$(CONFIG_TRACING) += trace/ ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y) # According to Alan Modra , the -fno-omit-frame-pointer is diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 8185c91..ce70677 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -3,3 +3,8 @@ # config HAVE_FTRACE bool + +config TRACING + bool + select DEBUG_FS + diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index bf4fd21..7af4031 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -1,3 +1,5 @@ obj-$(CONFIG_FTRACE) += libftrace.o +obj-$(CONFIG_TRACING) += trace.o + libftrace-y := ftrace.o diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c new file mode 100644 index 0000000..1b8eca7 --- /dev/null +++ b/kernel/trace/trace.c @@ -0,0 +1,1547 @@ +/* + * ring buffer based function tracer + * + * Copyright (C) 2007-2008 Steven Rostedt + * Copyright (C) 2008 Ingo Molnar + * + * Originally taken from the RT patch by: + * Arnaldo Carvalho de Melo + * + * Based on code from the latency_tracer, that is: + * Copyright (C) 2004-2006 Ingo Molnar + * Copyright (C) 2004 William Lee Irwin III + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "trace.h" + +unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX; +unsigned long __read_mostly tracing_thresh; + +static long notrace +ns2usecs(cycle_t nsec) +{ + nsec += 500; + do_div(nsec, 1000); + return nsec; +} + +static atomic_t tracer_counter; +static struct trace_array global_trace; + +static DEFINE_PER_CPU(struct trace_array_cpu, global_trace_cpu); + +static struct trace_array max_tr; + +static DEFINE_PER_CPU(struct trace_array_cpu, max_data); + +static int tracer_enabled; +static unsigned long trace_nr_entries = 4096UL; + +static struct tracer *trace_types __read_mostly; +static struct tracer *current_trace __read_mostly; +static int max_tracer_type_len; + +static DEFINE_MUTEX(trace_types_lock); + +static int __init set_nr_entries(char *str) +{ + if (!str) + return 0; + trace_nr_entries = simple_strtoul(str, &str, 0); + return 1; +} +__setup("trace_entries=", set_nr_entries); + +enum trace_type { + __TRACE_FIRST_TYPE = 0, + + TRACE_FN, + TRACE_CTX, + + __TRACE_LAST_TYPE +}; + +enum trace_flag_type { + TRACE_FLAG_IRQS_OFF = 0x01, + TRACE_FLAG_NEED_RESCHED = 0x02, + TRACE_FLAG_HARDIRQ = 0x04, + TRACE_FLAG_SOFTIRQ = 0x08, +}; + +enum trace_iterator_flags { + TRACE_ITER_PRINT_PARENT = 0x01, + TRACE_ITER_SYM_OFFSET = 0x02, + TRACE_ITER_SYM_ADDR = 0x04, + TRACE_ITER_VERBOSE = 0x08, +}; + +#define TRACE_ITER_SYM_MASK \ + (TRACE_ITER_PRINT_PARENT|TRACE_ITER_SYM_OFFSET|TRACE_ITER_SYM_ADDR) + +/* These must match the bit postions above */ +static const char *trace_options[] = { + "print-parent", + "sym-offset", + "sym-addr", + "verbose", + NULL +}; + +static unsigned trace_flags; + + +/* + * Copy the new maximum trace into the separate maximum-trace + * structure. (this way the maximum trace is permanently saved, + * for later retrieval via /debugfs/tracing/latency_trace) + */ +static void notrace +__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) +{ + struct trace_array_cpu *data = tr->data[cpu]; + + max_tr.cpu = cpu; + max_tr.time_start = data->preempt_timestamp; + + data = max_tr.data[cpu]; + data->saved_latency = tracing_max_latency; + + memcpy(data->comm, tsk->comm, TASK_COMM_LEN); + data->pid = tsk->pid; + data->uid = tsk->uid; + data->nice = tsk->static_prio - 20 - MAX_RT_PRIO; + data->policy = tsk->policy; + data->rt_priority = tsk->rt_priority; + + /* record this tasks comm */ + tracing_record_cmdline(current); +} + +notrace void +update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) +{ + struct trace_array_cpu *data; + void *save_trace; + int i; + + /* clear out all the previous traces */ + for_each_possible_cpu(i) { + data = tr->data[i]; + save_trace = max_tr.data[i]->trace; + memcpy(max_tr.data[i], data, sizeof(*data)); + data->trace = save_trace; + } + + __update_max_tr(tr, tsk, cpu); +} + +/** + * update_max_tr_single - only copy one trace over, and reset the rest + * @tr - tracer + * @tsk - task with the latency + * @cpu - the cpu of the buffer to copy. + */ +notrace void +update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) +{ + struct trace_array_cpu *data = tr->data[cpu]; + void *save_trace; + int i; + + for_each_possible_cpu(i) + tracing_reset(max_tr.data[i]); + + save_trace = max_tr.data[cpu]->trace; + memcpy(max_tr.data[cpu], data, sizeof(*data)); + data->trace = save_trace; + + __update_max_tr(tr, tsk, cpu); +} + +int register_tracer(struct tracer *type) +{ + struct tracer *t; + int len; + int ret = 0; + + if (!type->name) { + pr_info("Tracer must have a name\n"); + return -1; + } + + mutex_lock(&trace_types_lock); + for (t = trace_types; t; t = t->next) { + if (strcmp(type->name, t->name) == 0) { + /* already found */ + pr_info("Trace %s already registered\n", + type->name); + ret = -1; + goto out; + } + } + + type->next = trace_types; + trace_types = type; + len = strlen(type->name); + if (len > max_tracer_type_len) + max_tracer_type_len = len; + out: + mutex_unlock(&trace_types_lock); + + return ret; +} + +void unregister_tracer(struct tracer *type) +{ + struct tracer **t; + int len; + + mutex_lock(&trace_types_lock); + for (t = &trace_types; *t; t = &(*t)->next) { + if (*t == type) + goto found; + } + pr_info("Trace %s not registered\n", type->name); + goto out; + + found: + *t = (*t)->next; + if (strlen(type->name) != max_tracer_type_len) + goto out; + + max_tracer_type_len = 0; + for (t = &trace_types; *t; t = &(*t)->next) { + len = strlen((*t)->name); + if (len > max_tracer_type_len) + max_tracer_type_len = len; + } + out: + mutex_unlock(&trace_types_lock); +} + +void notrace tracing_reset(struct trace_array_cpu *data) +{ + data->trace_idx = 0; + atomic_set(&data->underrun, 0); +} + +#ifdef CONFIG_FTRACE +static void notrace +function_trace_call(unsigned long ip, unsigned long parent_ip) +{ + struct trace_array *tr = &global_trace; + struct trace_array_cpu *data; + unsigned long flags; + long disabled; + int cpu; + + if (unlikely(!tracer_enabled)) + return; + + raw_local_irq_save(flags); + cpu = raw_smp_processor_id(); + data = tr->data[cpu]; + disabled = atomic_inc_return(&data->disabled); + + if (likely(disabled == 1)) + ftrace(tr, data, ip, parent_ip, flags); + + atomic_dec(&data->disabled); + raw_local_irq_restore(flags); +} + +static struct ftrace_ops trace_ops __read_mostly = +{ + .func = function_trace_call, +}; +#endif + +notrace void tracing_start_function_trace(void) +{ + register_ftrace_function(&trace_ops); +} + +notrace void tracing_stop_function_trace(void) +{ + unregister_ftrace_function(&trace_ops); +} + +#define SAVED_CMDLINES 128 +static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1]; +static unsigned map_cmdline_to_pid[SAVED_CMDLINES]; +static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN]; +static int cmdline_idx; +static DEFINE_SPINLOCK(trace_cmdline_lock); +atomic_t trace_record_cmdline_disabled; + +static void trace_init_cmdlines(void) +{ + memset(&map_pid_to_cmdline, -1, sizeof(map_pid_to_cmdline)); + memset(&map_cmdline_to_pid, -1, sizeof(map_cmdline_to_pid)); + cmdline_idx = 0; +} + +notrace void trace_stop_cmdline_recording(void); + +static void notrace trace_save_cmdline(struct task_struct *tsk) +{ + unsigned map; + unsigned idx; + + if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT)) + return; + + /* + * It's not the end of the world if we don't get + * the lock, but we also don't want to spin + * nor do we want to disable interrupts, + * so if we miss here, then better luck next time. + */ + if (!spin_trylock(&trace_cmdline_lock)) + return; + + idx = map_pid_to_cmdline[tsk->pid]; + if (idx >= SAVED_CMDLINES) { + idx = (cmdline_idx + 1) % SAVED_CMDLINES; + + map = map_cmdline_to_pid[idx]; + if (map <= PID_MAX_DEFAULT) + map_pid_to_cmdline[map] = (unsigned)-1; + + map_pid_to_cmdline[tsk->pid] = idx; + + cmdline_idx = idx; + } + + memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN); + + spin_unlock(&trace_cmdline_lock); +} + +static notrace char *trace_find_cmdline(int pid) +{ + char *cmdline = "<...>"; + unsigned map; + + if (!pid) + return ""; + + if (pid > PID_MAX_DEFAULT) + goto out; + + map = map_pid_to_cmdline[pid]; + if (map >= SAVED_CMDLINES) + goto out; + + cmdline = saved_cmdlines[map]; + + out: + return cmdline; +} + +notrace void tracing_record_cmdline(struct task_struct *tsk) +{ + if (atomic_read(&trace_record_cmdline_disabled)) + return; + + trace_save_cmdline(tsk); +} + +static inline notrace struct trace_entry * +tracing_get_trace_entry(struct trace_array *tr, + struct trace_array_cpu *data) +{ + unsigned long idx, idx_next; + struct trace_entry *entry; + + idx = data->trace_idx; + idx_next = idx + 1; + + if (unlikely(idx_next >= tr->entries)) { + atomic_inc(&data->underrun); + idx_next = 0; + } + + data->trace_idx = idx_next; + + if (unlikely(idx_next != 0 && atomic_read(&data->underrun))) + atomic_inc(&data->underrun); + + entry = data->trace + idx * TRACE_ENTRY_SIZE; + + return entry; +} + +static inline notrace void +tracing_generic_entry_update(struct trace_entry *entry, + unsigned long flags) +{ + struct task_struct *tsk = current; + unsigned long pc; + + pc = preempt_count(); + + entry->idx = atomic_inc_return(&tracer_counter); + entry->preempt_count = pc & 0xff; + entry->pid = tsk->pid; + entry->t = now(raw_smp_processor_id()); + entry->flags = (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) | + ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) | + ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) | + (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0); +} + +notrace void +ftrace(struct trace_array *tr, struct trace_array_cpu *data, + unsigned long ip, unsigned long parent_ip, + unsigned long flags) +{ + struct trace_entry *entry; + + entry = tracing_get_trace_entry(tr, data); + tracing_generic_entry_update(entry, flags); + entry->type = TRACE_FN; + entry->fn.ip = ip; + entry->fn.parent_ip = parent_ip; +} + +notrace void +tracing_sched_switch_trace(struct trace_array *tr, + struct trace_array_cpu *data, + struct task_struct *prev, struct task_struct *next, + unsigned long flags) +{ + struct trace_entry *entry; + + entry = tracing_get_trace_entry(tr, data); + tracing_generic_entry_update(entry, flags); + entry->type = TRACE_CTX; + entry->ctx.prev_pid = prev->pid; + entry->ctx.prev_prio = prev->prio; + entry->ctx.prev_state = prev->state; + entry->ctx.next_pid = next->pid; + entry->ctx.next_prio = next->prio; +} + +enum trace_file_type { + TRACE_FILE_LAT_FMT = 1, +}; + +static struct trace_entry * +trace_entry_idx(struct trace_array *tr, unsigned long idx, int cpu) +{ + struct trace_entry *array = tr->data[cpu]->trace; + unsigned long underrun; + + if (idx >= tr->entries) + return NULL; + + underrun = atomic_read(&tr->data[cpu]->underrun); + if (underrun) + idx = ((underrun - 1) + idx) % tr->entries; + else if (idx >= tr->data[cpu]->trace_idx) + return NULL; + + return &array[idx]; +} + +static struct notrace trace_entry * +find_next_entry(struct trace_iterator *iter, int *ent_cpu) +{ + struct trace_array *tr = iter->tr; + struct trace_entry *ent, *next = NULL; + int next_cpu = -1; + int cpu; + + for_each_possible_cpu(cpu) { + if (!tr->data[cpu]->trace) + continue; + ent = trace_entry_idx(tr, iter->next_idx[cpu], cpu); + if (ent && + (!next || (long)(next->idx - ent->idx) > 0)) { + next = ent; + next_cpu = cpu; + } + } + + if (ent_cpu) + *ent_cpu = next_cpu; + + return next; +} + +static void *find_next_entry_inc(struct trace_iterator *iter) +{ + struct trace_entry *next; + int next_cpu = -1; + + next = find_next_entry(iter, &next_cpu); + + if (next) { + iter->next_idx[next_cpu]++; + iter->idx++; + } + iter->ent = next; + iter->cpu = next_cpu; + + return next ? iter : NULL; +} + +static void notrace * +s_next(struct seq_file *m, void *v, loff_t *pos) +{ + struct trace_iterator *iter = m->private; + void *ent; + void *last_ent = iter->ent; + int i = (int)*pos; + + (*pos)++; + + /* can't go backwards */ + if (iter->idx > i) + return NULL; + + if (iter->idx < 0) + ent = find_next_entry_inc(iter); + else + ent = iter; + + while (ent && iter->idx < i) + ent = find_next_entry_inc(iter); + + iter->pos = *pos; + + if (last_ent && !ent) + seq_puts(m, "\n\nvim:ft=help\n"); + + return ent; +} + +static void *s_start(struct seq_file *m, loff_t *pos) +{ + struct trace_iterator *iter = m->private; + void *p = NULL; + loff_t l = 0; + int i; + + mutex_lock(&trace_types_lock); + + if (!current_trace || current_trace != iter->trace) + return NULL; + + atomic_inc(&trace_record_cmdline_disabled); + + /* let the tracer grab locks here if needed */ + if (current_trace->start) + current_trace->start(iter); + + if (*pos != iter->pos) { + iter->ent = NULL; + iter->cpu = 0; + iter->idx = -1; + + for (i = 0; i < NR_CPUS; i++) + iter->next_idx[i] = 0; + + for (p = iter; p && l < *pos; p = s_next(m, p, &l)) + ; + + } else { + l = *pos; + p = s_next(m, p, &l); + } + + return p; +} + +static void s_stop(struct seq_file *m, void *p) +{ + struct trace_iterator *iter = m->private; + + atomic_dec(&trace_record_cmdline_disabled); + + /* let the tracer release locks here if needed */ + if (current_trace && current_trace == iter->trace && iter->trace->stop) + iter->trace->stop(iter); + + mutex_unlock(&trace_types_lock); +} + +static void +seq_print_sym_short(struct seq_file *m, const char *fmt, unsigned long address) +{ +#ifdef CONFIG_KALLSYMS + char str[KSYM_SYMBOL_LEN]; + + kallsyms_lookup(address, NULL, NULL, NULL, str); + + seq_printf(m, fmt, str); +#endif +} + +static void +seq_print_sym_offset(struct seq_file *m, const char *fmt, unsigned long address) +{ +#ifdef CONFIG_KALLSYMS + char str[KSYM_SYMBOL_LEN]; + + sprint_symbol(str, address); + seq_printf(m, fmt, str); +#endif +} + +#ifndef CONFIG_64BIT +# define IP_FMT "%08lx" +#else +# define IP_FMT "%016lx" +#endif + +static void notrace +seq_print_ip_sym(struct seq_file *m, unsigned long ip, unsigned long sym_flags) +{ + if (!ip) { + seq_printf(m, "0"); + return; + } + + if (sym_flags & TRACE_ITER_SYM_OFFSET) + seq_print_sym_offset(m, "%s", ip); + else + seq_print_sym_short(m, "%s", ip); + + if (sym_flags & TRACE_ITER_SYM_ADDR) + seq_printf(m, " <" IP_FMT ">", ip); +} + +static void notrace print_lat_help_header(struct seq_file *m) +{ + seq_puts(m, "# _------=> CPU# \n"); + seq_puts(m, "# / _-----=> irqs-off \n"); + seq_puts(m, "# | / _----=> need-resched \n"); + seq_puts(m, "# || / _---=> hardirq/softirq \n"); + seq_puts(m, "# ||| / _--=> preempt-depth \n"); + seq_puts(m, "# |||| / \n"); + seq_puts(m, "# ||||| delay \n"); + seq_puts(m, "# cmd pid ||||| time | caller \n"); + seq_puts(m, "# \\ / ||||| \\ | / \n"); +} + +static void notrace print_func_help_header(struct seq_file *m) +{ + seq_puts(m, "# TASK-PID CPU# TIMESTAMP FUNCTION\n"); + seq_puts(m, "# | | | | |\n"); +} + + +static void notrace +print_trace_header(struct seq_file *m, struct trace_iterator *iter) +{ + unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); + struct trace_array *tr = iter->tr; + struct trace_array_cpu *data = tr->data[tr->cpu]; + struct tracer *type = current_trace; + unsigned long underruns = 0; + unsigned long underrun; + unsigned long entries = 0; + int cpu; + const char *name = "preemption"; + + if (type) + name = type->name; + + for_each_possible_cpu(cpu) { + if (tr->data[cpu]->trace) { + underrun = atomic_read(&tr->data[cpu]->underrun); + if (underrun) { + underruns += underrun; + entries += tr->entries; + } else + entries += tr->data[cpu]->trace_idx; + } + } + + seq_printf(m, "%s latency trace v1.1.5 on %s\n", + name, UTS_RELEASE); + seq_puts(m, "-----------------------------------" + "---------------------------------\n"); + seq_printf(m, " latency: %lu us, #%lu/%lu, CPU#%d |" + " (M:%s VP:%d, KP:%d, SP:%d HP:%d", + data->saved_latency, + entries, + (entries + underruns), + tr->cpu, +#if defined(CONFIG_PREEMPT_NONE) + "server", +#elif defined(CONFIG_PREEMPT_VOLUNTARY) + "desktop", +#elif defined(CONFIG_PREEMPT_DESKTOP) + "preempt", +#else + "unknown", +#endif + /* These are reserved for later use */ + 0, 0, 0, 0); +#ifdef CONFIG_SMP + seq_printf(m, " #P:%d)\n", num_online_cpus()); +#else + seq_puts(m, ")\n"); +#endif + seq_puts(m, " -----------------\n"); + seq_printf(m, " | task: %.16s-%d " + "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n", + data->comm, data->pid, data->uid, data->nice, + data->policy, data->rt_priority); + seq_puts(m, " -----------------\n"); + + if (data->critical_start) { + seq_puts(m, " => started at: "); + seq_print_ip_sym(m, data->critical_start, sym_flags); + seq_puts(m, "\n => ended at: "); + seq_print_ip_sym(m, data->critical_end, sym_flags); + seq_puts(m, "\n"); + } + + seq_puts(m, "\n"); +} + +unsigned long nsecs_to_usecs(unsigned long nsecs) +{ + return nsecs / 1000; +} + +static void notrace +lat_print_generic(struct seq_file *m, struct trace_entry *entry, int cpu) +{ + int hardirq, softirq; + char *comm; + + comm = trace_find_cmdline(entry->pid); + + seq_printf(m, "%8.8s-%-5d ", comm, entry->pid); + seq_printf(m, "%d", cpu); + seq_printf(m, "%c%c", + (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : '.', + ((entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.')); + + hardirq = entry->flags & TRACE_FLAG_HARDIRQ; + softirq = entry->flags & TRACE_FLAG_SOFTIRQ; + if (hardirq && softirq) + seq_putc(m, 'H'); + else { + if (hardirq) + seq_putc(m, 'h'); + else { + if (softirq) + seq_putc(m, 's'); + else + seq_putc(m, '.'); + } + } + + if (entry->preempt_count) + seq_printf(m, "%x", entry->preempt_count); + else + seq_puts(m, "."); +} + +unsigned long preempt_mark_thresh = 100; + +static void notrace +lat_print_timestamp(struct seq_file *m, unsigned long long abs_usecs, + unsigned long rel_usecs) +{ + seq_printf(m, " %4lldus", abs_usecs); + if (rel_usecs > preempt_mark_thresh) + seq_puts(m, "!: "); + else if (rel_usecs > 1) + seq_puts(m, "+: "); + else + seq_puts(m, " : "); +} + +static const char state_to_char[] = TASK_STATE_TO_CHAR_STR; + +static void notrace +print_lat_fmt(struct seq_file *m, struct trace_iterator *iter, + unsigned int trace_idx, int cpu) +{ + unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); + struct trace_entry *next_entry = find_next_entry(iter, NULL); + unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE); + struct trace_entry *entry = iter->ent; + unsigned long abs_usecs; + unsigned long rel_usecs; + char *comm; + int S; + + if (!next_entry) + next_entry = entry; + rel_usecs = ns2usecs(next_entry->t - entry->t); + abs_usecs = ns2usecs(entry->t - iter->tr->time_start); + + if (verbose) { + comm = trace_find_cmdline(entry->pid); + seq_printf(m, "%16s %5d %d %d %08x %08x [%08lx]" + " %ld.%03ldms (+%ld.%03ldms): ", + comm, + entry->pid, cpu, entry->flags, + entry->preempt_count, trace_idx, + ns2usecs(entry->t), + abs_usecs/1000, + abs_usecs % 1000, rel_usecs/1000, rel_usecs % 1000); + } else { + lat_print_generic(m, entry, cpu); + lat_print_timestamp(m, abs_usecs, rel_usecs); + } + switch (entry->type) { + case TRACE_FN: + seq_print_ip_sym(m, entry->fn.ip, sym_flags); + seq_puts(m, " ("); + seq_print_ip_sym(m, entry->fn.parent_ip, sym_flags); + seq_puts(m, ")\n"); + break; + case TRACE_CTX: + S = entry->ctx.prev_state < sizeof(state_to_char) ? + state_to_char[entry->ctx.prev_state] : 'X'; + comm = trace_find_cmdline(entry->ctx.next_pid); + seq_printf(m, " %d:%d:%c --> %d:%d %s\n", + entry->ctx.prev_pid, + entry->ctx.prev_prio, + S, + entry->ctx.next_pid, + entry->ctx.next_prio, + comm); + break; + } +} + +static void notrace +print_trace_fmt(struct seq_file *m, struct trace_iterator *iter) +{ + unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); + struct trace_entry *entry = iter->ent; + unsigned long usec_rem; + unsigned long long t; + unsigned long secs; + char *comm; + int S; + + comm = trace_find_cmdline(iter->ent->pid); + + t = ns2usecs(entry->t); + usec_rem = do_div(t, 1000000ULL); + secs = (unsigned long)t; + + seq_printf(m, "%16s-%-5d ", comm, entry->pid); + seq_printf(m, "[%02d] ", iter->cpu); + seq_printf(m, "%5lu.%06lu: ", secs, usec_rem); + + switch (entry->type) { + case TRACE_FN: + seq_print_ip_sym(m, entry->fn.ip, sym_flags); + if ((sym_flags & TRACE_ITER_PRINT_PARENT) && + entry->fn.parent_ip) { + seq_printf(m, " <-"); + seq_print_ip_sym(m, entry->fn.parent_ip, sym_flags); + } + break; + case TRACE_CTX: + S = entry->ctx.prev_state < sizeof(state_to_char) ? + state_to_char[entry->ctx.prev_state] : 'X'; + seq_printf(m, " %d:%d:%c ==> %d:%d\n", + entry->ctx.prev_pid, + entry->ctx.prev_prio, + S, + entry->ctx.next_pid, + entry->ctx.next_prio); + break; + } + seq_printf(m, "\n"); +} + +static int trace_empty(struct trace_iterator *iter) +{ + struct trace_array_cpu *data; + int cpu; + + for_each_possible_cpu(cpu) { + data = iter->tr->data[cpu]; + + if (data->trace && + (data->trace_idx || + atomic_read(&data->underrun))) + return 0; + } + return 1; +} + +static int s_show(struct seq_file *m, void *v) +{ + struct trace_iterator *iter = v; + + if (iter->ent == NULL) { + if (iter->tr) { + seq_printf(m, "# tracer: %s\n", iter->trace->name); + seq_puts(m, "#\n"); + } + if (iter->iter_flags & TRACE_FILE_LAT_FMT) { + /* print nothing if the buffers are empty */ + if (trace_empty(iter)) + return 0; + print_trace_header(m, iter); + if (!(trace_flags & TRACE_ITER_VERBOSE)) + print_lat_help_header(m); + } else { + if (!(trace_flags & TRACE_ITER_VERBOSE)) + print_func_help_header(m); + } + } else { + if (iter->iter_flags & TRACE_FILE_LAT_FMT) + print_lat_fmt(m, iter, iter->idx, iter->cpu); + else + print_trace_fmt(m, iter); + } + + return 0; +} + +static struct seq_operations tracer_seq_ops = { + .start = s_start, + .next = s_next, + .stop = s_stop, + .show = s_show, +}; + +static struct trace_iterator notrace * +__tracing_open(struct inode *inode, struct file *file, int *ret) +{ + struct trace_iterator *iter; + + iter = kzalloc(sizeof(*iter), GFP_KERNEL); + if (!iter) { + *ret = -ENOMEM; + goto out; + } + + mutex_lock(&trace_types_lock); + if (current_trace && current_trace->print_max) + iter->tr = &max_tr; + else + iter->tr = inode->i_private; + iter->trace = current_trace; + iter->pos = -1; + + /* TODO stop tracer */ + *ret = seq_open(file, &tracer_seq_ops); + if (!*ret) { + struct seq_file *m = file->private_data; + m->private = iter; + + /* stop the trace while dumping */ + if (iter->tr->ctrl) + tracer_enabled = 0; + + if (iter->trace && iter->trace->open) + iter->trace->open(iter); + } else { + kfree(iter); + iter = NULL; + } + mutex_unlock(&trace_types_lock); + + out: + return iter; +} + +int tracing_open_generic(struct inode *inode, struct file *filp) +{ + filp->private_data = inode->i_private; + return 0; +} + +int tracing_release(struct inode *inode, struct file *file) +{ + struct seq_file *m = (struct seq_file *)file->private_data; + struct trace_iterator *iter = m->private; + + mutex_lock(&trace_types_lock); + if (iter->trace && iter->trace->close) + iter->trace->close(iter); + + /* reenable tracing if it was previously enabled */ + if (iter->tr->ctrl) + tracer_enabled = 1; + mutex_unlock(&trace_types_lock); + + seq_release(inode, file); + kfree(iter); + return 0; +} + +static int tracing_open(struct inode *inode, struct file *file) +{ + int ret; + + __tracing_open(inode, file, &ret); + + return ret; +} + +static int tracing_lt_open(struct inode *inode, struct file *file) +{ + struct trace_iterator *iter; + int ret; + + iter = __tracing_open(inode, file, &ret); + + if (!ret) + iter->iter_flags |= TRACE_FILE_LAT_FMT; + + return ret; +} + + +static void notrace * +t_next(struct seq_file *m, void *v, loff_t *pos) +{ + struct tracer *t = m->private; + + (*pos)++; + + if (t) + t = t->next; + + m->private = t; + + return t; +} + +static void *t_start(struct seq_file *m, loff_t *pos) +{ + struct tracer *t = m->private; + loff_t l = 0; + + mutex_lock(&trace_types_lock); + for (; t && l < *pos; t = t_next(m, t, &l)) + ; + + return t; +} + +static void t_stop(struct seq_file *m, void *p) +{ + mutex_unlock(&trace_types_lock); +} + +static int t_show(struct seq_file *m, void *v) +{ + struct tracer *t = v; + + if (!t) + return 0; + + seq_printf(m, "%s", t->name); + if (t->next) + seq_putc(m, ' '); + else + seq_putc(m, '\n'); + + return 0; +} + +static struct seq_operations show_traces_seq_ops = { + .start = t_start, + .next = t_next, + .stop = t_stop, + .show = t_show, +}; + +static int show_traces_open(struct inode *inode, struct file *file) +{ + int ret; + + ret = seq_open(file, &show_traces_seq_ops); + if (!ret) { + struct seq_file *m = file->private_data; + m->private = trace_types; + } + + return ret; +} + +static struct file_operations tracing_fops = { + .open = tracing_open, + .read = seq_read, + .llseek = seq_lseek, + .release = tracing_release, +}; + +static struct file_operations tracing_lt_fops = { + .open = tracing_lt_open, + .read = seq_read, + .llseek = seq_lseek, + .release = tracing_release, +}; + +static struct file_operations show_traces_fops = { + .open = show_traces_open, + .read = seq_read, + .release = seq_release, +}; + +static ssize_t +tracing_iter_ctrl_read(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + char *buf; + int r = 0; + int len = 0; + int i; + + /* calulate max size */ + for (i = 0; trace_options[i]; i++) { + len += strlen(trace_options[i]); + len += 3; /* "no" and space */ + } + + /* +2 for \n and \0 */ + buf = kmalloc(len + 2, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + for (i = 0; trace_options[i]; i++) { + if (trace_flags & (1 << i)) + r += sprintf(buf + r, "%s ", trace_options[i]); + else + r += sprintf(buf + r, "no%s ", trace_options[i]); + } + + r += sprintf(buf + r, "\n"); + WARN_ON(r >= len + 2); + + r = simple_read_from_buffer(ubuf, cnt, ppos, + buf, r); + + kfree(buf); + + return r; +} + +static ssize_t +tracing_iter_ctrl_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + char buf[64]; + char *cmp = buf; + int neg = 0; + int i; + + if (cnt > 63) + cnt = 63; + + if (copy_from_user(&buf, ubuf, cnt)) + return -EFAULT; + + buf[cnt] = 0; + + if (strncmp(buf, "no", 2) == 0) { + neg = 1; + cmp += 2; + } + + for (i = 0; trace_options[i]; i++) { + int len = strlen(trace_options[i]); + + if (strncmp(cmp, trace_options[i], len) == 0) { + if (neg) + trace_flags &= ~(1 << i); + else + trace_flags |= (1 << i); + break; + } + } + + filp->f_pos += cnt; + + return cnt; +} + +static struct file_operations tracing_iter_fops = { + .open = tracing_open_generic, + .read = tracing_iter_ctrl_read, + .write = tracing_iter_ctrl_write, +}; + +static ssize_t +tracing_ctrl_read(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + struct trace_array *tr = filp->private_data; + char buf[64]; + int r; + + r = sprintf(buf, "%ld\n", tr->ctrl); + return simple_read_from_buffer(ubuf, cnt, ppos, + buf, r); +} + +static ssize_t +tracing_ctrl_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + struct trace_array *tr = filp->private_data; + long val; + char buf[64]; + + if (cnt > 63) + cnt = 63; + + if (copy_from_user(&buf, ubuf, cnt)) + return -EFAULT; + + buf[cnt] = 0; + + val = simple_strtoul(buf, NULL, 10); + + val = !!val; + + mutex_lock(&trace_types_lock); + if (tr->ctrl ^ val) { + if (val) + tracer_enabled = 1; + else + tracer_enabled = 0; + + tr->ctrl = val; + + if (current_trace && current_trace->ctrl_update) + current_trace->ctrl_update(tr); + } + mutex_unlock(&trace_types_lock); + + filp->f_pos += cnt; + + return cnt; +} + +static ssize_t +tracing_set_trace_read(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + char buf[max_tracer_type_len+2]; + int r; + + mutex_lock(&trace_types_lock); + if (current_trace) + r = sprintf(buf, "%s\n", current_trace->name); + else + r = sprintf(buf, "\n"); + mutex_unlock(&trace_types_lock); + + return simple_read_from_buffer(ubuf, cnt, ppos, + buf, r); +} + +static ssize_t +tracing_set_trace_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + struct trace_array *tr = &global_trace; + struct tracer *t; + char buf[max_tracer_type_len+1]; + int i; + + if (cnt > max_tracer_type_len) + cnt = max_tracer_type_len; + + if (copy_from_user(&buf, ubuf, cnt)) + return -EFAULT; + + buf[cnt] = 0; + + /* strip ending whitespace. */ + for (i = cnt - 1; i > 0 && isspace(buf[i]); i--) + buf[i] = 0; + + mutex_lock(&trace_types_lock); + for (t = trace_types; t; t = t->next) { + if (strcmp(t->name, buf) == 0) + break; + } + if (!t || t == current_trace) + goto out; + + if (current_trace && current_trace->reset) + current_trace->reset(tr); + + current_trace = t; + if (t->init) + t->init(tr); + + out: + mutex_unlock(&trace_types_lock); + + filp->f_pos += cnt; + + return cnt; +} + +static ssize_t +tracing_max_lat_read(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + unsigned long *ptr = filp->private_data; + char buf[64]; + int r; + + r = snprintf(buf, 64, "%ld\n", + *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr)); + if (r > 64) + r = 64; + return simple_read_from_buffer(ubuf, cnt, ppos, + buf, r); +} + +static ssize_t +tracing_max_lat_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + long *ptr = filp->private_data; + long val; + char buf[64]; + + if (cnt > 63) + cnt = 63; + + if (copy_from_user(&buf, ubuf, cnt)) + return -EFAULT; + + buf[cnt] = 0; + + val = simple_strtoul(buf, NULL, 10); + + *ptr = val * 1000; + + return cnt; +} + +static struct file_operations tracing_max_lat_fops = { + .open = tracing_open_generic, + .read = tracing_max_lat_read, + .write = tracing_max_lat_write, +}; + +static struct file_operations tracing_ctrl_fops = { + .open = tracing_open_generic, + .read = tracing_ctrl_read, + .write = tracing_ctrl_write, +}; + +static struct file_operations set_tracer_fops = { + .open = tracing_open_generic, + .read = tracing_set_trace_read, + .write = tracing_set_trace_write, +}; + +#ifdef CONFIG_DYNAMIC_FTRACE + +static ssize_t +tracing_read_long(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + unsigned long *p = filp->private_data; + char buf[64]; + int r; + + r = sprintf(buf, "%ld\n", *p); + return simple_read_from_buffer(ubuf, cnt, ppos, + buf, r); +} + +static struct file_operations tracing_read_long_fops = { + .open = tracing_open_generic, + .read = tracing_read_long, +}; +#endif + +static struct dentry *d_tracer; + +struct dentry *tracing_init_dentry(void) +{ + static int once; + + if (d_tracer) + return d_tracer; + + d_tracer = debugfs_create_dir("tracing", NULL); + + if (!d_tracer && !once) { + once = 1; + pr_warning("Could not create debugfs directory 'tracing'\n"); + return NULL; + } + + return d_tracer; +} + +static __init void tracer_init_debugfs(void) +{ + struct dentry *d_tracer; + struct dentry *entry; + + d_tracer = tracing_init_dentry(); + + entry = debugfs_create_file("tracing_enabled", 0644, d_tracer, + &global_trace, &tracing_ctrl_fops); + if (!entry) + pr_warning("Could not create debugfs 'tracing_enabled' entry\n"); + + entry = debugfs_create_file("iter_ctrl", 0644, d_tracer, + NULL, &tracing_iter_fops); + if (!entry) + pr_warning("Could not create debugfs 'iter_ctrl' entry\n"); + + entry = debugfs_create_file("latency_trace", 0444, d_tracer, + &global_trace, &tracing_lt_fops); + if (!entry) + pr_warning("Could not create debugfs 'latency_trace' entry\n"); + + entry = debugfs_create_file("trace", 0444, d_tracer, + &global_trace, &tracing_fops); + if (!entry) + pr_warning("Could not create debugfs 'trace' entry\n"); + + entry = debugfs_create_file("available_tracers", 0444, d_tracer, + &global_trace, &show_traces_fops); + if (!entry) + pr_warning("Could not create debugfs 'trace' entry\n"); + + entry = debugfs_create_file("current_tracer", 0444, d_tracer, + &global_trace, &set_tracer_fops); + if (!entry) + pr_warning("Could not create debugfs 'trace' entry\n"); + + entry = debugfs_create_file("tracing_max_latency", 0644, d_tracer, + &tracing_max_latency, + &tracing_max_lat_fops); + if (!entry) + pr_warning("Could not create debugfs " + "'tracing_max_latency' entry\n"); + + entry = debugfs_create_file("tracing_thresh", 0644, d_tracer, + &tracing_thresh, &tracing_max_lat_fops); + if (!entry) + pr_warning("Could not create debugfs " + "'tracing_threash' entry\n"); + +#ifdef CONFIG_DYNAMIC_FTRACE + entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer, + &ftrace_update_tot_cnt, + &tracing_read_long_fops); + if (!entry) + pr_warning("Could not create debugfs " + "'dyn_ftrace_total_info' entry\n"); +#endif +} + +/* dummy trace to disable tracing */ +static struct tracer no_tracer __read_mostly = +{ + .name = "none", +}; + +static inline notrace int page_order(const unsigned long size) +{ + const unsigned long nr_pages = DIV_ROUND_UP(size, PAGE_SIZE); + return ilog2(roundup_pow_of_two(nr_pages)); +} + +__init static int tracer_alloc_buffers(void) +{ + const int order = page_order(trace_nr_entries * TRACE_ENTRY_SIZE); + const unsigned long size = (1UL << order) << PAGE_SHIFT; + struct trace_entry *array; + int i; + + for_each_possible_cpu(i) { + global_trace.data[i] = &per_cpu(global_trace_cpu, i); + max_tr.data[i] = &per_cpu(max_data, i); + + array = (struct trace_entry *) + __get_free_pages(GFP_KERNEL, order); + if (array == NULL) { + printk(KERN_ERR "tracer: failed to allocate" + " %ld bytes for trace buffer!\n", size); + goto free_buffers; + } + global_trace.data[i]->trace = array; + +/* Only allocate if we are actually using the max trace */ +#ifdef CONFIG_TRACER_MAX_TRACE + array = (struct trace_entry *) + __get_free_pages(GFP_KERNEL, order); + if (array == NULL) { + printk(KERN_ERR "wakeup tracer: failed to allocate" + " %ld bytes for trace buffer!\n", size); + goto free_buffers; + } + max_tr.data[i]->trace = array; +#endif + } + + /* + * Since we allocate by orders of pages, we may be able to + * round up a bit. + */ + global_trace.entries = size / TRACE_ENTRY_SIZE; + max_tr.entries = global_trace.entries; + + pr_info("tracer: %ld bytes allocated for %ld", + size, trace_nr_entries); + pr_info(" entries of %ld bytes\n", (long)TRACE_ENTRY_SIZE); + pr_info(" actual entries %ld\n", global_trace.entries); + + tracer_init_debugfs(); + + trace_init_cmdlines(); + + register_tracer(&no_tracer); + current_trace = &no_tracer; + + return 0; + + free_buffers: + for (i-- ; i >= 0; i--) { + struct trace_array_cpu *data = global_trace.data[i]; + + if (data && data->trace) { + free_pages((unsigned long)data->trace, order); + data->trace = NULL; + } + +#ifdef CONFIG_TRACER_MAX_TRACE + data = max_tr.data[i]; + if (data && data->trace) { + free_pages((unsigned long)data->trace, order); + data->trace = NULL; + } +#endif + } + return -ENOMEM; +} + +device_initcall(tracer_alloc_buffers); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h new file mode 100644 index 0000000..3173a93 --- /dev/null +++ b/kernel/trace/trace.h @@ -0,0 +1,184 @@ +#ifndef _LINUX_KERNEL_TRACE_H +#define _LINUX_KERNEL_TRACE_H + +#include +#include +#include +#include + +/* + * Function trace entry - function address and parent function addres: + */ +struct ftrace_entry { + unsigned long ip; + unsigned long parent_ip; +}; + +/* + * Context switch trace entry - which task (and prio) we switched from/to: + */ +struct ctx_switch_entry { + unsigned int prev_pid; + unsigned char prev_prio; + unsigned char prev_state; + unsigned int next_pid; + unsigned char next_prio; +}; + +/* + * The trace entry - the most basic unit of tracing. This is what + * is printed in the end as a single line in the trace output, such as: + * + * bash-15816 [01] 235.197585: idle_cpu <- irq_enter + */ +struct trace_entry { + char type; + char cpu; + char flags; + char preempt_count; + int pid; + cycle_t t; + unsigned long idx; + union { + struct ftrace_entry fn; + struct ctx_switch_entry ctx; + }; +}; + +#define TRACE_ENTRY_SIZE sizeof(struct trace_entry) + +/* + * The CPU trace array - it consists of thousands of trace entries + * plus some other descriptor data: (for example which task started + * the trace, etc.) + */ +struct trace_array_cpu { + void *trace; + unsigned long trace_idx; + atomic_t disabled; + atomic_t underrun; + unsigned long saved_latency; + unsigned long critical_start; + unsigned long critical_end; + unsigned long critical_sequence; + unsigned long nice; + unsigned long policy; + unsigned long rt_priority; + cycle_t preempt_timestamp; + pid_t pid; + uid_t uid; + char comm[TASK_COMM_LEN]; +}; + +struct trace_iterator; + +/* + * The trace array - an array of per-CPU trace arrays. This is the + * highest level data structure that individual tracers deal with. + * They have on/off state as well: + */ +struct trace_array { + unsigned long entries; + long ctrl; + int cpu; + cycle_t time_start; + struct trace_array_cpu *data[NR_CPUS]; +}; + +/* + * A specific tracer, represented by methods that operate on a trace array: + */ +struct tracer { + const char *name; + void (*init)(struct trace_array *tr); + void (*reset)(struct trace_array *tr); + void (*open)(struct trace_iterator *iter); + void (*close)(struct trace_iterator *iter); + void (*start)(struct trace_iterator *iter); + void (*stop)(struct trace_iterator *iter); + void (*ctrl_update)(struct trace_array *tr); + struct tracer *next; + int print_max; +}; + +/* + * Trace iterator - used by printout routines who present trace + * results to users and which routines might sleep, etc: + */ +struct trace_iterator { + struct trace_array *tr; + struct tracer *trace; + struct trace_entry *ent; + unsigned long iter_flags; + loff_t pos; + unsigned long next_idx[NR_CPUS]; + int cpu; + int idx; +}; + +void notrace tracing_reset(struct trace_array_cpu *data); +int tracing_open_generic(struct inode *inode, struct file *filp); +struct dentry *tracing_init_dentry(void); +void ftrace(struct trace_array *tr, + struct trace_array_cpu *data, + unsigned long ip, + unsigned long parent_ip, + unsigned long flags); +void tracing_sched_switch_trace(struct trace_array *tr, + struct trace_array_cpu *data, + struct task_struct *prev, + struct task_struct *next, + unsigned long flags); +void tracing_record_cmdline(struct task_struct *tsk); + +void tracing_start_function_trace(void); +void tracing_stop_function_trace(void); +int register_tracer(struct tracer *type); +void unregister_tracer(struct tracer *type); + +extern unsigned long nsecs_to_usecs(unsigned long nsecs); + +extern unsigned long tracing_max_latency; +extern unsigned long tracing_thresh; + +void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu); +void update_max_tr_single(struct trace_array *tr, + struct task_struct *tsk, int cpu); + +static inline notrace cycle_t now(int cpu) +{ + return cpu_clock(cpu); +} + +#ifdef CONFIG_SCHED_TRACER +extern void notrace +wakeup_sched_switch(struct task_struct *prev, struct task_struct *next); +#else +static inline void +wakeup_sched_switch(struct task_struct *prev, struct task_struct *next) +{ +} +#endif + +#ifdef CONFIG_CONTEXT_SWITCH_TRACER +typedef void +(*tracer_switch_func_t)(void *private, + struct task_struct *prev, + struct task_struct *next); + +struct tracer_switch_ops { + tracer_switch_func_t func; + void *private; + struct tracer_switch_ops *next; +}; + +extern int register_tracer_switch(struct tracer_switch_ops *ops); +extern int unregister_tracer_switch(struct tracer_switch_ops *ops); + +#endif /* CONFIG_CONTEXT_SWITCH_TRACER */ + +#ifdef CONFIG_DYNAMIC_FTRACE +extern unsigned long ftrace_update_tot_cnt; +#endif + +#endif /* _LINUX_KERNEL_TRACE_H */ -- cgit v0.10.2 From 1b29b01887e6032dcaf818c14999c7a39593b4e7 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:42 +0200 Subject: ftrace: function tracer This is a simple trace that uses the ftrace infrastructure. It is designed to be fast and small, and easy to use. It is useful to record things that happen over a very short period of time, and not to analyze the system in general. Updates: available_tracers "function" is added to this file. current_tracer To enable the function tracer: echo function > /debugfs/tracing/current_tracer To disable the tracer: echo disable > /debugfs/tracing/current_tracer The output of the function_trace file is as follows "echo noverbose > /debugfs/tracing/iter_ctrl" preemption latency trace v1.1.5 on 2.6.24-rc7-tst Signed-off-by: Ingo Molnar -------------------------------------------------------------------- latency: 0 us, #419428/4361791, CPU#1 | (M:desktop VP:0, KP:0, SP:0 HP:0 #P:4) ----------------- | task: -0 (uid:0 nice:0 policy:0 rt_prio:0) ----------------- _------=> CPU# / _-----=> irqs-off | / _----=> need-resched || / _---=> hardirq/softirq ||| / _--=> preempt-depth |||| / ||||| delay cmd pid ||||| time | caller \ / ||||| \ | / swapper-0 0d.h. 1595128us+: set_normalized_timespec+0x8/0x2d (ktime_get_ts+0x4a/0x4e ) swapper-0 0d.h. 1595131us+: _spin_lock+0x8/0x18 (hrtimer_interrupt+0x6e/0x1b0 ) Or with verbose turned on: "echo verbose > /debugfs/tracing/iter_ctrl" preemption latency trace v1.1.5 on 2.6.24-rc7-tst -------------------------------------------------------------------- latency: 0 us, #419428/4361791, CPU#1 | (M:desktop VP:0, KP:0, SP:0 HP:0 #P:4) ----------------- | task: -0 (uid:0 nice:0 policy:0 rt_prio:0) ----------------- swapper 0 0 9 00000000 00000000 [f3675f41] 1595.128ms (+0.003ms): set_normalized_timespec+0x8/0x2d (ktime_get_ts+0x4a/0x4e ) swapper 0 0 9 00000000 00000001 [f3675f45] 1595.131ms (+0.003ms): _spin_lock+0x8/0x18 (hrtimer_interrupt+0x6e/0x1b0 ) swapper 0 0 9 00000000 00000002 [f3675f48] 1595.135ms (+0.003ms): _spin_lock+0x8/0x18 (hrtimer_interrupt+0x6e/0x1b0 ) The "trace" file is not affected by the verbose mode, but is by the symonly. echo "nosymonly" > /debugfs/tracing/iter_ctrl tracer: [ 81.479967] CPU 0: bash:3154 register_ftrace_function+0x5f/0x66 <-- _spin_unlock_irqrestore+0xe/0x5a [ 81.479967] CPU 0: bash:3154 _spin_unlock_irqrestore+0x3e/0x5a <-- sub_preempt_count+0xc/0x7a [ 81.479968] CPU 0: bash:3154 sub_preempt_count+0x30/0x7a <-- in_lock_functions+0x9/0x24 [ 81.479968] CPU 0: bash:3154 vfs_write+0x11d/0x155 <-- dnotify_parent+0x12/0x78 [ 81.479968] CPU 0: bash:3154 dnotify_parent+0x2d/0x78 <-- _spin_lock+0xe/0x70 [ 81.479969] CPU 0: bash:3154 _spin_lock+0x1b/0x70 <-- add_preempt_count+0xe/0x77 [ 81.479969] CPU 0: bash:3154 add_preempt_count+0x3e/0x77 <-- in_lock_functions+0x9/0x24 echo "symonly" > /debugfs/tracing/iter_ctrl tracer: [ 81.479913] CPU 0: bash:3154 register_ftrace_function+0x5f/0x66 <-- _spin_unlock_irqrestore+0xe/0x5a [ 81.479913] CPU 0: bash:3154 _spin_unlock_irqrestore+0x3e/0x5a <-- sub_preempt_count+0xc/0x7a [ 81.479913] CPU 0: bash:3154 sub_preempt_count+0x30/0x7a <-- in_lock_functions+0x9/0x24 [ 81.479914] CPU 0: bash:3154 vfs_write+0x11d/0x155 <-- dnotify_parent+0x12/0x78 [ 81.479914] CPU 0: bash:3154 dnotify_parent+0x2d/0x78 <-- _spin_lock+0xe/0x70 [ 81.479914] CPU 0: bash:3154 _spin_lock+0x1b/0x70 <-- add_preempt_count+0xe/0x77 [ 81.479914] CPU 0: bash:3154 add_preempt_count+0x3e/0x77 <-- in_lock_functions+0x9/0x24 Signed-off-by: Steven Rostedt Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index ce70677..1399f37 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -8,3 +8,16 @@ config TRACING bool select DEBUG_FS +config FTRACE + bool "Kernel Function Tracer" + depends on DEBUG_KERNEL && HAVE_FTRACE + select FRAME_POINTER + select TRACING + help + Enable the kernel to trace every kernel function. This is done + by using a compiler feature to insert a small, 5-byte No-Operation + instruction to the beginning of every kernel function, which NOP + sequence is then dynamically patched into a tracer call when + tracing is enabled by the administrator. If it's runtime disabled + (the bootup default), then the overhead of the instructions is very + small and not measurable even in micro-benchmarks. diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 7af4031..6bb5e50 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -1,5 +1,6 @@ obj-$(CONFIG_FTRACE) += libftrace.o obj-$(CONFIG_TRACING) += trace.o +obj-$(CONFIG_FTRACE) += trace_functions.o libftrace-y := ftrace.o diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c new file mode 100644 index 0000000..82988c5 --- /dev/null +++ b/kernel/trace/trace_functions.c @@ -0,0 +1,73 @@ +/* + * ring buffer based function tracer + * + * Copyright (C) 2007-2008 Steven Rostedt + * Copyright (C) 2008 Ingo Molnar + * + * Based on code from the latency_tracer, that is: + * + * Copyright (C) 2004-2006 Ingo Molnar + * Copyright (C) 2004 William Lee Irwin III + */ +#include +#include +#include +#include + +#include "trace.h" + +static notrace void function_reset(struct trace_array *tr) +{ + int cpu; + + tr->time_start = now(tr->cpu); + + for_each_online_cpu(cpu) + tracing_reset(tr->data[cpu]); +} + +static notrace void start_function_trace(struct trace_array *tr) +{ + function_reset(tr); + tracing_start_function_trace(); +} + +static notrace void stop_function_trace(struct trace_array *tr) +{ + tracing_stop_function_trace(); +} + +static notrace void function_trace_init(struct trace_array *tr) +{ + if (tr->ctrl) + start_function_trace(tr); +} + +static notrace void function_trace_reset(struct trace_array *tr) +{ + if (tr->ctrl) + stop_function_trace(tr); +} + +static notrace void function_trace_ctrl_update(struct trace_array *tr) +{ + if (tr->ctrl) + start_function_trace(tr); + else + stop_function_trace(tr); +} + +static struct tracer function_trace __read_mostly = +{ + .name = "ftrace", + .init = function_trace_init, + .reset = function_trace_reset, + .ctrl_update = function_trace_ctrl_update, +}; + +static __init int init_function_trace(void) +{ + return register_tracer(&function_trace); +} + +device_initcall(init_function_trace); -- cgit v0.10.2 From 35e8e302e5d6e32675df2fc1dd3a53dfa6630dc1 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:42 +0200 Subject: ftrace: add tracing of context switches This patch adds context switch tracing, of the format of: _------=> CPU# / _-----=> irqs-off | / _----=> need-resched || / _---=> hardirq/softirq ||| / _--=> preempt-depth |||| / ||||| delay cmd pid ||||| time | pid:prio:state \ / ||||| \ | / swapper-0 1d..3 137us+: 0:140:R --> 2912:120 sshd-2912 1d..3 216us+: 2912:120:S --> 0:140 swapper-0 1d..3 261us+: 0:140:R --> 2912:120 bash-2920 0d..3 267us+: 2920:120:S --> 0:140 sshd-2912 1d..3 330us!: 2912:120:S --> 0:140 swapper-0 1d..3 2389us+: 0:140:R --> 2847:120 yum-upda-2847 1d..3 2411us!: 2847:120:S --> 0:140 swapper-0 0d..3 11089us+: 0:140:R --> 3139:120 gdm-bina-3139 0d..3 11113us!: 3139:120:S --> 0:140 swapper-0 1d..3 102328us+: 0:140:R --> 2847:120 yum-upda-2847 1d..3 102348us!: 2847:120:S --> 0:140 "sched_switch" is added to /debugfs/tracing/available_tracers [ Eugene Teo Cc: Mathieu Desnoyers Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 1399f37..5d6aa92 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -13,6 +13,7 @@ config FTRACE depends on DEBUG_KERNEL && HAVE_FTRACE select FRAME_POINTER select TRACING + select CONTEXT_SWITCH_TRACER help Enable the kernel to trace every kernel function. This is done by using a compiler feature to insert a small, 5-byte No-Operation @@ -21,3 +22,13 @@ config FTRACE tracing is enabled by the administrator. If it's runtime disabled (the bootup default), then the overhead of the instructions is very small and not measurable even in micro-benchmarks. + +config CONTEXT_SWITCH_TRACER + bool "Trace process context switches" + depends on DEBUG_KERNEL + select TRACING + select MARKERS + help + This tracer gets called from the context switch and records + all switching of tasks. + diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 6bb5e50..6b54ceb 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -1,6 +1,7 @@ obj-$(CONFIG_FTRACE) += libftrace.o obj-$(CONFIG_TRACING) += trace.o +obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o obj-$(CONFIG_FTRACE) += trace_functions.o libftrace-y := ftrace.o diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c new file mode 100644 index 0000000..3e4771d --- /dev/null +++ b/kernel/trace/trace_sched_switch.c @@ -0,0 +1,116 @@ +/* + * trace context switch + * + * Copyright (C) 2007 Steven Rostedt + * + */ +#include +#include +#include +#include +#include +#include +#include + +#include "trace.h" + +static struct trace_array *ctx_trace; +static int __read_mostly tracer_enabled; + +static void notrace +ctx_switch_func(struct task_struct *prev, struct task_struct *next) +{ + struct trace_array *tr = ctx_trace; + struct trace_array_cpu *data; + unsigned long flags; + long disabled; + int cpu; + + if (!tracer_enabled) + return; + + raw_local_irq_save(flags); + cpu = raw_smp_processor_id(); + data = tr->data[cpu]; + disabled = atomic_inc_return(&data->disabled); + + if (likely(disabled == 1)) + tracing_sched_switch_trace(tr, data, prev, next, flags); + + atomic_dec(&data->disabled); + raw_local_irq_restore(flags); +} + +void ftrace_ctx_switch(struct task_struct *prev, struct task_struct *next) +{ + tracing_record_cmdline(prev); + + /* + * If tracer_switch_func only points to the local + * switch func, it still needs the ptr passed to it. + */ + ctx_switch_func(prev, next); + + /* + * Chain to the wakeup tracer (this is a NOP if disabled): + */ + wakeup_sched_switch(prev, next); +} + +static notrace void sched_switch_reset(struct trace_array *tr) +{ + int cpu; + + tr->time_start = now(tr->cpu); + + for_each_online_cpu(cpu) + tracing_reset(tr->data[cpu]); +} + +static notrace void start_sched_trace(struct trace_array *tr) +{ + sched_switch_reset(tr); + tracer_enabled = 1; +} + +static notrace void stop_sched_trace(struct trace_array *tr) +{ + tracer_enabled = 0; +} + +static notrace void sched_switch_trace_init(struct trace_array *tr) +{ + ctx_trace = tr; + + if (tr->ctrl) + start_sched_trace(tr); +} + +static notrace void sched_switch_trace_reset(struct trace_array *tr) +{ + if (tr->ctrl) + stop_sched_trace(tr); +} + +static void sched_switch_trace_ctrl_update(struct trace_array *tr) +{ + /* When starting a new trace, reset the buffers */ + if (tr->ctrl) + start_sched_trace(tr); + else + stop_sched_trace(tr); +} + +static struct tracer sched_switch_trace __read_mostly = +{ + .name = "sched_switch", + .init = sched_switch_trace_init, + .reset = sched_switch_trace_reset, + .ctrl_update = sched_switch_trace_ctrl_update, +}; + +__init static int init_sched_switch_trace(void) +{ + return register_tracer(&sched_switch_trace); +} +device_initcall(init_sched_switch_trace); -- cgit v0.10.2 From 352ad25aa4a189c667cb2af333948d34692a2d27 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:42 +0200 Subject: ftrace: tracer for scheduler wakeup latency This patch adds the tracer that tracks the wakeup latency of the highest priority waking task. "wakeup" is added to /debugfs/tracing/available_tracers Also added to /debugfs/tracing tracing_max_latency holds the current max latency for the wakeup wakeup_thresh if set to other than zero, a log will be recorded for every wakeup that takes longer than the number entered in here (usecs for all counters) (deletes previous trace) Examples: (with ftrace_enabled = 0) ============ preemption latency trace v1.1.5 on 2.6.24-rc8 Signed-off-by: Ingo Molnar -------------------------------------------------------------------- latency: 26 us, #2/2, CPU#1 | (M:rt VP:0, KP:0, SP:0 HP:0 #P:2) ----------------- | task: migration/0-3 (uid:0 nice:-5 policy:1 rt_prio:99) ----------------- _------=> CPU# / _-----=> irqs-off | / _----=> need-resched || / _---=> hardirq/softirq ||| / _--=> preempt-depth |||| / ||||| delay cmd pid ||||| time | caller \ / ||||| \ | / quilt-8551 0d..3 0us+: wake_up_process+0x15/0x17 (sched_exec+0xc9/0x100 ) quilt-8551 0d..4 26us : sched_switch_callback+0x73/0x81 (schedule+0x483/0x6d5 ) vim:ft=help ============ (with ftrace_enabled = 1) ============ preemption latency trace v1.1.5 on 2.6.24-rc8 -------------------------------------------------------------------- latency: 36 us, #45/45, CPU#0 | (M:rt VP:0, KP:0, SP:0 HP:0 #P:2) ----------------- | task: migration/1-5 (uid:0 nice:-5 policy:1 rt_prio:99) ----------------- _------=> CPU# / _-----=> irqs-off | / _----=> need-resched || / _---=> hardirq/softirq ||| / _--=> preempt-depth |||| / ||||| delay cmd pid ||||| time | caller \ / ||||| \ | / bash-10653 1d..3 0us : wake_up_process+0x15/0x17 (sched_exec+0xc9/0x100 ) bash-10653 1d..3 1us : try_to_wake_up+0x271/0x2e7 (sub_preempt_count+0xc/0x7a ) bash-10653 1d..2 2us : try_to_wake_up+0x296/0x2e7 (update_rq_clock+0x9/0x20 ) bash-10653 1d..2 2us : update_rq_clock+0x1e/0x20 (__update_rq_clock+0xc/0x90 ) bash-10653 1d..2 3us : __update_rq_clock+0x1b/0x90 (sched_clock+0x9/0x29 ) bash-10653 1d..2 4us : try_to_wake_up+0x2a6/0x2e7 (activate_task+0xc/0x3f ) bash-10653 1d..2 4us : activate_task+0x2d/0x3f (enqueue_task+0xe/0x66 ) bash-10653 1d..2 5us : enqueue_task+0x5b/0x66 (enqueue_task_rt+0x9/0x3c ) bash-10653 1d..2 6us : try_to_wake_up+0x2ba/0x2e7 (check_preempt_wakeup+0x12/0x99 ) [...] bash-10653 1d..5 33us : tracing_record_cmdline+0xcf/0xd4 (_spin_unlock+0x9/0x33 ) bash-10653 1d..5 34us : _spin_unlock+0x19/0x33 (sub_preempt_count+0xc/0x7a ) bash-10653 1d..4 35us : wakeup_sched_switch+0x65/0x2ff (_spin_lock_irqsave+0xc/0xa9 ) bash-10653 1d..4 35us : _spin_lock_irqsave+0x19/0xa9 (add_preempt_count+0xe/0x77 ) bash-10653 1d..4 36us : sched_switch_callback+0x73/0x81 (schedule+0x483/0x6d5 ) vim:ft=help ============ The [...] was added here to not waste your email box space. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index b96ef14..db8a5e7 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -5,10 +5,6 @@ #include -#define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0)) -#define CALLER_ADDR1 ((unsigned long)__builtin_return_address(1)) -#define CALLER_ADDR2 ((unsigned long)__builtin_return_address(2)) - typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip); struct ftrace_ops { @@ -35,4 +31,23 @@ extern void mcount(void); # define unregister_ftrace_function(ops) do { } while (0) # define clear_ftrace_function(ops) do { } while (0) #endif /* CONFIG_FTRACE */ + + +#ifdef CONFIG_FRAME_POINTER +/* TODO: need to fix this for ARM */ +# define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0)) +# define CALLER_ADDR1 ((unsigned long)__builtin_return_address(1)) +# define CALLER_ADDR2 ((unsigned long)__builtin_return_address(2)) +# define CALLER_ADDR3 ((unsigned long)__builtin_return_address(3)) +# define CALLER_ADDR4 ((unsigned long)__builtin_return_address(4)) +# define CALLER_ADDR5 ((unsigned long)__builtin_return_address(5)) +#else +# define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0)) +# define CALLER_ADDR1 0UL +# define CALLER_ADDR2 0UL +# define CALLER_ADDR3 0UL +# define CALLER_ADDR4 0UL +# define CALLER_ADDR5 0UL +#endif + #endif /* _LINUX_FTRACE_H */ diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 5d6aa92..892ecc9 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -4,6 +4,9 @@ config HAVE_FTRACE bool +config TRACER_MAX_TRACE + bool + config TRACING bool select DEBUG_FS @@ -23,6 +26,16 @@ config FTRACE (the bootup default), then the overhead of the instructions is very small and not measurable even in micro-benchmarks. +config SCHED_TRACER + bool "Scheduling Latency Tracer" + depends on DEBUG_KERNEL + select TRACING + select CONTEXT_SWITCH_TRACER + select TRACER_MAX_TRACE + help + This tracer tracks the latency of the highest priority task + to be scheduled in, starting from the point it has woken up. + config CONTEXT_SWITCH_TRACER bool "Trace process context switches" depends on DEBUG_KERNEL diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 6b54ceb..5508cdb 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -3,5 +3,6 @@ obj-$(CONFIG_FTRACE) += libftrace.o obj-$(CONFIG_TRACING) += trace.o obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o obj-$(CONFIG_FTRACE) += trace_functions.o +obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o libftrace-y := ftrace.o diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c new file mode 100644 index 0000000..7c3ccef --- /dev/null +++ b/kernel/trace/trace_sched_wakeup.c @@ -0,0 +1,310 @@ +/* + * trace task wakeup timings + * + * Copyright (C) 2007-2008 Steven Rostedt + * Copyright (C) 2008 Ingo Molnar + * + * Based on code from the latency_tracer, that is: + * + * Copyright (C) 2004-2006 Ingo Molnar + * Copyright (C) 2004 William Lee Irwin III + */ +#include +#include +#include +#include +#include +#include + +#include "trace.h" + +static struct trace_array *wakeup_trace; +static int __read_mostly tracer_enabled; + +static struct task_struct *wakeup_task; +static int wakeup_cpu; +static unsigned wakeup_prio = -1; + +static DEFINE_SPINLOCK(wakeup_lock); + +static void notrace __wakeup_reset(struct trace_array *tr); + +/* + * Should this new latency be reported/recorded? + */ +static int notrace report_latency(cycle_t delta) +{ + if (tracing_thresh) { + if (delta < tracing_thresh) + return 0; + } else { + if (delta <= tracing_max_latency) + return 0; + } + return 1; +} + +void notrace +wakeup_sched_switch(struct task_struct *prev, struct task_struct *next) +{ + unsigned long latency = 0, t0 = 0, t1 = 0; + struct trace_array *tr = wakeup_trace; + struct trace_array_cpu *data; + cycle_t T0, T1, delta; + unsigned long flags; + long disabled; + int cpu; + + if (unlikely(!tracer_enabled)) + return; + + /* + * When we start a new trace, we set wakeup_task to NULL + * and then set tracer_enabled = 1. We want to make sure + * that another CPU does not see the tracer_enabled = 1 + * and the wakeup_task with an older task, that might + * actually be the same as next. + */ + smp_rmb(); + + if (next != wakeup_task) + return; + + /* The task we are waitng for is waking up */ + data = tr->data[wakeup_cpu]; + + /* disable local data, not wakeup_cpu data */ + cpu = raw_smp_processor_id(); + disabled = atomic_inc_return(&tr->data[cpu]->disabled); + if (likely(disabled != 1)) + goto out; + + spin_lock_irqsave(&wakeup_lock, flags); + + /* We could race with grabbing wakeup_lock */ + if (unlikely(!tracer_enabled || next != wakeup_task)) + goto out_unlock; + + ftrace(tr, data, CALLER_ADDR1, CALLER_ADDR2, flags); + + /* + * usecs conversion is slow so we try to delay the conversion + * as long as possible: + */ + T0 = data->preempt_timestamp; + T1 = now(cpu); + delta = T1-T0; + + if (!report_latency(delta)) + goto out_unlock; + + latency = nsecs_to_usecs(delta); + + tracing_max_latency = delta; + t0 = nsecs_to_usecs(T0); + t1 = nsecs_to_usecs(T1); + + update_max_tr(tr, wakeup_task, wakeup_cpu); + + if (tracing_thresh) { + printk(KERN_INFO "(%16s-%-5d|#%d): %lu us wakeup latency " + "violates %lu us threshold.\n" + " => started at timestamp %lu: ", + wakeup_task->comm, wakeup_task->pid, + raw_smp_processor_id(), + latency, nsecs_to_usecs(tracing_thresh), t0); + } else { + printk(KERN_INFO "(%16s-%-5d|#%d): new %lu us maximum " + "wakeup latency.\n => started at timestamp %lu: ", + wakeup_task->comm, wakeup_task->pid, + cpu, latency, t0); + } + + printk(KERN_CONT " ended at timestamp %lu: ", t1); + dump_stack(); + t1 = nsecs_to_usecs(now(cpu)); + printk(KERN_CONT " dump-end timestamp %lu\n\n", t1); + +out_unlock: + __wakeup_reset(tr); + spin_unlock_irqrestore(&wakeup_lock, flags); +out: + atomic_dec(&tr->data[cpu]->disabled); +} + +static void notrace __wakeup_reset(struct trace_array *tr) +{ + struct trace_array_cpu *data; + int cpu; + + assert_spin_locked(&wakeup_lock); + + for_each_possible_cpu(cpu) { + data = tr->data[cpu]; + tracing_reset(data); + } + + wakeup_cpu = -1; + wakeup_prio = -1; + + if (wakeup_task) + put_task_struct(wakeup_task); + + wakeup_task = NULL; +} + +static void notrace wakeup_reset(struct trace_array *tr) +{ + unsigned long flags; + + spin_lock_irqsave(&wakeup_lock, flags); + __wakeup_reset(tr); + spin_unlock_irqrestore(&wakeup_lock, flags); +} + +static notrace void +wakeup_check_start(struct trace_array *tr, struct task_struct *p, + struct task_struct *curr) +{ + int cpu = smp_processor_id(); + unsigned long flags; + long disabled; + + if (likely(!rt_task(p)) || + p->prio >= wakeup_prio || + p->prio >= curr->prio) + return; + + disabled = atomic_inc_return(&tr->data[cpu]->disabled); + if (unlikely(disabled != 1)) + goto out; + + /* interrupts should be off from try_to_wake_up */ + spin_lock(&wakeup_lock); + + /* check for races. */ + if (!tracer_enabled || p->prio >= wakeup_prio) + goto out_locked; + + /* reset the trace */ + __wakeup_reset(tr); + + wakeup_cpu = task_cpu(p); + wakeup_prio = p->prio; + + wakeup_task = p; + get_task_struct(wakeup_task); + + local_save_flags(flags); + + tr->data[wakeup_cpu]->preempt_timestamp = now(cpu); + ftrace(tr, tr->data[wakeup_cpu], CALLER_ADDR1, CALLER_ADDR2, flags); + +out_locked: + spin_unlock(&wakeup_lock); +out: + atomic_dec(&tr->data[cpu]->disabled); +} + +notrace void +ftrace_wake_up_task(struct task_struct *wakee, struct task_struct *curr) +{ + if (likely(!tracer_enabled)) + return; + + wakeup_check_start(wakeup_trace, wakee, curr); +} + +notrace void +ftrace_wake_up_new_task(struct task_struct *wakee, struct task_struct *curr) +{ + if (likely(!tracer_enabled)) + return; + + wakeup_check_start(wakeup_trace, wakee, curr); +} + +static notrace void start_wakeup_tracer(struct trace_array *tr) +{ + wakeup_reset(tr); + + /* + * Don't let the tracer_enabled = 1 show up before + * the wakeup_task is reset. This may be overkill since + * wakeup_reset does a spin_unlock after setting the + * wakeup_task to NULL, but I want to be safe. + * This is a slow path anyway. + */ + smp_wmb(); + + tracer_enabled = 1; + + return; +} + +static notrace void stop_wakeup_tracer(struct trace_array *tr) +{ + tracer_enabled = 0; +} + +static notrace void wakeup_tracer_init(struct trace_array *tr) +{ + wakeup_trace = tr; + + if (tr->ctrl) + start_wakeup_tracer(tr); +} + +static notrace void wakeup_tracer_reset(struct trace_array *tr) +{ + if (tr->ctrl) { + stop_wakeup_tracer(tr); + /* make sure we put back any tasks we are tracing */ + wakeup_reset(tr); + } +} + +static void wakeup_tracer_ctrl_update(struct trace_array *tr) +{ + if (tr->ctrl) + start_wakeup_tracer(tr); + else + stop_wakeup_tracer(tr); +} + +static void notrace wakeup_tracer_open(struct trace_iterator *iter) +{ + /* stop the trace while dumping */ + if (iter->tr->ctrl) + stop_wakeup_tracer(iter->tr); +} + +static void notrace wakeup_tracer_close(struct trace_iterator *iter) +{ + /* forget about any processes we were recording */ + if (iter->tr->ctrl) + start_wakeup_tracer(iter->tr); +} + +static struct tracer wakeup_tracer __read_mostly = +{ + .name = "wakeup", + .init = wakeup_tracer_init, + .reset = wakeup_tracer_reset, + .open = wakeup_tracer_open, + .close = wakeup_tracer_close, + .ctrl_update = wakeup_tracer_ctrl_update, + .print_max = 1, +}; + +__init static int init_wakeup_tracer(void) +{ + int ret; + + ret = register_tracer(&wakeup_tracer); + if (ret) + return ret; + + return 0; +} +device_initcall(init_wakeup_tracer); -- cgit v0.10.2 From 81d68a96a39844853b37f20cc8282d9b65b78ef3 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:42 +0200 Subject: ftrace: trace irq disabled critical timings This patch adds latency tracing for critical timings (how long interrupts are disabled for). "irqsoff" is added to /debugfs/tracing/available_tracers Note: tracing_max_latency also holds the max latency for irqsoff (in usecs). (default to large number so one must start latency tracing) tracing_thresh threshold (in usecs) to always print out if irqs off is detected to be longer than stated here. If irq_thresh is non-zero, then max_irq_latency is ignored. Here's an example of a trace with ftrace_enabled = 0 ======= preemption latency trace v1.1.5 on 2.6.24-rc7 Signed-off-by: Ingo Molnar -------------------------------------------------------------------- latency: 100 us, #3/3, CPU#1 | (M:rt VP:0, KP:0, SP:0 HP:0 #P:2) ----------------- | task: swapper-0 (uid:0 nice:0 policy:0 rt_prio:0) ----------------- => started at: _spin_lock_irqsave+0x2a/0xb7 => ended at: _spin_unlock_irqrestore+0x32/0x5f _------=> CPU# / _-----=> irqs-off | / _----=> need-resched || / _---=> hardirq/softirq ||| / _--=> preempt-depth |||| / ||||| delay cmd pid ||||| time | caller \ / ||||| \ | / swapper-0 1d.s3 0us+: _spin_lock_irqsave+0x2a/0xb7 (e1000_update_stats+0x47/0x64c [e1000]) swapper-0 1d.s3 100us : _spin_unlock_irqrestore+0x32/0x5f (e1000_update_stats+0x641/0x64c [e1000]) swapper-0 1d.s3 100us : trace_hardirqs_on_caller+0x75/0x89 (_spin_unlock_irqrestore+0x32/0x5f) vim:ft=help ======= And this is a trace with ftrace_enabled == 1 ======= preemption latency trace v1.1.5 on 2.6.24-rc7 -------------------------------------------------------------------- latency: 102 us, #12/12, CPU#1 | (M:rt VP:0, KP:0, SP:0 HP:0 #P:2) ----------------- | task: swapper-0 (uid:0 nice:0 policy:0 rt_prio:0) ----------------- => started at: _spin_lock_irqsave+0x2a/0xb7 => ended at: _spin_unlock_irqrestore+0x32/0x5f _------=> CPU# / _-----=> irqs-off | / _----=> need-resched || / _---=> hardirq/softirq ||| / _--=> preempt-depth |||| / ||||| delay cmd pid ||||| time | caller \ / ||||| \ | / swapper-0 1dNs3 0us+: _spin_lock_irqsave+0x2a/0xb7 (e1000_update_stats+0x47/0x64c [e1000]) swapper-0 1dNs3 46us : e1000_read_phy_reg+0x16/0x225 [e1000] (e1000_update_stats+0x5e2/0x64c [e1000]) swapper-0 1dNs3 46us : e1000_swfw_sync_acquire+0x10/0x99 [e1000] (e1000_read_phy_reg+0x49/0x225 [e1000]) swapper-0 1dNs3 46us : e1000_get_hw_eeprom_semaphore+0x12/0xa6 [e1000] (e1000_swfw_sync_acquire+0x36/0x99 [e1000]) swapper-0 1dNs3 47us : __const_udelay+0x9/0x47 (e1000_read_phy_reg+0x116/0x225 [e1000]) swapper-0 1dNs3 47us+: __delay+0x9/0x50 (__const_udelay+0x45/0x47) swapper-0 1dNs3 97us : preempt_schedule+0xc/0x84 (__delay+0x4e/0x50) swapper-0 1dNs3 98us : e1000_swfw_sync_release+0xc/0x55 [e1000] (e1000_read_phy_reg+0x211/0x225 [e1000]) swapper-0 1dNs3 99us+: e1000_put_hw_eeprom_semaphore+0x9/0x35 [e1000] (e1000_swfw_sync_release+0x50/0x55 [e1000]) swapper-0 1dNs3 101us : _spin_unlock_irqrestore+0xe/0x5f (e1000_update_stats+0x641/0x64c [e1000]) swapper-0 1dNs3 102us : _spin_unlock_irqrestore+0x32/0x5f (e1000_update_stats+0x641/0x64c [e1000]) swapper-0 1dNs3 102us : trace_hardirqs_on_caller+0x75/0x89 (_spin_unlock_irqrestore+0x32/0x5f) vim:ft=help ======= Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index e2319f3..dd349c9 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -165,7 +165,10 @@ void cpu_idle(void) */ local_irq_disable(); enter_idle(); + /* Don't trace irqs off for idle */ + stop_critical_timings(); idle(); + start_critical_timings(); /* In many cases the interrupt that ended idle has already called exit_idle. But some idle loops can be woken up without interrupt. */ diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 76f60f5..84aa288 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -5,6 +5,7 @@ obj-$(CONFIG_SMP) := msr-on-cpu.o lib-y := delay_$(BITS).o +lib-y += thunk_$(BITS).o lib-y += usercopy_$(BITS).o getuser_$(BITS).o putuser_$(BITS).o lib-y += memcpy_$(BITS).o diff --git a/arch/x86/lib/thunk_32.S b/arch/x86/lib/thunk_32.S new file mode 100644 index 0000000..650b11e --- /dev/null +++ b/arch/x86/lib/thunk_32.S @@ -0,0 +1,47 @@ +/* + * Trampoline to trace irqs off. (otherwise CALLER_ADDR1 might crash) + * Copyright 2008 by Steven Rostedt, Red Hat, Inc + * (inspired by Andi Kleen's thunk_64.S) + * Subject to the GNU public license, v.2. No warranty of any kind. + */ + + #include + +#define ARCH_TRACE_IRQS_ON \ + pushl %eax; \ + pushl %ecx; \ + pushl %edx; \ + call trace_hardirqs_on; \ + popl %edx; \ + popl %ecx; \ + popl %eax; + +#define ARCH_TRACE_IRQS_OFF \ + pushl %eax; \ + pushl %ecx; \ + pushl %edx; \ + call trace_hardirqs_off; \ + popl %edx; \ + popl %ecx; \ + popl %eax; + +#ifdef CONFIG_TRACE_IRQFLAGS + /* put return address in eax (arg1) */ + .macro thunk_ra name,func + .globl \name +\name: + pushl %eax + pushl %ecx + pushl %edx + /* Place EIP in the arg1 */ + movl 3*4(%esp), %eax + call \func + popl %edx + popl %ecx + popl %eax + ret + .endm + + thunk_ra trace_hardirqs_on_thunk,trace_hardirqs_on_caller + thunk_ra trace_hardirqs_off_thunk,trace_hardirqs_off_caller +#endif diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S index e009251..bf9a7d5 100644 --- a/arch/x86/lib/thunk_64.S +++ b/arch/x86/lib/thunk_64.S @@ -2,6 +2,7 @@ * Save registers before calling assembly functions. This avoids * disturbance of register allocation in some inline assembly constructs. * Copyright 2001,2002 by Andi Kleen, SuSE Labs. + * Added trace_hardirqs callers - Copyright 2007 Steven Rostedt, Red Hat, Inc. * Subject to the GNU public license, v.2. No warranty of any kind. */ @@ -42,8 +43,22 @@ #endif #ifdef CONFIG_TRACE_IRQFLAGS - thunk trace_hardirqs_on_thunk,trace_hardirqs_on - thunk trace_hardirqs_off_thunk,trace_hardirqs_off + /* put return address in rdi (arg1) */ + .macro thunk_ra name,func + .globl \name +\name: + CFI_STARTPROC + SAVE_ARGS + /* SAVE_ARGS pushs 9 elements */ + /* the next element would be the rip */ + movq 9*8(%rsp), %rdi + call \func + jmp restore + CFI_ENDPROC + .endm + + thunk_ra trace_hardirqs_on_thunk,trace_hardirqs_on_caller + thunk_ra trace_hardirqs_off_thunk,trace_hardirqs_off_caller #endif #ifdef CONFIG_DEBUG_LOCK_ALLOC diff --git a/include/asm-x86/irqflags.h b/include/asm-x86/irqflags.h index c242527..24d71b1 100644 --- a/include/asm-x86/irqflags.h +++ b/include/asm-x86/irqflags.h @@ -179,8 +179,6 @@ static inline void trace_hardirqs_fixup(void) * have a reliable stack. x86_64 only. */ #define SWAPGS_UNSAFE_STACK swapgs -#define ARCH_TRACE_IRQS_ON call trace_hardirqs_on_thunk -#define ARCH_TRACE_IRQS_OFF call trace_hardirqs_off_thunk #define ARCH_LOCKDEP_SYS_EXIT call lockdep_sys_exit_thunk #define ARCH_LOCKDEP_SYS_EXIT_IRQ \ TRACE_IRQS_ON; \ @@ -192,24 +190,6 @@ static inline void trace_hardirqs_fixup(void) TRACE_IRQS_OFF; #else -#define ARCH_TRACE_IRQS_ON \ - pushl %eax; \ - pushl %ecx; \ - pushl %edx; \ - call trace_hardirqs_on; \ - popl %edx; \ - popl %ecx; \ - popl %eax; - -#define ARCH_TRACE_IRQS_OFF \ - pushl %eax; \ - pushl %ecx; \ - pushl %edx; \ - call trace_hardirqs_off; \ - popl %edx; \ - popl %ecx; \ - popl %eax; - #define ARCH_LOCKDEP_SYS_EXIT \ pushl %eax; \ pushl %ecx; \ @@ -223,8 +203,8 @@ static inline void trace_hardirqs_fixup(void) #endif #ifdef CONFIG_TRACE_IRQFLAGS -# define TRACE_IRQS_ON ARCH_TRACE_IRQS_ON -# define TRACE_IRQS_OFF ARCH_TRACE_IRQS_OFF +# define TRACE_IRQS_ON call trace_hardirqs_on_thunk; +# define TRACE_IRQS_OFF call trace_hardirqs_off_thunk; #else # define TRACE_IRQS_ON # define TRACE_IRQS_OFF diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index db8a5e7..0a20445 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -50,4 +50,12 @@ extern void mcount(void); # define CALLER_ADDR5 0UL #endif +#ifdef CONFIG_IRQSOFF_TRACER + extern void notrace time_hardirqs_on(unsigned long a0, unsigned long a1); + extern void notrace time_hardirqs_off(unsigned long a0, unsigned long a1); +#else +# define time_hardirqs_on(a0, a1) do { } while (0) +# define time_hardirqs_off(a0, a1) do { } while (0) +#endif + #endif /* _LINUX_FTRACE_H */ diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index e600c4e..5b711d4 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -12,10 +12,10 @@ #define _LINUX_TRACE_IRQFLAGS_H #ifdef CONFIG_TRACE_IRQFLAGS - extern void trace_hardirqs_on(void); - extern void trace_hardirqs_off(void); extern void trace_softirqs_on(unsigned long ip); extern void trace_softirqs_off(unsigned long ip); + extern void trace_hardirqs_on(void); + extern void trace_hardirqs_off(void); # define trace_hardirq_context(p) ((p)->hardirq_context) # define trace_softirq_context(p) ((p)->softirq_context) # define trace_hardirqs_enabled(p) ((p)->hardirqs_enabled) @@ -41,6 +41,14 @@ # define INIT_TRACE_IRQFLAGS #endif +#ifdef CONFIG_IRQSOFF_TRACER + extern void stop_critical_timings(void); + extern void start_critical_timings(void); +#else +# define stop_critical_timings() do { } while (0) +# define start_critical_timings() do { } while (0) +#endif + #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT #include diff --git a/kernel/fork.c b/kernel/fork.c index 19908b2..d66d676 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -909,7 +909,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, rt_mutex_init_task(p); -#ifdef CONFIG_TRACE_IRQFLAGS +#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_LOCKDEP) DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled); DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); #endif diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 81a4e4a..e219243 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -39,6 +39,7 @@ #include #include #include +#include #include @@ -982,7 +983,7 @@ check_noncircular(struct lock_class *source, unsigned int depth) return 1; } -#ifdef CONFIG_TRACE_IRQFLAGS +#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) /* * Forwards and backwards subgraph searching, for the purposes of * proving that two subgraphs can be connected by a new dependency @@ -1680,7 +1681,7 @@ valid_state(struct task_struct *curr, struct held_lock *this, static int mark_lock(struct task_struct *curr, struct held_lock *this, enum lock_usage_bit new_bit); -#ifdef CONFIG_TRACE_IRQFLAGS +#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) /* * print irq inversion bug: @@ -2013,11 +2014,13 @@ void early_boot_irqs_on(void) /* * Hardirqs will be enabled: */ -void trace_hardirqs_on(void) +void notrace trace_hardirqs_on_caller(unsigned long a0) { struct task_struct *curr = current; unsigned long ip; + time_hardirqs_on(CALLER_ADDR0, a0); + if (unlikely(!debug_locks || current->lockdep_recursion)) return; @@ -2055,16 +2058,23 @@ void trace_hardirqs_on(void) curr->hardirq_enable_event = ++curr->irq_events; debug_atomic_inc(&hardirqs_on_events); } +EXPORT_SYMBOL(trace_hardirqs_on_caller); +void notrace trace_hardirqs_on(void) +{ + trace_hardirqs_on_caller(CALLER_ADDR0); +} EXPORT_SYMBOL(trace_hardirqs_on); /* * Hardirqs were disabled: */ -void trace_hardirqs_off(void) +void notrace trace_hardirqs_off_caller(unsigned long a0) { struct task_struct *curr = current; + time_hardirqs_off(CALLER_ADDR0, a0); + if (unlikely(!debug_locks || current->lockdep_recursion)) return; @@ -2082,7 +2092,12 @@ void trace_hardirqs_off(void) } else debug_atomic_inc(&redundant_hardirqs_off); } +EXPORT_SYMBOL(trace_hardirqs_off_caller); +void notrace trace_hardirqs_off(void) +{ + trace_hardirqs_off_caller(CALLER_ADDR0); +} EXPORT_SYMBOL(trace_hardirqs_off); /* diff --git a/kernel/printk.c b/kernel/printk.c index 8fb01c3..ae7d5b9 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -1041,7 +1041,9 @@ void release_console_sem(void) _log_end = log_end; con_start = log_end; /* Flush */ spin_unlock(&logbuf_lock); + stop_critical_timings(); /* don't trace print latency */ call_console_drivers(_con_start, _log_end); + start_critical_timings(); local_irq_restore(flags); } console_locked = 0; diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 892ecc9..896df1c 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -26,6 +26,24 @@ config FTRACE (the bootup default), then the overhead of the instructions is very small and not measurable even in micro-benchmarks. +config IRQSOFF_TRACER + bool "Interrupts-off Latency Tracer" + default n + depends on TRACE_IRQFLAGS_SUPPORT + depends on GENERIC_TIME + select TRACE_IRQFLAGS + select TRACING + select TRACER_MAX_TRACE + help + This option measures the time spent in irqs-off critical + sections, with microsecond accuracy. + + The default measurement method is a maximum search, which is + disabled by default and can be runtime (re-)started + via: + + echo 0 > /debugfs/tracing/tracing_max_latency + config SCHED_TRACER bool "Scheduling Latency Tracer" depends on DEBUG_KERNEL diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 5508cdb..46be864 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -3,6 +3,7 @@ obj-$(CONFIG_FTRACE) += libftrace.o obj-$(CONFIG_TRACING) += trace.o obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o obj-$(CONFIG_FTRACE) += trace_functions.o +obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o libftrace-y := ftrace.o diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c new file mode 100644 index 0000000..a9131b0 --- /dev/null +++ b/kernel/trace/trace_irqsoff.c @@ -0,0 +1,402 @@ +/* + * trace irqs off criticall timings + * + * Copyright (C) 2007-2008 Steven Rostedt + * Copyright (C) 2008 Ingo Molnar + * + * From code in the latency_tracer, that is: + * + * Copyright (C) 2004-2006 Ingo Molnar + * Copyright (C) 2004 William Lee Irwin III + */ +#include +#include +#include +#include +#include +#include + +#include "trace.h" + +static struct trace_array *irqsoff_trace __read_mostly; +static int tracer_enabled __read_mostly; + +/* + * Sequence count - we record it when starting a measurement and + * skip the latency if the sequence has changed - some other section + * did a maximum and could disturb our measurement with serial console + * printouts, etc. Truly coinciding maximum latencies should be rare + * and what happens together happens separately as well, so this doesnt + * decrease the validity of the maximum found: + */ +static __cacheline_aligned_in_smp unsigned long max_sequence; + +#ifdef CONFIG_FTRACE +/* + * irqsoff uses its own tracer function to keep the overhead down: + */ +static void notrace +irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip) +{ + struct trace_array *tr = irqsoff_trace; + struct trace_array_cpu *data; + unsigned long flags; + long disabled; + int cpu; + + if (likely(!tracer_enabled)) + return; + + local_save_flags(flags); + + if (!irqs_disabled_flags(flags)) + return; + + cpu = raw_smp_processor_id(); + data = tr->data[cpu]; + disabled = atomic_inc_return(&data->disabled); + + if (likely(disabled == 1)) + ftrace(tr, data, ip, parent_ip, flags); + + atomic_dec(&data->disabled); +} + +static struct ftrace_ops trace_ops __read_mostly = +{ + .func = irqsoff_tracer_call, +}; +#endif /* CONFIG_FTRACE */ + +/* + * Should this new latency be reported/recorded? + */ +static int notrace report_latency(cycle_t delta) +{ + if (tracing_thresh) { + if (delta < tracing_thresh) + return 0; + } else { + if (delta <= tracing_max_latency) + return 0; + } + return 1; +} + +static void notrace +check_critical_timing(struct trace_array *tr, + struct trace_array_cpu *data, + unsigned long parent_ip, + int cpu) +{ + unsigned long latency, t0, t1; + cycle_t T0, T1, T2, delta; + unsigned long flags; + + /* + * usecs conversion is slow so we try to delay the conversion + * as long as possible: + */ + T0 = data->preempt_timestamp; + T1 = now(cpu); + delta = T1-T0; + + local_save_flags(flags); + + if (!report_latency(delta)) + goto out; + + ftrace(tr, data, CALLER_ADDR0, parent_ip, flags); + /* + * Update the timestamp, because the trace entry above + * might change it (it can only get larger so the latency + * is fair to be reported): + */ + T2 = now(cpu); + + delta = T2-T0; + + latency = nsecs_to_usecs(delta); + + if (data->critical_sequence != max_sequence) + goto out; + + tracing_max_latency = delta; + t0 = nsecs_to_usecs(T0); + t1 = nsecs_to_usecs(T1); + + data->critical_end = parent_ip; + + update_max_tr_single(tr, current, cpu); + + if (tracing_thresh) + printk(KERN_INFO "(%16s-%-5d|#%d): %lu us critical section " + "violates %lu us threshold.\n" + " => started at timestamp %lu: ", + current->comm, current->pid, + raw_smp_processor_id(), + latency, nsecs_to_usecs(tracing_thresh), t0); + else + printk(KERN_INFO "(%16s-%-5d|#%d):" + " new %lu us maximum-latency " + "critical section.\n => started at timestamp %lu: ", + current->comm, current->pid, + raw_smp_processor_id(), + latency, t0); + + print_symbol(KERN_CONT "<%s>\n", data->critical_start); + printk(KERN_CONT " => ended at timestamp %lu: ", t1); + print_symbol(KERN_CONT "<%s>\n", data->critical_end); + dump_stack(); + t1 = nsecs_to_usecs(now(cpu)); + printk(KERN_CONT " => dump-end timestamp %lu\n\n", t1); + + max_sequence++; + +out: + data->critical_sequence = max_sequence; + data->preempt_timestamp = now(cpu); + tracing_reset(data); + ftrace(tr, data, CALLER_ADDR0, parent_ip, flags); +} + +static inline void notrace +start_critical_timing(unsigned long ip, unsigned long parent_ip) +{ + int cpu; + struct trace_array *tr = irqsoff_trace; + struct trace_array_cpu *data; + unsigned long flags; + + if (likely(!tracer_enabled)) + return; + + cpu = raw_smp_processor_id(); + data = tr->data[cpu]; + + if (unlikely(!data) || unlikely(!data->trace) || + data->critical_start || atomic_read(&data->disabled)) + return; + + atomic_inc(&data->disabled); + + data->critical_sequence = max_sequence; + data->preempt_timestamp = now(cpu); + data->critical_start = parent_ip; + tracing_reset(data); + + local_save_flags(flags); + ftrace(tr, data, ip, parent_ip, flags); + + atomic_dec(&data->disabled); +} + +static inline void notrace +stop_critical_timing(unsigned long ip, unsigned long parent_ip) +{ + int cpu; + struct trace_array *tr = irqsoff_trace; + struct trace_array_cpu *data; + unsigned long flags; + + if (likely(!tracer_enabled)) + return; + + cpu = raw_smp_processor_id(); + data = tr->data[cpu]; + + if (unlikely(!data) || unlikely(!data->trace) || + !data->critical_start || atomic_read(&data->disabled)) + return; + + atomic_inc(&data->disabled); + local_save_flags(flags); + ftrace(tr, data, ip, parent_ip, flags); + check_critical_timing(tr, data, parent_ip, cpu); + data->critical_start = 0; + atomic_dec(&data->disabled); +} + +void notrace start_critical_timings(void) +{ + unsigned long flags; + + local_save_flags(flags); + + if (irqs_disabled_flags(flags)) + start_critical_timing(CALLER_ADDR0, CALLER_ADDR1); +} + +void notrace stop_critical_timings(void) +{ + unsigned long flags; + + local_save_flags(flags); + + if (irqs_disabled_flags(flags)) + stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1); +} + +#ifdef CONFIG_PROVE_LOCKING +void notrace time_hardirqs_on(unsigned long a0, unsigned long a1) +{ + unsigned long flags; + + local_save_flags(flags); + + if (irqs_disabled_flags(flags)) + stop_critical_timing(a0, a1); +} + +void notrace time_hardirqs_off(unsigned long a0, unsigned long a1) +{ + unsigned long flags; + + local_save_flags(flags); + + if (irqs_disabled_flags(flags)) + start_critical_timing(a0, a1); +} + +#else /* !CONFIG_PROVE_LOCKING */ + +/* + * Stubs: + */ + +void early_boot_irqs_off(void) +{ +} + +void early_boot_irqs_on(void) +{ +} + +void trace_softirqs_on(unsigned long ip) +{ +} + +void trace_softirqs_off(unsigned long ip) +{ +} + +inline void print_irqtrace_events(struct task_struct *curr) +{ +} + +/* + * We are only interested in hardirq on/off events: + */ +void notrace trace_hardirqs_on(void) +{ + unsigned long flags; + + local_save_flags(flags); + + if (irqs_disabled_flags(flags)) + stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1); +} +EXPORT_SYMBOL(trace_hardirqs_on); + +void notrace trace_hardirqs_off(void) +{ + unsigned long flags; + + local_save_flags(flags); + + if (irqs_disabled_flags(flags)) + start_critical_timing(CALLER_ADDR0, CALLER_ADDR1); +} +EXPORT_SYMBOL(trace_hardirqs_off); + +void notrace trace_hardirqs_on_caller(unsigned long caller_addr) +{ + unsigned long flags; + + local_save_flags(flags); + + if (irqs_disabled_flags(flags)) + stop_critical_timing(CALLER_ADDR0, caller_addr); +} +EXPORT_SYMBOL(trace_hardirqs_on_caller); + +void notrace trace_hardirqs_off_caller(unsigned long caller_addr) +{ + unsigned long flags; + + local_save_flags(flags); + + if (irqs_disabled_flags(flags)) + start_critical_timing(CALLER_ADDR0, caller_addr); +} +EXPORT_SYMBOL(trace_hardirqs_off_caller); + +#endif /* CONFIG_PROVE_LOCKING */ + +static void start_irqsoff_tracer(struct trace_array *tr) +{ + tracer_enabled = 1; + register_ftrace_function(&trace_ops); +} + +static void stop_irqsoff_tracer(struct trace_array *tr) +{ + unregister_ftrace_function(&trace_ops); + tracer_enabled = 0; +} + +static void irqsoff_tracer_init(struct trace_array *tr) +{ + irqsoff_trace = tr; + /* make sure that the tracer is visibel */ + smp_wmb(); + + if (tr->ctrl) + start_irqsoff_tracer(tr); +} + +static void irqsoff_tracer_reset(struct trace_array *tr) +{ + if (tr->ctrl) + stop_irqsoff_tracer(tr); +} + +static void irqsoff_tracer_ctrl_update(struct trace_array *tr) +{ + if (tr->ctrl) + start_irqsoff_tracer(tr); + else + stop_irqsoff_tracer(tr); +} + +static void notrace irqsoff_tracer_open(struct trace_iterator *iter) +{ + /* stop the trace while dumping */ + if (iter->tr->ctrl) + stop_irqsoff_tracer(iter->tr); +} + +static void notrace irqsoff_tracer_close(struct trace_iterator *iter) +{ + if (iter->tr->ctrl) + start_irqsoff_tracer(iter->tr); +} + +static struct tracer irqsoff_tracer __read_mostly = +{ + .name = "irqsoff", + .init = irqsoff_tracer_init, + .reset = irqsoff_tracer_reset, + .open = irqsoff_tracer_open, + .close = irqsoff_tracer_close, + .ctrl_update = irqsoff_tracer_ctrl_update, + .print_max = 1, +}; + +__init static int init_irqsoff_tracer(void) +{ + register_tracer(&irqsoff_tracer); + + return 0; +} +device_initcall(init_irqsoff_tracer); -- cgit v0.10.2 From 6cd8a4bb2f97527a9ceb30bc77ea4e959c6a95e3 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:42 +0200 Subject: ftrace: trace preempt off critical timings Add preempt off timings. A lot of kernel core code is taken from the RT patch latency trace that was written by Ingo Molnar. This adds "preemptoff" and "preemptirqsoff" to /debugfs/tracing/available_tracers Now instead of just tracing irqs off, preemption off can be selected to be recorded. When this is selected, it shares the same files as irqs off timings. One can either trace preemption off, irqs off, or one or the other off. By echoing "preemptoff" into /debugfs/tracing/current_tracer, recording of preempt off only is performed. "irqsoff" will only record the time irqs are disabled, but "preemptirqsoff" will take the total time irqs or preemption are disabled. Runtime switching of these options is now supported by simpling echoing in the appropriate trace name into /debugfs/tracing/current_tracer. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index f8476df..a30aa1f 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -185,7 +185,10 @@ void cpu_idle(void) local_irq_disable(); __get_cpu_var(irq_stat).idle_timestamp = jiffies; + /* Don't trace irqs off for idle */ + stop_critical_timings(); idle(); + start_critical_timings(); } tick_nohz_restart_sched_tick(); preempt_enable_no_resched(); diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 0a20445..740c97d 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -58,4 +58,12 @@ extern void mcount(void); # define time_hardirqs_off(a0, a1) do { } while (0) #endif +#ifdef CONFIG_PREEMPT_TRACER + extern void notrace trace_preempt_on(unsigned long a0, unsigned long a1); + extern void notrace trace_preempt_off(unsigned long a0, unsigned long a1); +#else +# define trace_preempt_on(a0, a1) do { } while (0) +# define trace_preempt_off(a0, a1) do { } while (0) +#endif + #endif /* _LINUX_FTRACE_H */ diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index 5b711d4..2b1c2e5 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -41,7 +41,8 @@ # define INIT_TRACE_IRQFLAGS #endif -#ifdef CONFIG_IRQSOFF_TRACER +#if defined(CONFIG_IRQSOFF_TRACER) || \ + defined(CONFIG_PREEMPT_TRACER) extern void stop_critical_timings(void); extern void start_critical_timings(void); #else diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 36b03d5..72b1a10 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -10,7 +10,7 @@ #include #include -#ifdef CONFIG_DEBUG_PREEMPT +#if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_PREEMPT_TRACER) extern void add_preempt_count(int val); extern void sub_preempt_count(int val); #else diff --git a/kernel/sched.c b/kernel/sched.c index 73e6008..328494e 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -70,6 +70,7 @@ #include #include #include +#include #include #include @@ -4365,26 +4366,44 @@ void scheduler_tick(void) #endif } -#if defined(CONFIG_PREEMPT) && defined(CONFIG_DEBUG_PREEMPT) +#if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \ + defined(CONFIG_PREEMPT_TRACER)) + +static inline unsigned long get_parent_ip(unsigned long addr) +{ + if (in_lock_functions(addr)) { + addr = CALLER_ADDR2; + if (in_lock_functions(addr)) + addr = CALLER_ADDR3; + } + return addr; +} void __kprobes add_preempt_count(int val) { +#ifdef CONFIG_DEBUG_PREEMPT /* * Underflow? */ if (DEBUG_LOCKS_WARN_ON((preempt_count() < 0))) return; +#endif preempt_count() += val; +#ifdef CONFIG_DEBUG_PREEMPT /* * Spinlock count overflowing soon? */ DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >= PREEMPT_MASK - 10); +#endif + if (preempt_count() == val) + trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1)); } EXPORT_SYMBOL(add_preempt_count); void __kprobes sub_preempt_count(int val) { +#ifdef CONFIG_DEBUG_PREEMPT /* * Underflow? */ @@ -4396,7 +4415,10 @@ void __kprobes sub_preempt_count(int val) if (DEBUG_LOCKS_WARN_ON((val < PREEMPT_MASK) && !(preempt_count() & PREEMPT_MASK))) return; +#endif + if (preempt_count() == val) + trace_preempt_on(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1)); preempt_count() -= val; } EXPORT_SYMBOL(sub_preempt_count); diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 896df1c..6430016 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -44,6 +44,31 @@ config IRQSOFF_TRACER echo 0 > /debugfs/tracing/tracing_max_latency + (Note that kernel size and overhead increases with this option + enabled. This option and the preempt-off timing option can be + used together or separately.) + +config PREEMPT_TRACER + bool "Preemption-off Latency Tracer" + default n + depends on GENERIC_TIME + depends on PREEMPT + select TRACING + select TRACER_MAX_TRACE + help + This option measures the time spent in preemption off critical + sections, with microsecond accuracy. + + The default measurement method is a maximum search, which is + disabled by default and can be runtime (re-)started + via: + + echo 0 > /debugfs/tracing/tracing_max_latency + + (Note that kernel size and overhead increases with this option + enabled. This option and the irqs-off timing option can be + used together or separately.) + config SCHED_TRACER bool "Scheduling Latency Tracer" depends on DEBUG_KERNEL diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 46be864..3fec653 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -4,6 +4,7 @@ obj-$(CONFIG_TRACING) += trace.o obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o obj-$(CONFIG_FTRACE) += trace_functions.o obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o +obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o libftrace-y := ftrace.o diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index a9131b0..8b12316 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -21,6 +21,36 @@ static struct trace_array *irqsoff_trace __read_mostly; static int tracer_enabled __read_mostly; +static DEFINE_PER_CPU(int, tracing_cpu); + +enum { + TRACER_IRQS_OFF = (1 << 1), + TRACER_PREEMPT_OFF = (1 << 2), +}; + +static int trace_type __read_mostly; + +#ifdef CONFIG_PREEMPT_TRACER +static inline int notrace +preempt_trace(void) +{ + return ((trace_type & TRACER_PREEMPT_OFF) && preempt_count()); +} +#else +# define preempt_trace() (0) +#endif + +#ifdef CONFIG_IRQSOFF_TRACER +static inline int notrace +irq_trace(void) +{ + return ((trace_type & TRACER_IRQS_OFF) && + irqs_disabled()); +} +#else +# define irq_trace() (0) +#endif + /* * Sequence count - we record it when starting a measurement and * skip the latency if the sequence has changed - some other section @@ -44,14 +74,11 @@ irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip) long disabled; int cpu; - if (likely(!tracer_enabled)) + if (likely(!__get_cpu_var(tracing_cpu))) return; local_save_flags(flags); - if (!irqs_disabled_flags(flags)) - return; - cpu = raw_smp_processor_id(); data = tr->data[cpu]; disabled = atomic_inc_return(&data->disabled); @@ -171,23 +198,29 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip) if (likely(!tracer_enabled)) return; + if (__get_cpu_var(tracing_cpu)) + return; + cpu = raw_smp_processor_id(); data = tr->data[cpu]; if (unlikely(!data) || unlikely(!data->trace) || - data->critical_start || atomic_read(&data->disabled)) + atomic_read(&data->disabled)) return; atomic_inc(&data->disabled); data->critical_sequence = max_sequence; data->preempt_timestamp = now(cpu); - data->critical_start = parent_ip; + data->critical_start = parent_ip ? : ip; tracing_reset(data); local_save_flags(flags); + ftrace(tr, data, ip, parent_ip, flags); + __get_cpu_var(tracing_cpu) = 1; + atomic_dec(&data->disabled); } @@ -199,7 +232,13 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip) struct trace_array_cpu *data; unsigned long flags; - if (likely(!tracer_enabled)) + /* Always clear the tracing cpu on stopping the trace */ + if (unlikely(__get_cpu_var(tracing_cpu))) + __get_cpu_var(tracing_cpu) = 0; + else + return; + + if (!tracer_enabled) return; cpu = raw_smp_processor_id(); @@ -212,49 +251,35 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip) atomic_inc(&data->disabled); local_save_flags(flags); ftrace(tr, data, ip, parent_ip, flags); - check_critical_timing(tr, data, parent_ip, cpu); + check_critical_timing(tr, data, parent_ip ? : ip, cpu); data->critical_start = 0; atomic_dec(&data->disabled); } +/* start and stop critical timings used to for stoppage (in idle) */ void notrace start_critical_timings(void) { - unsigned long flags; - - local_save_flags(flags); - - if (irqs_disabled_flags(flags)) + if (preempt_trace() || irq_trace()) start_critical_timing(CALLER_ADDR0, CALLER_ADDR1); } void notrace stop_critical_timings(void) { - unsigned long flags; - - local_save_flags(flags); - - if (irqs_disabled_flags(flags)) + if (preempt_trace() || irq_trace()) stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1); } +#ifdef CONFIG_IRQSOFF_TRACER #ifdef CONFIG_PROVE_LOCKING void notrace time_hardirqs_on(unsigned long a0, unsigned long a1) { - unsigned long flags; - - local_save_flags(flags); - - if (irqs_disabled_flags(flags)) + if (!preempt_trace() && irq_trace()) stop_critical_timing(a0, a1); } void notrace time_hardirqs_off(unsigned long a0, unsigned long a1) { - unsigned long flags; - - local_save_flags(flags); - - if (irqs_disabled_flags(flags)) + if (!preempt_trace() && irq_trace()) start_critical_timing(a0, a1); } @@ -289,49 +314,46 @@ inline void print_irqtrace_events(struct task_struct *curr) */ void notrace trace_hardirqs_on(void) { - unsigned long flags; - - local_save_flags(flags); - - if (irqs_disabled_flags(flags)) + if (!preempt_trace() && irq_trace()) stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1); } EXPORT_SYMBOL(trace_hardirqs_on); void notrace trace_hardirqs_off(void) { - unsigned long flags; - - local_save_flags(flags); - - if (irqs_disabled_flags(flags)) + if (!preempt_trace() && irq_trace()) start_critical_timing(CALLER_ADDR0, CALLER_ADDR1); } EXPORT_SYMBOL(trace_hardirqs_off); void notrace trace_hardirqs_on_caller(unsigned long caller_addr) { - unsigned long flags; - - local_save_flags(flags); - - if (irqs_disabled_flags(flags)) + if (!preempt_trace() && irq_trace()) stop_critical_timing(CALLER_ADDR0, caller_addr); } EXPORT_SYMBOL(trace_hardirqs_on_caller); void notrace trace_hardirqs_off_caller(unsigned long caller_addr) { - unsigned long flags; - - local_save_flags(flags); - - if (irqs_disabled_flags(flags)) + if (!preempt_trace() && irq_trace()) start_critical_timing(CALLER_ADDR0, caller_addr); } EXPORT_SYMBOL(trace_hardirqs_off_caller); #endif /* CONFIG_PROVE_LOCKING */ +#endif /* CONFIG_IRQSOFF_TRACER */ + +#ifdef CONFIG_PREEMPT_TRACER +void notrace trace_preempt_on(unsigned long a0, unsigned long a1) +{ + stop_critical_timing(a0, a1); +} + +void notrace trace_preempt_off(unsigned long a0, unsigned long a1) +{ + start_critical_timing(a0, a1); +} +#endif /* CONFIG_PREEMPT_TRACER */ static void start_irqsoff_tracer(struct trace_array *tr) { @@ -345,7 +367,7 @@ static void stop_irqsoff_tracer(struct trace_array *tr) tracer_enabled = 0; } -static void irqsoff_tracer_init(struct trace_array *tr) +static void __irqsoff_tracer_init(struct trace_array *tr) { irqsoff_trace = tr; /* make sure that the tracer is visibel */ @@ -382,6 +404,13 @@ static void notrace irqsoff_tracer_close(struct trace_iterator *iter) start_irqsoff_tracer(iter->tr); } +#ifdef CONFIG_IRQSOFF_TRACER +static void irqsoff_tracer_init(struct trace_array *tr) +{ + trace_type = TRACER_IRQS_OFF; + + __irqsoff_tracer_init(tr); +} static struct tracer irqsoff_tracer __read_mostly = { .name = "irqsoff", @@ -392,10 +421,65 @@ static struct tracer irqsoff_tracer __read_mostly = .ctrl_update = irqsoff_tracer_ctrl_update, .print_max = 1, }; +# define register_irqsoff(trace) register_tracer(&trace) +#else +# define register_irqsoff(trace) do { } while (0) +#endif + +#ifdef CONFIG_PREEMPT_TRACER +static void preemptoff_tracer_init(struct trace_array *tr) +{ + trace_type = TRACER_PREEMPT_OFF; + + __irqsoff_tracer_init(tr); +} + +static struct tracer preemptoff_tracer __read_mostly = +{ + .name = "preemptoff", + .init = preemptoff_tracer_init, + .reset = irqsoff_tracer_reset, + .open = irqsoff_tracer_open, + .close = irqsoff_tracer_close, + .ctrl_update = irqsoff_tracer_ctrl_update, + .print_max = 1, +}; +# define register_preemptoff(trace) register_tracer(&trace) +#else +# define register_preemptoff(trace) do { } while (0) +#endif + +#if defined(CONFIG_IRQSOFF_TRACER) && \ + defined(CONFIG_PREEMPT_TRACER) + +static void preemptirqsoff_tracer_init(struct trace_array *tr) +{ + trace_type = TRACER_IRQS_OFF | TRACER_PREEMPT_OFF; + + __irqsoff_tracer_init(tr); +} + +static struct tracer preemptirqsoff_tracer __read_mostly = +{ + .name = "preemptirqsoff", + .init = preemptirqsoff_tracer_init, + .reset = irqsoff_tracer_reset, + .open = irqsoff_tracer_open, + .close = irqsoff_tracer_close, + .ctrl_update = irqsoff_tracer_ctrl_update, + .print_max = 1, +}; + +# define register_preemptirqsoff(trace) register_tracer(&trace) +#else +# define register_preemptirqsoff(trace) do { } while (0) +#endif __init static int init_irqsoff_tracer(void) { - register_tracer(&irqsoff_tracer); + register_irqsoff(irqsoff_tracer); + register_preemptoff(preemptoff_tracer); + register_preemptirqsoff(preemptirqsoff_tracer); return 0; } -- cgit v0.10.2 From 3d0833953e1b98b79ddf491dd49229eef9baeac1 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:42 +0200 Subject: ftrace: dynamic enabling/disabling of function calls This patch adds a feature to dynamically replace the ftrace code with the jmps to allow a kernel with ftrace configured to run as fast as it can without it configured. The way this works, is on bootup (if ftrace is enabled), a ftrace function is registered to record the instruction pointer of all places that call the function. Later, if there's still any code to patch, a kthread is awoken (rate limited to at most once a second) that performs a stop_machine, and replaces all the code that was called with a jmp over the call to ftrace. It only replaces what was found the previous time. Typically the system reaches equilibrium quickly after bootup and there's no code patching needed at all. e.g. call ftrace /* 5 bytes */ is replaced with jmp 3f /* jmp is 2 bytes and we jump 3 forward */ 3: When we want to enable ftrace for function tracing, the IP recording is removed, and stop_machine is called again to replace all the locations of that were recorded back to the call of ftrace. When it is disabled, we replace the code back to the jmp. Allocation is done by the kthread. If the ftrace recording function is called, and we don't have any record slots available, then we simply skip that call. Once a second a new page (if needed) is allocated for recording new ftrace function calls. A large batch is allocated at boot up to get most of the calls there. Because we do this via stop_machine, we don't have to worry about another CPU executing a ftrace call as we modify it. But we do need to worry about NMI's so all functions that might be called via nmi must be annotated with notrace_nmi. When this code is configured in, the NMI code will not call notrace. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 5e618c3..e142091 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -56,6 +56,7 @@ obj-$(CONFIG_X86_MPPARSE) += mpparse.o obj-$(CONFIG_X86_LOCAL_APIC) += apic_$(BITS).o nmi_$(BITS).o obj-$(CONFIG_X86_IO_APIC) += io_apic_$(BITS).o obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o +obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c new file mode 100644 index 0000000..5dd5813 --- /dev/null +++ b/arch/x86/kernel/ftrace.c @@ -0,0 +1,237 @@ +/* + * Code for replacing ftrace calls with jumps. + * + * Copyright (C) 2007-2008 Steven Rostedt + * + * Thanks goes to Ingo Molnar, for suggesting the idea. + * Mathieu Desnoyers, for suggesting postponing the modifications. + * Arjan van de Ven, for keeping me straight, and explaining to me + * the dangers of modifying code on the run. + */ + +#include +#include +#include +#include +#include +#include + +#define CALL_BACK 5 + +#define JMPFWD 0x03eb + +static unsigned short ftrace_jmp = JMPFWD; + +struct ftrace_record { + struct dyn_ftrace rec; + int failed; +} __attribute__((packed)); + +struct ftrace_page { + struct ftrace_page *next; + int index; + struct ftrace_record records[]; +} __attribute__((packed)); + +#define ENTRIES_PER_PAGE \ + ((PAGE_SIZE - sizeof(struct ftrace_page)) / sizeof(struct ftrace_record)) + +/* estimate from running different kernels */ +#define NR_TO_INIT 10000 + +#define MCOUNT_ADDR ((long)(&mcount)) + +union ftrace_code_union { + char code[5]; + struct { + char e8; + int offset; + } __attribute__((packed)); +}; + +static struct ftrace_page *ftrace_pages_start; +static struct ftrace_page *ftrace_pages; + +notrace struct dyn_ftrace *ftrace_alloc_shutdown_node(unsigned long ip) +{ + struct ftrace_record *rec; + unsigned short save; + + ip -= CALL_BACK; + save = *(short *)ip; + + /* If this was already converted, skip it */ + if (save == JMPFWD) + return NULL; + + if (ftrace_pages->index == ENTRIES_PER_PAGE) { + if (!ftrace_pages->next) + return NULL; + ftrace_pages = ftrace_pages->next; + } + + rec = &ftrace_pages->records[ftrace_pages->index++]; + + return &rec->rec; +} + +static int notrace +ftrace_modify_code(unsigned long ip, unsigned char *old_code, + unsigned char *new_code) +{ + unsigned short old = *(unsigned short *)old_code; + unsigned short new = *(unsigned short *)new_code; + unsigned short replaced; + int faulted = 0; + + /* + * Note: Due to modules and __init, code can + * disappear and change, we need to protect against faulting + * as well as code changing. + * + * No real locking needed, this code is run through + * kstop_machine. + */ + asm volatile ( + "1: lock\n" + " cmpxchg %w3, (%2)\n" + "2:\n" + ".section .fixup, \"ax\"\n" + " movl $1, %0\n" + "3: jmp 2b\n" + ".previous\n" + _ASM_EXTABLE(1b, 3b) + : "=r"(faulted), "=a"(replaced) + : "r"(ip), "r"(new), "0"(faulted), "a"(old) + : "memory"); + sync_core(); + + if (replaced != old) + faulted = 2; + + return faulted; +} + +static int notrace ftrace_calc_offset(long ip) +{ + return (int)(MCOUNT_ADDR - ip); +} + +notrace void ftrace_code_disable(struct dyn_ftrace *rec) +{ + unsigned long ip; + union ftrace_code_union save; + struct ftrace_record *r = + container_of(rec, struct ftrace_record, rec); + + ip = rec->ip; + + save.e8 = 0xe8; + save.offset = ftrace_calc_offset(ip); + + /* move the IP back to the start of the call */ + ip -= CALL_BACK; + + r->failed = ftrace_modify_code(ip, save.code, (char *)&ftrace_jmp); +} + +static void notrace ftrace_replace_code(int saved) +{ + unsigned char *new = NULL, *old = NULL; + struct ftrace_record *rec; + struct ftrace_page *pg; + unsigned long ip; + int i; + + if (saved) + old = (char *)&ftrace_jmp; + else + new = (char *)&ftrace_jmp; + + for (pg = ftrace_pages_start; pg; pg = pg->next) { + for (i = 0; i < pg->index; i++) { + union ftrace_code_union calc; + rec = &pg->records[i]; + + /* don't modify code that has already faulted */ + if (rec->failed) + continue; + + ip = rec->rec.ip; + + calc.e8 = 0xe8; + calc.offset = ftrace_calc_offset(ip); + + if (saved) + new = calc.code; + else + old = calc.code; + + ip -= CALL_BACK; + + rec->failed = ftrace_modify_code(ip, old, new); + } + } + +} + +notrace void ftrace_startup_code(void) +{ + ftrace_replace_code(1); +} + +notrace void ftrace_shutdown_code(void) +{ + ftrace_replace_code(0); +} + +notrace void ftrace_shutdown_replenish(void) +{ + if (ftrace_pages->next) + return; + + /* allocate another page */ + ftrace_pages->next = (void *)get_zeroed_page(GFP_KERNEL); +} + +notrace int ftrace_shutdown_arch_init(void) +{ + struct ftrace_page *pg; + int cnt; + int i; + + /* allocate a few pages */ + ftrace_pages_start = (void *)get_zeroed_page(GFP_KERNEL); + if (!ftrace_pages_start) + return -1; + + /* + * Allocate a few more pages. + * + * TODO: have some parser search vmlinux before + * final linking to find all calls to ftrace. + * Then we can: + * a) know how many pages to allocate. + * and/or + * b) set up the table then. + * + * The dynamic code is still necessary for + * modules. + */ + + pg = ftrace_pages = ftrace_pages_start; + + cnt = NR_TO_INIT / ENTRIES_PER_PAGE; + + for (i = 0; i < cnt; i++) { + pg->next = (void *)get_zeroed_page(GFP_KERNEL); + + /* If we fail, we'll try later anyway */ + if (!pg->next) + break; + + pg = pg->next; + } + + return 0; +} diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 740c97d..90dbc0e 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -32,6 +32,24 @@ extern void mcount(void); # define clear_ftrace_function(ops) do { } while (0) #endif /* CONFIG_FTRACE */ +#ifdef CONFIG_DYNAMIC_FTRACE +# define FTRACE_HASHBITS 10 +# define FTRACE_HASHSIZE (1< +#include +#include +#include +#include +#include #include +#include +#include +#include + +#include "trace.h" -static DEFINE_SPINLOCK(ftrace_func_lock); +static DEFINE_SPINLOCK(ftrace_lock); static struct ftrace_ops ftrace_list_end __read_mostly = { .func = ftrace_stub, @@ -44,21 +53,21 @@ notrace void ftrace_list_func(unsigned long ip, unsigned long parent_ip) } /** - * register_ftrace_function - register a function for profiling - * @ops - ops structure that holds the function for profiling. - * - * Register a function to be called by all functions in the - * kernel. + * clear_ftrace_function - reset the ftrace function * - * Note: @ops->func and all the functions it calls must be labeled - * with "notrace", otherwise it will go into a - * recursive loop. + * This NULLs the ftrace function and in essence stops + * tracing. There may be lag */ -int register_ftrace_function(struct ftrace_ops *ops) +void clear_ftrace_function(void) { - unsigned long flags; + ftrace_trace_function = ftrace_stub; +} + +static int notrace __register_ftrace_function(struct ftrace_ops *ops) +{ + /* Should never be called by interrupts */ + spin_lock(&ftrace_lock); - spin_lock_irqsave(&ftrace_func_lock, flags); ops->next = ftrace_list; /* * We are entering ops into the ftrace_list but another @@ -68,6 +77,7 @@ int register_ftrace_function(struct ftrace_ops *ops) */ smp_wmb(); ftrace_list = ops; + /* * For one func, simply call it directly. * For more than one func, call the chain. @@ -76,28 +86,22 @@ int register_ftrace_function(struct ftrace_ops *ops) ftrace_trace_function = ops->func; else ftrace_trace_function = ftrace_list_func; - spin_unlock_irqrestore(&ftrace_func_lock, flags); + + spin_unlock(&ftrace_lock); return 0; } -/** - * unregister_ftrace_function - unresgister a function for profiling. - * @ops - ops structure that holds the function to unregister - * - * Unregister a function that was added to be called by ftrace profiling. - */ -int unregister_ftrace_function(struct ftrace_ops *ops) +static int notrace __unregister_ftrace_function(struct ftrace_ops *ops) { - unsigned long flags; struct ftrace_ops **p; int ret = 0; - spin_lock_irqsave(&ftrace_func_lock, flags); + spin_lock(&ftrace_lock); /* - * If we are the only function, then the ftrace pointer is - * pointing directly to that function. + * If we are removing the last function, then simply point + * to the ftrace_stub. */ if (ftrace_list == ops && ops->next == &ftrace_list_end) { ftrace_trace_function = ftrace_stub; @@ -117,22 +121,310 @@ int unregister_ftrace_function(struct ftrace_ops *ops) *p = (*p)->next; /* If we only have one func left, then call that directly */ - if (ftrace_list->next == &ftrace_list_end) + if (ftrace_list == &ftrace_list_end || + ftrace_list->next == &ftrace_list_end) ftrace_trace_function = ftrace_list->func; out: - spin_unlock_irqrestore(&ftrace_func_lock, flags); + spin_unlock(&ftrace_lock); + + return ret; +} + +#ifdef CONFIG_DYNAMIC_FTRACE + +static struct hlist_head ftrace_hash[FTRACE_HASHSIZE]; + +static DEFINE_PER_CPU(int, ftrace_shutdown_disable_cpu); + +static DEFINE_SPINLOCK(ftrace_shutdown_lock); +static DEFINE_MUTEX(ftraced_lock); + +static int ftraced_trigger; +static int ftraced_suspend; + +static int ftrace_record_suspend; + +static inline int +notrace ftrace_ip_in_hash(unsigned long ip, unsigned long key) +{ + struct dyn_ftrace *p; + struct hlist_node *t; + int found = 0; + + hlist_for_each_entry(p, t, &ftrace_hash[key], node) { + if (p->ip == ip) { + found = 1; + break; + } + } + + return found; +} + +static inline void notrace +ftrace_add_hash(struct dyn_ftrace *node, unsigned long key) +{ + hlist_add_head(&node->node, &ftrace_hash[key]); +} + +static void notrace +ftrace_record_ip(unsigned long ip, unsigned long parent_ip) +{ + struct dyn_ftrace *node; + unsigned long flags; + unsigned long key; + int resched; + int atomic; + + resched = need_resched(); + preempt_disable_notrace(); + + /* We simply need to protect against recursion */ + __get_cpu_var(ftrace_shutdown_disable_cpu)++; + if (__get_cpu_var(ftrace_shutdown_disable_cpu) != 1) + goto out; + + if (unlikely(ftrace_record_suspend)) + goto out; + + key = hash_long(ip, FTRACE_HASHBITS); + + WARN_ON_ONCE(key >= FTRACE_HASHSIZE); + + if (ftrace_ip_in_hash(ip, key)) + goto out; + + atomic = irqs_disabled(); + + spin_lock_irqsave(&ftrace_shutdown_lock, flags); + + /* This ip may have hit the hash before the lock */ + if (ftrace_ip_in_hash(ip, key)) + goto out_unlock; + + /* + * There's a slight race that the ftraced will update the + * hash and reset here. The arch alloc is responsible + * for seeing if the IP has already changed, and if + * it has, the alloc will fail. + */ + node = ftrace_alloc_shutdown_node(ip); + if (!node) + goto out_unlock; + + node->ip = ip; + + ftrace_add_hash(node, key); + + ftraced_trigger = 1; + + out_unlock: + spin_unlock_irqrestore(&ftrace_shutdown_lock, flags); + out: + __get_cpu_var(ftrace_shutdown_disable_cpu)--; + + /* prevent recursion with scheduler */ + if (resched) + preempt_enable_no_resched_notrace(); + else + preempt_enable_notrace(); +} + +static struct ftrace_ops ftrace_shutdown_ops __read_mostly = +{ + .func = ftrace_record_ip, +}; + + +static int notrace __ftrace_modify_code(void *data) +{ + void (*func)(void) = data; + + func(); + return 0; +} + +static void notrace ftrace_run_startup_code(void) +{ + stop_machine_run(__ftrace_modify_code, ftrace_startup_code, NR_CPUS); +} + +static void notrace ftrace_run_shutdown_code(void) +{ + stop_machine_run(__ftrace_modify_code, ftrace_shutdown_code, NR_CPUS); +} + +static void notrace ftrace_startup(void) +{ + mutex_lock(&ftraced_lock); + ftraced_suspend++; + if (ftraced_suspend != 1) + goto out; + __unregister_ftrace_function(&ftrace_shutdown_ops); + + ftrace_run_startup_code(); + out: + mutex_unlock(&ftraced_lock); +} + +static void notrace ftrace_shutdown(void) +{ + mutex_lock(&ftraced_lock); + ftraced_suspend--; + if (ftraced_suspend) + goto out; + + ftrace_run_shutdown_code(); + + __register_ftrace_function(&ftrace_shutdown_ops); + out: + mutex_unlock(&ftraced_lock); +} + +static cycle_t ftrace_update_time; +static unsigned long ftrace_update_cnt; +unsigned long ftrace_update_tot_cnt; + +static int notrace __ftrace_update_code(void *ignore) +{ + struct dyn_ftrace *p; + struct hlist_head head; + struct hlist_node *t; + cycle_t start, stop; + int i; + + /* Don't be calling ftrace ops now */ + __unregister_ftrace_function(&ftrace_shutdown_ops); + + start = now(raw_smp_processor_id()); + ftrace_update_cnt = 0; + + /* No locks needed, the machine is stopped! */ + for (i = 0; i < FTRACE_HASHSIZE; i++) { + if (hlist_empty(&ftrace_hash[i])) + continue; + + head = ftrace_hash[i]; + INIT_HLIST_HEAD(&ftrace_hash[i]); + + /* all CPUS are stopped, we are safe to modify code */ + hlist_for_each_entry(p, t, &head, node) { + ftrace_code_disable(p); + ftrace_update_cnt++; + } + + } + + stop = now(raw_smp_processor_id()); + ftrace_update_time = stop - start; + ftrace_update_tot_cnt += ftrace_update_cnt; + + __register_ftrace_function(&ftrace_shutdown_ops); return 0; } +static void notrace ftrace_update_code(void) +{ + stop_machine_run(__ftrace_update_code, NULL, NR_CPUS); +} + +static int notrace ftraced(void *ignore) +{ + unsigned long usecs; + + set_current_state(TASK_INTERRUPTIBLE); + + while (!kthread_should_stop()) { + + /* check once a second */ + schedule_timeout(HZ); + + mutex_lock(&ftraced_lock); + if (ftraced_trigger && !ftraced_suspend) { + ftrace_record_suspend++; + ftrace_update_code(); + usecs = nsecs_to_usecs(ftrace_update_time); + if (ftrace_update_tot_cnt > 100000) { + ftrace_update_tot_cnt = 0; + pr_info("hm, dftrace overflow: %lu change%s" + " (%lu total) in %lu usec%s\n", + ftrace_update_cnt, + ftrace_update_cnt != 1 ? "s" : "", + ftrace_update_tot_cnt, + usecs, usecs != 1 ? "s" : ""); + WARN_ON_ONCE(1); + } + ftraced_trigger = 0; + ftrace_record_suspend--; + } + mutex_unlock(&ftraced_lock); + + ftrace_shutdown_replenish(); + + set_current_state(TASK_INTERRUPTIBLE); + } + __set_current_state(TASK_RUNNING); + return 0; +} + +static int __init notrace ftrace_shutdown_init(void) +{ + struct task_struct *p; + int ret; + + ret = ftrace_shutdown_arch_init(); + if (ret) + return ret; + + p = kthread_run(ftraced, NULL, "ftraced"); + if (IS_ERR(p)) + return -1; + + __register_ftrace_function(&ftrace_shutdown_ops); + + return 0; +} + +core_initcall(ftrace_shutdown_init); +#else +# define ftrace_startup() do { } while (0) +# define ftrace_shutdown() do { } while (0) +#endif /* CONFIG_DYNAMIC_FTRACE */ + /** - * clear_ftrace_function - reset the ftrace function + * register_ftrace_function - register a function for profiling + * @ops - ops structure that holds the function for profiling. * - * This NULLs the ftrace function and in essence stops - * tracing. There may be lag + * Register a function to be called by all functions in the + * kernel. + * + * Note: @ops->func and all the functions it calls must be labeled + * with "notrace", otherwise it will go into a + * recursive loop. */ -void clear_ftrace_function(void) +int register_ftrace_function(struct ftrace_ops *ops) { - ftrace_trace_function = ftrace_stub; + ftrace_startup(); + + return __register_ftrace_function(ops); +} + +/** + * unregister_ftrace_function - unresgister a function for profiling. + * @ops - ops structure that holds the function to unregister + * + * Unregister a function that was added to be called by ftrace profiling. + */ +int unregister_ftrace_function(struct ftrace_ops *ops) +{ + int ret; + + ret = __unregister_ftrace_function(ops); + + if (ftrace_list == &ftrace_list_end) + ftrace_shutdown(); + + return ret; } -- cgit v0.10.2 From b0fc494fae96a7089f3651cb451f461c7291244c Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:43 +0200 Subject: ftrace: add ftrace_enabled sysctl to disable mcount function This patch adds back the sysctl ftrace_enabled. This time it is defaulted to on, if DYNAMIC_FTRACE is configured. When ftrace_enabled is disabled, the ftrace function is set to the stub return. If DYNAMIC_FTRACE is also configured, on ftrace_enabled = 0, the registered ftrace functions will all be set to jmps, but no more new calls to ftrace recording (used to find the ftrace calling sites) will be called. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 90dbc0e..ccd8537 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -5,6 +5,12 @@ #include +extern int ftrace_enabled; +extern int +ftrace_enable_sysctl(struct ctl_table *table, int write, + struct file *filp, void __user *buffer, size_t *lenp, + loff_t *ppos); + typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip); struct ftrace_ops { diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 2911665..efaf7c5 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include @@ -455,6 +456,16 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, +#ifdef CONFIG_FTRACE + { + .ctl_name = CTL_UNNUMBERED, + .procname = "ftrace_enabled", + .data = &ftrace_enabled, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &ftrace_enable_sysctl, + }, +#endif #ifdef CONFIG_KMOD { .ctl_name = KERN_MODPROBE, diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index d1ae2ba..d3de372 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -20,12 +20,24 @@ #include #include #include +#include #include #include #include "trace.h" +#ifdef CONFIG_DYNAMIC_FTRACE +# define FTRACE_ENABLED_INIT 1 +#else +# define FTRACE_ENABLED_INIT 0 +#endif + +int ftrace_enabled = FTRACE_ENABLED_INIT; +static int last_ftrace_enabled = FTRACE_ENABLED_INIT; + static DEFINE_SPINLOCK(ftrace_lock); +static DEFINE_MUTEX(ftrace_sysctl_lock); + static struct ftrace_ops ftrace_list_end __read_mostly = { .func = ftrace_stub, @@ -78,14 +90,16 @@ static int notrace __register_ftrace_function(struct ftrace_ops *ops) smp_wmb(); ftrace_list = ops; - /* - * For one func, simply call it directly. - * For more than one func, call the chain. - */ - if (ops->next == &ftrace_list_end) - ftrace_trace_function = ops->func; - else - ftrace_trace_function = ftrace_list_func; + if (ftrace_enabled) { + /* + * For one func, simply call it directly. + * For more than one func, call the chain. + */ + if (ops->next == &ftrace_list_end) + ftrace_trace_function = ops->func; + else + ftrace_trace_function = ftrace_list_func; + } spin_unlock(&ftrace_lock); @@ -120,10 +134,12 @@ static int notrace __unregister_ftrace_function(struct ftrace_ops *ops) *p = (*p)->next; - /* If we only have one func left, then call that directly */ - if (ftrace_list == &ftrace_list_end || - ftrace_list->next == &ftrace_list_end) - ftrace_trace_function = ftrace_list->func; + if (ftrace_enabled) { + /* If we only have one func left, then call that directly */ + if (ftrace_list == &ftrace_list_end || + ftrace_list->next == &ftrace_list_end) + ftrace_trace_function = ftrace_list->func; + } out: spin_unlock(&ftrace_lock); @@ -263,7 +279,8 @@ static void notrace ftrace_startup(void) goto out; __unregister_ftrace_function(&ftrace_shutdown_ops); - ftrace_run_startup_code(); + if (ftrace_enabled) + ftrace_run_startup_code(); out: mutex_unlock(&ftraced_lock); } @@ -275,13 +292,32 @@ static void notrace ftrace_shutdown(void) if (ftraced_suspend) goto out; - ftrace_run_shutdown_code(); + if (ftrace_enabled) + ftrace_run_shutdown_code(); __register_ftrace_function(&ftrace_shutdown_ops); out: mutex_unlock(&ftraced_lock); } +static void notrace ftrace_startup_sysctl(void) +{ + mutex_lock(&ftraced_lock); + /* ftraced_suspend is true if we want ftrace running */ + if (ftraced_suspend) + ftrace_run_startup_code(); + mutex_unlock(&ftraced_lock); +} + +static void notrace ftrace_shutdown_sysctl(void) +{ + mutex_lock(&ftraced_lock); + /* ftraced_suspend is true if ftrace is running */ + if (ftraced_suspend) + ftrace_run_shutdown_code(); + mutex_unlock(&ftraced_lock); +} + static cycle_t ftrace_update_time; static unsigned long ftrace_update_cnt; unsigned long ftrace_update_tot_cnt; @@ -341,8 +377,9 @@ static int notrace ftraced(void *ignore) /* check once a second */ schedule_timeout(HZ); + mutex_lock(&ftrace_sysctl_lock); mutex_lock(&ftraced_lock); - if (ftraced_trigger && !ftraced_suspend) { + if (ftrace_enabled && ftraced_trigger && !ftraced_suspend) { ftrace_record_suspend++; ftrace_update_code(); usecs = nsecs_to_usecs(ftrace_update_time); @@ -360,6 +397,7 @@ static int notrace ftraced(void *ignore) ftrace_record_suspend--; } mutex_unlock(&ftraced_lock); + mutex_unlock(&ftrace_sysctl_lock); ftrace_shutdown_replenish(); @@ -389,8 +427,10 @@ static int __init notrace ftrace_shutdown_init(void) core_initcall(ftrace_shutdown_init); #else -# define ftrace_startup() do { } while (0) -# define ftrace_shutdown() do { } while (0) +# define ftrace_startup() do { } while (0) +# define ftrace_shutdown() do { } while (0) +# define ftrace_startup_sysctl() do { } while (0) +# define ftrace_shutdown_sysctl() do { } while (0) #endif /* CONFIG_DYNAMIC_FTRACE */ /** @@ -406,9 +446,15 @@ core_initcall(ftrace_shutdown_init); */ int register_ftrace_function(struct ftrace_ops *ops) { + int ret; + + mutex_lock(&ftrace_sysctl_lock); ftrace_startup(); - return __register_ftrace_function(ops); + ret = __register_ftrace_function(ops); + mutex_unlock(&ftrace_sysctl_lock); + + return ret; } /** @@ -421,10 +467,53 @@ int unregister_ftrace_function(struct ftrace_ops *ops) { int ret; + mutex_lock(&ftrace_sysctl_lock); ret = __unregister_ftrace_function(ops); if (ftrace_list == &ftrace_list_end) ftrace_shutdown(); + mutex_unlock(&ftrace_sysctl_lock); + + return ret; +} + +notrace int +ftrace_enable_sysctl(struct ctl_table *table, int write, + struct file *filp, void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + int ret; + + mutex_lock(&ftrace_sysctl_lock); + + ret = proc_dointvec(table, write, filp, buffer, lenp, ppos); + + if (ret || !write || (last_ftrace_enabled == ftrace_enabled)) + goto out; + + last_ftrace_enabled = ftrace_enabled; + + if (ftrace_enabled) { + + ftrace_startup_sysctl(); + + /* we are starting ftrace again */ + if (ftrace_list != &ftrace_list_end) { + if (ftrace_list->next == &ftrace_list_end) + ftrace_trace_function = ftrace_list->func; + else + ftrace_trace_function = ftrace_list_func; + } + + } else { + /* stopping ftrace calls (just send to ftrace_stub) */ + ftrace_trace_function = ftrace_stub; + + ftrace_shutdown_sysctl(); + } + + out: + mutex_unlock(&ftrace_sysctl_lock); return ret; } -- cgit v0.10.2 From dfa60aba04dae7833d75b2e2be124bb7cfb8239f Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:43 +0200 Subject: ftrace: use nops instead of jmp This patch patches the call to mcount with nops instead of a jmp over the mcount call. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 65c7857..de240ba 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -143,7 +143,7 @@ static const unsigned char *const p6_nops[ASM_NOP_MAX+1] = { #ifdef CONFIG_X86_64 extern char __vsyscall_0; -static inline const unsigned char*const * find_nop_table(void) +const unsigned char *const *find_nop_table(void) { return boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || boot_cpu_data.x86 < 6 ? k8_nops : p6_nops; @@ -162,7 +162,7 @@ static const struct nop { { -1, NULL } }; -static const unsigned char*const * find_nop_table(void) +const unsigned char *const *find_nop_table(void) { const unsigned char *const *noptable = intel_nops; int i; diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 5dd5813..2e060c5 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -16,11 +16,12 @@ #include #include -#define CALL_BACK 5 +#include -#define JMPFWD 0x03eb +#define CALL_BACK 5 -static unsigned short ftrace_jmp = JMPFWD; +/* Long is fine, even if it is only 4 bytes ;-) */ +static long *ftrace_nop; struct ftrace_record { struct dyn_ftrace rec; @@ -55,13 +56,13 @@ static struct ftrace_page *ftrace_pages; notrace struct dyn_ftrace *ftrace_alloc_shutdown_node(unsigned long ip) { struct ftrace_record *rec; - unsigned short save; + unsigned long save; ip -= CALL_BACK; - save = *(short *)ip; + save = *(long *)ip; /* If this was already converted, skip it */ - if (save == JMPFWD) + if (save == *ftrace_nop) return NULL; if (ftrace_pages->index == ENTRIES_PER_PAGE) { @@ -79,9 +80,10 @@ static int notrace ftrace_modify_code(unsigned long ip, unsigned char *old_code, unsigned char *new_code) { - unsigned short old = *(unsigned short *)old_code; - unsigned short new = *(unsigned short *)new_code; - unsigned short replaced; + unsigned replaced; + unsigned old = *(unsigned *)old_code; /* 4 bytes */ + unsigned new = *(unsigned *)new_code; /* 4 bytes */ + unsigned char newch = new_code[4]; int faulted = 0; /* @@ -94,7 +96,9 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, */ asm volatile ( "1: lock\n" - " cmpxchg %w3, (%2)\n" + " cmpxchg %3, (%2)\n" + " jnz 2f\n" + " movb %b4, 4(%2)\n" "2:\n" ".section .fixup, \"ax\"\n" " movl $1, %0\n" @@ -102,11 +106,12 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, ".previous\n" _ASM_EXTABLE(1b, 3b) : "=r"(faulted), "=a"(replaced) - : "r"(ip), "r"(new), "0"(faulted), "a"(old) + : "r"(ip), "r"(new), "r"(newch), + "0"(faulted), "a"(old) : "memory"); sync_core(); - if (replaced != old) + if (replaced != old && replaced != new) faulted = 2; return faulted; @@ -132,7 +137,7 @@ notrace void ftrace_code_disable(struct dyn_ftrace *rec) /* move the IP back to the start of the call */ ip -= CALL_BACK; - r->failed = ftrace_modify_code(ip, save.code, (char *)&ftrace_jmp); + r->failed = ftrace_modify_code(ip, save.code, (char *)ftrace_nop); } static void notrace ftrace_replace_code(int saved) @@ -144,9 +149,9 @@ static void notrace ftrace_replace_code(int saved) int i; if (saved) - old = (char *)&ftrace_jmp; + old = (char *)ftrace_nop; else - new = (char *)&ftrace_jmp; + new = (char *)ftrace_nop; for (pg = ftrace_pages_start; pg; pg = pg->next) { for (i = 0; i < pg->index; i++) { @@ -194,12 +199,15 @@ notrace void ftrace_shutdown_replenish(void) ftrace_pages->next = (void *)get_zeroed_page(GFP_KERNEL); } -notrace int ftrace_shutdown_arch_init(void) +notrace int __init ftrace_shutdown_arch_init(void) { + const unsigned char *const *noptable = find_nop_table(); struct ftrace_page *pg; int cnt; int i; + ftrace_nop = (unsigned long *)noptable[CALL_BACK]; + /* allocate a few pages */ ftrace_pages_start = (void *)get_zeroed_page(GFP_KERNEL); if (!ftrace_pages_start) diff --git a/include/asm-x86/alternative.h b/include/asm-x86/alternative.h index 1f6a9ca..f6aa18e 100644 --- a/include/asm-x86/alternative.h +++ b/include/asm-x86/alternative.h @@ -72,6 +72,8 @@ static inline void alternatives_smp_module_del(struct module *mod) {} static inline void alternatives_smp_switch(int smp) {} #endif /* CONFIG_SMP */ +const unsigned char *const *find_nop_table(void); + /* * Alternative instructions for different CPU types or capabilities. * -- cgit v0.10.2 From 3c1720f00bb619302ba19d55986ab565e74d06db Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:43 +0200 Subject: ftrace: move memory management out of arch code This patch moves the memory management of the ftrace records out of the arch code and into the generic code making the arch code simpler. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 2e060c5..b69795e 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -23,25 +23,6 @@ /* Long is fine, even if it is only 4 bytes ;-) */ static long *ftrace_nop; -struct ftrace_record { - struct dyn_ftrace rec; - int failed; -} __attribute__((packed)); - -struct ftrace_page { - struct ftrace_page *next; - int index; - struct ftrace_record records[]; -} __attribute__((packed)); - -#define ENTRIES_PER_PAGE \ - ((PAGE_SIZE - sizeof(struct ftrace_page)) / sizeof(struct ftrace_record)) - -/* estimate from running different kernels */ -#define NR_TO_INIT 10000 - -#define MCOUNT_ADDR ((long)(&mcount)) - union ftrace_code_union { char code[5]; struct { @@ -50,33 +31,41 @@ union ftrace_code_union { } __attribute__((packed)); }; -static struct ftrace_page *ftrace_pages_start; -static struct ftrace_page *ftrace_pages; - -notrace struct dyn_ftrace *ftrace_alloc_shutdown_node(unsigned long ip) +notrace int ftrace_ip_converted(unsigned long ip) { - struct ftrace_record *rec; unsigned long save; ip -= CALL_BACK; save = *(long *)ip; - /* If this was already converted, skip it */ - if (save == *ftrace_nop) - return NULL; + return save == *ftrace_nop; +} - if (ftrace_pages->index == ENTRIES_PER_PAGE) { - if (!ftrace_pages->next) - return NULL; - ftrace_pages = ftrace_pages->next; - } +static int notrace ftrace_calc_offset(long ip, long addr) +{ + return (int)(addr - ip); +} - rec = &ftrace_pages->records[ftrace_pages->index++]; +notrace unsigned char *ftrace_nop_replace(void) +{ + return (char *)ftrace_nop; +} + +notrace unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) +{ + static union ftrace_code_union calc; - return &rec->rec; + calc.e8 = 0xe8; + calc.offset = ftrace_calc_offset(ip, addr); + + /* + * No locking needed, this must be called via kstop_machine + * which in essence is like running on a uniprocessor machine. + */ + return calc.code; } -static int notrace +notrace int ftrace_modify_code(unsigned long ip, unsigned char *old_code, unsigned char *new_code) { @@ -86,6 +75,9 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, unsigned char newch = new_code[4]; int faulted = 0; + /* move the IP back to the start of the call */ + ip -= CALL_BACK; + /* * Note: Due to modules and __init, code can * disappear and change, we need to protect against faulting @@ -117,129 +109,12 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, return faulted; } -static int notrace ftrace_calc_offset(long ip) -{ - return (int)(MCOUNT_ADDR - ip); -} - -notrace void ftrace_code_disable(struct dyn_ftrace *rec) -{ - unsigned long ip; - union ftrace_code_union save; - struct ftrace_record *r = - container_of(rec, struct ftrace_record, rec); - - ip = rec->ip; - - save.e8 = 0xe8; - save.offset = ftrace_calc_offset(ip); - - /* move the IP back to the start of the call */ - ip -= CALL_BACK; - - r->failed = ftrace_modify_code(ip, save.code, (char *)ftrace_nop); -} - -static void notrace ftrace_replace_code(int saved) -{ - unsigned char *new = NULL, *old = NULL; - struct ftrace_record *rec; - struct ftrace_page *pg; - unsigned long ip; - int i; - - if (saved) - old = (char *)ftrace_nop; - else - new = (char *)ftrace_nop; - - for (pg = ftrace_pages_start; pg; pg = pg->next) { - for (i = 0; i < pg->index; i++) { - union ftrace_code_union calc; - rec = &pg->records[i]; - - /* don't modify code that has already faulted */ - if (rec->failed) - continue; - - ip = rec->rec.ip; - - calc.e8 = 0xe8; - calc.offset = ftrace_calc_offset(ip); - - if (saved) - new = calc.code; - else - old = calc.code; - - ip -= CALL_BACK; - - rec->failed = ftrace_modify_code(ip, old, new); - } - } - -} - -notrace void ftrace_startup_code(void) -{ - ftrace_replace_code(1); -} - -notrace void ftrace_shutdown_code(void) -{ - ftrace_replace_code(0); -} - -notrace void ftrace_shutdown_replenish(void) -{ - if (ftrace_pages->next) - return; - - /* allocate another page */ - ftrace_pages->next = (void *)get_zeroed_page(GFP_KERNEL); -} - -notrace int __init ftrace_shutdown_arch_init(void) +int __init ftrace_dyn_arch_init(void) { const unsigned char *const *noptable = find_nop_table(); - struct ftrace_page *pg; - int cnt; - int i; ftrace_nop = (unsigned long *)noptable[CALL_BACK]; - /* allocate a few pages */ - ftrace_pages_start = (void *)get_zeroed_page(GFP_KERNEL); - if (!ftrace_pages_start) - return -1; - - /* - * Allocate a few more pages. - * - * TODO: have some parser search vmlinux before - * final linking to find all calls to ftrace. - * Then we can: - * a) know how many pages to allocate. - * and/or - * b) set up the table then. - * - * The dynamic code is still necessary for - * modules. - */ - - pg = ftrace_pages = ftrace_pages_start; - - cnt = NR_TO_INIT / ENTRIES_PER_PAGE; - - for (i = 0; i < cnt; i++) { - pg->next = (void *)get_zeroed_page(GFP_KERNEL); - - /* If we fail, we'll try later anyway */ - if (!pg->next) - break; - - pg = pg->next; - } - return 0; } + diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index ccd8537..d509ad6 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -42,19 +42,23 @@ extern void mcount(void); # define FTRACE_HASHBITS 10 # define FTRACE_HASHSIZE (1<node, &ftrace_hash[key]); } +static notrace struct dyn_ftrace *ftrace_alloc_shutdown_node(unsigned long ip) +{ + /* If this was already converted, skip it */ + if (ftrace_ip_converted(ip)) + return NULL; + + if (ftrace_pages->index == ENTRIES_PER_PAGE) { + if (!ftrace_pages->next) + return NULL; + ftrace_pages = ftrace_pages->next; + } + + return &ftrace_pages->records[ftrace_pages->index++]; +} + static void notrace ftrace_record_ip(unsigned long ip, unsigned long parent_ip) { @@ -252,6 +282,62 @@ static struct ftrace_ops ftrace_shutdown_ops __read_mostly = .func = ftrace_record_ip, }; +#define MCOUNT_ADDR ((long)(&mcount)) + +static void notrace ftrace_replace_code(int saved) +{ + unsigned char *new = NULL, *old = NULL; + struct dyn_ftrace *rec; + struct ftrace_page *pg; + unsigned long ip; + int failed; + int i; + + if (saved) + old = ftrace_nop_replace(); + else + new = ftrace_nop_replace(); + + for (pg = ftrace_pages_start; pg; pg = pg->next) { + for (i = 0; i < pg->index; i++) { + rec = &pg->records[i]; + + /* don't modify code that has already faulted */ + if (rec->flags & FTRACE_FL_FAILED) + continue; + + ip = rec->ip; + + if (saved) + new = ftrace_call_replace(ip, MCOUNT_ADDR); + else + old = ftrace_call_replace(ip, MCOUNT_ADDR); + + failed = ftrace_modify_code(ip, old, new); + if (failed) + rec->flags |= FTRACE_FL_FAILED; + } + } +} + +static notrace void ftrace_startup_code(void) +{ + ftrace_replace_code(1); +} + +static notrace void ftrace_shutdown_code(void) +{ + ftrace_replace_code(0); +} + +static notrace void ftrace_shutdown_replenish(void) +{ + if (ftrace_pages->next) + return; + + /* allocate another page */ + ftrace_pages->next = (void *)get_zeroed_page(GFP_KERNEL); +} static int notrace __ftrace_modify_code(void *data) { @@ -261,6 +347,23 @@ static int notrace __ftrace_modify_code(void *data) return 0; } +static notrace void +ftrace_code_disable(struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned long ip; + unsigned char *nop, *call; + int failed; + + ip = rec->ip; + + nop = ftrace_nop_replace(); + call = ftrace_call_replace(ip, addr); + + failed = ftrace_modify_code(ip, call, nop); + if (failed) + rec->flags |= FTRACE_FL_FAILED; +} + static void notrace ftrace_run_startup_code(void) { stop_machine_run(__ftrace_modify_code, ftrace_startup_code, NR_CPUS); @@ -346,7 +449,7 @@ static int notrace __ftrace_update_code(void *ignore) /* all CPUS are stopped, we are safe to modify code */ hlist_for_each_entry(p, t, &head, node) { - ftrace_code_disable(p); + ftrace_code_disable(p, MCOUNT_ADDR); ftrace_update_cnt++; } @@ -407,12 +510,59 @@ static int notrace ftraced(void *ignore) return 0; } +static int __init ftrace_dyn_table_alloc(void) +{ + struct ftrace_page *pg; + int cnt; + int i; + int ret; + + ret = ftrace_dyn_arch_init(); + if (ret) + return ret; + + /* allocate a few pages */ + ftrace_pages_start = (void *)get_zeroed_page(GFP_KERNEL); + if (!ftrace_pages_start) + return -1; + + /* + * Allocate a few more pages. + * + * TODO: have some parser search vmlinux before + * final linking to find all calls to ftrace. + * Then we can: + * a) know how many pages to allocate. + * and/or + * b) set up the table then. + * + * The dynamic code is still necessary for + * modules. + */ + + pg = ftrace_pages = ftrace_pages_start; + + cnt = NR_TO_INIT / ENTRIES_PER_PAGE; + + for (i = 0; i < cnt; i++) { + pg->next = (void *)get_zeroed_page(GFP_KERNEL); + + /* If we fail, we'll try later anyway */ + if (!pg->next) + break; + + pg = pg->next; + } + + return 0; +} + static int __init notrace ftrace_shutdown_init(void) { struct task_struct *p; int ret; - ret = ftrace_shutdown_arch_init(); + ret = ftrace_dyn_table_alloc(); if (ret) return ret; -- cgit v0.10.2 From d61f82d06672f57fca410da6f7fffd15867db622 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:43 +0200 Subject: ftrace: use dynamic patching for updating mcount calls This patch replaces the indirect call to the mcount function pointer with a direct call that will be patched by the dynamic ftrace routines. On boot up, the mcount function calls the ftace_stub function. When the dynamic ftrace code is initialized, the ftrace_stub is replaced with a call to the ftrace_record_ip, which records the instruction pointers of the locations that call it. Later, the ftraced daemon will call kstop_machine and patch all the locations to nops. When a ftrace is enabled, the original calls to mcount will now be set top call ftrace_caller, which will do a direct call to the registered ftrace function. This direct call is also patched when the function that should be called is updated. All patching is performed by a kstop_machine routine to prevent any type of race conditions that is associated with modifying code on the fly. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index f47b9b5..e6517ce 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1110,10 +1110,50 @@ ENDPROC(xen_failsafe_callback) #endif /* CONFIG_XEN */ #ifdef CONFIG_FTRACE +#ifdef CONFIG_DYNAMIC_FTRACE + +ENTRY(mcount) + pushl %eax + pushl %ecx + pushl %edx + movl 0xc(%esp), %eax + +.globl mcount_call +mcount_call: + call ftrace_stub + + popl %edx + popl %ecx + popl %eax + + ret +END(mcount) + +ENTRY(ftrace_caller) + pushl %eax + pushl %ecx + pushl %edx + movl 0xc(%esp), %eax + movl 0x4(%ebp), %edx + +.globl ftrace_call +ftrace_call: + call ftrace_stub + + popl %edx + popl %ecx + popl %eax + +.globl ftrace_stub +ftrace_stub: + ret +END(ftrace_caller) + +#else /* ! CONFIG_DYNAMIC_FTRACE */ + ENTRY(mcount) cmpl $ftrace_stub, ftrace_trace_function jnz trace - .globl ftrace_stub ftrace_stub: ret @@ -1126,7 +1166,7 @@ trace: movl 0xc(%esp), %eax movl 0x4(%ebp), %edx - call *ftrace_trace_function + call *ftrace_trace_function popl %edx popl %ecx @@ -1134,7 +1174,8 @@ trace: jmp ftrace_stub END(mcount) -#endif +#endif /* CONFIG_DYNAMIC_FTRACE */ +#endif /* CONFIG_FTRACE */ .section .rodata,"a" #include "syscall_table_32.S" diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index f046e0c..fe25e5f 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -55,6 +55,70 @@ .code64 #ifdef CONFIG_FTRACE +#ifdef CONFIG_DYNAMIC_FTRACE +ENTRY(mcount) + + subq $0x38, %rsp + movq %rax, (%rsp) + movq %rcx, 8(%rsp) + movq %rdx, 16(%rsp) + movq %rsi, 24(%rsp) + movq %rdi, 32(%rsp) + movq %r8, 40(%rsp) + movq %r9, 48(%rsp) + + movq 0x38(%rsp), %rdi + +.globl mcount_call +mcount_call: + call ftrace_stub + + movq 48(%rsp), %r9 + movq 40(%rsp), %r8 + movq 32(%rsp), %rdi + movq 24(%rsp), %rsi + movq 16(%rsp), %rdx + movq 8(%rsp), %rcx + movq (%rsp), %rax + addq $0x38, %rsp + + retq +END(mcount) + +ENTRY(ftrace_caller) + + /* taken from glibc */ + subq $0x38, %rsp + movq %rax, (%rsp) + movq %rcx, 8(%rsp) + movq %rdx, 16(%rsp) + movq %rsi, 24(%rsp) + movq %rdi, 32(%rsp) + movq %r8, 40(%rsp) + movq %r9, 48(%rsp) + + movq 0x38(%rsp), %rdi + movq 8(%rbp), %rsi + +.globl ftrace_call +ftrace_call: + call ftrace_stub + + movq 48(%rsp), %r9 + movq 40(%rsp), %r8 + movq 32(%rsp), %rdi + movq 24(%rsp), %rsi + movq 16(%rsp), %rdx + movq 8(%rsp), %rcx + movq (%rsp), %rax + addq $0x38, %rsp + +.globl ftrace_stub +ftrace_stub: + retq +END(ftrace_caller) + +#else /* ! CONFIG_DYNAMIC_FTRACE */ ENTRY(mcount) cmpq $ftrace_stub, ftrace_trace_function jnz trace @@ -89,7 +153,8 @@ trace: jmp ftrace_stub END(mcount) -#endif +#endif /* CONFIG_DYNAMIC_FTRACE */ +#endif /* CONFIG_FTRACE */ #ifndef CONFIG_PREEMPT #define retint_kernel retint_restore_args diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index b69795e..9f44623 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -109,10 +109,49 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, return faulted; } -int __init ftrace_dyn_arch_init(void) +notrace int ftrace_update_ftrace_func(ftrace_func_t func) +{ + unsigned long ip = (unsigned long)(&ftrace_call); + unsigned char old[5], *new; + int ret; + + ip += CALL_BACK; + + memcpy(old, &ftrace_call, 5); + new = ftrace_call_replace(ip, (unsigned long)func); + ret = ftrace_modify_code(ip, old, new); + + return ret; +} + +notrace int ftrace_mcount_set(unsigned long *data) +{ + unsigned long ip = (long)(&mcount_call); + unsigned long *addr = data; + unsigned char old[5], *new; + + /* ip is at the location, but modify code will subtact this */ + ip += CALL_BACK; + + /* + * Replace the mcount stub with a pointer to the + * ip recorder function. + */ + memcpy(old, &mcount_call, 5); + new = ftrace_call_replace(ip, *addr); + *addr = ftrace_modify_code(ip, old, new); + + return 0; +} + +int __init ftrace_dyn_arch_init(void *data) { const unsigned char *const *noptable = find_nop_table(); + /* This is running in kstop_machine */ + + ftrace_mcount_set(data); + ftrace_nop = (unsigned long *)noptable[CALL_BACK]; return 0; diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index d509ad6..b0dd009 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -56,9 +56,14 @@ struct dyn_ftrace { extern int ftrace_ip_converted(unsigned long ip); extern unsigned char *ftrace_nop_replace(void); extern unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr); -extern int ftrace_dyn_arch_init(void); +extern int ftrace_dyn_arch_init(void *data); +extern int ftrace_mcount_set(unsigned long *data); extern int ftrace_modify_code(unsigned long ip, unsigned char *old_code, unsigned char *new_code); +extern int ftrace_update_ftrace_func(ftrace_func_t func); +extern void ftrace_caller(void); +extern void ftrace_call(void); +extern void mcount_call(void); #endif #ifdef CONFIG_FRAME_POINTER diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index f6d9af3..88544f9 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -26,14 +26,8 @@ #include "trace.h" -#ifdef CONFIG_DYNAMIC_FTRACE -# define FTRACE_ENABLED_INIT 1 -#else -# define FTRACE_ENABLED_INIT 0 -#endif - -int ftrace_enabled = FTRACE_ENABLED_INIT; -static int last_ftrace_enabled = FTRACE_ENABLED_INIT; +int ftrace_enabled; +static int last_ftrace_enabled; static DEFINE_SPINLOCK(ftrace_lock); static DEFINE_MUTEX(ftrace_sysctl_lock); @@ -149,6 +143,14 @@ static int notrace __unregister_ftrace_function(struct ftrace_ops *ops) #ifdef CONFIG_DYNAMIC_FTRACE +enum { + FTRACE_ENABLE_CALLS = (1 << 0), + FTRACE_DISABLE_CALLS = (1 << 1), + FTRACE_UPDATE_TRACE_FUNC = (1 << 2), + FTRACE_ENABLE_MCOUNT = (1 << 3), + FTRACE_DISABLE_MCOUNT = (1 << 4), +}; + static struct hlist_head ftrace_hash[FTRACE_HASHSIZE]; static DEFINE_PER_CPU(int, ftrace_shutdown_disable_cpu); @@ -199,12 +201,8 @@ ftrace_add_hash(struct dyn_ftrace *node, unsigned long key) hlist_add_head(&node->node, &ftrace_hash[key]); } -static notrace struct dyn_ftrace *ftrace_alloc_shutdown_node(unsigned long ip) +static notrace struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip) { - /* If this was already converted, skip it */ - if (ftrace_ip_converted(ip)) - return NULL; - if (ftrace_pages->index == ENTRIES_PER_PAGE) { if (!ftrace_pages->next) return NULL; @@ -215,7 +213,7 @@ static notrace struct dyn_ftrace *ftrace_alloc_shutdown_node(unsigned long ip) } static void notrace -ftrace_record_ip(unsigned long ip, unsigned long parent_ip) +ftrace_record_ip(unsigned long ip) { struct dyn_ftrace *node; unsigned long flags; @@ -223,6 +221,9 @@ ftrace_record_ip(unsigned long ip, unsigned long parent_ip) int resched; int atomic; + if (!ftrace_enabled) + return; + resched = need_resched(); preempt_disable_notrace(); @@ -251,11 +252,12 @@ ftrace_record_ip(unsigned long ip, unsigned long parent_ip) /* * There's a slight race that the ftraced will update the - * hash and reset here. The arch alloc is responsible - * for seeing if the IP has already changed, and if - * it has, the alloc will fail. + * hash and reset here. If it is already converted, skip it. */ - node = ftrace_alloc_shutdown_node(ip); + if (ftrace_ip_converted(ip)) + goto out_unlock; + + node = ftrace_alloc_dyn_node(ip); if (!node) goto out_unlock; @@ -277,11 +279,7 @@ ftrace_record_ip(unsigned long ip, unsigned long parent_ip) preempt_enable_notrace(); } -static struct ftrace_ops ftrace_shutdown_ops __read_mostly = -{ - .func = ftrace_record_ip, -}; - +#define FTRACE_ADDR ((long)(&ftrace_caller)) #define MCOUNT_ADDR ((long)(&mcount)) static void notrace ftrace_replace_code(int saved) @@ -309,9 +307,9 @@ static void notrace ftrace_replace_code(int saved) ip = rec->ip; if (saved) - new = ftrace_call_replace(ip, MCOUNT_ADDR); + new = ftrace_call_replace(ip, FTRACE_ADDR); else - old = ftrace_call_replace(ip, MCOUNT_ADDR); + old = ftrace_call_replace(ip, FTRACE_ADDR); failed = ftrace_modify_code(ip, old, new); if (failed) @@ -320,16 +318,6 @@ static void notrace ftrace_replace_code(int saved) } } -static notrace void ftrace_startup_code(void) -{ - ftrace_replace_code(1); -} - -static notrace void ftrace_shutdown_code(void) -{ - ftrace_replace_code(0); -} - static notrace void ftrace_shutdown_replenish(void) { if (ftrace_pages->next) @@ -339,16 +327,8 @@ static notrace void ftrace_shutdown_replenish(void) ftrace_pages->next = (void *)get_zeroed_page(GFP_KERNEL); } -static int notrace __ftrace_modify_code(void *data) -{ - void (*func)(void) = data; - - func(); - return 0; -} - static notrace void -ftrace_code_disable(struct dyn_ftrace *rec, unsigned long addr) +ftrace_code_disable(struct dyn_ftrace *rec) { unsigned long ip; unsigned char *nop, *call; @@ -357,67 +337,113 @@ ftrace_code_disable(struct dyn_ftrace *rec, unsigned long addr) ip = rec->ip; nop = ftrace_nop_replace(); - call = ftrace_call_replace(ip, addr); + call = ftrace_call_replace(ip, MCOUNT_ADDR); failed = ftrace_modify_code(ip, call, nop); if (failed) rec->flags |= FTRACE_FL_FAILED; } -static void notrace ftrace_run_startup_code(void) +static int notrace __ftrace_modify_code(void *data) { - stop_machine_run(__ftrace_modify_code, ftrace_startup_code, NR_CPUS); + unsigned long addr; + int *command = data; + + if (*command & FTRACE_ENABLE_CALLS) + ftrace_replace_code(1); + else if (*command & FTRACE_DISABLE_CALLS) + ftrace_replace_code(0); + + if (*command & FTRACE_UPDATE_TRACE_FUNC) + ftrace_update_ftrace_func(ftrace_trace_function); + + if (*command & FTRACE_ENABLE_MCOUNT) { + addr = (unsigned long)ftrace_record_ip; + ftrace_mcount_set(&addr); + } else if (*command & FTRACE_DISABLE_MCOUNT) { + addr = (unsigned long)ftrace_stub; + ftrace_mcount_set(&addr); + } + + return 0; } -static void notrace ftrace_run_shutdown_code(void) +static void notrace ftrace_run_update_code(int command) { - stop_machine_run(__ftrace_modify_code, ftrace_shutdown_code, NR_CPUS); + stop_machine_run(__ftrace_modify_code, &command, NR_CPUS); } +static ftrace_func_t saved_ftrace_func; + static void notrace ftrace_startup(void) { + int command = 0; + mutex_lock(&ftraced_lock); ftraced_suspend++; - if (ftraced_suspend != 1) + if (ftraced_suspend == 1) + command |= FTRACE_ENABLE_CALLS; + + if (saved_ftrace_func != ftrace_trace_function) { + saved_ftrace_func = ftrace_trace_function; + command |= FTRACE_UPDATE_TRACE_FUNC; + } + + if (!command || !ftrace_enabled) goto out; - __unregister_ftrace_function(&ftrace_shutdown_ops); - if (ftrace_enabled) - ftrace_run_startup_code(); + ftrace_run_update_code(command); out: mutex_unlock(&ftraced_lock); } static void notrace ftrace_shutdown(void) { + int command = 0; + mutex_lock(&ftraced_lock); ftraced_suspend--; - if (ftraced_suspend) - goto out; + if (!ftraced_suspend) + command |= FTRACE_DISABLE_CALLS; - if (ftrace_enabled) - ftrace_run_shutdown_code(); + if (saved_ftrace_func != ftrace_trace_function) { + saved_ftrace_func = ftrace_trace_function; + command |= FTRACE_UPDATE_TRACE_FUNC; + } - __register_ftrace_function(&ftrace_shutdown_ops); + if (!command || !ftrace_enabled) + goto out; + + ftrace_run_update_code(command); out: mutex_unlock(&ftraced_lock); } static void notrace ftrace_startup_sysctl(void) { + int command = FTRACE_ENABLE_MCOUNT; + mutex_lock(&ftraced_lock); + /* Force update next time */ + saved_ftrace_func = NULL; /* ftraced_suspend is true if we want ftrace running */ if (ftraced_suspend) - ftrace_run_startup_code(); + command |= FTRACE_ENABLE_CALLS; + + ftrace_run_update_code(command); mutex_unlock(&ftraced_lock); } static void notrace ftrace_shutdown_sysctl(void) { + int command = FTRACE_DISABLE_MCOUNT; + mutex_lock(&ftraced_lock); /* ftraced_suspend is true if ftrace is running */ if (ftraced_suspend) - ftrace_run_shutdown_code(); + command |= FTRACE_DISABLE_CALLS; + + ftrace_run_update_code(command); mutex_unlock(&ftraced_lock); } @@ -430,11 +456,13 @@ static int notrace __ftrace_update_code(void *ignore) struct dyn_ftrace *p; struct hlist_head head; struct hlist_node *t; + int save_ftrace_enabled; cycle_t start, stop; int i; - /* Don't be calling ftrace ops now */ - __unregister_ftrace_function(&ftrace_shutdown_ops); + /* Don't be recording funcs now */ + save_ftrace_enabled = ftrace_enabled; + ftrace_enabled = 0; start = now(raw_smp_processor_id()); ftrace_update_cnt = 0; @@ -449,7 +477,7 @@ static int notrace __ftrace_update_code(void *ignore) /* all CPUS are stopped, we are safe to modify code */ hlist_for_each_entry(p, t, &head, node) { - ftrace_code_disable(p, MCOUNT_ADDR); + ftrace_code_disable(p); ftrace_update_cnt++; } @@ -459,7 +487,7 @@ static int notrace __ftrace_update_code(void *ignore) ftrace_update_time = stop - start; ftrace_update_tot_cnt += ftrace_update_cnt; - __register_ftrace_function(&ftrace_shutdown_ops); + ftrace_enabled = save_ftrace_enabled; return 0; } @@ -515,11 +543,6 @@ static int __init ftrace_dyn_table_alloc(void) struct ftrace_page *pg; int cnt; int i; - int ret; - - ret = ftrace_dyn_arch_init(); - if (ret) - return ret; /* allocate a few pages */ ftrace_pages_start = (void *)get_zeroed_page(GFP_KERNEL); @@ -557,11 +580,19 @@ static int __init ftrace_dyn_table_alloc(void) return 0; } -static int __init notrace ftrace_shutdown_init(void) +static int __init notrace ftrace_dynamic_init(void) { struct task_struct *p; + unsigned long addr; int ret; + addr = (unsigned long)ftrace_record_ip; + stop_machine_run(ftrace_dyn_arch_init, &addr, NR_CPUS); + + /* ftrace_dyn_arch_init places the return code in addr */ + if (addr) + return addr; + ret = ftrace_dyn_table_alloc(); if (ret) return ret; @@ -570,12 +601,12 @@ static int __init notrace ftrace_shutdown_init(void) if (IS_ERR(p)) return -1; - __register_ftrace_function(&ftrace_shutdown_ops); + last_ftrace_enabled = ftrace_enabled = 1; return 0; } -core_initcall(ftrace_shutdown_init); +core_initcall(ftrace_dynamic_init); #else # define ftrace_startup() do { } while (0) # define ftrace_shutdown() do { } while (0) @@ -599,9 +630,8 @@ int register_ftrace_function(struct ftrace_ops *ops) int ret; mutex_lock(&ftrace_sysctl_lock); - ftrace_startup(); - ret = __register_ftrace_function(ops); + ftrace_startup(); mutex_unlock(&ftrace_sysctl_lock); return ret; @@ -619,10 +649,7 @@ int unregister_ftrace_function(struct ftrace_ops *ops) mutex_lock(&ftrace_sysctl_lock); ret = __unregister_ftrace_function(ops); - - if (ftrace_list == &ftrace_list_end) - ftrace_shutdown(); - + ftrace_shutdown(); mutex_unlock(&ftrace_sysctl_lock); return ret; -- cgit v0.10.2 From 5072c59fd45e9976d02ee6f18c7336ef97623cbc Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:43 +0200 Subject: ftrace: add filter select functions to trace This patch adds two files to the debugfs system: /debugfs/tracing/available_filter_functions and /debugfs/tracing/set_ftrace_filter The available_filter_functions lists all functions that has been recorded by the ftraced that has called the ftrace_record_ip function. This is to allow users to see what functions have been converted to nops and can be enabled for tracing. To enable functions, simply echo the names (whitespace delimited) into set_ftrace_filter. Simple wildcards are also allowed. echo 'scheduler' > /debugfs/tracing/set_ftrace_filter Will have only the scheduler be activated when tracing is enabled. echo 'sched_*' > /debugfs/tracing/set_ftrace_filter Will have only the functions starting with 'sched_' be activated. echo '*lock' > /debugfs/tracing/set_ftrace_filter Will have only functions ending with 'lock' be activated. echo '*lock*' > /debugfs/tracing/set_ftrace_filter Will have only functions with 'lock' in its name be activated. Note: 'sched*lock' will not work. The only wildcards that are allowed is an asterisk and the beginning and or end of the string passed in. Multiple names can be passed in with whitespace delimited: echo 'scheduler *lock *acpi*' > /debugfs/tracing/set_ftrace_filter is also the same as: echo 'scheduler' > /debugfs/tracing/set_ftrace_filter echo '*lock' >> /debugfs/tracing/set_ftrace_filter echo '*acpi*' >> /debugfs/tracing/set_ftrace_filter Appending does just that. It appends to the list. To disable all filters simply echo an empty line in: echo > /debugfs/tracing/set_ftrace_filter Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index b0dd009..f5911d2 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -43,7 +43,9 @@ extern void mcount(void); # define FTRACE_HASHSIZE (1< #include #include +#include +#include #include #include #include -#include +#include #include #include +#include #include #include "trace.h" @@ -151,12 +154,15 @@ enum { FTRACE_DISABLE_MCOUNT = (1 << 4), }; +static int ftrace_filtered; + static struct hlist_head ftrace_hash[FTRACE_HASHSIZE]; static DEFINE_PER_CPU(int, ftrace_shutdown_disable_cpu); static DEFINE_SPINLOCK(ftrace_shutdown_lock); static DEFINE_MUTEX(ftraced_lock); +static DEFINE_MUTEX(ftrace_filter_lock); struct ftrace_page { struct ftrace_page *next; @@ -282,16 +288,82 @@ ftrace_record_ip(unsigned long ip) #define FTRACE_ADDR ((long)(&ftrace_caller)) #define MCOUNT_ADDR ((long)(&mcount)) -static void notrace ftrace_replace_code(int saved) +static void notrace +__ftrace_replace_code(struct dyn_ftrace *rec, + unsigned char *old, unsigned char *new, int enable) +{ + unsigned long ip; + int failed; + + ip = rec->ip; + + if (ftrace_filtered && enable) { + unsigned long fl; + /* + * If filtering is on: + * + * If this record is set to be filtered and + * is enabled then do nothing. + * + * If this record is set to be filtered and + * it is not enabled, enable it. + * + * If this record is not set to be filtered + * and it is not enabled do nothing. + * + * If this record is not set to be filtered and + * it is enabled, disable it. + */ + fl = rec->flags & (FTRACE_FL_FILTER | FTRACE_FL_ENABLED); + + if ((fl == (FTRACE_FL_FILTER | FTRACE_FL_ENABLED)) || + (fl == 0)) + return; + + /* + * If it is enabled disable it, + * otherwise enable it! + */ + if (fl == FTRACE_FL_ENABLED) { + /* swap new and old */ + new = old; + old = ftrace_call_replace(ip, FTRACE_ADDR); + rec->flags &= ~FTRACE_FL_ENABLED; + } else { + new = ftrace_call_replace(ip, FTRACE_ADDR); + rec->flags |= FTRACE_FL_ENABLED; + } + } else { + + if (enable) + new = ftrace_call_replace(ip, FTRACE_ADDR); + else + old = ftrace_call_replace(ip, FTRACE_ADDR); + + if (enable) { + if (rec->flags & FTRACE_FL_ENABLED) + return; + rec->flags |= FTRACE_FL_ENABLED; + } else { + if (!(rec->flags & FTRACE_FL_ENABLED)) + return; + rec->flags &= ~FTRACE_FL_ENABLED; + } + } + + failed = ftrace_modify_code(ip, old, new); + if (failed) + rec->flags |= FTRACE_FL_FAILED; +} + +static void notrace ftrace_replace_code(int enable) { unsigned char *new = NULL, *old = NULL; struct dyn_ftrace *rec; struct ftrace_page *pg; - unsigned long ip; - int failed; int i; - if (saved) + if (enable) old = ftrace_nop_replace(); else new = ftrace_nop_replace(); @@ -304,16 +376,7 @@ static void notrace ftrace_replace_code(int saved) if (rec->flags & FTRACE_FL_FAILED) continue; - ip = rec->ip; - - if (saved) - new = ftrace_call_replace(ip, FTRACE_ADDR); - else - old = ftrace_call_replace(ip, FTRACE_ADDR); - - failed = ftrace_modify_code(ip, old, new); - if (failed) - rec->flags |= FTRACE_FL_FAILED; + __ftrace_replace_code(rec, old, new, enable); } } } @@ -580,6 +643,436 @@ static int __init ftrace_dyn_table_alloc(void) return 0; } +enum { + FTRACE_ITER_FILTER = (1 << 0), + FTRACE_ITER_CONT = (1 << 1), +}; + +#define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */ + +struct ftrace_iterator { + loff_t pos; + struct ftrace_page *pg; + unsigned idx; + unsigned flags; + unsigned char buffer[FTRACE_BUFF_MAX+1]; + unsigned buffer_idx; + unsigned filtered; +}; + +static void notrace * +t_next(struct seq_file *m, void *v, loff_t *pos) +{ + struct ftrace_iterator *iter = m->private; + struct dyn_ftrace *rec = NULL; + + (*pos)++; + + retry: + if (iter->idx >= iter->pg->index) { + if (iter->pg->next) { + iter->pg = iter->pg->next; + iter->idx = 0; + goto retry; + } + } else { + rec = &iter->pg->records[iter->idx++]; + if ((rec->flags & FTRACE_FL_FAILED) || + ((iter->flags & FTRACE_ITER_FILTER) && + !(rec->flags & FTRACE_FL_FILTER))) { + rec = NULL; + goto retry; + } + } + + iter->pos = *pos; + + return rec; +} + +static void *t_start(struct seq_file *m, loff_t *pos) +{ + struct ftrace_iterator *iter = m->private; + void *p = NULL; + loff_t l = -1; + + if (*pos != iter->pos) { + for (p = t_next(m, p, &l); p && l < *pos; p = t_next(m, p, &l)) + ; + } else { + l = *pos; + p = t_next(m, p, &l); + } + + return p; +} + +static void t_stop(struct seq_file *m, void *p) +{ +} + +static int t_show(struct seq_file *m, void *v) +{ + struct dyn_ftrace *rec = v; + char str[KSYM_SYMBOL_LEN]; + + if (!rec) + return 0; + + kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); + + seq_printf(m, "%s\n", str); + + return 0; +} + +static struct seq_operations show_ftrace_seq_ops = { + .start = t_start, + .next = t_next, + .stop = t_stop, + .show = t_show, +}; + +static int notrace +ftrace_avail_open(struct inode *inode, struct file *file) +{ + struct ftrace_iterator *iter; + int ret; + + iter = kzalloc(sizeof(*iter), GFP_KERNEL); + if (!iter) + return -ENOMEM; + + iter->pg = ftrace_pages_start; + iter->pos = -1; + + ret = seq_open(file, &show_ftrace_seq_ops); + if (!ret) { + struct seq_file *m = file->private_data; + m->private = iter; + } else + kfree(iter); + + return ret; +} + +int ftrace_avail_release(struct inode *inode, struct file *file) +{ + struct seq_file *m = (struct seq_file *)file->private_data; + struct ftrace_iterator *iter = m->private; + + seq_release(inode, file); + kfree(iter); + return 0; +} + +static void notrace ftrace_filter_reset(void) +{ + struct ftrace_page *pg; + struct dyn_ftrace *rec; + unsigned i; + + /* keep kstop machine from running */ + preempt_disable(); + ftrace_filtered = 0; + pg = ftrace_pages_start; + while (pg) { + for (i = 0; i < pg->index; i++) { + rec = &pg->records[i]; + if (rec->flags & FTRACE_FL_FAILED) + continue; + rec->flags &= ~FTRACE_FL_FILTER; + } + pg = pg->next; + } + preempt_enable(); +} + +static int notrace +ftrace_filter_open(struct inode *inode, struct file *file) +{ + struct ftrace_iterator *iter; + int ret = 0; + + iter = kzalloc(sizeof(*iter), GFP_KERNEL); + if (!iter) + return -ENOMEM; + + mutex_lock(&ftrace_filter_lock); + if ((file->f_mode & FMODE_WRITE) && + !(file->f_flags & O_APPEND)) + ftrace_filter_reset(); + + if (file->f_mode & FMODE_READ) { + iter->pg = ftrace_pages_start; + iter->pos = -1; + iter->flags = FTRACE_ITER_FILTER; + + ret = seq_open(file, &show_ftrace_seq_ops); + if (!ret) { + struct seq_file *m = file->private_data; + m->private = iter; + } else + kfree(iter); + } else + file->private_data = iter; + mutex_unlock(&ftrace_filter_lock); + + return ret; +} + +static ssize_t notrace +ftrace_filter_read(struct file *file, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + if (file->f_mode & FMODE_READ) + return seq_read(file, ubuf, cnt, ppos); + else + return -EPERM; +} + +static loff_t notrace +ftrace_filter_lseek(struct file *file, loff_t offset, int origin) +{ + loff_t ret; + + if (file->f_mode & FMODE_READ) + ret = seq_lseek(file, offset, origin); + else + file->f_pos = ret = 1; + + return ret; +} + +enum { + MATCH_FULL, + MATCH_FRONT_ONLY, + MATCH_MIDDLE_ONLY, + MATCH_END_ONLY, +}; + +static void notrace +ftrace_match(unsigned char *buff, int len) +{ + char str[KSYM_SYMBOL_LEN]; + char *search = NULL; + struct ftrace_page *pg; + struct dyn_ftrace *rec; + int type = MATCH_FULL; + unsigned i, match = 0, search_len = 0; + + for (i = 0; i < len; i++) { + if (buff[i] == '*') { + if (!i) { + search = buff + i + 1; + type = MATCH_END_ONLY; + search_len = len - (i + 1); + } else { + if (type == MATCH_END_ONLY) { + type = MATCH_MIDDLE_ONLY; + } else { + match = i; + type = MATCH_FRONT_ONLY; + } + buff[i] = 0; + break; + } + } + } + + /* keep kstop machine from running */ + preempt_disable(); + ftrace_filtered = 1; + pg = ftrace_pages_start; + while (pg) { + for (i = 0; i < pg->index; i++) { + int matched = 0; + char *ptr; + + rec = &pg->records[i]; + if (rec->flags & FTRACE_FL_FAILED) + continue; + kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); + switch (type) { + case MATCH_FULL: + if (strcmp(str, buff) == 0) + matched = 1; + break; + case MATCH_FRONT_ONLY: + if (memcmp(str, buff, match) == 0) + matched = 1; + break; + case MATCH_MIDDLE_ONLY: + if (strstr(str, search)) + matched = 1; + break; + case MATCH_END_ONLY: + ptr = strstr(str, search); + if (ptr && (ptr[search_len] == 0)) + matched = 1; + break; + } + if (matched) + rec->flags |= FTRACE_FL_FILTER; + } + pg = pg->next; + } + preempt_enable(); +} + +static ssize_t notrace +ftrace_filter_write(struct file *file, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + struct ftrace_iterator *iter; + char ch; + size_t read = 0; + ssize_t ret; + + if (!cnt || cnt < 0) + return 0; + + mutex_lock(&ftrace_filter_lock); + + if (file->f_mode & FMODE_READ) { + struct seq_file *m = file->private_data; + iter = m->private; + } else + iter = file->private_data; + + if (!*ppos) { + iter->flags &= ~FTRACE_ITER_CONT; + iter->buffer_idx = 0; + } + + ret = get_user(ch, ubuf++); + if (ret) + goto out; + read++; + cnt--; + + if (!(iter->flags & ~FTRACE_ITER_CONT)) { + /* skip white space */ + while (cnt && isspace(ch)) { + ret = get_user(ch, ubuf++); + if (ret) + goto out; + read++; + cnt--; + } + + + if (isspace(ch)) { + file->f_pos += read; + ret = read; + goto out; + } + + iter->buffer_idx = 0; + } + + while (cnt && !isspace(ch)) { + if (iter->buffer_idx < FTRACE_BUFF_MAX) + iter->buffer[iter->buffer_idx++] = ch; + else { + ret = -EINVAL; + goto out; + } + ret = get_user(ch, ubuf++); + if (ret) + goto out; + read++; + cnt--; + } + + if (isspace(ch)) { + iter->filtered++; + iter->buffer[iter->buffer_idx] = 0; + ftrace_match(iter->buffer, iter->buffer_idx); + iter->buffer_idx = 0; + } else + iter->flags |= FTRACE_ITER_CONT; + + + file->f_pos += read; + + ret = read; + out: + mutex_unlock(&ftrace_filter_lock); + + return ret; +} + +static int notrace +ftrace_filter_release(struct inode *inode, struct file *file) +{ + struct seq_file *m = (struct seq_file *)file->private_data; + struct ftrace_iterator *iter; + + mutex_lock(&ftrace_filter_lock); + if (file->f_mode & FMODE_READ) { + iter = m->private; + + seq_release(inode, file); + } else + iter = file->private_data; + + if (iter->buffer_idx) { + iter->filtered++; + iter->buffer[iter->buffer_idx] = 0; + ftrace_match(iter->buffer, iter->buffer_idx); + } + + mutex_lock(&ftrace_sysctl_lock); + mutex_lock(&ftraced_lock); + if (iter->filtered && ftraced_suspend && ftrace_enabled) + ftrace_run_update_code(FTRACE_ENABLE_CALLS); + mutex_unlock(&ftraced_lock); + mutex_unlock(&ftrace_sysctl_lock); + + kfree(iter); + mutex_unlock(&ftrace_filter_lock); + return 0; +} + +static struct file_operations ftrace_avail_fops = { + .open = ftrace_avail_open, + .read = seq_read, + .llseek = seq_lseek, + .release = ftrace_avail_release, +}; + +static struct file_operations ftrace_filter_fops = { + .open = ftrace_filter_open, + .read = ftrace_filter_read, + .write = ftrace_filter_write, + .llseek = ftrace_filter_lseek, + .release = ftrace_filter_release, +}; + +static __init int ftrace_init_debugfs(void) +{ + struct dentry *d_tracer; + struct dentry *entry; + + d_tracer = tracing_init_dentry(); + + entry = debugfs_create_file("available_filter_functions", 0444, + d_tracer, NULL, &ftrace_avail_fops); + if (!entry) + pr_warning("Could not create debugfs " + "'available_filter_functions' entry\n"); + + entry = debugfs_create_file("set_ftrace_filter", 0644, d_tracer, + NULL, &ftrace_filter_fops); + if (!entry) + pr_warning("Could not create debugfs " + "'set_ftrace_filter' entry\n"); + return 0; +} + +fs_initcall(ftrace_init_debugfs); + static int __init notrace ftrace_dynamic_init(void) { struct task_struct *p; @@ -657,14 +1150,14 @@ int unregister_ftrace_function(struct ftrace_ops *ops) notrace int ftrace_enable_sysctl(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, + struct file *file, void __user *buffer, size_t *lenp, loff_t *ppos) { int ret; mutex_lock(&ftrace_sysctl_lock); - ret = proc_dointvec(table, write, filp, buffer, lenp, ppos); + ret = proc_dointvec(table, write, file, buffer, lenp, ppos); if (ret || !write || (last_ftrace_enabled == ftrace_enabled)) goto out; -- cgit v0.10.2 From 4c11d7aed389375253b59e2b1865eec96663c65d Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:43 +0200 Subject: ftrace: convert single large buffer into single pages. Allocating large buffers for the tracer may fail easily. This patch converts the buffer from a large ordered allocation to single pages. It uses the struct page LRU field to link the pages together. Later patches may also implement dynamic increasing and decreasing of the trace buffers. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 1b8eca7..d7ad030 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -49,7 +50,7 @@ static struct trace_array max_tr; static DEFINE_PER_CPU(struct trace_array_cpu, max_data); static int tracer_enabled; -static unsigned long trace_nr_entries = 4096UL; +static unsigned long trace_nr_entries = 16384UL; static struct tracer *trace_types __read_mostly; static struct tracer *current_trace __read_mostly; @@ -57,6 +58,8 @@ static int max_tracer_type_len; static DEFINE_MUTEX(trace_types_lock); +#define ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(struct trace_entry)) + static int __init set_nr_entries(char *str) { if (!str) @@ -103,6 +106,7 @@ static const char *trace_options[] = { static unsigned trace_flags; +static DEFINE_SPINLOCK(ftrace_max_lock); /* * Copy the new maximum trace into the separate maximum-trace @@ -136,17 +140,23 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) { struct trace_array_cpu *data; void *save_trace; + struct list_head save_pages; int i; + WARN_ON_ONCE(!irqs_disabled()); + spin_lock(&ftrace_max_lock); /* clear out all the previous traces */ for_each_possible_cpu(i) { data = tr->data[i]; save_trace = max_tr.data[i]->trace; + save_pages = max_tr.data[i]->trace_pages; memcpy(max_tr.data[i], data, sizeof(*data)); data->trace = save_trace; + data->trace_pages = save_pages; } __update_max_tr(tr, tsk, cpu); + spin_unlock(&ftrace_max_lock); } /** @@ -160,16 +170,22 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) { struct trace_array_cpu *data = tr->data[cpu]; void *save_trace; + struct list_head save_pages; int i; + WARN_ON_ONCE(!irqs_disabled()); + spin_lock(&ftrace_max_lock); for_each_possible_cpu(i) tracing_reset(max_tr.data[i]); save_trace = max_tr.data[cpu]->trace; + save_pages = max_tr.data[cpu]->trace_pages; memcpy(max_tr.data[cpu], data, sizeof(*data)); data->trace = save_trace; + data->trace_pages = save_pages; __update_max_tr(tr, tsk, cpu); + spin_unlock(&ftrace_max_lock); } int register_tracer(struct tracer *type) @@ -236,7 +252,8 @@ void unregister_tracer(struct tracer *type) void notrace tracing_reset(struct trace_array_cpu *data) { data->trace_idx = 0; - atomic_set(&data->underrun, 0); + data->trace_current = data->trace; + data->trace_current_idx = 0; } #ifdef CONFIG_FTRACE @@ -367,21 +384,27 @@ tracing_get_trace_entry(struct trace_array *tr, { unsigned long idx, idx_next; struct trace_entry *entry; + struct page *page; + struct list_head *next; - idx = data->trace_idx; + data->trace_idx++; + idx = data->trace_current_idx; idx_next = idx + 1; - if (unlikely(idx_next >= tr->entries)) { - atomic_inc(&data->underrun); + entry = data->trace_current + idx * TRACE_ENTRY_SIZE; + + if (unlikely(idx_next >= ENTRIES_PER_PAGE)) { + page = virt_to_page(data->trace_current); + if (unlikely(&page->lru == data->trace_pages.prev)) + next = data->trace_pages.next; + else + next = page->lru.next; + page = list_entry(next, struct page, lru); + data->trace_current = page_address(page); idx_next = 0; } - data->trace_idx = idx_next; - - if (unlikely(idx_next != 0 && atomic_read(&data->underrun))) - atomic_inc(&data->underrun); - - entry = data->trace + idx * TRACE_ENTRY_SIZE; + data->trace_current_idx = idx_next; return entry; } @@ -442,21 +465,38 @@ enum trace_file_type { }; static struct trace_entry * -trace_entry_idx(struct trace_array *tr, unsigned long idx, int cpu) +trace_entry_idx(struct trace_array *tr, struct trace_array_cpu *data, + struct trace_iterator *iter, int cpu) { - struct trace_entry *array = tr->data[cpu]->trace; - unsigned long underrun; + struct page *page; + struct trace_entry *array; - if (idx >= tr->entries) + if (iter->next_idx[cpu] >= tr->entries || + iter->next_idx[cpu] >= data->trace_idx) return NULL; - underrun = atomic_read(&tr->data[cpu]->underrun); - if (underrun) - idx = ((underrun - 1) + idx) % tr->entries; - else if (idx >= tr->data[cpu]->trace_idx) - return NULL; + if (!iter->next_page[cpu]) { + /* + * Initialize. If the count of elements in + * this buffer is greater than the max entries + * we had an underrun. Which means we looped around. + * We can simply use the current pointer as our + * starting point. + */ + if (data->trace_idx >= tr->entries) { + page = virt_to_page(data->trace_current); + iter->next_page[cpu] = &page->lru; + iter->next_page_idx[cpu] = data->trace_current_idx; + } else { + iter->next_page[cpu] = data->trace_pages.next; + iter->next_page_idx[cpu] = 0; + } + } - return &array[idx]; + page = list_entry(iter->next_page[cpu], struct page, lru); + array = page_address(page); + + return &array[iter->next_page_idx[cpu]]; } static struct notrace trace_entry * @@ -470,7 +510,7 @@ find_next_entry(struct trace_iterator *iter, int *ent_cpu) for_each_possible_cpu(cpu) { if (!tr->data[cpu]->trace) continue; - ent = trace_entry_idx(tr, iter->next_idx[cpu], cpu); + ent = trace_entry_idx(tr, tr->data[cpu], iter, cpu); if (ent && (!next || (long)(next->idx - ent->idx) > 0)) { next = ent; @@ -492,8 +532,19 @@ static void *find_next_entry_inc(struct trace_iterator *iter) next = find_next_entry(iter, &next_cpu); if (next) { - iter->next_idx[next_cpu]++; iter->idx++; + iter->next_idx[next_cpu]++; + iter->next_page_idx[next_cpu]++; + if (iter->next_page_idx[next_cpu] >= ENTRIES_PER_PAGE) { + struct trace_array_cpu *data = iter->tr->data[next_cpu]; + + iter->next_page_idx[next_cpu] = 0; + iter->next_page[next_cpu] = + iter->next_page[next_cpu]->next; + if (iter->next_page[next_cpu] == &data->trace_pages) + iter->next_page[next_cpu] = + data->trace_pages.next; + } } iter->ent = next; iter->cpu = next_cpu; @@ -554,14 +605,16 @@ static void *s_start(struct seq_file *m, loff_t *pos) iter->cpu = 0; iter->idx = -1; - for (i = 0; i < NR_CPUS; i++) + for_each_possible_cpu(i) { iter->next_idx[i] = 0; + iter->next_page[i] = NULL; + } for (p = iter; p && l < *pos; p = s_next(m, p, &l)) ; } else { - l = *pos; + l = *pos - 1; p = s_next(m, p, &l); } @@ -654,9 +707,8 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter) struct trace_array *tr = iter->tr; struct trace_array_cpu *data = tr->data[tr->cpu]; struct tracer *type = current_trace; - unsigned long underruns = 0; - unsigned long underrun; - unsigned long entries = 0; + unsigned long total = 0; + unsigned long entries = 0; int cpu; const char *name = "preemption"; @@ -665,11 +717,10 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter) for_each_possible_cpu(cpu) { if (tr->data[cpu]->trace) { - underrun = atomic_read(&tr->data[cpu]->underrun); - if (underrun) { - underruns += underrun; + total += tr->data[cpu]->trace_idx; + if (tr->data[cpu]->trace_idx > tr->entries) entries += tr->entries; - } else + else entries += tr->data[cpu]->trace_idx; } } @@ -682,7 +733,7 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter) " (M:%s VP:%d, KP:%d, SP:%d HP:%d", data->saved_latency, entries, - (entries + underruns), + total, tr->cpu, #if defined(CONFIG_PREEMPT_NONE) "server", @@ -882,8 +933,7 @@ static int trace_empty(struct trace_iterator *iter) data = iter->tr->data[cpu]; if (data->trace && - (data->trace_idx || - atomic_read(&data->underrun))) + data->trace_idx) return 0; } return 1; @@ -1464,42 +1514,109 @@ static struct tracer no_tracer __read_mostly = .name = "none", }; -static inline notrace int page_order(const unsigned long size) +static int trace_alloc_page(void) { - const unsigned long nr_pages = DIV_ROUND_UP(size, PAGE_SIZE); - return ilog2(roundup_pow_of_two(nr_pages)); + struct trace_array_cpu *data; + void *array; + struct page *page, *tmp; + LIST_HEAD(pages); + int i; + + /* first allocate a page for each CPU */ + for_each_possible_cpu(i) { + array = (void *)__get_free_page(GFP_KERNEL); + if (array == NULL) { + printk(KERN_ERR "tracer: failed to allocate page" + "for trace buffer!\n"); + goto free_pages; + } + + page = virt_to_page(array); + list_add(&page->lru, &pages); + +/* Only allocate if we are actually using the max trace */ +#ifdef CONFIG_TRACER_MAX_TRACE + array = (void *)__get_free_page(GFP_KERNEL); + if (array == NULL) { + printk(KERN_ERR "tracer: failed to allocate page" + "for trace buffer!\n"); + goto free_pages; + } + page = virt_to_page(array); + list_add(&page->lru, &pages); +#endif + } + + /* Now that we successfully allocate a page per CPU, add them */ + for_each_possible_cpu(i) { + data = global_trace.data[i]; + page = list_entry(pages.next, struct page, lru); + list_del(&page->lru); + list_add_tail(&page->lru, &data->trace_pages); + ClearPageLRU(page); + +#ifdef CONFIG_TRACER_MAX_TRACE + data = max_tr.data[i]; + page = list_entry(pages.next, struct page, lru); + list_del(&page->lru); + list_add_tail(&page->lru, &data->trace_pages); + SetPageLRU(page); +#endif + } + global_trace.entries += ENTRIES_PER_PAGE; + + return 0; + + free_pages: + list_for_each_entry_safe(page, tmp, &pages, lru) { + list_del(&page->lru); + __free_page(page); + } + return -ENOMEM; } __init static int tracer_alloc_buffers(void) { - const int order = page_order(trace_nr_entries * TRACE_ENTRY_SIZE); - const unsigned long size = (1UL << order) << PAGE_SHIFT; - struct trace_entry *array; + struct trace_array_cpu *data; + void *array; + struct page *page; + int pages = 0; int i; + /* Allocate the first page for all buffers */ for_each_possible_cpu(i) { - global_trace.data[i] = &per_cpu(global_trace_cpu, i); + data = global_trace.data[i] = &per_cpu(global_trace_cpu, i); max_tr.data[i] = &per_cpu(max_data, i); - array = (struct trace_entry *) - __get_free_pages(GFP_KERNEL, order); + array = (void *)__get_free_page(GFP_KERNEL); if (array == NULL) { - printk(KERN_ERR "tracer: failed to allocate" - " %ld bytes for trace buffer!\n", size); + printk(KERN_ERR "tracer: failed to allocate page" + "for trace buffer!\n"); goto free_buffers; } - global_trace.data[i]->trace = array; + data->trace = array; + + /* set the array to the list */ + INIT_LIST_HEAD(&data->trace_pages); + page = virt_to_page(array); + list_add(&page->lru, &data->trace_pages); + /* use the LRU flag to differentiate the two buffers */ + ClearPageLRU(page); /* Only allocate if we are actually using the max trace */ #ifdef CONFIG_TRACER_MAX_TRACE - array = (struct trace_entry *) - __get_free_pages(GFP_KERNEL, order); + array = (void *)__get_free_page(GFP_KERNEL); if (array == NULL) { - printk(KERN_ERR "wakeup tracer: failed to allocate" - " %ld bytes for trace buffer!\n", size); + printk(KERN_ERR "tracer: failed to allocate page" + "for trace buffer!\n"); goto free_buffers; } max_tr.data[i]->trace = array; + + INIT_LIST_HEAD(&max_tr.data[i]->trace_pages); + page = virt_to_page(array); + list_add(&page->lru, &max_tr.data[i]->trace_pages); + SetPageLRU(page); #endif } @@ -1507,11 +1624,18 @@ __init static int tracer_alloc_buffers(void) * Since we allocate by orders of pages, we may be able to * round up a bit. */ - global_trace.entries = size / TRACE_ENTRY_SIZE; + global_trace.entries = ENTRIES_PER_PAGE; max_tr.entries = global_trace.entries; + pages++; + + while (global_trace.entries < trace_nr_entries) { + if (trace_alloc_page()) + break; + pages++; + } - pr_info("tracer: %ld bytes allocated for %ld", - size, trace_nr_entries); + pr_info("tracer: %d pages allocated for %ld", + pages, trace_nr_entries); pr_info(" entries of %ld bytes\n", (long)TRACE_ENTRY_SIZE); pr_info(" actual entries %ld\n", global_trace.entries); @@ -1526,17 +1650,26 @@ __init static int tracer_alloc_buffers(void) free_buffers: for (i-- ; i >= 0; i--) { + struct page *page, *tmp; struct trace_array_cpu *data = global_trace.data[i]; if (data && data->trace) { - free_pages((unsigned long)data->trace, order); + list_for_each_entry_safe(page, tmp, + &data->trace_pages, lru) { + list_del(&page->lru); + __free_page(page); + } data->trace = NULL; } #ifdef CONFIG_TRACER_MAX_TRACE data = max_tr.data[i]; if (data && data->trace) { - free_pages((unsigned long)data->trace, order); + list_for_each_entry_safe(page, tmp, + &data->trace_pages, lru) { + list_del(&page->lru); + __free_page(page); + } data->trace = NULL; } #endif diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 3173a93..83e257e 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -54,9 +54,11 @@ struct trace_entry { */ struct trace_array_cpu { void *trace; + void *trace_current; + unsigned trace_current_idx; + struct list_head trace_pages; unsigned long trace_idx; atomic_t disabled; - atomic_t underrun; unsigned long saved_latency; unsigned long critical_start; unsigned long critical_end; @@ -112,8 +114,10 @@ struct trace_iterator { unsigned long iter_flags; loff_t pos; unsigned long next_idx[NR_CPUS]; + struct list_head *next_page[NR_CPUS]; + unsigned next_page_idx[NR_CPUS]; + long idx; int cpu; - int idx; }; void notrace tracing_reset(struct trace_array_cpu *data); -- cgit v0.10.2 From f43fdad8627fec2d21df92799b254dceb66c9c3c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:43 +0200 Subject: ftrace: fix kexec disable the tracer while kexec pulls the rug from under the old kernel. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index d0b234c..88923fd 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c @@ -11,6 +11,8 @@ #include #include #include +#include + #include #include #include @@ -107,6 +109,8 @@ NORET_TYPE void machine_kexec(struct kimage *image) unsigned long page_list[PAGES_NR]; void *control_page; + tracer_disable(); + /* Interrupts aren't acceptable while we reboot */ local_irq_disable(); diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 576a03d..1558fdc 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -11,6 +11,8 @@ #include #include #include +#include + #include #include #include @@ -184,6 +186,8 @@ NORET_TYPE void machine_kexec(struct kimage *image) unsigned long page_list[PAGES_NR]; void *control_page; + tracer_disable(); + /* Interrupts aren't acceptable while we reboot */ local_irq_disable(); diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index f5911d2..a42390c 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -68,6 +68,13 @@ extern void ftrace_call(void); extern void mcount_call(void); #endif +static inline void tracer_disable(void) +{ +#ifdef CONFIG_FTRACE + ftrace_enabled = 0; +#endif +} + #ifdef CONFIG_FRAME_POINTER /* TODO: need to fix this for ARM */ # define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0)) -- cgit v0.10.2 From 5568b139f4d196273958ae2947a736fdf1ffeece Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:44 +0200 Subject: ftrace: debug smp_processor_id, use notrace preempt disable The debug smp_processor_id caused a recursive fault in debugging the irqsoff tracer. The tracer used a smp_processor_id in the ftrace callback, and this function called preempt_disable which also is traced. This caused a recursive fault (stack overload). Since using smp_processor_id without debugging on does not cause faults with the tracer (even when the tracer is wrong), the debug version should not cause a system reboot. This changes the debug_smp_processor_id to use the notrace versions of preempt_disable and enable. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c index e555ab6..3b4dc09 100644 --- a/lib/smp_processor_id.c +++ b/lib/smp_processor_id.c @@ -37,7 +37,7 @@ notrace unsigned int debug_smp_processor_id(void) /* * Avoid recursion: */ - preempt_disable(); + preempt_disable_notrace(); if (!printk_ratelimit()) goto out_enable; @@ -49,7 +49,7 @@ notrace unsigned int debug_smp_processor_id(void) dump_stack(); out_enable: - preempt_enable_no_resched(); + preempt_enable_no_resched_notrace(); out: return this_cpu; } -- cgit v0.10.2 From 361943ad0ba3f16e66859e30a408915e008ba91e Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:44 +0200 Subject: ftrace: irqs off smp_processor_id() fix The irqsoff function tracer did a __get_cpu_var to determine if it should trace the function or not. The problem is that __get_cpu_var can preempt between getting the CPU and reading the cpu variable. This means that the cpu variable that is being read is not from the cpu being run on. At worst, this can give a false positive, where we trace the function when we should not. It will never give a false negative since we only want to trace when interrupts are disabled and we never preempt when they are. This fix adds a check after reading the irq flags to only trace if the interrupts are actually disabled. It also changes the reading of the cpu variable to use a raw_smp_processor_id since we now don't care if we preempt. We still catch that fact. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 8b12316..bd3f881 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -74,12 +74,21 @@ irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip) long disabled; int cpu; - if (likely(!__get_cpu_var(tracing_cpu))) + /* + * Does not matter if we preempt. We test the flags + * afterward, to see if irqs are disabled or not. + * If we preempt and get a false positive, the flags + * test will fail. + */ + cpu = raw_smp_processor_id(); + if (likely(!per_cpu(tracing_cpu, cpu))) return; local_save_flags(flags); + /* slight chance to get a false positive on tracing_cpu */ + if (!irqs_disabled_flags(flags)) + return; - cpu = raw_smp_processor_id(); data = tr->data[cpu]; disabled = atomic_inc_return(&data->disabled); -- cgit v0.10.2 From 0764d23cf066c52de42b653144605b481d3fbdbc Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:44 +0200 Subject: ftrace: lockdep notrace annotations Add notrace annotations to lockdep to keep ftrace from causing recursive problems with lock tracing and debugging. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/lockdep.c b/kernel/lockdep.c index e219243..ac46847 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -271,14 +271,14 @@ static struct list_head chainhash_table[CHAINHASH_SIZE]; ((key1) >> (64-MAX_LOCKDEP_KEYS_BITS)) ^ \ (key2)) -void lockdep_off(void) +notrace void lockdep_off(void) { current->lockdep_recursion++; } EXPORT_SYMBOL(lockdep_off); -void lockdep_on(void) +notrace void lockdep_on(void) { current->lockdep_recursion--; } @@ -1041,7 +1041,7 @@ find_usage_forwards(struct lock_class *source, unsigned int depth) * Return 1 otherwise and keep unchanged. * Return 0 on error. */ -static noinline int +static noinline notrace int find_usage_backwards(struct lock_class *source, unsigned int depth) { struct lock_list *entry; @@ -1591,7 +1591,7 @@ static inline int validate_chain(struct task_struct *curr, * We are building curr_chain_key incrementally, so double-check * it from scratch, to make sure that it's done correctly: */ -static void check_chain_key(struct task_struct *curr) +static notrace void check_chain_key(struct task_struct *curr) { #ifdef CONFIG_DEBUG_LOCKDEP struct held_lock *hlock, *prev_hlock = NULL; @@ -1967,7 +1967,7 @@ static int mark_lock_irq(struct task_struct *curr, struct held_lock *this, /* * Mark all held locks with a usage bit: */ -static int +static notrace int mark_held_locks(struct task_struct *curr, int hardirq) { enum lock_usage_bit usage_bit; @@ -2260,8 +2260,8 @@ static inline int separate_irq_context(struct task_struct *curr, /* * Mark a lock with a usage bit, and validate the state transition: */ -static int mark_lock(struct task_struct *curr, struct held_lock *this, - enum lock_usage_bit new_bit) +static notrace int mark_lock(struct task_struct *curr, struct held_lock *this, + enum lock_usage_bit new_bit) { unsigned int new_mask = 1 << new_bit, ret = 1; @@ -2663,7 +2663,7 @@ __lock_release(struct lockdep_map *lock, int nested, unsigned long ip) /* * Check whether we follow the irq-flags state precisely: */ -static void check_flags(unsigned long flags) +static notrace void check_flags(unsigned long flags) { #if defined(CONFIG_DEBUG_LOCKDEP) && defined(CONFIG_TRACE_IRQFLAGS) if (!debug_locks) @@ -2700,8 +2700,8 @@ static void check_flags(unsigned long flags) * We are not always called with irqs disabled - do that here, * and also avoid lockdep recursion: */ -void lock_acquire(struct lockdep_map *lock, unsigned int subclass, - int trylock, int read, int check, unsigned long ip) +notrace void lock_acquire(struct lockdep_map *lock, unsigned int subclass, + int trylock, int read, int check, unsigned long ip) { unsigned long flags; @@ -2723,7 +2723,8 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass, EXPORT_SYMBOL_GPL(lock_acquire); -void lock_release(struct lockdep_map *lock, int nested, unsigned long ip) +notrace void lock_release(struct lockdep_map *lock, int nested, + unsigned long ip) { unsigned long flags; diff --git a/kernel/spinlock.c b/kernel/spinlock.c index ae28c82..a1fb54c 100644 --- a/kernel/spinlock.c +++ b/kernel/spinlock.c @@ -436,7 +436,7 @@ int __lockfunc _spin_trylock_bh(spinlock_t *lock) } EXPORT_SYMBOL(_spin_trylock_bh); -int in_lock_functions(unsigned long addr) +notrace int in_lock_functions(unsigned long addr) { /* Linker adds these: start and end of __lockfunc functions */ extern char __lock_text_start[], __lock_text_end[]; -- cgit v0.10.2 From 18cef379d30f5ded20cc31d7f2d342639d39919d Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:44 +0200 Subject: ftrace: don't use raw_local_irq_save/restore Using raw_local_irq_save/restore confuses lockdep. It's fine to use the normal ones. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d7ad030..9175ce9 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -269,7 +269,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip) if (unlikely(!tracer_enabled)) return; - raw_local_irq_save(flags); + local_irq_save(flags); cpu = raw_smp_processor_id(); data = tr->data[cpu]; disabled = atomic_inc_return(&data->disabled); @@ -278,7 +278,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip) ftrace(tr, data, ip, parent_ip, flags); atomic_dec(&data->disabled); - raw_local_irq_restore(flags); + local_irq_restore(flags); } static struct ftrace_ops trace_ops __read_mostly = diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index 3e4771d..2715267 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -29,7 +29,7 @@ ctx_switch_func(struct task_struct *prev, struct task_struct *next) if (!tracer_enabled) return; - raw_local_irq_save(flags); + local_irq_save(flags); cpu = raw_smp_processor_id(); data = tr->data[cpu]; disabled = atomic_inc_return(&data->disabled); @@ -38,7 +38,7 @@ ctx_switch_func(struct task_struct *prev, struct task_struct *next) tracing_sched_switch_trace(tr, data, prev, next, flags); atomic_dec(&data->disabled); - raw_local_irq_restore(flags); + local_irq_restore(flags); } void ftrace_ctx_switch(struct task_struct *prev, struct task_struct *next) -- cgit v0.10.2 From 89b2f97819dd074297bbe3e19eaa4afcc98845ad Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:44 +0200 Subject: ftrace: fix updates to max trace This patch fixes some bugs to the updating of the max trace that was caused by implementing the new buffering. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 9175ce9..9596656 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -153,6 +153,7 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) memcpy(max_tr.data[i], data, sizeof(*data)); data->trace = save_trace; data->trace_pages = save_pages; + tracing_reset(data); } __update_max_tr(tr, tsk, cpu); @@ -183,6 +184,7 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) memcpy(max_tr.data[cpu], data, sizeof(*data)); data->trace = save_trace; data->trace_pages = save_pages; + tracing_reset(data); __update_max_tr(tr, tsk, cpu); spin_unlock(&ftrace_max_lock); @@ -877,6 +879,8 @@ print_lat_fmt(struct seq_file *m, struct trace_iterator *iter, entry->ctx.next_prio, comm); break; + default: + seq_printf(m, "Unknown type %d\n", entry->type); } } @@ -1625,7 +1629,6 @@ __init static int tracer_alloc_buffers(void) * round up a bit. */ global_trace.entries = ENTRIES_PER_PAGE; - max_tr.entries = global_trace.entries; pages++; while (global_trace.entries < trace_nr_entries) { @@ -1633,6 +1636,7 @@ __init static int tracer_alloc_buffers(void) break; pages++; } + max_tr.entries = global_trace.entries; pr_info("tracer: %d pages allocated for %ld", pages, trace_nr_entries); diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index bd3f881..74165f6 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -23,6 +23,8 @@ static int tracer_enabled __read_mostly; static DEFINE_PER_CPU(int, tracing_cpu); +static DEFINE_SPINLOCK(max_trace_lock); + enum { TRACER_IRQS_OFF = (1 << 1), TRACER_PREEMPT_OFF = (1 << 2), @@ -126,7 +128,7 @@ check_critical_timing(struct trace_array *tr, int cpu) { unsigned long latency, t0, t1; - cycle_t T0, T1, T2, delta; + cycle_t T0, T1, delta; unsigned long flags; /* @@ -142,20 +144,18 @@ check_critical_timing(struct trace_array *tr, if (!report_latency(delta)) goto out; - ftrace(tr, data, CALLER_ADDR0, parent_ip, flags); - /* - * Update the timestamp, because the trace entry above - * might change it (it can only get larger so the latency - * is fair to be reported): - */ - T2 = now(cpu); + spin_lock(&max_trace_lock); - delta = T2-T0; + /* check if we are still the max latency */ + if (!report_latency(delta)) + goto out_unlock; + + ftrace(tr, data, CALLER_ADDR0, parent_ip, flags); latency = nsecs_to_usecs(delta); if (data->critical_sequence != max_sequence) - goto out; + goto out_unlock; tracing_max_latency = delta; t0 = nsecs_to_usecs(T0); @@ -189,6 +189,9 @@ check_critical_timing(struct trace_array *tr, max_sequence++; +out_unlock: + spin_unlock(&max_trace_lock); + out: data->critical_sequence = max_sequence; data->preempt_timestamp = now(cpu); @@ -366,14 +369,14 @@ void notrace trace_preempt_off(unsigned long a0, unsigned long a1) static void start_irqsoff_tracer(struct trace_array *tr) { - tracer_enabled = 1; register_ftrace_function(&trace_ops); + tracer_enabled = 1; } static void stop_irqsoff_tracer(struct trace_array *tr) { - unregister_ftrace_function(&trace_ops); tracer_enabled = 0; + unregister_ftrace_function(&trace_ops); } static void __irqsoff_tracer_init(struct trace_array *tr) -- cgit v0.10.2 From 57f50be14d57b0dbf88dd019e7bb0ff3a3dc7b81 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:44 +0200 Subject: ftrace: fix max latency Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 9596656..9bad237 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -69,6 +69,11 @@ static int __init set_nr_entries(char *str) } __setup("trace_entries=", set_nr_entries); +unsigned long nsecs_to_usecs(unsigned long nsecs) +{ + return nsecs / 1000; +} + enum trace_type { __TRACE_FIRST_TYPE = 0, @@ -733,7 +738,7 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter) "---------------------------------\n"); seq_printf(m, " latency: %lu us, #%lu/%lu, CPU#%d |" " (M:%s VP:%d, KP:%d, SP:%d HP:%d", - data->saved_latency, + nsecs_to_usecs(data->saved_latency), entries, total, tr->cpu, @@ -771,11 +776,6 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter) seq_puts(m, "\n"); } -unsigned long nsecs_to_usecs(unsigned long nsecs) -{ - return nsecs / 1000; -} - static void notrace lat_print_generic(struct seq_file *m, struct trace_entry *entry, int cpu) { -- cgit v0.10.2 From e1c08bdd9fa73e44096e5a82c0d5928b04ab02c8 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:44 +0200 Subject: ftrace: force recording Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index a42390c..2c1670c 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -54,6 +54,8 @@ struct dyn_ftrace { unsigned long flags; }; +int ftrace_force_update(void); + /* defined in arch */ extern int ftrace_ip_converted(unsigned long ip); extern unsigned char *ftrace_nop_replace(void); @@ -66,6 +68,8 @@ extern int ftrace_update_ftrace_func(ftrace_func_t func); extern void ftrace_caller(void); extern void ftrace_call(void); extern void mcount_call(void); +#else +# define ftrace_force_update() do { } while (0) #endif static inline void tracer_disable(void) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 97d5cb7..4facf5c 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -146,6 +146,10 @@ static int notrace __unregister_ftrace_function(struct ftrace_ops *ops) #ifdef CONFIG_DYNAMIC_FTRACE +static struct task_struct *ftraced_task; +static DECLARE_WAIT_QUEUE_HEAD(ftraced_waiters); +static unsigned long ftraced_iteration_counter; + enum { FTRACE_ENABLE_CALLS = (1 << 0), FTRACE_DISABLE_CALLS = (1 << 1), @@ -590,9 +594,12 @@ static int notrace ftraced(void *ignore) ftraced_trigger = 0; ftrace_record_suspend--; } + ftraced_iteration_counter++; mutex_unlock(&ftraced_lock); mutex_unlock(&ftrace_sysctl_lock); + wake_up_interruptible(&ftraced_waiters); + ftrace_shutdown_replenish(); set_current_state(TASK_INTERRUPTIBLE); @@ -1050,6 +1057,49 @@ static struct file_operations ftrace_filter_fops = { .release = ftrace_filter_release, }; +/** + * ftrace_force_update - force an update to all recording ftrace functions + * + * The ftrace dynamic update daemon only wakes up once a second. + * There may be cases where an update needs to be done immediately + * for tests or internal kernel tracing to begin. This function + * wakes the daemon to do an update and will not return until the + * update is complete. + */ +int ftrace_force_update(void) +{ + unsigned long last_counter; + DECLARE_WAITQUEUE(wait, current); + int ret = 0; + + if (!ftraced_task) + return -ENODEV; + + mutex_lock(&ftraced_lock); + last_counter = ftraced_iteration_counter; + + set_current_state(TASK_INTERRUPTIBLE); + add_wait_queue(&ftraced_waiters, &wait); + + do { + mutex_unlock(&ftraced_lock); + wake_up_process(ftraced_task); + schedule(); + mutex_lock(&ftraced_lock); + if (signal_pending(current)) { + ret = -EINTR; + break; + } + set_current_state(TASK_INTERRUPTIBLE); + } while (last_counter == ftraced_iteration_counter); + + mutex_unlock(&ftraced_lock); + remove_wait_queue(&ftraced_waiters, &wait); + set_current_state(TASK_RUNNING); + + return ret; +} + static __init int ftrace_init_debugfs(void) { struct dentry *d_tracer; @@ -1095,6 +1145,7 @@ static int __init notrace ftrace_dynamic_init(void) return -1; last_ftrace_enabled = ftrace_enabled = 1; + ftraced_task = p; return 0; } -- cgit v0.10.2 From 60a11774b38fef1ab90b18c5353bd1c7c4d311c8 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:44 +0200 Subject: ftrace: add self-tests Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index cad9db1..3f73a17 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -105,3 +105,16 @@ config DYNAMIC_FTRACE wakes up once a second and checks to see if any ftrace calls were made. If so, it runs stop_machine (stops all CPUS) and modifies the code to jump over the call to ftrace. + +config FTRACE_SELFTEST + bool + +config FTRACE_STARTUP_TEST + bool "Perform a startup test on ftrace" + depends on TRACING + select FTRACE_SELFTEST + help + This option performs a series of startup tests on ftrace. On bootup + a series of tests are made to verify that the tracer is + functioning properly. It will do tests on all the configured + tracers of ftrace. diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 9bad237..f6d026f 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -32,6 +32,8 @@ unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX; unsigned long __read_mostly tracing_thresh; +static int tracing_disabled = 1; + static long notrace ns2usecs(cycle_t nsec) { @@ -217,11 +219,48 @@ int register_tracer(struct tracer *type) } } +#ifdef CONFIG_FTRACE_STARTUP_TEST + if (type->selftest) { + struct tracer *saved_tracer = current_trace; + struct trace_array_cpu *data; + struct trace_array *tr = &global_trace; + int saved_ctrl = tr->ctrl; + int i; + /* + * Run a selftest on this tracer. + * Here we reset the trace buffer, and set the current + * tracer to be this tracer. The tracer can then run some + * internal tracing to verify that everything is in order. + * If we fail, we do not register this tracer. + */ + for_each_possible_cpu(i) { + if (!data->trace) + continue; + data = tr->data[i]; + tracing_reset(data); + } + current_trace = type; + tr->ctrl = 0; + /* the test is responsible for initializing and enabling */ + pr_info("Testing tracer %s: ", type->name); + ret = type->selftest(type, tr); + /* the test is responsible for resetting too */ + current_trace = saved_tracer; + tr->ctrl = saved_ctrl; + if (ret) { + printk(KERN_CONT "FAILED!\n"); + goto out; + } + printk(KERN_CONT "PASSED\n"); + } +#endif + type->next = trace_types; trace_types = type; len = strlen(type->name); if (len > max_tracer_type_len) max_tracer_type_len = len; + out: mutex_unlock(&trace_types_lock); @@ -985,6 +1024,11 @@ __tracing_open(struct inode *inode, struct file *file, int *ret) { struct trace_iterator *iter; + if (tracing_disabled) { + *ret = -ENODEV; + return NULL; + } + iter = kzalloc(sizeof(*iter), GFP_KERNEL); if (!iter) { *ret = -ENOMEM; @@ -1023,6 +1067,9 @@ __tracing_open(struct inode *inode, struct file *file, int *ret) int tracing_open_generic(struct inode *inode, struct file *filp) { + if (tracing_disabled) + return -ENODEV; + filp->private_data = inode->i_private; return 0; } @@ -1128,6 +1175,9 @@ static int show_traces_open(struct inode *inode, struct file *file) { int ret; + if (tracing_disabled) + return -ENODEV; + ret = seq_open(file, &show_traces_seq_ops); if (!ret) { struct seq_file *m = file->private_data; @@ -1452,6 +1502,11 @@ struct dentry *tracing_init_dentry(void) return d_tracer; } +#ifdef CONFIG_FTRACE_SELFTEST +/* Let selftest have access to static functions in this file */ +#include "trace_selftest.c" +#endif + static __init void tracer_init_debugfs(void) { struct dentry *d_tracer; @@ -1585,6 +1640,7 @@ __init static int tracer_alloc_buffers(void) void *array; struct page *page; int pages = 0; + int ret = -ENOMEM; int i; /* Allocate the first page for all buffers */ @@ -1650,6 +1706,9 @@ __init static int tracer_alloc_buffers(void) register_tracer(&no_tracer); current_trace = &no_tracer; + /* All seems OK, enable tracing */ + tracing_disabled = 0; + return 0; free_buffers: @@ -1678,7 +1737,7 @@ __init static int tracer_alloc_buffers(void) } #endif } - return -ENOMEM; + return ret; } -device_initcall(tracer_alloc_buffers); +fs_initcall(tracer_alloc_buffers); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 83e257e..88edbf1 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -99,6 +99,10 @@ struct tracer { void (*start)(struct trace_iterator *iter); void (*stop)(struct trace_iterator *iter); void (*ctrl_update)(struct trace_array *tr); +#ifdef CONFIG_FTRACE_STARTUP_TEST + int (*selftest)(struct tracer *trace, + struct trace_array *tr); +#endif struct tracer *next; int print_max; }; @@ -185,4 +189,31 @@ extern int unregister_tracer_switch(struct tracer_switch_ops *ops); extern unsigned long ftrace_update_tot_cnt; #endif +#ifdef CONFIG_FTRACE_STARTUP_TEST +#ifdef CONFIG_FTRACE +extern int trace_selftest_startup_function(struct tracer *trace, + struct trace_array *tr); +#endif +#ifdef CONFIG_IRQSOFF_TRACER +extern int trace_selftest_startup_irqsoff(struct tracer *trace, + struct trace_array *tr); +#endif +#ifdef CONFIG_PREEMPT_TRACER +extern int trace_selftest_startup_preemptoff(struct tracer *trace, + struct trace_array *tr); +#endif +#if defined(CONFIG_IRQSOFF_TRACER) && defined(CONFIG_PREEMPT_TRACER) +extern int trace_selftest_startup_preemptirqsoff(struct tracer *trace, + struct trace_array *tr); +#endif +#ifdef CONFIG_SCHED_TRACER +extern int trace_selftest_startup_wakeup(struct tracer *trace, + struct trace_array *tr); +#endif +#ifdef CONFIG_CONTEXT_SWITCH_TRACER +extern int trace_selftest_startup_sched_switch(struct tracer *trace, + struct trace_array *tr); +#endif +#endif /* CONFIG_FTRACE_STARTUP_TEST */ + #endif /* _LINUX_KERNEL_TRACE_H */ diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index 82988c5..5d8ad7a 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -63,6 +63,9 @@ static struct tracer function_trace __read_mostly = .init = function_trace_init, .reset = function_trace_reset, .ctrl_update = function_trace_ctrl_update, +#ifdef CONFIG_FTRACE_SELFTEST + .selftest = trace_selftest_startup_function, +#endif }; static __init int init_function_trace(void) diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 74165f6..14183b8 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -432,6 +432,9 @@ static struct tracer irqsoff_tracer __read_mostly = .close = irqsoff_tracer_close, .ctrl_update = irqsoff_tracer_ctrl_update, .print_max = 1, +#ifdef CONFIG_FTRACE_SELFTEST + .selftest = trace_selftest_startup_irqsoff, +#endif }; # define register_irqsoff(trace) register_tracer(&trace) #else @@ -455,6 +458,9 @@ static struct tracer preemptoff_tracer __read_mostly = .close = irqsoff_tracer_close, .ctrl_update = irqsoff_tracer_ctrl_update, .print_max = 1, +#ifdef CONFIG_FTRACE_SELFTEST + .selftest = trace_selftest_startup_preemptoff, +#endif }; # define register_preemptoff(trace) register_tracer(&trace) #else @@ -480,6 +486,9 @@ static struct tracer preemptirqsoff_tracer __read_mostly = .close = irqsoff_tracer_close, .ctrl_update = irqsoff_tracer_ctrl_update, .print_max = 1, +#ifdef CONFIG_FTRACE_SELFTEST + .selftest = trace_selftest_startup_preemptirqsoff, +#endif }; # define register_preemptirqsoff(trace) register_tracer(&trace) diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index 2715267..6c92841 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -107,6 +107,9 @@ static struct tracer sched_switch_trace __read_mostly = .init = sched_switch_trace_init, .reset = sched_switch_trace_reset, .ctrl_update = sched_switch_trace_ctrl_update, +#ifdef CONFIG_FTRACE_SELFTEST + .selftest = trace_selftest_startup_sched_switch, +#endif }; __init static int init_sched_switch_trace(void) diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 7c3ccef..3d10ff0 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -295,6 +295,9 @@ static struct tracer wakeup_tracer __read_mostly = .close = wakeup_tracer_close, .ctrl_update = wakeup_tracer_ctrl_update, .print_max = 1, +#ifdef CONFIG_FTRACE_SELFTEST + .selftest = trace_selftest_startup_wakeup, +#endif }; __init static int init_wakeup_tracer(void) diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c new file mode 100644 index 0000000..ef4d3cc --- /dev/null +++ b/kernel/trace/trace_selftest.c @@ -0,0 +1,415 @@ +/* Include in trace.c */ + +#include + +static inline int trace_valid_entry(struct trace_entry *entry) +{ + switch (entry->type) { + case TRACE_FN: + case TRACE_CTX: + return 1; + } + return 0; +} + +static int +trace_test_buffer_cpu(struct trace_array *tr, struct trace_array_cpu *data) +{ + struct page *page; + struct trace_entry *entries; + int idx = 0; + int i; + + page = list_entry(data->trace_pages.next, struct page, lru); + entries = page_address(page); + + if (data->trace != entries) + goto failed; + + /* + * The starting trace buffer always has valid elements, + * if any element exits. + */ + entries = data->trace; + + for (i = 0; i < tr->entries; i++) { + + if (i < data->trace_idx && + !trace_valid_entry(&entries[idx])) { + printk(KERN_CONT ".. invalid entry %d ", entries[idx].type); + goto failed; + } + + idx++; + if (idx >= ENTRIES_PER_PAGE) { + page = virt_to_page(entries); + if (page->lru.next == &data->trace_pages) { + if (i != tr->entries - 1) { + printk(KERN_CONT ".. entries buffer mismatch"); + goto failed; + } + } else { + page = list_entry(page->lru.next, struct page, lru); + entries = page_address(page); + } + idx = 0; + } + } + + page = virt_to_page(entries); + if (page->lru.next != &data->trace_pages) { + printk(KERN_CONT ".. too many entries"); + goto failed; + } + + return 0; + + failed: + printk(KERN_CONT ".. corrupted trace buffer .. "); + return -1; +} + +/* + * Test the trace buffer to see if all the elements + * are still sane. + */ +static int trace_test_buffer(struct trace_array *tr, unsigned long *count) +{ + unsigned long cnt = 0; + int cpu; + int ret = 0; + + for_each_possible_cpu(cpu) { + if (!tr->data[cpu]->trace) + continue; + + cnt += tr->data[cpu]->trace_idx; + printk("%d: count = %ld\n", cpu, cnt); + + ret = trace_test_buffer_cpu(tr, tr->data[cpu]); + if (ret) + break; + } + + if (count) + *count = cnt; + + return ret; +} + +#ifdef CONFIG_FTRACE +/* + * Simple verification test of ftrace function tracer. + * Enable ftrace, sleep 1/10 second, and then read the trace + * buffer to see if all is in order. + */ +int +trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr) +{ + unsigned long count; + int ret; + + /* make sure functions have been recorded */ + ret = ftrace_force_update(); + if (ret) { + printk(KERN_CONT ".. ftraced failed .. "); + return ret; + } + + /* start the tracing */ + tr->ctrl = 1; + trace->init(tr); + /* Sleep for a 1/10 of a second */ + msleep(100); + /* stop the tracing. */ + tr->ctrl = 0; + trace->ctrl_update(tr); + /* check the trace buffer */ + ret = trace_test_buffer(tr, &count); + trace->reset(tr); + + if (!ret && !count) { + printk(KERN_CONT ".. no entries found .."); + ret = -1; + } + + return ret; +} +#endif /* CONFIG_FTRACE */ + +#ifdef CONFIG_IRQSOFF_TRACER +int +trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr) +{ + unsigned long save_max = tracing_max_latency; + unsigned long count; + int ret; + + /* start the tracing */ + tr->ctrl = 1; + trace->init(tr); + /* reset the max latency */ + tracing_max_latency = 0; + /* disable interrupts for a bit */ + local_irq_disable(); + udelay(100); + local_irq_enable(); + /* stop the tracing. */ + tr->ctrl = 0; + trace->ctrl_update(tr); + /* check both trace buffers */ + ret = trace_test_buffer(tr, NULL); + if (!ret) + ret = trace_test_buffer(&max_tr, &count); + trace->reset(tr); + + if (!ret && !count) { + printk(KERN_CONT ".. no entries found .."); + ret = -1; + } + + tracing_max_latency = save_max; + + return ret; +} +#endif /* CONFIG_IRQSOFF_TRACER */ + +#ifdef CONFIG_PREEMPT_TRACER +int +trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr) +{ + unsigned long save_max = tracing_max_latency; + unsigned long count; + int ret; + + /* start the tracing */ + tr->ctrl = 1; + trace->init(tr); + /* reset the max latency */ + tracing_max_latency = 0; + /* disable preemption for a bit */ + preempt_disable(); + udelay(100); + preempt_enable(); + /* stop the tracing. */ + tr->ctrl = 0; + trace->ctrl_update(tr); + /* check both trace buffers */ + ret = trace_test_buffer(tr, NULL); + if (!ret) + ret = trace_test_buffer(&max_tr, &count); + trace->reset(tr); + + if (!ret && !count) { + printk(KERN_CONT ".. no entries found .."); + ret = -1; + } + + tracing_max_latency = save_max; + + return ret; +} +#endif /* CONFIG_PREEMPT_TRACER */ + +#if defined(CONFIG_IRQSOFF_TRACER) && defined(CONFIG_PREEMPT_TRACER) +int +trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *tr) +{ + unsigned long save_max = tracing_max_latency; + unsigned long count; + int ret; + + /* start the tracing */ + tr->ctrl = 1; + trace->init(tr); + + /* reset the max latency */ + tracing_max_latency = 0; + + /* disable preemption and interrupts for a bit */ + preempt_disable(); + local_irq_disable(); + udelay(100); + preempt_enable(); + /* reverse the order of preempt vs irqs */ + local_irq_enable(); + + /* stop the tracing. */ + tr->ctrl = 0; + trace->ctrl_update(tr); + /* check both trace buffers */ + ret = trace_test_buffer(tr, NULL); + if (ret) + goto out; + + ret = trace_test_buffer(&max_tr, &count); + if (ret) + goto out; + + if (!ret && !count) { + printk(KERN_CONT ".. no entries found .."); + ret = -1; + goto out; + } + + /* do the test by disabling interrupts first this time */ + tracing_max_latency = 0; + tr->ctrl = 1; + trace->ctrl_update(tr); + preempt_disable(); + local_irq_disable(); + udelay(100); + preempt_enable(); + /* reverse the order of preempt vs irqs */ + local_irq_enable(); + + /* stop the tracing. */ + tr->ctrl = 0; + trace->ctrl_update(tr); + /* check both trace buffers */ + ret = trace_test_buffer(tr, NULL); + if (ret) + goto out; + + ret = trace_test_buffer(&max_tr, &count); + + if (!ret && !count) { + printk(KERN_CONT ".. no entries found .."); + ret = -1; + goto out; + } + + out: + trace->reset(tr); + tracing_max_latency = save_max; + + return ret; +} +#endif /* CONFIG_IRQSOFF_TRACER && CONFIG_PREEMPT_TRACER */ + +#ifdef CONFIG_SCHED_TRACER +static int trace_wakeup_test_thread(void *data) +{ + struct completion *x = data; + + /* Make this a RT thread, doesn't need to be too high */ + + rt_mutex_setprio(current, MAX_RT_PRIO - 5); + + /* Make it know we have a new prio */ + complete(x); + + /* now go to sleep and let the test wake us up */ + set_current_state(TASK_INTERRUPTIBLE); + schedule(); + + /* we are awake, now wait to disappear */ + while (!kthread_should_stop()) { + /* + * This is an RT task, do short sleeps to let + * others run. + */ + msleep(100); + } + + return 0; +} + +int +trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr) +{ + unsigned long save_max = tracing_max_latency; + struct task_struct *p; + struct completion isrt; + unsigned long count; + int ret; + + init_completion(&isrt); + + /* create a high prio thread */ + p = kthread_run(trace_wakeup_test_thread, &isrt, "ftrace-test"); + if (!IS_ERR(p)) { + printk(KERN_CONT "Failed to create ftrace wakeup test thread "); + return -1; + } + + /* make sure the thread is running at an RT prio */ + wait_for_completion(&isrt); + + /* start the tracing */ + tr->ctrl = 1; + trace->init(tr); + /* reset the max latency */ + tracing_max_latency = 0; + + /* sleep to let the RT thread sleep too */ + msleep(100); + + /* + * Yes this is slightly racy. It is possible that for some + * strange reason that the RT thread we created, did not + * call schedule for 100ms after doing the completion, + * and we do a wakeup on a task that already is awake. + * But that is extremely unlikely, and the worst thing that + * happens in such a case, is that we disable tracing. + * Honestly, if this race does happen something is horrible + * wrong with the system. + */ + + wake_up_process(p); + + /* stop the tracing. */ + tr->ctrl = 0; + trace->ctrl_update(tr); + /* check both trace buffers */ + ret = trace_test_buffer(tr, NULL); + if (!ret) + ret = trace_test_buffer(&max_tr, &count); + + + trace->reset(tr); + + tracing_max_latency = save_max; + + /* kill the thread */ + kthread_stop(p); + + if (!ret && !count) { + printk(KERN_CONT ".. no entries found .."); + ret = -1; + } + + return ret; +} +#endif /* CONFIG_SCHED_TRACER */ + +#ifdef CONFIG_CONTEXT_SWITCH_TRACER +int +trace_selftest_startup_sched_switch(struct tracer *trace, struct trace_array *tr) +{ + unsigned long count; + int ret; + + /* start the tracing */ + tr->ctrl = 1; + trace->init(tr); + /* Sleep for a 1/10 of a second */ + msleep(100); + /* stop the tracing. */ + tr->ctrl = 0; + trace->ctrl_update(tr); + /* check the trace buffer */ + ret = trace_test_buffer(tr, &count); + trace->reset(tr); + + if (!ret && !count) { + printk(KERN_CONT ".. no entries found .."); + ret = -1; + } + + return ret; +} +#endif /* CONFIG_CONTEXT_SWITCH_TRACER */ + +#ifdef CONFIG_DYNAMIC_FTRACE +#endif /* CONFIG_DYNAMIC_FTRACE */ -- cgit v0.10.2 From c7aafc549766b87819285d3480648fc652a47bc4 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:45 +0200 Subject: ftrace: cleanups factor out code and clean it up. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 2c1670c..953a36d 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -69,7 +69,7 @@ extern void ftrace_caller(void); extern void ftrace_call(void); extern void mcount_call(void); #else -# define ftrace_force_update() do { } while (0) +# define ftrace_force_update() ({ 0; }) #endif static inline void tracer_disable(void) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 4facf5c..6d4d2e8 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1152,10 +1152,10 @@ static int __init notrace ftrace_dynamic_init(void) core_initcall(ftrace_dynamic_init); #else -# define ftrace_startup() do { } while (0) -# define ftrace_shutdown() do { } while (0) -# define ftrace_startup_sysctl() do { } while (0) -# define ftrace_shutdown_sysctl() do { } while (0) +# define ftrace_startup() do { } while (0) +# define ftrace_shutdown() do { } while (0) +# define ftrace_startup_sysctl() do { } while (0) +# define ftrace_shutdown_sysctl() do { } while (0) #endif /* CONFIG_DYNAMIC_FTRACE */ /** diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index f6d026f..61d2f02 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -142,12 +142,59 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) tracing_record_cmdline(current); } +void check_pages(struct trace_array_cpu *data) +{ + struct page *page, *tmp; + + BUG_ON(data->trace_pages.next->prev != &data->trace_pages); + BUG_ON(data->trace_pages.prev->next != &data->trace_pages); + + list_for_each_entry_safe(page, tmp, &data->trace_pages, lru) { + BUG_ON(page->lru.next->prev != &page->lru); + BUG_ON(page->lru.prev->next != &page->lru); + } +} + +void *head_page(struct trace_array_cpu *data) +{ + struct page *page; + + check_pages(data); + if (list_empty(&data->trace_pages)) + return NULL; + + page = list_entry(data->trace_pages.next, struct page, lru); + BUG_ON(&page->lru == &data->trace_pages); + + return page_address(page); +} + +notrace static void +flip_trace(struct trace_array_cpu *tr1, struct trace_array_cpu *tr2) +{ + struct list_head flip_pages; + + INIT_LIST_HEAD(&flip_pages); + + tr1->trace_current = NULL; + memcpy(&tr1->trace_current_idx, &tr2->trace_current_idx, + sizeof(struct trace_array_cpu) - + offsetof(struct trace_array_cpu, trace_current_idx)); + + check_pages(tr1); + check_pages(tr2); + list_splice_init(&tr1->trace_pages, &flip_pages); + list_splice_init(&tr2->trace_pages, &tr1->trace_pages); + list_splice_init(&flip_pages, &tr2->trace_pages); + BUG_ON(!list_empty(&flip_pages)); + check_pages(tr1); + check_pages(tr2); +} + notrace void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) { struct trace_array_cpu *data; - void *save_trace; - struct list_head save_pages; int i; WARN_ON_ONCE(!irqs_disabled()); @@ -155,11 +202,7 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) /* clear out all the previous traces */ for_each_possible_cpu(i) { data = tr->data[i]; - save_trace = max_tr.data[i]->trace; - save_pages = max_tr.data[i]->trace_pages; - memcpy(max_tr.data[i], data, sizeof(*data)); - data->trace = save_trace; - data->trace_pages = save_pages; + flip_trace(max_tr.data[i], data); tracing_reset(data); } @@ -177,8 +220,6 @@ notrace void update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) { struct trace_array_cpu *data = tr->data[cpu]; - void *save_trace; - struct list_head save_pages; int i; WARN_ON_ONCE(!irqs_disabled()); @@ -186,11 +227,8 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) for_each_possible_cpu(i) tracing_reset(max_tr.data[i]); - save_trace = max_tr.data[cpu]->trace; - save_pages = max_tr.data[cpu]->trace_pages; - memcpy(max_tr.data[cpu], data, sizeof(*data)); - data->trace = save_trace; - data->trace_pages = save_pages; + flip_trace(max_tr.data[cpu], data); + tracing_reset(data); __update_max_tr(tr, tsk, cpu); @@ -234,9 +272,9 @@ int register_tracer(struct tracer *type) * If we fail, we do not register this tracer. */ for_each_possible_cpu(i) { - if (!data->trace) - continue; data = tr->data[i]; + if (!head_page(data)) + continue; tracing_reset(data); } current_trace = type; @@ -298,7 +336,7 @@ void unregister_tracer(struct tracer *type) void notrace tracing_reset(struct trace_array_cpu *data) { data->trace_idx = 0; - data->trace_current = data->trace; + data->trace_current = head_page(data); data->trace_current_idx = 0; } @@ -425,26 +463,31 @@ notrace void tracing_record_cmdline(struct task_struct *tsk) } static inline notrace struct trace_entry * -tracing_get_trace_entry(struct trace_array *tr, - struct trace_array_cpu *data) +tracing_get_trace_entry(struct trace_array *tr, struct trace_array_cpu *data) { unsigned long idx, idx_next; struct trace_entry *entry; - struct page *page; struct list_head *next; + struct page *page; data->trace_idx++; idx = data->trace_current_idx; idx_next = idx + 1; + BUG_ON(idx * TRACE_ENTRY_SIZE >= PAGE_SIZE); + entry = data->trace_current + idx * TRACE_ENTRY_SIZE; if (unlikely(idx_next >= ENTRIES_PER_PAGE)) { page = virt_to_page(data->trace_current); - if (unlikely(&page->lru == data->trace_pages.prev)) - next = data->trace_pages.next; - else - next = page->lru.next; + /* + * Roundrobin - but skip the head (which is not a real page): + */ + next = page->lru.next; + if (unlikely(next == &data->trace_pages)) + next = next->next; + BUG_ON(next == &data->trace_pages); + page = list_entry(next, struct page, lru); data->trace_current = page_address(page); idx_next = 0; @@ -456,18 +499,17 @@ tracing_get_trace_entry(struct trace_array *tr, } static inline notrace void -tracing_generic_entry_update(struct trace_entry *entry, - unsigned long flags) +tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags) { struct task_struct *tsk = current; unsigned long pc; pc = preempt_count(); - entry->idx = atomic_inc_return(&tracer_counter); - entry->preempt_count = pc & 0xff; - entry->pid = tsk->pid; - entry->t = now(raw_smp_processor_id()); + entry->idx = atomic_inc_return(&tracer_counter); + entry->preempt_count = pc & 0xff; + entry->pid = tsk->pid; + entry->t = now(raw_smp_processor_id()); entry->flags = (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) | ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) | ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) | @@ -476,16 +518,15 @@ tracing_generic_entry_update(struct trace_entry *entry, notrace void ftrace(struct trace_array *tr, struct trace_array_cpu *data, - unsigned long ip, unsigned long parent_ip, - unsigned long flags) + unsigned long ip, unsigned long parent_ip, unsigned long flags) { struct trace_entry *entry; - entry = tracing_get_trace_entry(tr, data); + entry = tracing_get_trace_entry(tr, data); tracing_generic_entry_update(entry, flags); - entry->type = TRACE_FN; - entry->fn.ip = ip; - entry->fn.parent_ip = parent_ip; + entry->type = TRACE_FN; + entry->fn.ip = ip; + entry->fn.parent_ip = parent_ip; } notrace void @@ -496,7 +537,7 @@ tracing_sched_switch_trace(struct trace_array *tr, { struct trace_entry *entry; - entry = tracing_get_trace_entry(tr, data); + entry = tracing_get_trace_entry(tr, data); tracing_generic_entry_update(entry, flags); entry->type = TRACE_CTX; entry->ctx.prev_pid = prev->pid; @@ -540,6 +581,8 @@ trace_entry_idx(struct trace_array *tr, struct trace_array_cpu *data, } page = list_entry(iter->next_page[cpu], struct page, lru); + BUG_ON(&data->trace_pages == &page->lru); + array = page_address(page); return &array[iter->next_page_idx[cpu]]; @@ -554,7 +597,7 @@ find_next_entry(struct trace_iterator *iter, int *ent_cpu) int cpu; for_each_possible_cpu(cpu) { - if (!tr->data[cpu]->trace) + if (!head_page(tr->data[cpu])) continue; ent = trace_entry_idx(tr, tr->data[cpu], iter, cpu); if (ent && @@ -762,7 +805,7 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter) name = type->name; for_each_possible_cpu(cpu) { - if (tr->data[cpu]->trace) { + if (head_page(tr->data[cpu])) { total += tr->data[cpu]->trace_idx; if (tr->data[cpu]->trace_idx > tr->entries) entries += tr->entries; @@ -975,8 +1018,7 @@ static int trace_empty(struct trace_iterator *iter) for_each_possible_cpu(cpu) { data = iter->tr->data[cpu]; - if (data->trace && - data->trace_idx) + if (head_page(data) && data->trace_idx) return 0; } return 1; @@ -1576,9 +1618,9 @@ static struct tracer no_tracer __read_mostly = static int trace_alloc_page(void) { struct trace_array_cpu *data; - void *array; struct page *page, *tmp; LIST_HEAD(pages); + void *array; int i; /* first allocate a page for each CPU */ @@ -1610,14 +1652,14 @@ static int trace_alloc_page(void) for_each_possible_cpu(i) { data = global_trace.data[i]; page = list_entry(pages.next, struct page, lru); - list_del(&page->lru); + list_del_init(&page->lru); list_add_tail(&page->lru, &data->trace_pages); ClearPageLRU(page); #ifdef CONFIG_TRACER_MAX_TRACE data = max_tr.data[i]; page = list_entry(pages.next, struct page, lru); - list_del(&page->lru); + list_del_init(&page->lru); list_add_tail(&page->lru, &data->trace_pages); SetPageLRU(page); #endif @@ -1628,7 +1670,7 @@ static int trace_alloc_page(void) free_pages: list_for_each_entry_safe(page, tmp, &pages, lru) { - list_del(&page->lru); + list_del_init(&page->lru); __free_page(page); } return -ENOMEM; @@ -1654,7 +1696,6 @@ __init static int tracer_alloc_buffers(void) "for trace buffer!\n"); goto free_buffers; } - data->trace = array; /* set the array to the list */ INIT_LIST_HEAD(&data->trace_pages); @@ -1671,7 +1712,6 @@ __init static int tracer_alloc_buffers(void) "for trace buffer!\n"); goto free_buffers; } - max_tr.data[i]->trace = array; INIT_LIST_HEAD(&max_tr.data[i]->trace_pages); page = virt_to_page(array); @@ -1716,24 +1756,22 @@ __init static int tracer_alloc_buffers(void) struct page *page, *tmp; struct trace_array_cpu *data = global_trace.data[i]; - if (data && data->trace) { + if (data) { list_for_each_entry_safe(page, tmp, &data->trace_pages, lru) { - list_del(&page->lru); + list_del_init(&page->lru); __free_page(page); } - data->trace = NULL; } #ifdef CONFIG_TRACER_MAX_TRACE data = max_tr.data[i]; - if (data && data->trace) { + if (data) { list_for_each_entry_safe(page, tmp, &data->trace_pages, lru) { - list_del(&page->lru); + list_del_init(&page->lru); __free_page(page); } - data->trace = NULL; } #endif } diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 88edbf1..cc1d34b 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -53,12 +53,12 @@ struct trace_entry { * the trace, etc.) */ struct trace_array_cpu { - void *trace; void *trace_current; - unsigned trace_current_idx; struct list_head trace_pages; - unsigned long trace_idx; atomic_t disabled; + /* these fields get copied into max-trace: */ + unsigned trace_current_idx; + unsigned long trace_idx; unsigned long saved_latency; unsigned long critical_start; unsigned long critical_end; @@ -216,4 +216,6 @@ extern int trace_selftest_startup_sched_switch(struct tracer *trace, #endif #endif /* CONFIG_FTRACE_STARTUP_TEST */ +extern void *head_page(struct trace_array_cpu *data); + #endif /* _LINUX_KERNEL_TRACE_H */ diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 14183b8..2dfebb6 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -144,7 +144,7 @@ check_critical_timing(struct trace_array *tr, if (!report_latency(delta)) goto out; - spin_lock(&max_trace_lock); + spin_lock_irqsave(&max_trace_lock, flags); /* check if we are still the max latency */ if (!report_latency(delta)) @@ -165,32 +165,24 @@ check_critical_timing(struct trace_array *tr, update_max_tr_single(tr, current, cpu); - if (tracing_thresh) - printk(KERN_INFO "(%16s-%-5d|#%d): %lu us critical section " - "violates %lu us threshold.\n" - " => started at timestamp %lu: ", + if (tracing_thresh) { + printk(KERN_INFO "(%16s-%-5d|#%d):" + " %lu us critical section violates %lu us threshold.\n", current->comm, current->pid, raw_smp_processor_id(), - latency, nsecs_to_usecs(tracing_thresh), t0); - else + latency, nsecs_to_usecs(tracing_thresh)); + } else { printk(KERN_INFO "(%16s-%-5d|#%d):" - " new %lu us maximum-latency " - "critical section.\n => started at timestamp %lu: ", + " new %lu us maximum-latency critical section.\n", current->comm, current->pid, raw_smp_processor_id(), - latency, t0); - - print_symbol(KERN_CONT "<%s>\n", data->critical_start); - printk(KERN_CONT " => ended at timestamp %lu: ", t1); - print_symbol(KERN_CONT "<%s>\n", data->critical_end); - dump_stack(); - t1 = nsecs_to_usecs(now(cpu)); - printk(KERN_CONT " => dump-end timestamp %lu\n\n", t1); + latency); + } max_sequence++; out_unlock: - spin_unlock(&max_trace_lock); + spin_unlock_irqrestore(&max_trace_lock, flags); out: data->critical_sequence = max_sequence; @@ -216,7 +208,7 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip) cpu = raw_smp_processor_id(); data = tr->data[cpu]; - if (unlikely(!data) || unlikely(!data->trace) || + if (unlikely(!data) || unlikely(!head_page(data)) || atomic_read(&data->disabled)) return; @@ -256,7 +248,7 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip) cpu = raw_smp_processor_id(); data = tr->data[cpu]; - if (unlikely(!data) || unlikely(!data->trace) || + if (unlikely(!data) || unlikely(!head_page(data)) || !data->critical_start || atomic_read(&data->disabled)) return; diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 3d10ff0..688df96 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -107,24 +107,18 @@ wakeup_sched_switch(struct task_struct *prev, struct task_struct *next) update_max_tr(tr, wakeup_task, wakeup_cpu); if (tracing_thresh) { - printk(KERN_INFO "(%16s-%-5d|#%d): %lu us wakeup latency " - "violates %lu us threshold.\n" - " => started at timestamp %lu: ", + printk(KERN_INFO "(%16s-%-5d|#%d):" + " %lu us wakeup latency violates %lu us threshold.\n", wakeup_task->comm, wakeup_task->pid, raw_smp_processor_id(), - latency, nsecs_to_usecs(tracing_thresh), t0); + latency, nsecs_to_usecs(tracing_thresh)); } else { - printk(KERN_INFO "(%16s-%-5d|#%d): new %lu us maximum " - "wakeup latency.\n => started at timestamp %lu: ", + printk(KERN_INFO "(%16s-%-5d|#%d):" + " new %lu us maximum wakeup latency.\n", wakeup_task->comm, wakeup_task->pid, - cpu, latency, t0); + cpu, latency); } - printk(KERN_CONT " ended at timestamp %lu: ", t1); - dump_stack(); - t1 = nsecs_to_usecs(now(cpu)); - printk(KERN_CONT " dump-end timestamp %lu\n\n", t1); - out_unlock: __wakeup_reset(tr); spin_unlock_irqrestore(&wakeup_lock, flags); diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index ef4d3cc..c01874c 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -1,6 +1,7 @@ /* Include in trace.c */ #include +#include static inline int trace_valid_entry(struct trace_entry *entry) { @@ -15,28 +16,29 @@ static inline int trace_valid_entry(struct trace_entry *entry) static int trace_test_buffer_cpu(struct trace_array *tr, struct trace_array_cpu *data) { - struct page *page; struct trace_entry *entries; + struct page *page; int idx = 0; int i; + BUG_ON(list_empty(&data->trace_pages)); page = list_entry(data->trace_pages.next, struct page, lru); entries = page_address(page); - if (data->trace != entries) + if (head_page(data) != entries) goto failed; /* * The starting trace buffer always has valid elements, - * if any element exits. + * if any element exists. */ - entries = data->trace; + entries = head_page(data); for (i = 0; i < tr->entries; i++) { - if (i < data->trace_idx && - !trace_valid_entry(&entries[idx])) { - printk(KERN_CONT ".. invalid entry %d ", entries[idx].type); + if (i < data->trace_idx && !trace_valid_entry(&entries[idx])) { + printk(KERN_CONT ".. invalid entry %d ", + entries[idx].type); goto failed; } @@ -80,11 +82,10 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count) int ret = 0; for_each_possible_cpu(cpu) { - if (!tr->data[cpu]->trace) + if (!head_page(tr->data[cpu])) continue; cnt += tr->data[cpu]->trace_idx; - printk("%d: count = %ld\n", cpu, cnt); ret = trace_test_buffer_cpu(tr, tr->data[cpu]); if (ret) @@ -117,6 +118,8 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr) } /* start the tracing */ + ftrace_enabled = 1; + tr->ctrl = 1; trace->init(tr); /* Sleep for a 1/10 of a second */ @@ -124,6 +127,8 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr) /* stop the tracing. */ tr->ctrl = 0; trace->ctrl_update(tr); + ftrace_enabled = 0; + /* check the trace buffer */ ret = trace_test_buffer(tr, &count); trace->reset(tr); @@ -328,7 +333,7 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr) /* create a high prio thread */ p = kthread_run(trace_wakeup_test_thread, &isrt, "ftrace-test"); - if (!IS_ERR(p)) { + if (IS_ERR(p)) { printk(KERN_CONT "Failed to create ftrace wakeup test thread "); return -1; } -- cgit v0.10.2 From 7bd2f24c2f769e3f8f1d4fc8b9fddf689825f6a7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:45 +0200 Subject: ftrace: add README make it easier for newbies to find their way around. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 61d2f02..c736dd2 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -995,6 +995,7 @@ print_trace_fmt(struct seq_file *m, struct trace_iterator *iter) seq_printf(m, " <-"); seq_print_ip_sym(m, entry->fn.parent_ip, sym_flags); } + seq_printf(m, "\n"); break; case TRACE_CTX: S = entry->ctx.prev_state < sizeof(state_to_char) ? @@ -1007,7 +1008,6 @@ print_trace_fmt(struct seq_file *m, struct trace_iterator *iter) entry->ctx.next_prio); break; } - seq_printf(m, "\n"); } static int trace_empty(struct trace_iterator *iter) @@ -1332,6 +1332,39 @@ static struct file_operations tracing_iter_fops = { .write = tracing_iter_ctrl_write, }; +static const char readme_msg[] = + "tracing mini-HOWTO:\n\n" + "# mkdir /debug\n" + "# mount -t debugfs nodev /debug\n\n" + "# cat /debug/tracing/available_tracers\n" + "wakeup preemptirqsoff preemptoff irqsoff ftrace sched_switch none\n\n" + "# cat /debug/tracing/current_tracer\n" + "none\n" + "# echo sched_switch > /debug/tracing/current_tracer\n" + "# cat /debug/tracing/current_tracer\n" + "sched_switch\n" + "# cat /debug/tracing/iter_ctrl\n" + "noprint-parent nosym-offset nosym-addr noverbose\n" + "# echo print-parent > /debug/tracing/iter_ctrl\n" + "# echo 1 > /debug/tracing/tracing_enabled\n" + "# cat /debug/tracing/trace > /tmp/trace.txt\n" + "echo 0 > /debug/tracing/tracing_enabled\n" +; + +static ssize_t +tracing_readme_read(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + return simple_read_from_buffer(ubuf, cnt, ppos, + readme_msg, strlen(readme_msg)); +} + +static struct file_operations tracing_readme_fops = { + .open = tracing_open_generic, + .read = tracing_readme_read, +}; + + static ssize_t tracing_ctrl_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) @@ -1598,6 +1631,11 @@ static __init void tracer_init_debugfs(void) if (!entry) pr_warning("Could not create debugfs " "'tracing_threash' entry\n"); + entry = debugfs_create_file("README", 0644, d_tracer, + NULL, &tracing_readme_fops); + if (!entry) + pr_warning("Could not create debugfs 'README' entry\n"); + #ifdef CONFIG_DYNAMIC_FTRACE entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer, -- cgit v0.10.2 From 77a2b37d227483fe52aead242652aee406c25bf0 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:45 +0200 Subject: ftrace: startup tester on dynamic tracing. This patch adds a startup self test on dynamic code modification and filters. The test filters on a specific function, makes sure that no other function is traced, exectutes the function, then makes sure that the function is traced. This patch also fixes a slight bug with the ftrace selftest, where tracer_enabled was not being set. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 953a36d..a842d96 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -55,6 +55,7 @@ struct dyn_ftrace { }; int ftrace_force_update(void); +void ftrace_set_filter(unsigned char *buf, int len, int reset); /* defined in arch */ extern int ftrace_ip_converted(unsigned long ip); @@ -70,6 +71,7 @@ extern void ftrace_call(void); extern void mcount_call(void); #else # define ftrace_force_update() ({ 0; }) +# define ftrace_set_filter(buf, len, reset) do { } while (0) #endif static inline void tracer_disable(void) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 6d4d2e8..5e9389f 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1010,6 +1010,25 @@ ftrace_filter_write(struct file *file, const char __user *ubuf, return ret; } +/** + * ftrace_set_filter - set a function to filter on in ftrace + * @buf - the string that holds the function filter text. + * @len - the length of the string. + * @reset - non zero to reset all filters before applying this filter. + * + * Filters denote which functions should be enabled when tracing is enabled. + * If @buf is NULL and reset is set, all functions will be enabled for tracing. + */ +notrace void ftrace_set_filter(unsigned char *buf, int len, int reset) +{ + mutex_lock(&ftrace_filter_lock); + if (reset) + ftrace_filter_reset(); + if (buf) + ftrace_match(buf, len); + mutex_unlock(&ftrace_filter_lock); +} + static int notrace ftrace_filter_release(struct inode *inode, struct file *file) { diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index c01874c..4c8a1b2 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -99,6 +99,100 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count) } #ifdef CONFIG_FTRACE + +#ifdef CONFIG_DYNAMIC_FTRACE + +#define DYN_FTRACE_TEST_NAME trace_selftest_dynamic_test_func +#define __STR(x) #x +#define STR(x) __STR(x) +static int DYN_FTRACE_TEST_NAME(void) +{ + /* used to call mcount */ + return 0; +} + +/* Test dynamic code modification and ftrace filters */ +int trace_selftest_startup_dynamic_tracing(struct tracer *trace, + struct trace_array *tr, + int (*func)(void)) +{ + unsigned long count; + int ret; + int save_ftrace_enabled = ftrace_enabled; + int save_tracer_enabled = tracer_enabled; + + /* The ftrace test PASSED */ + printk(KERN_CONT "PASSED\n"); + pr_info("Testing dynamic ftrace: "); + + /* enable tracing, and record the filter function */ + ftrace_enabled = 1; + tracer_enabled = 1; + + /* passed in by parameter to fool gcc from optimizing */ + func(); + + /* update the records */ + ret = ftrace_force_update(); + if (ret) { + printk(KERN_CONT ".. ftraced failed .. "); + return ret; + } + + /* filter only on our function */ + ftrace_set_filter(STR(DYN_FTRACE_TEST_NAME), + sizeof(STR(DYN_FTRACE_TEST_NAME)), 1); + + /* enable tracing */ + tr->ctrl = 1; + trace->init(tr); + /* Sleep for a 1/10 of a second */ + msleep(100); + + /* we should have nothing in the buffer */ + ret = trace_test_buffer(tr, &count); + if (ret) + goto out; + + if (count) { + ret = -1; + printk(KERN_CONT ".. filter did not filter .. "); + goto out; + } + + /* call our function again */ + func(); + + /* sleep again */ + msleep(100); + + /* stop the tracing. */ + tr->ctrl = 0; + trace->ctrl_update(tr); + ftrace_enabled = 0; + + /* check the trace buffer */ + ret = trace_test_buffer(tr, &count); + trace->reset(tr); + + /* we should only have one item */ + if (!ret && count != 1) { + printk(KERN_CONT ".. filter failed .."); + ret = -1; + goto out; + } + out: + ftrace_enabled = save_ftrace_enabled; + tracer_enabled = save_tracer_enabled; + + /* Enable tracing on all functions again */ + ftrace_set_filter(NULL, 0, 1); + + return ret; +} +#else +# define trace_selftest_startup_dynamic_tracing(trace, tr, func) ({ 0; }) +#endif /* CONFIG_DYNAMIC_FTRACE */ /* * Simple verification test of ftrace function tracer. * Enable ftrace, sleep 1/10 second, and then read the trace @@ -109,8 +203,13 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr) { unsigned long count; int ret; + int save_ftrace_enabled = ftrace_enabled; + int save_tracer_enabled = tracer_enabled; - /* make sure functions have been recorded */ + /* make sure msleep has been recorded */ + msleep(1); + + /* force the recorded functions to be traced */ ret = ftrace_force_update(); if (ret) { printk(KERN_CONT ".. ftraced failed .. "); @@ -119,6 +218,7 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr) /* start the tracing */ ftrace_enabled = 1; + tracer_enabled = 1; tr->ctrl = 1; trace->init(tr); @@ -136,8 +236,16 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr) if (!ret && !count) { printk(KERN_CONT ".. no entries found .."); ret = -1; + goto out; } + ret = trace_selftest_startup_dynamic_tracing(trace, tr, + DYN_FTRACE_TEST_NAME); + + out: + ftrace_enabled = save_ftrace_enabled; + tracer_enabled = save_tracer_enabled; + return ret; } #endif /* CONFIG_FTRACE */ @@ -415,6 +523,3 @@ trace_selftest_startup_sched_switch(struct tracer *trace, struct trace_array *tr return ret; } #endif /* CONFIG_CONTEXT_SWITCH_TRACER */ - -#ifdef CONFIG_DYNAMIC_FTRACE -#endif /* CONFIG_DYNAMIC_FTRACE */ -- cgit v0.10.2 From 4e3c3333f3bd7eedfd21b1155b3c7cd24fc7f754 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:45 +0200 Subject: ftrace: fix time offset fix time offset calculations and ordering, plus make code more consistent. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index c736dd2..8755a43 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -120,7 +120,7 @@ static DEFINE_SPINLOCK(ftrace_max_lock); * structure. (this way the maximum trace is permanently saved, * for later retrieval via /debugfs/tracing/latency_trace) */ -static void notrace +static notrace void __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) { struct trace_array_cpu *data = tr->data[cpu]; @@ -333,15 +333,16 @@ void unregister_tracer(struct tracer *type) mutex_unlock(&trace_types_lock); } -void notrace tracing_reset(struct trace_array_cpu *data) +notrace void tracing_reset(struct trace_array_cpu *data) { data->trace_idx = 0; data->trace_current = head_page(data); data->trace_current_idx = 0; + data->time_offset = 0; } #ifdef CONFIG_FTRACE -static void notrace +static notrace void function_trace_call(unsigned long ip, unsigned long parent_ip) { struct trace_array *tr = &global_trace; @@ -398,7 +399,7 @@ static void trace_init_cmdlines(void) notrace void trace_stop_cmdline_recording(void); -static void notrace trace_save_cmdline(struct task_struct *tsk) +static notrace void trace_save_cmdline(struct task_struct *tsk) { unsigned map; unsigned idx; @@ -624,6 +625,7 @@ static void *find_next_entry_inc(struct trace_iterator *iter) iter->idx++; iter->next_idx[next_cpu]++; iter->next_page_idx[next_cpu]++; + if (iter->next_page_idx[next_cpu] >= ENTRIES_PER_PAGE) { struct trace_array_cpu *data = iter->tr->data[next_cpu]; @@ -635,19 +637,21 @@ static void *find_next_entry_inc(struct trace_iterator *iter) data->trace_pages.next; } } + iter->prev_ent = iter->ent; + iter->prev_cpu = iter->cpu; + iter->ent = next; iter->cpu = next_cpu; return next ? iter : NULL; } -static void notrace * -s_next(struct seq_file *m, void *v, loff_t *pos) +static notrace void *s_next(struct seq_file *m, void *v, loff_t *pos) { struct trace_iterator *iter = m->private; - void *ent; void *last_ent = iter->ent; int i = (int)*pos; + void *ent; (*pos)++; @@ -693,6 +697,8 @@ static void *s_start(struct seq_file *m, loff_t *pos) iter->ent = NULL; iter->cpu = 0; iter->idx = -1; + iter->prev_ent = NULL; + iter->prev_cpu = -1; for_each_possible_cpu(i) { iter->next_idx[i] = 0; @@ -752,7 +758,7 @@ seq_print_sym_offset(struct seq_file *m, const char *fmt, unsigned long address) # define IP_FMT "%016lx" #endif -static void notrace +static notrace void seq_print_ip_sym(struct seq_file *m, unsigned long ip, unsigned long sym_flags) { if (!ip) { @@ -769,7 +775,7 @@ seq_print_ip_sym(struct seq_file *m, unsigned long ip, unsigned long sym_flags) seq_printf(m, " <" IP_FMT ">", ip); } -static void notrace print_lat_help_header(struct seq_file *m) +static notrace void print_lat_help_header(struct seq_file *m) { seq_puts(m, "# _------=> CPU# \n"); seq_puts(m, "# / _-----=> irqs-off \n"); @@ -782,14 +788,14 @@ static void notrace print_lat_help_header(struct seq_file *m) seq_puts(m, "# \\ / ||||| \\ | / \n"); } -static void notrace print_func_help_header(struct seq_file *m) +static notrace void print_func_help_header(struct seq_file *m) { seq_puts(m, "# TASK-PID CPU# TIMESTAMP FUNCTION\n"); seq_puts(m, "# | | | | |\n"); } -static void notrace +static notrace void print_trace_header(struct seq_file *m, struct trace_iterator *iter) { unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); @@ -858,7 +864,7 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter) seq_puts(m, "\n"); } -static void notrace +static notrace void lat_print_generic(struct seq_file *m, struct trace_entry *entry, int cpu) { int hardirq, softirq; @@ -895,7 +901,7 @@ lat_print_generic(struct seq_file *m, struct trace_entry *entry, int cpu) unsigned long preempt_mark_thresh = 100; -static void notrace +static notrace void lat_print_timestamp(struct seq_file *m, unsigned long long abs_usecs, unsigned long rel_usecs) { @@ -910,7 +916,7 @@ lat_print_timestamp(struct seq_file *m, unsigned long long abs_usecs, static const char state_to_char[] = TASK_STATE_TO_CHAR_STR; -static void notrace +static notrace void print_lat_fmt(struct seq_file *m, struct trace_iterator *iter, unsigned int trace_idx, int cpu) { @@ -966,20 +972,50 @@ print_lat_fmt(struct seq_file *m, struct trace_iterator *iter, } } -static void notrace +static notrace void sync_time_offset(struct trace_iterator *iter) +{ + struct trace_array_cpu *prev_array, *array; + struct trace_entry *prev_entry, *entry; + cycle_t prev_t, t; + + entry = iter->ent; + prev_entry = iter->prev_ent; + if (!prev_entry) + return; + + prev_array = iter->tr->data[iter->prev_cpu]; + array = iter->tr->data[iter->cpu]; + + prev_t = prev_entry->t + prev_array->time_offset; + t = entry->t + array->time_offset; + + /* + * If time goes backwards we increase the offset of + * the current array, to not have observable time warps. + * This will quickly synchronize the time offsets of + * multiple CPUs: + */ + if (t < prev_t) + array->time_offset += prev_t - t; +} + +static notrace void print_trace_fmt(struct seq_file *m, struct trace_iterator *iter) { unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); - struct trace_entry *entry = iter->ent; + struct trace_entry *entry; unsigned long usec_rem; unsigned long long t; unsigned long secs; char *comm; int S; + sync_time_offset(iter); + entry = iter->ent; + comm = trace_find_cmdline(iter->ent->pid); - t = ns2usecs(entry->t); + t = ns2usecs(entry->t + iter->tr->data[iter->cpu]->time_offset); usec_rem = do_div(t, 1000000ULL); secs = (unsigned long)t; @@ -1158,7 +1194,7 @@ static int tracing_lt_open(struct inode *inode, struct file *file) } -static void notrace * +static notrace void * t_next(struct seq_file *m, void *v, loff_t *pos) { struct tracer *t = m->private; @@ -1374,8 +1410,7 @@ tracing_ctrl_read(struct file *filp, char __user *ubuf, int r; r = sprintf(buf, "%ld\n", tr->ctrl); - return simple_read_from_buffer(ubuf, cnt, ppos, - buf, r); + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); } static ssize_t diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index cc1d34b..5df8ff2 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -56,6 +56,8 @@ struct trace_array_cpu { void *trace_current; struct list_head trace_pages; atomic_t disabled; + cycle_t time_offset; + /* these fields get copied into max-trace: */ unsigned trace_current_idx; unsigned long trace_idx; @@ -114,14 +116,19 @@ struct tracer { struct trace_iterator { struct trace_array *tr; struct tracer *trace; + struct trace_entry *ent; + int cpu; + + struct trace_entry *prev_ent; + int prev_cpu; + unsigned long iter_flags; loff_t pos; unsigned long next_idx[NR_CPUS]; struct list_head *next_page[NR_CPUS]; unsigned next_page_idx[NR_CPUS]; long idx; - int cpu; }; void notrace tracing_reset(struct trace_array_cpu *data); -- cgit v0.10.2 From 08bafa0efcf29fe18ec39c2147077b597368b018 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:45 +0200 Subject: ftrace: disable all tracers on corrupted buffer If the trace buffer is detected to be corrupted, then we disable all tracers. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 4c8a1b2..a6f1ed7 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -67,6 +67,8 @@ trace_test_buffer_cpu(struct trace_array *tr, struct trace_array_cpu *data) return 0; failed: + /* disable tracing */ + tracing_disabled = 1; printk(KERN_CONT ".. corrupted trace buffer .. "); return -1; } -- cgit v0.10.2 From 1d4db00a5e30c7b8f8dc2a1b19e886fd942be143 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:45 +0200 Subject: ftrace: reset selftests The tests may leave stuff in the buffers. This resets the buffers after each test is run. If a test fails, it does not reset the buffer to avoid touching a buffer that is corrupted. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 8755a43..6580e7e 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -289,6 +289,13 @@ int register_tracer(struct tracer *type) printk(KERN_CONT "FAILED!\n"); goto out; } + /* Only reset on passing, to avoid touching corrupted buffers */ + for_each_possible_cpu(i) { + data = tr->data[i]; + if (!head_page(data)) + continue; + tracing_reset(data); + } printk(KERN_CONT "PASSED\n"); } #endif -- cgit v0.10.2 From 93a588f459da134be6ab17c4104e28441beb0d22 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:45 +0200 Subject: ftrace: change buffers to producer consumer This patch changes the way the CPU trace buffers are handled. Instead of always starting from the trace page head, the logic is changed to a producer consumer logic. This allows for the buffers to be drained while they are alive. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 6580e7e..777b859 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -176,10 +176,9 @@ flip_trace(struct trace_array_cpu *tr1, struct trace_array_cpu *tr2) INIT_LIST_HEAD(&flip_pages); - tr1->trace_current = NULL; - memcpy(&tr1->trace_current_idx, &tr2->trace_current_idx, + memcpy(&tr1->trace_head_idx, &tr2->trace_head_idx, sizeof(struct trace_array_cpu) - - offsetof(struct trace_array_cpu, trace_current_idx)); + offsetof(struct trace_array_cpu, trace_head_idx)); check_pages(tr1); check_pages(tr2); @@ -228,7 +227,6 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) tracing_reset(max_tr.data[i]); flip_trace(max_tr.data[cpu], data); - tracing_reset(data); __update_max_tr(tr, tsk, cpu); @@ -343,9 +341,9 @@ void unregister_tracer(struct tracer *type) notrace void tracing_reset(struct trace_array_cpu *data) { data->trace_idx = 0; - data->trace_current = head_page(data); - data->trace_current_idx = 0; - data->time_offset = 0; + data->trace_head = data->trace_tail = head_page(data); + data->trace_head_idx = 0; + data->trace_tail_idx = 0; } #ifdef CONFIG_FTRACE @@ -470,38 +468,65 @@ notrace void tracing_record_cmdline(struct task_struct *tsk) trace_save_cmdline(tsk); } +static inline notrace struct list_head * +trace_next_list(struct trace_array_cpu *data, struct list_head *next) +{ + /* + * Roundrobin - but skip the head (which is not a real page): + */ + next = next->next; + if (unlikely(next == &data->trace_pages)) + next = next->next; + BUG_ON(next == &data->trace_pages); + + return next; +} + +static inline notrace void * +trace_next_page(struct trace_array_cpu *data, void *addr) +{ + struct list_head *next; + struct page *page; + + page = virt_to_page(addr); + + next = trace_next_list(data, &page->lru); + page = list_entry(next, struct page, lru); + + return page_address(page); +} + static inline notrace struct trace_entry * tracing_get_trace_entry(struct trace_array *tr, struct trace_array_cpu *data) { unsigned long idx, idx_next; struct trace_entry *entry; - struct list_head *next; - struct page *page; data->trace_idx++; - idx = data->trace_current_idx; + idx = data->trace_head_idx; idx_next = idx + 1; BUG_ON(idx * TRACE_ENTRY_SIZE >= PAGE_SIZE); - entry = data->trace_current + idx * TRACE_ENTRY_SIZE; + entry = data->trace_head + idx * TRACE_ENTRY_SIZE; if (unlikely(idx_next >= ENTRIES_PER_PAGE)) { - page = virt_to_page(data->trace_current); - /* - * Roundrobin - but skip the head (which is not a real page): - */ - next = page->lru.next; - if (unlikely(next == &data->trace_pages)) - next = next->next; - BUG_ON(next == &data->trace_pages); - - page = list_entry(next, struct page, lru); - data->trace_current = page_address(page); + data->trace_head = trace_next_page(data, data->trace_head); idx_next = 0; } - data->trace_current_idx = idx_next; + if (data->trace_head == data->trace_tail && + idx_next == data->trace_tail_idx) { + /* overrun */ + data->trace_tail_idx++; + if (data->trace_tail_idx >= ENTRIES_PER_PAGE) { + data->trace_tail = + trace_next_page(data, data->trace_tail); + data->trace_tail_idx = 0; + } + } + + data->trace_head_idx = idx_next; return entry; } @@ -571,21 +596,11 @@ trace_entry_idx(struct trace_array *tr, struct trace_array_cpu *data, return NULL; if (!iter->next_page[cpu]) { - /* - * Initialize. If the count of elements in - * this buffer is greater than the max entries - * we had an underrun. Which means we looped around. - * We can simply use the current pointer as our - * starting point. - */ - if (data->trace_idx >= tr->entries) { - page = virt_to_page(data->trace_current); - iter->next_page[cpu] = &page->lru; - iter->next_page_idx[cpu] = data->trace_current_idx; - } else { - iter->next_page[cpu] = data->trace_pages.next; - iter->next_page_idx[cpu] = 0; - } + /* Initialize the iterator for this cpu trace buffer */ + WARN_ON(!data->trace_tail); + page = virt_to_page(data->trace_tail); + iter->next_page[cpu] = &page->lru; + iter->next_page_idx[cpu] = data->trace_tail_idx; } page = list_entry(iter->next_page[cpu], struct page, lru); @@ -593,6 +608,12 @@ trace_entry_idx(struct trace_array *tr, struct trace_array_cpu *data, array = page_address(page); + /* Still possible to catch up to the tail */ + if (iter->next_idx[cpu] && array == data->trace_tail && + iter->next_page_idx[cpu] == data->trace_tail_idx) + return NULL; + + WARN_ON(iter->next_page_idx[cpu] >= ENTRIES_PER_PAGE); return &array[iter->next_page_idx[cpu]]; } @@ -638,10 +659,8 @@ static void *find_next_entry_inc(struct trace_iterator *iter) iter->next_page_idx[next_cpu] = 0; iter->next_page[next_cpu] = - iter->next_page[next_cpu]->next; - if (iter->next_page[next_cpu] == &data->trace_pages) - iter->next_page[next_cpu] = - data->trace_pages.next; + trace_next_list(data, iter->next_page[next_cpu]); + } } iter->prev_ent = iter->ent; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 5df8ff2..0ce1274 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -53,13 +53,15 @@ struct trace_entry { * the trace, etc.) */ struct trace_array_cpu { - void *trace_current; struct list_head trace_pages; atomic_t disabled; cycle_t time_offset; /* these fields get copied into max-trace: */ - unsigned trace_current_idx; + unsigned trace_head_idx; + unsigned trace_tail_idx; + void *trace_head; /* producer */ + void *trace_tail; /* consumer */ unsigned long trace_idx; unsigned long saved_latency; unsigned long critical_start; -- cgit v0.10.2 From 214023c3d13a71525e463b5e54e360b926b4dc90 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:46 +0200 Subject: ftrace: add a buffer for output Later patches will need to print the same things as the seq output does. But those outputs will not use the seq utility. This patch adds a buffer to the iterator, that can be used by either the seq utility or other output. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 777b859..d39f4fa 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -169,6 +169,66 @@ void *head_page(struct trace_array_cpu *data) return page_address(page); } +static notrace int +trace_seq_printf(struct trace_seq *s, const char *fmt, ...) +{ + int len = (PAGE_SIZE - 1) - s->len; + va_list ap; + + if (!len) + return 0; + + va_start(ap, fmt); + len = vsnprintf(s->buffer + s->len, len, fmt, ap); + va_end(ap); + + s->len += len; + + return len; +} + +static notrace int +trace_seq_puts(struct trace_seq *s, const char *str) +{ + int len = strlen(str); + + if (len > ((PAGE_SIZE - 1) - s->len)) + len = (PAGE_SIZE - 1) - s->len; + + memcpy(s->buffer + s->len, str, len); + s->len += len; + + return len; +} + +static notrace int +trace_seq_putc(struct trace_seq *s, unsigned char c) +{ + if (s->len >= (PAGE_SIZE - 1)) + return 0; + + s->buffer[s->len++] = c; + + return 1; +} + +static notrace void +trace_seq_reset(struct trace_seq *s) +{ + s->len = 0; +} + +static notrace void +trace_print_seq(struct seq_file *m, struct trace_seq *s) +{ + int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len; + + s->buffer[len] = 0; + seq_puts(m, s->buffer); + + trace_seq_reset(s); +} + notrace static void flip_trace(struct trace_array_cpu *tr1, struct trace_array_cpu *tr2) { @@ -756,25 +816,26 @@ static void s_stop(struct seq_file *m, void *p) } static void -seq_print_sym_short(struct seq_file *m, const char *fmt, unsigned long address) +seq_print_sym_short(struct trace_seq *s, const char *fmt, unsigned long address) { #ifdef CONFIG_KALLSYMS char str[KSYM_SYMBOL_LEN]; kallsyms_lookup(address, NULL, NULL, NULL, str); - seq_printf(m, fmt, str); + trace_seq_printf(s, fmt, str); #endif } static void -seq_print_sym_offset(struct seq_file *m, const char *fmt, unsigned long address) +seq_print_sym_offset(struct trace_seq *s, const char *fmt, + unsigned long address) { #ifdef CONFIG_KALLSYMS char str[KSYM_SYMBOL_LEN]; sprint_symbol(str, address); - seq_printf(m, fmt, str); + trace_seq_printf(s, fmt, str); #endif } @@ -785,20 +846,20 @@ seq_print_sym_offset(struct seq_file *m, const char *fmt, unsigned long address) #endif static notrace void -seq_print_ip_sym(struct seq_file *m, unsigned long ip, unsigned long sym_flags) +seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags) { if (!ip) { - seq_printf(m, "0"); + trace_seq_printf(s, "0"); return; } if (sym_flags & TRACE_ITER_SYM_OFFSET) - seq_print_sym_offset(m, "%s", ip); + seq_print_sym_offset(s, "%s", ip); else - seq_print_sym_short(m, "%s", ip); + seq_print_sym_short(s, "%s", ip); if (sym_flags & TRACE_ITER_SYM_ADDR) - seq_printf(m, " <" IP_FMT ">", ip); + trace_seq_printf(s, " <" IP_FMT ">", ip); } static notrace void print_lat_help_header(struct seq_file *m) @@ -881,9 +942,11 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter) if (data->critical_start) { seq_puts(m, " => started at: "); - seq_print_ip_sym(m, data->critical_start, sym_flags); + seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags); + trace_print_seq(m, &iter->seq); seq_puts(m, "\n => ended at: "); - seq_print_ip_sym(m, data->critical_end, sym_flags); + seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags); + trace_print_seq(m, &iter->seq); seq_puts(m, "\n"); } @@ -891,61 +954,61 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter) } static notrace void -lat_print_generic(struct seq_file *m, struct trace_entry *entry, int cpu) +lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu) { int hardirq, softirq; char *comm; comm = trace_find_cmdline(entry->pid); - seq_printf(m, "%8.8s-%-5d ", comm, entry->pid); - seq_printf(m, "%d", cpu); - seq_printf(m, "%c%c", - (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : '.', - ((entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.')); + trace_seq_printf(s, "%8.8s-%-5d ", comm, entry->pid); + trace_seq_printf(s, "%d", cpu); + trace_seq_printf(s, "%c%c", + (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : '.', + ((entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.')); hardirq = entry->flags & TRACE_FLAG_HARDIRQ; softirq = entry->flags & TRACE_FLAG_SOFTIRQ; if (hardirq && softirq) - seq_putc(m, 'H'); + trace_seq_putc(s, 'H'); else { if (hardirq) - seq_putc(m, 'h'); + trace_seq_putc(s, 'h'); else { if (softirq) - seq_putc(m, 's'); + trace_seq_putc(s, 's'); else - seq_putc(m, '.'); + trace_seq_putc(s, '.'); } } if (entry->preempt_count) - seq_printf(m, "%x", entry->preempt_count); + trace_seq_printf(s, "%x", entry->preempt_count); else - seq_puts(m, "."); + trace_seq_puts(s, "."); } unsigned long preempt_mark_thresh = 100; static notrace void -lat_print_timestamp(struct seq_file *m, unsigned long long abs_usecs, +lat_print_timestamp(struct trace_seq *s, unsigned long long abs_usecs, unsigned long rel_usecs) { - seq_printf(m, " %4lldus", abs_usecs); + trace_seq_printf(s, " %4lldus", abs_usecs); if (rel_usecs > preempt_mark_thresh) - seq_puts(m, "!: "); + trace_seq_puts(s, "!: "); else if (rel_usecs > 1) - seq_puts(m, "+: "); + trace_seq_puts(s, "+: "); else - seq_puts(m, " : "); + trace_seq_puts(s, " : "); } static const char state_to_char[] = TASK_STATE_TO_CHAR_STR; static notrace void -print_lat_fmt(struct seq_file *m, struct trace_iterator *iter, - unsigned int trace_idx, int cpu) +print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) { + struct trace_seq *s = &iter->seq; unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); struct trace_entry *next_entry = find_next_entry(iter, NULL); unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE); @@ -962,39 +1025,40 @@ print_lat_fmt(struct seq_file *m, struct trace_iterator *iter, if (verbose) { comm = trace_find_cmdline(entry->pid); - seq_printf(m, "%16s %5d %d %d %08x %08x [%08lx]" - " %ld.%03ldms (+%ld.%03ldms): ", - comm, - entry->pid, cpu, entry->flags, - entry->preempt_count, trace_idx, - ns2usecs(entry->t), - abs_usecs/1000, - abs_usecs % 1000, rel_usecs/1000, rel_usecs % 1000); + trace_seq_printf(s, "%16s %5d %d %d %08x %08x [%08lx]" + " %ld.%03ldms (+%ld.%03ldms): ", + comm, + entry->pid, cpu, entry->flags, + entry->preempt_count, trace_idx, + ns2usecs(entry->t), + abs_usecs/1000, + abs_usecs % 1000, rel_usecs/1000, + rel_usecs % 1000); } else { - lat_print_generic(m, entry, cpu); - lat_print_timestamp(m, abs_usecs, rel_usecs); + lat_print_generic(s, entry, cpu); + lat_print_timestamp(s, abs_usecs, rel_usecs); } switch (entry->type) { case TRACE_FN: - seq_print_ip_sym(m, entry->fn.ip, sym_flags); - seq_puts(m, " ("); - seq_print_ip_sym(m, entry->fn.parent_ip, sym_flags); - seq_puts(m, ")\n"); + seq_print_ip_sym(s, entry->fn.ip, sym_flags); + trace_seq_puts(s, " ("); + seq_print_ip_sym(s, entry->fn.parent_ip, sym_flags); + trace_seq_puts(s, ")\n"); break; case TRACE_CTX: S = entry->ctx.prev_state < sizeof(state_to_char) ? state_to_char[entry->ctx.prev_state] : 'X'; comm = trace_find_cmdline(entry->ctx.next_pid); - seq_printf(m, " %d:%d:%c --> %d:%d %s\n", - entry->ctx.prev_pid, - entry->ctx.prev_prio, - S, - entry->ctx.next_pid, - entry->ctx.next_prio, - comm); + trace_seq_printf(s, " %d:%d:%c --> %d:%d %s\n", + entry->ctx.prev_pid, + entry->ctx.prev_prio, + S, + entry->ctx.next_pid, + entry->ctx.next_prio, + comm); break; default: - seq_printf(m, "Unknown type %d\n", entry->type); + trace_seq_printf(s, "Unknown type %d\n", entry->type); } } @@ -1026,8 +1090,9 @@ static notrace void sync_time_offset(struct trace_iterator *iter) } static notrace void -print_trace_fmt(struct seq_file *m, struct trace_iterator *iter) +print_trace_fmt(struct trace_iterator *iter) { + struct trace_seq *s = &iter->seq; unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); struct trace_entry *entry; unsigned long usec_rem; @@ -1045,29 +1110,29 @@ print_trace_fmt(struct seq_file *m, struct trace_iterator *iter) usec_rem = do_div(t, 1000000ULL); secs = (unsigned long)t; - seq_printf(m, "%16s-%-5d ", comm, entry->pid); - seq_printf(m, "[%02d] ", iter->cpu); - seq_printf(m, "%5lu.%06lu: ", secs, usec_rem); + trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid); + trace_seq_printf(s, "[%02d] ", iter->cpu); + trace_seq_printf(s, "%5lu.%06lu: ", secs, usec_rem); switch (entry->type) { case TRACE_FN: - seq_print_ip_sym(m, entry->fn.ip, sym_flags); + seq_print_ip_sym(s, entry->fn.ip, sym_flags); if ((sym_flags & TRACE_ITER_PRINT_PARENT) && entry->fn.parent_ip) { - seq_printf(m, " <-"); - seq_print_ip_sym(m, entry->fn.parent_ip, sym_flags); + trace_seq_printf(s, " <-"); + seq_print_ip_sym(s, entry->fn.parent_ip, sym_flags); } - seq_printf(m, "\n"); + trace_seq_printf(s, "\n"); break; case TRACE_CTX: S = entry->ctx.prev_state < sizeof(state_to_char) ? state_to_char[entry->ctx.prev_state] : 'X'; - seq_printf(m, " %d:%d:%c ==> %d:%d\n", - entry->ctx.prev_pid, - entry->ctx.prev_prio, - S, - entry->ctx.next_pid, - entry->ctx.next_prio); + trace_seq_printf(s, " %d:%d:%c ==> %d:%d\n", + entry->ctx.prev_pid, + entry->ctx.prev_prio, + S, + entry->ctx.next_pid, + entry->ctx.next_prio); break; } } @@ -1108,9 +1173,10 @@ static int s_show(struct seq_file *m, void *v) } } else { if (iter->iter_flags & TRACE_FILE_LAT_FMT) - print_lat_fmt(m, iter, iter->idx, iter->cpu); + print_lat_fmt(iter, iter->idx, iter->cpu); else - print_trace_fmt(m, iter); + print_trace_fmt(iter); + trace_print_seq(m, &iter->seq); } return 0; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 0ce1274..f5b32ca 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -111,11 +111,17 @@ struct tracer { int print_max; }; +struct trace_seq { + unsigned char buffer[PAGE_SIZE]; + unsigned int len; +}; + /* * Trace iterator - used by printout routines who present trace * results to users and which routines might sleep, etc: */ struct trace_iterator { + struct trace_seq seq; struct trace_array *tr; struct tracer *trace; -- cgit v0.10.2 From b3806b4316306dc9c542eff6c23d7d42918f3504 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:46 +0200 Subject: ftrace: user run time file reading This patch creates a file called trace_pipe in the tracing debug directory. This file is a consumer of the trace buffers. This means that reads of this file consumes the entries from the trace buffers so that they will not be read a second time, as contrast to the static buffers latency_trace and trace. Reading from the trace_pipe will remove the entries from trace and latency_trace too. The advantage that trace_pipe has is that it can record live traces. It will block when there is nothing in the buffer, and read the entries as they are entered. An EOF happens when tracing is disabled (tracing_enabled = 0). Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d39f4fa..a40687a 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -174,15 +174,20 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...) { int len = (PAGE_SIZE - 1) - s->len; va_list ap; + int ret; if (!len) return 0; va_start(ap, fmt); - len = vsnprintf(s->buffer + s->len, len, fmt, ap); + ret = vsnprintf(s->buffer + s->len, len, fmt, ap); va_end(ap); - s->len += len; + /* If we can't write it all, don't bother writing anything */ + if (ret > len) + return 0; + + s->len += ret; return len; } @@ -193,7 +198,7 @@ trace_seq_puts(struct trace_seq *s, const char *str) int len = strlen(str); if (len > ((PAGE_SIZE - 1) - s->len)) - len = (PAGE_SIZE - 1) - s->len; + return 0; memcpy(s->buffer + s->len, str, len); s->len += len; @@ -615,11 +620,13 @@ ftrace(struct trace_array *tr, struct trace_array_cpu *data, { struct trace_entry *entry; + spin_lock(&data->lock); entry = tracing_get_trace_entry(tr, data); tracing_generic_entry_update(entry, flags); entry->type = TRACE_FN; entry->fn.ip = ip; entry->fn.parent_ip = parent_ip; + spin_unlock(&data->lock); } notrace void @@ -630,6 +637,7 @@ tracing_sched_switch_trace(struct trace_array *tr, { struct trace_entry *entry; + spin_lock(&data->lock); entry = tracing_get_trace_entry(tr, data); tracing_generic_entry_update(entry, flags); entry->type = TRACE_CTX; @@ -638,6 +646,7 @@ tracing_sched_switch_trace(struct trace_array *tr, entry->ctx.prev_state = prev->state; entry->ctx.next_pid = next->pid; entry->ctx.next_prio = next->prio; + spin_unlock(&data->lock); } enum trace_file_type { @@ -652,7 +661,9 @@ trace_entry_idx(struct trace_array *tr, struct trace_array_cpu *data, struct trace_entry *array; if (iter->next_idx[cpu] >= tr->entries || - iter->next_idx[cpu] >= data->trace_idx) + iter->next_idx[cpu] >= data->trace_idx || + (data->trace_head == data->trace_tail && + data->trace_head_idx == data->trace_tail_idx)) return NULL; if (!iter->next_page[cpu]) { @@ -702,33 +713,57 @@ find_next_entry(struct trace_iterator *iter, int *ent_cpu) return next; } -static void *find_next_entry_inc(struct trace_iterator *iter) +static notrace void +trace_iterator_increment(struct trace_iterator *iter) { - struct trace_entry *next; - int next_cpu = -1; + iter->idx++; + iter->next_idx[iter->cpu]++; + iter->next_page_idx[iter->cpu]++; + if (iter->next_page_idx[iter->cpu] >= ENTRIES_PER_PAGE) { + struct trace_array_cpu *data = iter->tr->data[iter->cpu]; - next = find_next_entry(iter, &next_cpu); + iter->next_page_idx[iter->cpu] = 0; + iter->next_page[iter->cpu] = + trace_next_list(data, iter->next_page[iter->cpu]); + } +} - if (next) { - iter->idx++; - iter->next_idx[next_cpu]++; - iter->next_page_idx[next_cpu]++; +static notrace void +trace_consume(struct trace_iterator *iter) +{ + struct trace_array_cpu *data = iter->tr->data[iter->cpu]; + + data->trace_tail_idx++; + if (data->trace_tail_idx >= ENTRIES_PER_PAGE) { + data->trace_tail = trace_next_page(data, data->trace_tail); + data->trace_tail_idx = 0; + } - if (iter->next_page_idx[next_cpu] >= ENTRIES_PER_PAGE) { - struct trace_array_cpu *data = iter->tr->data[next_cpu]; + /* Check if we empty it, then reset the index */ + if (data->trace_head == data->trace_tail && + data->trace_head_idx == data->trace_tail_idx) + data->trace_idx = 0; - iter->next_page_idx[next_cpu] = 0; - iter->next_page[next_cpu] = - trace_next_list(data, iter->next_page[next_cpu]); + trace_iterator_increment(iter); +} + +static notrace void * +find_next_entry_inc(struct trace_iterator *iter) +{ + struct trace_entry *next; + int next_cpu = -1; + + next = find_next_entry(iter, &next_cpu); - } - } iter->prev_ent = iter->ent; iter->prev_cpu = iter->cpu; iter->ent = next; iter->cpu = next_cpu; + if (next) + trace_iterator_increment(iter); + return next ? iter : NULL; } @@ -815,7 +850,7 @@ static void s_stop(struct seq_file *m, void *p) mutex_unlock(&trace_types_lock); } -static void +static int seq_print_sym_short(struct trace_seq *s, const char *fmt, unsigned long address) { #ifdef CONFIG_KALLSYMS @@ -823,11 +858,12 @@ seq_print_sym_short(struct trace_seq *s, const char *fmt, unsigned long address) kallsyms_lookup(address, NULL, NULL, NULL, str); - trace_seq_printf(s, fmt, str); + return trace_seq_printf(s, fmt, str); #endif + return 1; } -static void +static int seq_print_sym_offset(struct trace_seq *s, const char *fmt, unsigned long address) { @@ -835,8 +871,9 @@ seq_print_sym_offset(struct trace_seq *s, const char *fmt, char str[KSYM_SYMBOL_LEN]; sprint_symbol(str, address); - trace_seq_printf(s, fmt, str); + return trace_seq_printf(s, fmt, str); #endif + return 1; } #ifndef CONFIG_64BIT @@ -845,21 +882,25 @@ seq_print_sym_offset(struct trace_seq *s, const char *fmt, # define IP_FMT "%016lx" #endif -static notrace void +static notrace int seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags) { - if (!ip) { - trace_seq_printf(s, "0"); - return; - } + int ret; + + if (!ip) + return trace_seq_printf(s, "0"); if (sym_flags & TRACE_ITER_SYM_OFFSET) - seq_print_sym_offset(s, "%s", ip); + ret = seq_print_sym_offset(s, "%s", ip); else - seq_print_sym_short(s, "%s", ip); + ret = seq_print_sym_short(s, "%s", ip); + + if (!ret) + return 0; if (sym_flags & TRACE_ITER_SYM_ADDR) - trace_seq_printf(s, " <" IP_FMT ">", ip); + ret = trace_seq_printf(s, " <" IP_FMT ">", ip); + return ret; } static notrace void print_lat_help_header(struct seq_file *m) @@ -1089,7 +1130,7 @@ static notrace void sync_time_offset(struct trace_iterator *iter) array->time_offset += prev_t - t; } -static notrace void +static notrace int print_trace_fmt(struct trace_iterator *iter) { struct trace_seq *s = &iter->seq; @@ -1100,6 +1141,7 @@ print_trace_fmt(struct trace_iterator *iter) unsigned long secs; char *comm; int S; + int ret; sync_time_offset(iter); entry = iter->ent; @@ -1110,31 +1152,49 @@ print_trace_fmt(struct trace_iterator *iter) usec_rem = do_div(t, 1000000ULL); secs = (unsigned long)t; - trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid); - trace_seq_printf(s, "[%02d] ", iter->cpu); - trace_seq_printf(s, "%5lu.%06lu: ", secs, usec_rem); + ret = trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid); + if (!ret) + return 0; + ret = trace_seq_printf(s, "[%02d] ", iter->cpu); + if (!ret) + return 0; + ret = trace_seq_printf(s, "%5lu.%06lu: ", secs, usec_rem); + if (!ret) + return 0; switch (entry->type) { case TRACE_FN: - seq_print_ip_sym(s, entry->fn.ip, sym_flags); + ret = seq_print_ip_sym(s, entry->fn.ip, sym_flags); + if (!ret) + return 0; if ((sym_flags & TRACE_ITER_PRINT_PARENT) && entry->fn.parent_ip) { - trace_seq_printf(s, " <-"); - seq_print_ip_sym(s, entry->fn.parent_ip, sym_flags); + ret = trace_seq_printf(s, " <-"); + if (!ret) + return 0; + ret = seq_print_ip_sym(s, entry->fn.parent_ip, + sym_flags); + if (!ret) + return 0; } - trace_seq_printf(s, "\n"); + ret = trace_seq_printf(s, "\n"); + if (!ret) + return 0; break; case TRACE_CTX: S = entry->ctx.prev_state < sizeof(state_to_char) ? state_to_char[entry->ctx.prev_state] : 'X'; - trace_seq_printf(s, " %d:%d:%c ==> %d:%d\n", - entry->ctx.prev_pid, - entry->ctx.prev_prio, - S, - entry->ctx.next_pid, - entry->ctx.next_prio); + ret = trace_seq_printf(s, " %d:%d:%c ==> %d:%d\n", + entry->ctx.prev_pid, + entry->ctx.prev_prio, + S, + entry->ctx.next_pid, + entry->ctx.next_prio); + if (!ret) + return 0; break; } + return 1; } static int trace_empty(struct trace_iterator *iter) @@ -1145,7 +1205,9 @@ static int trace_empty(struct trace_iterator *iter) for_each_possible_cpu(cpu) { data = iter->tr->data[cpu]; - if (head_page(data) && data->trace_idx) + if (head_page(data) && data->trace_idx && + (data->trace_tail != data->trace_head || + data->trace_tail_idx != data->trace_head_idx)) return 0; } return 1; @@ -1645,6 +1707,192 @@ tracing_max_lat_write(struct file *filp, const char __user *ubuf, return cnt; } +static atomic_t tracing_reader; + +static int tracing_open_pipe(struct inode *inode, struct file *filp) +{ + struct trace_iterator *iter; + + if (tracing_disabled) + return -ENODEV; + + /* We only allow for reader of the pipe */ + if (atomic_inc_return(&tracing_reader) != 1) { + atomic_dec(&tracing_reader); + return -EBUSY; + } + + /* create a buffer to store the information to pass to userspace */ + iter = kzalloc(sizeof(*iter), GFP_KERNEL); + if (!iter) + return -ENOMEM; + + iter->tr = &global_trace; + + filp->private_data = iter; + + return 0; +} + +static int tracing_release_pipe(struct inode *inode, struct file *file) +{ + struct trace_iterator *iter = file->private_data; + + kfree(iter); + atomic_dec(&tracing_reader); + + return 0; +} + +/* + * Consumer reader. + */ +static ssize_t +tracing_read_pipe(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + struct trace_iterator *iter = filp->private_data; + struct trace_array_cpu *data; + static cpumask_t mask; + struct trace_entry *entry; + static int start; + unsigned long flags; + int read = 0; + int cpu; + int len; + int ret; + + /* return any leftover data */ + if (iter->seq.len > start) { + len = iter->seq.len - start; + if (cnt > len) + cnt = len; + ret = copy_to_user(ubuf, iter->seq.buffer + start, cnt); + if (ret) + cnt = -EFAULT; + + start += len; + + return cnt; + } + + trace_seq_reset(&iter->seq); + start = 0; + + while (trace_empty(iter)) { + /* + * This is a make-shift waitqueue. The reason we don't use + * an actual wait queue is because: + * 1) we only ever have one waiter + * 2) the tracing, traces all functions, we don't want + * the overhead of calling wake_up and friends + * (and tracing them too) + * Anyway, this is really very primitive wakeup. + */ + set_current_state(TASK_INTERRUPTIBLE); + iter->tr->waiter = current; + + /* sleep for one second, and try again. */ + schedule_timeout(HZ); + + iter->tr->waiter = NULL; + + if (signal_pending(current)) + return -EINTR; + + /* + * We block until we read something and tracing is disabled. + * We still block if tracing is disabled, but we have never + * read anything. This allows a user to cat this file, and + * then enable tracing. But after we have read something, + * we give an EOF when tracing is again disabled. + * + * iter->pos will be 0 if we haven't read anything. + */ + if (!tracer_enabled && iter->pos) + break; + + continue; + } + + /* stop when tracing is finished */ + if (trace_empty(iter)) + return 0; + + if (cnt >= PAGE_SIZE) + cnt = PAGE_SIZE - 1; + + memset(iter, 0, sizeof(*iter)); + iter->tr = &global_trace; + iter->pos = -1; + + /* + * We need to stop all tracing on all CPUS to read the + * the next buffer. This is a bit expensive, but is + * not done often. We fill all what we can read, + * and then release the locks again. + */ + + cpus_clear(mask); + local_irq_save(flags); + for_each_possible_cpu(cpu) { + data = iter->tr->data[cpu]; + + if (!head_page(data) || !data->trace_idx) + continue; + + atomic_inc(&data->disabled); + spin_lock(&data->lock); + cpu_set(cpu, mask); + } + + while ((entry = find_next_entry(iter, &cpu))) { + + if (!entry) + break; + + iter->ent = entry; + iter->cpu = cpu; + + ret = print_trace_fmt(iter); + if (!ret) + break; + + trace_consume(iter); + + if (iter->seq.len >= cnt) + break; + + } + + for_each_possible_cpu(cpu) { + data = iter->tr->data[cpu]; + + if (!cpu_isset(cpu, mask)) + continue; + spin_unlock(&data->lock); + atomic_dec(&data->disabled); + } + local_irq_restore(flags); + + /* Now copy what we have to the user */ + read = iter->seq.len; + if (read > cnt) + read = cnt; + + ret = copy_to_user(ubuf, iter->seq.buffer, read); + + if (read < iter->seq.len) + start = read; + else + trace_seq_reset(&iter->seq); + + if (ret) + read = -EFAULT; + + return read; +} + static struct file_operations tracing_max_lat_fops = { .open = tracing_open_generic, .read = tracing_max_lat_read, @@ -1663,6 +1911,12 @@ static struct file_operations set_tracer_fops = { .write = tracing_set_trace_write, }; +static struct file_operations tracing_pipe_fops = { + .open = tracing_open_pipe, + .read = tracing_read_pipe, + .release = tracing_release_pipe, +}; + #ifdef CONFIG_DYNAMIC_FTRACE static ssize_t @@ -1763,6 +2017,11 @@ static __init void tracer_init_debugfs(void) if (!entry) pr_warning("Could not create debugfs 'README' entry\n"); + entry = debugfs_create_file("trace_pipe", 0644, d_tracer, + NULL, &tracing_pipe_fops); + if (!entry) + pr_warning("Could not create debugfs " + "'tracing_threash' entry\n"); #ifdef CONFIG_DYNAMIC_FTRACE entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer, @@ -1816,6 +2075,7 @@ static int trace_alloc_page(void) /* Now that we successfully allocate a page per CPU, add them */ for_each_possible_cpu(i) { data = global_trace.data[i]; + spin_lock_init(&data->lock); page = list_entry(pages.next, struct page, lru); list_del_init(&page->lru); list_add_tail(&page->lru, &data->trace_pages); @@ -1823,6 +2083,7 @@ static int trace_alloc_page(void) #ifdef CONFIG_TRACER_MAX_TRACE data = max_tr.data[i]; + spin_lock_init(&data->lock); page = list_entry(pages.next, struct page, lru); list_del_init(&page->lru); list_add_tail(&page->lru, &data->trace_pages); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index f5b32ca..29a7ea5 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -55,6 +55,7 @@ struct trace_entry { struct trace_array_cpu { struct list_head trace_pages; atomic_t disabled; + spinlock_t lock; cycle_t time_offset; /* these fields get copied into max-trace: */ @@ -88,6 +89,7 @@ struct trace_array { long ctrl; int cpu; cycle_t time_start; + struct task_struct *waiter; struct trace_array_cpu *data[NR_CPUS]; }; -- cgit v0.10.2 From d4c5a2f5870939d837293de87b41dda0012a4572 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:46 +0200 Subject: ftrace: fix locking we can hold all cpu trace buffer locks at once - put each into a separate lock class. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index a40687a..b3811ca 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1865,11 +1865,8 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, } - for_each_possible_cpu(cpu) { + for_each_cpu_mask(cpu, mask) { data = iter->tr->data[cpu]; - - if (!cpu_isset(cpu, mask)) - continue; spin_unlock(&data->lock); atomic_dec(&data->disabled); } @@ -2076,6 +2073,7 @@ static int trace_alloc_page(void) for_each_possible_cpu(i) { data = global_trace.data[i]; spin_lock_init(&data->lock); + lockdep_set_class(&data->lock, &data->lock_key); page = list_entry(pages.next, struct page, lru); list_del_init(&page->lru); list_add_tail(&page->lru, &data->trace_pages); @@ -2084,6 +2082,7 @@ static int trace_alloc_page(void) #ifdef CONFIG_TRACER_MAX_TRACE data = max_tr.data[i]; spin_lock_init(&data->lock); + lockdep_set_class(&data->lock, &data->lock_key); page = list_entry(pages.next, struct page, lru); list_del_init(&page->lru); list_add_tail(&page->lru, &data->trace_pages); @@ -2203,5 +2202,4 @@ __init static int tracer_alloc_buffers(void) } return ret; } - fs_initcall(tracer_alloc_buffers); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 29a7ea5..b040835 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -56,6 +56,7 @@ struct trace_array_cpu { struct list_head trace_pages; atomic_t disabled; spinlock_t lock; + struct lock_class_key lock_key; cycle_t time_offset; /* these fields get copied into max-trace: */ -- cgit v0.10.2 From 4bf39a9411a4ce8712954e03a9bd1592ee345919 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:46 +0200 Subject: ftrace: cleanups no code changed. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 5e9389f..97c4086 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -756,9 +756,11 @@ ftrace_avail_open(struct inode *inode, struct file *file) ret = seq_open(file, &show_ftrace_seq_ops); if (!ret) { struct seq_file *m = file->private_data; + m->private = iter; - } else + } else { kfree(iter); + } return ret; } @@ -770,6 +772,7 @@ int ftrace_avail_release(struct inode *inode, struct file *file) seq_release(inode, file); kfree(iter); + return 0; } diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index b3811ca..4550afd 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1245,10 +1245,10 @@ static int s_show(struct seq_file *m, void *v) } static struct seq_operations tracer_seq_ops = { - .start = s_start, - .next = s_next, - .stop = s_stop, - .show = s_show, + .start = s_start, + .next = s_next, + .stop = s_stop, + .show = s_show, }; static struct trace_iterator notrace * @@ -1397,10 +1397,10 @@ static int t_show(struct seq_file *m, void *v) } static struct seq_operations show_traces_seq_ops = { - .start = t_start, - .next = t_next, - .stop = t_stop, - .show = t_show, + .start = t_start, + .next = t_next, + .stop = t_stop, + .show = t_show, }; static int show_traces_open(struct inode *inode, struct file *file) @@ -1420,17 +1420,17 @@ static int show_traces_open(struct inode *inode, struct file *file) } static struct file_operations tracing_fops = { - .open = tracing_open, - .read = seq_read, - .llseek = seq_lseek, - .release = tracing_release, + .open = tracing_open, + .read = seq_read, + .llseek = seq_lseek, + .release = tracing_release, }; static struct file_operations tracing_lt_fops = { - .open = tracing_lt_open, - .read = seq_read, - .llseek = seq_lseek, - .release = tracing_release, + .open = tracing_lt_open, + .read = seq_read, + .llseek = seq_lseek, + .release = tracing_release, }; static struct file_operations show_traces_fops = { @@ -1620,8 +1620,7 @@ tracing_set_trace_read(struct file *filp, char __user *ubuf, r = sprintf(buf, "\n"); mutex_unlock(&trace_types_lock); - return simple_read_from_buffer(ubuf, cnt, ppos, - buf, r); + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); } static ssize_t @@ -1680,8 +1679,7 @@ tracing_max_lat_read(struct file *filp, char __user *ubuf, *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr)); if (r > 64) r = 64; - return simple_read_from_buffer(ubuf, cnt, ppos, - buf, r); + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); } static ssize_t @@ -1891,27 +1889,27 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, } static struct file_operations tracing_max_lat_fops = { - .open = tracing_open_generic, - .read = tracing_max_lat_read, - .write = tracing_max_lat_write, + .open = tracing_open_generic, + .read = tracing_max_lat_read, + .write = tracing_max_lat_write, }; static struct file_operations tracing_ctrl_fops = { - .open = tracing_open_generic, - .read = tracing_ctrl_read, - .write = tracing_ctrl_write, + .open = tracing_open_generic, + .read = tracing_ctrl_read, + .write = tracing_ctrl_write, }; static struct file_operations set_tracer_fops = { - .open = tracing_open_generic, - .read = tracing_set_trace_read, - .write = tracing_set_trace_write, + .open = tracing_open_generic, + .read = tracing_set_trace_read, + .write = tracing_set_trace_write, }; static struct file_operations tracing_pipe_fops = { - .open = tracing_open_pipe, - .read = tracing_read_pipe, - .release = tracing_release_pipe, + .open = tracing_open_pipe, + .read = tracing_read_pipe, + .release = tracing_release_pipe, }; #ifdef CONFIG_DYNAMIC_FTRACE @@ -1925,13 +1923,13 @@ tracing_read_long(struct file *filp, char __user *ubuf, int r; r = sprintf(buf, "%ld\n", *p); - return simple_read_from_buffer(ubuf, cnt, ppos, - buf, r); + + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); } static struct file_operations tracing_read_long_fops = { - .open = tracing_open_generic, - .read = tracing_read_long, + .open = tracing_open_generic, + .read = tracing_read_long, }; #endif @@ -2033,7 +2031,7 @@ static __init void tracer_init_debugfs(void) /* dummy trace to disable tracing */ static struct tracer no_tracer __read_mostly = { - .name = "none", + .name = "none", }; static int trace_alloc_page(void) -- cgit v0.10.2 From 750ed1a40783432d0dcb0e6c2e813a12615d7664 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:46 +0200 Subject: ftrace: timestamp syncing, prepare rename and uninline now() to ftrace_now(). Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 97c4086..a15e068 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -531,7 +531,7 @@ static int notrace __ftrace_update_code(void *ignore) save_ftrace_enabled = ftrace_enabled; ftrace_enabled = 0; - start = now(raw_smp_processor_id()); + start = ftrace_now(raw_smp_processor_id()); ftrace_update_cnt = 0; /* No locks needed, the machine is stopped! */ @@ -550,7 +550,7 @@ static int notrace __ftrace_update_code(void *ignore) } - stop = now(raw_smp_processor_id()); + stop = ftrace_now(raw_smp_processor_id()); ftrace_update_time = stop - start; ftrace_update_tot_cnt += ftrace_update_cnt; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 4550afd..e3778ab 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -42,6 +42,11 @@ ns2usecs(cycle_t nsec) return nsec; } +notrace cycle_t ftrace_now(int cpu) +{ + return cpu_clock(cpu); +} + static atomic_t tracer_counter; static struct trace_array global_trace; @@ -607,7 +612,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags) entry->idx = atomic_inc_return(&tracer_counter); entry->preempt_count = pc & 0xff; entry->pid = tsk->pid; - entry->t = now(raw_smp_processor_id()); + entry->t = ftrace_now(raw_smp_processor_id()); entry->flags = (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) | ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) | ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) | diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index b040835..30cad67 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -171,10 +171,7 @@ void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu); void update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu); -static inline notrace cycle_t now(int cpu) -{ - return cpu_clock(cpu); -} +extern notrace cycle_t ftrace_now(int cpu); #ifdef CONFIG_SCHED_TRACER extern void notrace diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index 5d8ad7a..e5d34b7 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -20,7 +20,7 @@ static notrace void function_reset(struct trace_array *tr) { int cpu; - tr->time_start = now(tr->cpu); + tr->time_start = ftrace_now(tr->cpu); for_each_online_cpu(cpu) tracing_reset(tr->data[cpu]); diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 2dfebb6..d2a6e6f 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -136,7 +136,7 @@ check_critical_timing(struct trace_array *tr, * as long as possible: */ T0 = data->preempt_timestamp; - T1 = now(cpu); + T1 = ftrace_now(cpu); delta = T1-T0; local_save_flags(flags); @@ -186,7 +186,7 @@ out_unlock: out: data->critical_sequence = max_sequence; - data->preempt_timestamp = now(cpu); + data->preempt_timestamp = ftrace_now(cpu); tracing_reset(data); ftrace(tr, data, CALLER_ADDR0, parent_ip, flags); } @@ -215,7 +215,7 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip) atomic_inc(&data->disabled); data->critical_sequence = max_sequence; - data->preempt_timestamp = now(cpu); + data->preempt_timestamp = ftrace_now(cpu); data->critical_start = parent_ip ? : ip; tracing_reset(data); diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index 6c92841..8d65667 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -61,7 +61,7 @@ static notrace void sched_switch_reset(struct trace_array *tr) { int cpu; - tr->time_start = now(tr->cpu); + tr->time_start = ftrace_now(tr->cpu); for_each_online_cpu(cpu) tracing_reset(tr->data[cpu]); diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 688df96..b7df825 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -92,7 +92,7 @@ wakeup_sched_switch(struct task_struct *prev, struct task_struct *next) * as long as possible: */ T0 = data->preempt_timestamp; - T1 = now(cpu); + T1 = ftrace_now(cpu); delta = T1-T0; if (!report_latency(delta)) @@ -191,7 +191,7 @@ wakeup_check_start(struct trace_array *tr, struct task_struct *p, local_save_flags(flags); - tr->data[wakeup_cpu]->preempt_timestamp = now(cpu); + tr->data[wakeup_cpu]->preempt_timestamp = ftrace_now(cpu); ftrace(tr, tr->data[wakeup_cpu], CALLER_ADDR1, CALLER_ADDR2, flags); out_locked: -- cgit v0.10.2 From 53c37c17aafcf50f7c6fddaf01dda8f9d7e31ddf Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:46 +0200 Subject: ftrace: fast, scalable, synchronized timestamps implement globally synchronized, fast and scalable time source for tracing. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index e3778ab..9a931c7 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -42,9 +42,61 @@ ns2usecs(cycle_t nsec) return nsec; } +static const int time_sync_freq_max = 128; +static const cycle_t time_sync_thresh = 100000; + +static DEFINE_PER_CPU(cycle_t, time_offset); +static DEFINE_PER_CPU(cycle_t, prev_cpu_time); +static DEFINE_PER_CPU(int, time_sync_count); +static DEFINE_PER_CPU(int, time_sync_freq); + +/* + * Global lock which we take every now and then to synchronize + * the CPUs time. This method is not warp-safe, but it's good + * enough to synchronize slowly diverging time sources and thus + * it's good enough for tracing: + */ +static DEFINE_SPINLOCK(time_sync_lock); +static cycle_t prev_global_time; + +static notrace cycle_t __ftrace_now_sync(cycles_t time, int cpu) +{ + unsigned long flags; + + spin_lock_irqsave(&time_sync_lock, flags); + + /* + * Update the synchronization frequency: + */ + if (per_cpu(time_sync_freq, cpu) < time_sync_freq_max) + per_cpu(time_sync_freq, cpu) *= 2; + per_cpu(time_sync_count, cpu) = per_cpu(time_sync_freq, cpu); + + if (time < prev_global_time) { + per_cpu(time_offset, cpu) += prev_global_time - time; + time = prev_global_time; + } else { + prev_global_time = time; + } + + spin_unlock_irqrestore(&time_sync_lock, flags); + + return time; +} + notrace cycle_t ftrace_now(int cpu) { - return cpu_clock(cpu); + cycle_t prev_cpu_time, time, delta_time; + + prev_cpu_time = per_cpu(prev_cpu_time, cpu); + time = sched_clock() + per_cpu(time_offset, cpu); + delta_time = time-prev_cpu_time; + + if (unlikely(delta_time > time_sync_thresh || + --per_cpu(time_sync_count, cpu) <= 0)) + time = __ftrace_now_sync(time, cpu); + + return time; } static atomic_t tracer_counter; -- cgit v0.10.2 From cdd31cd2d7a0dcbec2cce3974f7129dd4fc8c879 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:46 +0200 Subject: ftrace: remove-idx-sync remove idx syncing - it's expensive on SMP. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 9a931c7..ce8ceb8 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -99,7 +99,6 @@ notrace cycle_t ftrace_now(int cpu) return time; } -static atomic_t tracer_counter; static struct trace_array global_trace; static DEFINE_PER_CPU(struct trace_array_cpu, global_trace_cpu); @@ -661,7 +660,6 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags) pc = preempt_count(); - entry->idx = atomic_inc_return(&tracer_counter); entry->preempt_count = pc & 0xff; entry->pid = tsk->pid; entry->t = ftrace_now(raw_smp_processor_id()); @@ -757,8 +755,10 @@ find_next_entry(struct trace_iterator *iter, int *ent_cpu) if (!head_page(tr->data[cpu])) continue; ent = trace_entry_idx(tr, tr->data[cpu], iter, cpu); - if (ent && - (!next || (long)(next->idx - ent->idx) > 0)) { + /* + * Pick the entry with the smallest timestamp: + */ + if (ent && (!next || ent->t < next->t)) { next = ent; next_cpu = cpu; } @@ -800,8 +800,6 @@ trace_consume(struct trace_iterator *iter) if (data->trace_head == data->trace_tail && data->trace_head_idx == data->trace_tail_idx) data->trace_idx = 0; - - trace_iterator_increment(iter); } static notrace void * @@ -1160,33 +1158,6 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) } } -static notrace void sync_time_offset(struct trace_iterator *iter) -{ - struct trace_array_cpu *prev_array, *array; - struct trace_entry *prev_entry, *entry; - cycle_t prev_t, t; - - entry = iter->ent; - prev_entry = iter->prev_ent; - if (!prev_entry) - return; - - prev_array = iter->tr->data[iter->prev_cpu]; - array = iter->tr->data[iter->cpu]; - - prev_t = prev_entry->t + prev_array->time_offset; - t = entry->t + array->time_offset; - - /* - * If time goes backwards we increase the offset of - * the current array, to not have observable time warps. - * This will quickly synchronize the time offsets of - * multiple CPUs: - */ - if (t < prev_t) - array->time_offset += prev_t - t; -} - static notrace int print_trace_fmt(struct trace_iterator *iter) { @@ -1200,12 +1171,11 @@ print_trace_fmt(struct trace_iterator *iter) int S; int ret; - sync_time_offset(iter); entry = iter->ent; comm = trace_find_cmdline(iter->ent->pid); - t = ns2usecs(entry->t + iter->tr->data[iter->cpu]->time_offset); + t = ns2usecs(entry->t); usec_rem = do_div(t, 1000000ULL); secs = (unsigned long)t; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 30cad67..27fa2d0 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -38,7 +38,6 @@ struct trace_entry { char preempt_count; int pid; cycle_t t; - unsigned long idx; union { struct ftrace_entry fn; struct ctx_switch_entry ctx; @@ -57,7 +56,6 @@ struct trace_array_cpu { atomic_t disabled; spinlock_t lock; struct lock_class_key lock_key; - cycle_t time_offset; /* these fields get copied into max-trace: */ unsigned trace_head_idx; -- cgit v0.10.2 From 8c523a9d82dbc4f3f7d972df8c0f1eacd83d0d55 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:46 +0200 Subject: ftrace: clean-up-pipe-iteration Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index ce8ceb8..42f1926 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -770,12 +770,12 @@ find_next_entry(struct trace_iterator *iter, int *ent_cpu) return next; } -static notrace void -trace_iterator_increment(struct trace_iterator *iter) +static notrace void trace_iterator_increment(struct trace_iterator *iter) { iter->idx++; iter->next_idx[iter->cpu]++; iter->next_page_idx[iter->cpu]++; + if (iter->next_page_idx[iter->cpu] >= ENTRIES_PER_PAGE) { struct trace_array_cpu *data = iter->tr->data[iter->cpu]; @@ -785,8 +785,7 @@ trace_iterator_increment(struct trace_iterator *iter) } } -static notrace void -trace_consume(struct trace_iterator *iter) +static notrace void trace_consume(struct trace_iterator *iter) { struct trace_array_cpu *data = iter->tr->data[iter->cpu]; @@ -802,8 +801,7 @@ trace_consume(struct trace_iterator *iter) data->trace_idx = 0; } -static notrace void * -find_next_entry_inc(struct trace_iterator *iter) +static notrace void *find_next_entry_inc(struct trace_iterator *iter) { struct trace_entry *next; int next_cpu = -1; @@ -1871,14 +1869,7 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, cpu_set(cpu, mask); } - while ((entry = find_next_entry(iter, &cpu))) { - - if (!entry) - break; - - iter->ent = entry; - iter->cpu = cpu; - + while ((entry = find_next_entry_inc(iter)) != NULL) { ret = print_trace_fmt(iter); if (!ret) break; @@ -1887,7 +1878,6 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, if (iter->seq.len >= cnt) break; - } for_each_cpu_mask(cpu, mask) { -- cgit v0.10.2 From f9896bf30928922a3913a3810a4ab7908da6cfe7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:47 +0200 Subject: ftrace: add raw output Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 42f1926..bebd263 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -153,6 +153,7 @@ enum trace_iterator_flags { TRACE_ITER_SYM_OFFSET = 0x02, TRACE_ITER_SYM_ADDR = 0x04, TRACE_ITER_VERBOSE = 0x08, + TRACE_ITER_RAW = 0x10, }; #define TRACE_ITER_SYM_MASK \ @@ -164,6 +165,7 @@ static const char *trace_options[] = { "sym-offset", "sym-addr", "verbose", + "raw", NULL }; @@ -1099,7 +1101,7 @@ lat_print_timestamp(struct trace_seq *s, unsigned long long abs_usecs, static const char state_to_char[] = TASK_STATE_TO_CHAR_STR; -static notrace void +static notrace int print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) { struct trace_seq *s = &iter->seq; @@ -1154,10 +1156,10 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) default: trace_seq_printf(s, "Unknown type %d\n", entry->type); } + return 1; } -static notrace int -print_trace_fmt(struct trace_iterator *iter) +static notrace int print_trace_fmt(struct trace_iterator *iter) { struct trace_seq *s = &iter->seq; unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); @@ -1222,6 +1224,43 @@ print_trace_fmt(struct trace_iterator *iter) return 1; } +static notrace int print_raw_fmt(struct trace_iterator *iter) +{ + struct trace_seq *s = &iter->seq; + struct trace_entry *entry; + int ret; + int S; + + entry = iter->ent; + + ret = trace_seq_printf(s, "%d %d %llu ", + entry->pid, iter->cpu, entry->t); + if (!ret) + return 0; + + switch (entry->type) { + case TRACE_FN: + ret = trace_seq_printf(s, "%x %x\n", + entry->fn.ip, entry->fn.parent_ip); + if (!ret) + return 0; + break; + case TRACE_CTX: + S = entry->ctx.prev_state < sizeof(state_to_char) ? + state_to_char[entry->ctx.prev_state] : 'X'; + ret = trace_seq_printf(s, "%d %d %c %d %d\n", + entry->ctx.prev_pid, + entry->ctx.prev_prio, + S, + entry->ctx.next_pid, + entry->ctx.next_prio); + if (!ret) + return 0; + break; + } + return 1; +} + static int trace_empty(struct trace_iterator *iter) { struct trace_array_cpu *data; @@ -1238,6 +1277,17 @@ static int trace_empty(struct trace_iterator *iter) return 1; } +static int print_trace_line(struct trace_iterator *iter) +{ + if (trace_flags & TRACE_ITER_RAW) + return print_raw_fmt(iter); + + if (iter->iter_flags & TRACE_FILE_LAT_FMT) + return print_lat_fmt(iter, iter->idx, iter->cpu); + + return print_trace_fmt(iter); +} + static int s_show(struct seq_file *m, void *v) { struct trace_iterator *iter = v; @@ -1259,10 +1309,7 @@ static int s_show(struct seq_file *m, void *v) print_func_help_header(m); } } else { - if (iter->iter_flags & TRACE_FILE_LAT_FMT) - print_lat_fmt(iter, iter->idx, iter->cpu); - else - print_trace_fmt(iter); + print_trace_line(iter); trace_print_seq(m, &iter->seq); } @@ -1870,7 +1917,7 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, } while ((entry = find_next_entry_inc(iter)) != NULL) { - ret = print_trace_fmt(iter); + ret = print_trace_line(iter); if (!ret) break; -- cgit v0.10.2 From cb0f12aae8d085140d37ada351aa5a8e76c3f9b0 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:47 +0200 Subject: ftrace: bin-output Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index bebd263..d78cbc4 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -154,6 +154,7 @@ enum trace_iterator_flags { TRACE_ITER_SYM_ADDR = 0x04, TRACE_ITER_VERBOSE = 0x08, TRACE_ITER_RAW = 0x10, + TRACE_ITER_BIN = 0x20, }; #define TRACE_ITER_SYM_MASK \ @@ -166,6 +167,7 @@ static const char *trace_options[] = { "sym-addr", "verbose", "raw", + "bin", NULL }; @@ -275,6 +277,18 @@ trace_seq_putc(struct trace_seq *s, unsigned char c) return 1; } +static notrace int +trace_seq_putmem(struct trace_seq *s, void *mem, size_t len) +{ + if (len > ((PAGE_SIZE - 1) - s->len)) + return 0; + + memcpy(s->buffer + s->len, mem, len); + s->len += len; + + return len; +} + static notrace void trace_seq_reset(struct trace_seq *s) { @@ -1261,6 +1275,39 @@ static notrace int print_raw_fmt(struct trace_iterator *iter) return 1; } +#define SEQ_PUT_FIELD_RET(s, x) \ +do { \ + if (!trace_seq_putmem(s, &(x), sizeof(x))) \ + return 0; \ +} while (0) + +static notrace int print_bin_fmt(struct trace_iterator *iter) +{ + struct trace_seq *s = &iter->seq; + struct trace_entry *entry; + + entry = iter->ent; + + SEQ_PUT_FIELD_RET(s, entry->pid); + SEQ_PUT_FIELD_RET(s, entry->cpu); + SEQ_PUT_FIELD_RET(s, entry->t); + + switch (entry->type) { + case TRACE_FN: + SEQ_PUT_FIELD_RET(s, entry->fn.ip); + SEQ_PUT_FIELD_RET(s, entry->fn.parent_ip); + break; + case TRACE_CTX: + SEQ_PUT_FIELD_RET(s, entry->ctx.prev_pid); + SEQ_PUT_FIELD_RET(s, entry->ctx.prev_prio); + SEQ_PUT_FIELD_RET(s, entry->ctx.prev_state); + SEQ_PUT_FIELD_RET(s, entry->ctx.next_pid); + SEQ_PUT_FIELD_RET(s, entry->ctx.next_prio); + break; + } + return 1; +} + static int trace_empty(struct trace_iterator *iter) { struct trace_array_cpu *data; @@ -1279,6 +1326,9 @@ static int trace_empty(struct trace_iterator *iter) static int print_trace_line(struct trace_iterator *iter) { + if (trace_flags & TRACE_ITER_BIN) + return print_bin_fmt(iter); + if (trace_flags & TRACE_ITER_RAW) return print_raw_fmt(iter); -- cgit v0.10.2 From f0a920d5752e1788c0cba2add103076bcc0f7a49 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:47 +0200 Subject: ftrace: add trace_special() for ad-hoc tracing. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d78cbc4..fa13059 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -137,6 +137,7 @@ enum trace_type { TRACE_FN, TRACE_CTX, + TRACE_SPECIAL, __TRACE_LAST_TYPE }; @@ -701,6 +702,22 @@ ftrace(struct trace_array *tr, struct trace_array_cpu *data, } notrace void +trace_special(struct trace_array *tr, struct trace_array_cpu *data, + unsigned long arg1, unsigned long arg2, unsigned long arg3) +{ + struct trace_entry *entry; + + spin_lock(&data->lock); + entry = tracing_get_trace_entry(tr, data); + tracing_generic_entry_update(entry, 0); + entry->type = TRACE_SPECIAL; + entry->special.arg1 = arg1; + entry->special.arg2 = arg2; + entry->special.arg3 = arg3; + spin_unlock(&data->lock); +} + +notrace void tracing_sched_switch_trace(struct trace_array *tr, struct trace_array_cpu *data, struct task_struct *prev, struct task_struct *next, @@ -1167,6 +1184,12 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) entry->ctx.next_prio, comm); break; + case TRACE_SPECIAL: + trace_seq_printf(s, " %lx %lx %lx\n", + entry->special.arg1, + entry->special.arg2, + entry->special.arg3); + break; default: trace_seq_printf(s, "Unknown type %d\n", entry->type); } @@ -1234,6 +1257,14 @@ static notrace int print_trace_fmt(struct trace_iterator *iter) if (!ret) return 0; break; + case TRACE_SPECIAL: + ret = trace_seq_printf(s, " %lx %lx %lx\n", + entry->special.arg1, + entry->special.arg2, + entry->special.arg3); + if (!ret) + return 0; + break; } return 1; } @@ -1271,6 +1302,14 @@ static notrace int print_raw_fmt(struct trace_iterator *iter) if (!ret) return 0; break; + case TRACE_SPECIAL: + ret = trace_seq_printf(s, " %lx %lx %lx\n", + entry->special.arg1, + entry->special.arg2, + entry->special.arg3); + if (!ret) + return 0; + break; } return 1; } @@ -1304,6 +1343,11 @@ static notrace int print_bin_fmt(struct trace_iterator *iter) SEQ_PUT_FIELD_RET(s, entry->ctx.next_pid); SEQ_PUT_FIELD_RET(s, entry->ctx.next_prio); break; + case TRACE_SPECIAL: + SEQ_PUT_FIELD_RET(s, entry->special.arg1); + SEQ_PUT_FIELD_RET(s, entry->special.arg2); + SEQ_PUT_FIELD_RET(s, entry->special.arg3); + break; } return 1; } diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 27fa2d0..7bdfef3 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -26,6 +26,15 @@ struct ctx_switch_entry { }; /* + * Special (free-form) trace entry: + */ +struct special_entry { + unsigned long arg1; + unsigned long arg2; + unsigned long arg3; +}; + +/* * The trace entry - the most basic unit of tracing. This is what * is printed in the end as a single line in the trace output, such as: * @@ -41,6 +50,7 @@ struct trace_entry { union { struct ftrace_entry fn; struct ctx_switch_entry ctx; + struct special_entry special; }; }; @@ -154,6 +164,11 @@ void tracing_sched_switch_trace(struct trace_array *tr, struct task_struct *next, unsigned long flags); void tracing_record_cmdline(struct task_struct *tsk); +void trace_special(struct trace_array *tr, + struct trace_array_cpu *data, + unsigned long arg1, + unsigned long arg2, + unsigned long arg3); void tracing_start_function_trace(void); void tracing_stop_function_trace(void); -- cgit v0.10.2 From dcb6308f2b56720599f6b9d5a01c33e67e69bde4 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:48 +0200 Subject: ftrace, locking fix should be an irq-safe lock ... Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index fa13059..70f94fa 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -691,14 +691,15 @@ ftrace(struct trace_array *tr, struct trace_array_cpu *data, unsigned long ip, unsigned long parent_ip, unsigned long flags) { struct trace_entry *entry; + unsigned long irq_flags; - spin_lock(&data->lock); + spin_lock_irqsave(&data->lock, irq_flags); entry = tracing_get_trace_entry(tr, data); tracing_generic_entry_update(entry, flags); entry->type = TRACE_FN; entry->fn.ip = ip; entry->fn.parent_ip = parent_ip; - spin_unlock(&data->lock); + spin_unlock_irqrestore(&data->lock, irq_flags); } notrace void @@ -706,15 +707,16 @@ trace_special(struct trace_array *tr, struct trace_array_cpu *data, unsigned long arg1, unsigned long arg2, unsigned long arg3) { struct trace_entry *entry; + unsigned long irq_flags; - spin_lock(&data->lock); + spin_lock_irqsave(&data->lock, irq_flags); entry = tracing_get_trace_entry(tr, data); tracing_generic_entry_update(entry, 0); entry->type = TRACE_SPECIAL; entry->special.arg1 = arg1; entry->special.arg2 = arg2; entry->special.arg3 = arg3; - spin_unlock(&data->lock); + spin_unlock_irqrestore(&data->lock, irq_flags); } notrace void @@ -724,8 +726,9 @@ tracing_sched_switch_trace(struct trace_array *tr, unsigned long flags) { struct trace_entry *entry; + unsigned long irq_flags; - spin_lock(&data->lock); + spin_lock_irqsave(&data->lock, irq_flags); entry = tracing_get_trace_entry(tr, data); tracing_generic_entry_update(entry, flags); entry->type = TRACE_CTX; @@ -734,7 +737,7 @@ tracing_sched_switch_trace(struct trace_array *tr, entry->ctx.prev_state = prev->state; entry->ctx.next_pid = next->pid; entry->ctx.next_prio = next->prio; - spin_unlock(&data->lock); + spin_unlock_irqrestore(&data->lock, irq_flags); } enum trace_file_type { -- cgit v0.10.2 From 088b1e427dbba2af93cb6a7d39258c10ff58dd27 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:48 +0200 Subject: ftrace: pipe fixes Some fixes for better output with the trace pipe. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 70f94fa..c56fc5e 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -770,11 +770,6 @@ trace_entry_idx(struct trace_array *tr, struct trace_array_cpu *data, array = page_address(page); - /* Still possible to catch up to the tail */ - if (iter->next_idx[cpu] && array == data->trace_tail && - iter->next_page_idx[cpu] == data->trace_tail_idx) - return NULL; - WARN_ON(iter->next_page_idx[cpu] >= ENTRIES_PER_PAGE); return &array[iter->next_page_idx[cpu]]; } @@ -1921,7 +1916,6 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, struct trace_iterator *iter = filp->private_data; struct trace_array_cpu *data; static cpumask_t mask; - struct trace_entry *entry; static int start; unsigned long flags; int read = 0; @@ -2013,10 +2007,15 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, cpu_set(cpu, mask); } - while ((entry = find_next_entry_inc(iter)) != NULL) { + while (find_next_entry_inc(iter) != NULL) { + int len = iter->seq.len; + ret = print_trace_line(iter); - if (!ret) + if (!ret) { + /* don't print partial lines */ + iter->seq.len = len; break; + } trace_consume(iter); -- cgit v0.10.2 From 37ad508419f0fdfda7b378756eb1f35cfd26d96d Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:48 +0200 Subject: ftrace - fix dynamic ftrace memory leak The ftrace dynamic function update allocates a record to store the instruction pointers that are being modified. If the modified instruction pointer fails to update, then the record is marked as failed and nothing more is done. Worse, if the modification fails, but the record ip function is still called, it will allocate a new record and try again. In just a matter of time, will this cause a serious memory leak and crash the system. This patch plugs this memory leak. When a record fails, it is included back into the pool of records to be used. Now a record may fail over and over again, but the number of allocated records will not increase. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index a842d96..61e757b 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -43,9 +43,10 @@ extern void mcount(void); # define FTRACE_HASHSIZE (1<node, &ftrace_hash[key]); } +static notrace void ftrace_free_rec(struct dyn_ftrace *rec) +{ + /* no locking, only called from kstop_machine */ + + rec->ip = (unsigned long)ftrace_free_records; + ftrace_free_records = rec; + rec->flags |= FTRACE_FL_FREE; +} + static notrace struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip) { + struct dyn_ftrace *rec; + + /* First check for freed records */ + if (ftrace_free_records) { + rec = ftrace_free_records; + + /* todo, disable tracing altogether on this warning */ + if (unlikely(!(rec->flags & FTRACE_FL_FREE))) { + WARN_ON_ONCE(1); + ftrace_free_records = NULL; + return NULL; + } + + ftrace_free_records = (void *)rec->ip; + memset(rec, 0, sizeof(*rec)); + return rec; + } + if (ftrace_pages->index == ENTRIES_PER_PAGE) { if (!ftrace_pages->next) return NULL; @@ -356,8 +385,16 @@ __ftrace_replace_code(struct dyn_ftrace *rec, } failed = ftrace_modify_code(ip, old, new); - if (failed) - rec->flags |= FTRACE_FL_FAILED; + if (failed) { + unsigned long key; + /* It is possible that the function hasn't been converted yet */ + key = hash_long(ip, FTRACE_HASHBITS); + if (!ftrace_ip_in_hash(ip, key)) { + rec->flags |= FTRACE_FL_FAILED; + ftrace_free_rec(rec); + } + + } } static void notrace ftrace_replace_code(int enable) @@ -407,8 +444,10 @@ ftrace_code_disable(struct dyn_ftrace *rec) call = ftrace_call_replace(ip, MCOUNT_ADDR); failed = ftrace_modify_code(ip, call, nop); - if (failed) + if (failed) { rec->flags |= FTRACE_FL_FAILED; + ftrace_free_rec(rec); + } } static int notrace __ftrace_modify_code(void *data) -- cgit v0.10.2 From 4eebcc81a33fbc45e28542b50197ed7b3c486d90 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:48 +0200 Subject: ftrace: disable tracing on failure Since ftrace touches practically every function. If we detect any anomaly, we want to fully disable ftrace. This patch adds code to try shutdown ftrace as much as possible without doing any more harm is something is detected not quite correct. This only kills ftrace, this patch does have checks for other parts of the tracer (irqsoff, wakeup, etc.). Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 61e757b..4650a31 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -58,6 +58,9 @@ struct dyn_ftrace { int ftrace_force_update(void); void ftrace_set_filter(unsigned char *buf, int len, int reset); +/* totally disable ftrace - can not re-enable after this */ +void ftrace_kill(void); + /* defined in arch */ extern int ftrace_ip_converted(unsigned long ip); extern unsigned char *ftrace_nop_replace(void); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 8e02aa6..ff42345 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -29,9 +29,16 @@ #include "trace.h" -int ftrace_enabled; +/* ftrace_enabled is a method to turn ftrace on or off */ +int ftrace_enabled __read_mostly; static int last_ftrace_enabled; +/* + * ftrace_disabled is set when an anomaly is discovered. + * ftrace_disabled is much stronger than ftrace_enabled. + */ +static int ftrace_disabled __read_mostly; + static DEFINE_SPINLOCK(ftrace_lock); static DEFINE_MUTEX(ftrace_sysctl_lock); @@ -230,10 +237,11 @@ static notrace struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip) if (ftrace_free_records) { rec = ftrace_free_records; - /* todo, disable tracing altogether on this warning */ if (unlikely(!(rec->flags & FTRACE_FL_FREE))) { WARN_ON_ONCE(1); ftrace_free_records = NULL; + ftrace_disabled = 1; + ftrace_enabled = 0; return NULL; } @@ -260,7 +268,7 @@ ftrace_record_ip(unsigned long ip) int resched; int atomic; - if (!ftrace_enabled) + if (!ftrace_enabled || ftrace_disabled) return; resched = need_resched(); @@ -485,6 +493,9 @@ static void notrace ftrace_startup(void) { int command = 0; + if (unlikely(ftrace_disabled)) + return; + mutex_lock(&ftraced_lock); ftraced_suspend++; if (ftraced_suspend == 1) @@ -507,6 +518,9 @@ static void notrace ftrace_shutdown(void) { int command = 0; + if (unlikely(ftrace_disabled)) + return; + mutex_lock(&ftraced_lock); ftraced_suspend--; if (!ftraced_suspend) @@ -529,6 +543,9 @@ static void notrace ftrace_startup_sysctl(void) { int command = FTRACE_ENABLE_MCOUNT; + if (unlikely(ftrace_disabled)) + return; + mutex_lock(&ftraced_lock); /* Force update next time */ saved_ftrace_func = NULL; @@ -544,6 +561,9 @@ static void notrace ftrace_shutdown_sysctl(void) { int command = FTRACE_DISABLE_MCOUNT; + if (unlikely(ftrace_disabled)) + return; + mutex_lock(&ftraced_lock); /* ftraced_suspend is true if ftrace is running */ if (ftraced_suspend) @@ -600,6 +620,9 @@ static int notrace __ftrace_update_code(void *ignore) static void notrace ftrace_update_code(void) { + if (unlikely(ftrace_disabled)) + return; + stop_machine_run(__ftrace_update_code, NULL, NR_CPUS); } @@ -614,6 +637,9 @@ static int notrace ftraced(void *ignore) /* check once a second */ schedule_timeout(HZ); + if (unlikely(ftrace_disabled)) + continue; + mutex_lock(&ftrace_sysctl_lock); mutex_lock(&ftraced_lock); if (ftrace_enabled && ftraced_trigger && !ftraced_suspend) { @@ -628,6 +654,7 @@ static int notrace ftraced(void *ignore) ftrace_update_cnt != 1 ? "s" : "", ftrace_update_tot_cnt, usecs, usecs != 1 ? "s" : ""); + ftrace_disabled = 1; WARN_ON_ONCE(1); } ftraced_trigger = 0; @@ -785,6 +812,9 @@ ftrace_avail_open(struct inode *inode, struct file *file) struct ftrace_iterator *iter; int ret; + if (unlikely(ftrace_disabled)) + return -ENODEV; + iter = kzalloc(sizeof(*iter), GFP_KERNEL); if (!iter) return -ENOMEM; @@ -843,6 +873,9 @@ ftrace_filter_open(struct inode *inode, struct file *file) struct ftrace_iterator *iter; int ret = 0; + if (unlikely(ftrace_disabled)) + return -ENODEV; + iter = kzalloc(sizeof(*iter), GFP_KERNEL); if (!iter) return -ENOMEM; @@ -1063,6 +1096,9 @@ ftrace_filter_write(struct file *file, const char __user *ubuf, */ notrace void ftrace_set_filter(unsigned char *buf, int len, int reset) { + if (unlikely(ftrace_disabled)) + return; + mutex_lock(&ftrace_filter_lock); if (reset) ftrace_filter_reset(); @@ -1133,7 +1169,7 @@ int ftrace_force_update(void) DECLARE_WAITQUEUE(wait, current); int ret = 0; - if (!ftraced_task) + if (unlikely(ftrace_disabled)) return -ENODEV; mutex_lock(&ftraced_lock); @@ -1142,6 +1178,11 @@ int ftrace_force_update(void) set_current_state(TASK_INTERRUPTIBLE); add_wait_queue(&ftraced_waiters, &wait); + if (unlikely(!ftraced_task)) { + ret = -ENODEV; + goto out; + } + do { mutex_unlock(&ftraced_lock); wake_up_process(ftraced_task); @@ -1154,6 +1195,7 @@ int ftrace_force_update(void) set_current_state(TASK_INTERRUPTIBLE); } while (last_counter == ftraced_iteration_counter); + out: mutex_unlock(&ftraced_lock); remove_wait_queue(&ftraced_waiters, &wait); set_current_state(TASK_RUNNING); @@ -1161,6 +1203,22 @@ int ftrace_force_update(void) return ret; } +static void ftrace_force_shutdown(void) +{ + struct task_struct *task; + int command = FTRACE_DISABLE_CALLS | FTRACE_UPDATE_TRACE_FUNC; + + mutex_lock(&ftraced_lock); + task = ftraced_task; + ftraced_task = NULL; + ftraced_suspend = -1; + ftrace_run_update_code(command); + mutex_unlock(&ftraced_lock); + + if (task) + kthread_stop(task); +} + static __init int ftrace_init_debugfs(void) { struct dentry *d_tracer; @@ -1194,21 +1252,29 @@ static int __init notrace ftrace_dynamic_init(void) stop_machine_run(ftrace_dyn_arch_init, &addr, NR_CPUS); /* ftrace_dyn_arch_init places the return code in addr */ - if (addr) - return addr; + if (addr) { + ret = (int)addr; + goto failed; + } ret = ftrace_dyn_table_alloc(); if (ret) - return ret; + goto failed; p = kthread_run(ftraced, NULL, "ftraced"); - if (IS_ERR(p)) - return -1; + if (IS_ERR(p)) { + ret = -1; + goto failed; + } last_ftrace_enabled = ftrace_enabled = 1; ftraced_task = p; return 0; + + failed: + ftrace_disabled = 1; + return ret; } core_initcall(ftrace_dynamic_init); @@ -1217,9 +1283,31 @@ core_initcall(ftrace_dynamic_init); # define ftrace_shutdown() do { } while (0) # define ftrace_startup_sysctl() do { } while (0) # define ftrace_shutdown_sysctl() do { } while (0) +# define ftrace_force_shutdown() do { } while (0) #endif /* CONFIG_DYNAMIC_FTRACE */ /** + * ftrace_kill - totally shutdown ftrace + * + * This is a safety measure. If something was detected that seems + * wrong, calling this function will keep ftrace from doing + * any more modifications, and updates. + * used when something went wrong. + */ +void ftrace_kill(void) +{ + mutex_lock(&ftrace_sysctl_lock); + ftrace_disabled = 1; + ftrace_enabled = 0; + + clear_ftrace_function(); + mutex_unlock(&ftrace_sysctl_lock); + + /* Try to totally disable ftrace */ + ftrace_force_shutdown(); +} + +/** * register_ftrace_function - register a function for profiling * @ops - ops structure that holds the function for profiling. * @@ -1234,6 +1322,9 @@ int register_ftrace_function(struct ftrace_ops *ops) { int ret; + if (unlikely(ftrace_disabled)) + return -1; + mutex_lock(&ftrace_sysctl_lock); ret = __register_ftrace_function(ops); ftrace_startup(); @@ -1267,6 +1358,9 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, { int ret; + if (unlikely(ftrace_disabled)) + return -ENODEV; + mutex_lock(&ftrace_sysctl_lock); ret = proc_dointvec(table, write, file, buffer, lenp, ppos); diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index a6f1ed7..85715b8 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -248,6 +248,10 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr) ftrace_enabled = save_ftrace_enabled; tracer_enabled = save_tracer_enabled; + /* kill ftrace totally if we failed */ + if (ret) + ftrace_kill(); + return ret; } #endif /* CONFIG_FTRACE */ -- cgit v0.10.2 From 26994ead1fc8cced63f17e9848edc1771036664e Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:48 +0200 Subject: ftrace: enabled tracing by default This patch is the correct way to have tracing enabled by default. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index c56fc5e..3dc6eac 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -107,7 +107,7 @@ static struct trace_array max_tr; static DEFINE_PER_CPU(struct trace_array_cpu, max_data); -static int tracer_enabled; +static int tracer_enabled = 1; static unsigned long trace_nr_entries = 16384UL; static struct tracer *trace_types __read_mostly; @@ -2268,6 +2268,8 @@ __init static int tracer_alloc_buffers(void) int ret = -ENOMEM; int i; + global_trace.ctrl = tracer_enabled; + /* Allocate the first page for all buffers */ for_each_possible_cpu(i) { data = global_trace.data[i] = &per_cpu(global_trace_cpu, i); -- cgit v0.10.2 From 0fd9e0dac9026df09986a4b201518ae015814aef Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:48 +0200 Subject: ftrace: use cpu clock again Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 3dc6eac..d74c039 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -42,61 +42,9 @@ ns2usecs(cycle_t nsec) return nsec; } -static const int time_sync_freq_max = 128; -static const cycle_t time_sync_thresh = 100000; - -static DEFINE_PER_CPU(cycle_t, time_offset); -static DEFINE_PER_CPU(cycle_t, prev_cpu_time); -static DEFINE_PER_CPU(int, time_sync_count); -static DEFINE_PER_CPU(int, time_sync_freq); - -/* - * Global lock which we take every now and then to synchronize - * the CPUs time. This method is not warp-safe, but it's good - * enough to synchronize slowly diverging time sources and thus - * it's good enough for tracing: - */ -static DEFINE_SPINLOCK(time_sync_lock); -static cycle_t prev_global_time; - -static notrace cycle_t __ftrace_now_sync(cycles_t time, int cpu) -{ - unsigned long flags; - - spin_lock_irqsave(&time_sync_lock, flags); - - /* - * Update the synchronization frequency: - */ - if (per_cpu(time_sync_freq, cpu) < time_sync_freq_max) - per_cpu(time_sync_freq, cpu) *= 2; - per_cpu(time_sync_count, cpu) = per_cpu(time_sync_freq, cpu); - - if (time < prev_global_time) { - per_cpu(time_offset, cpu) += prev_global_time - time; - time = prev_global_time; - } else { - prev_global_time = time; - } - - spin_unlock_irqrestore(&time_sync_lock, flags); - - return time; -} - notrace cycle_t ftrace_now(int cpu) { - cycle_t prev_cpu_time, time, delta_time; - - prev_cpu_time = per_cpu(prev_cpu_time, cpu); - time = sched_clock() + per_cpu(time_offset, cpu); - delta_time = time-prev_cpu_time; - - if (unlikely(delta_time > time_sync_thresh || - --per_cpu(time_sync_count, cpu) <= 0)) - time = __ftrace_now_sync(time, cpu); - - return time; + return cpu_clock(cpu); } static struct trace_array global_trace; -- cgit v0.10.2 From 2e0f57618529a2739a5e1570e6c445c9c966b595 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:49 +0200 Subject: ftrace: build fix Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d74c039..71b25b7 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -432,47 +432,6 @@ notrace void tracing_reset(struct trace_array_cpu *data) data->trace_tail_idx = 0; } -#ifdef CONFIG_FTRACE -static notrace void -function_trace_call(unsigned long ip, unsigned long parent_ip) -{ - struct trace_array *tr = &global_trace; - struct trace_array_cpu *data; - unsigned long flags; - long disabled; - int cpu; - - if (unlikely(!tracer_enabled)) - return; - - local_irq_save(flags); - cpu = raw_smp_processor_id(); - data = tr->data[cpu]; - disabled = atomic_inc_return(&data->disabled); - - if (likely(disabled == 1)) - ftrace(tr, data, ip, parent_ip, flags); - - atomic_dec(&data->disabled); - local_irq_restore(flags); -} - -static struct ftrace_ops trace_ops __read_mostly = -{ - .func = function_trace_call, -}; -#endif - -notrace void tracing_start_function_trace(void) -{ - register_ftrace_function(&trace_ops); -} - -notrace void tracing_stop_function_trace(void) -{ - unregister_ftrace_function(&trace_ops); -} - #define SAVED_CMDLINES 128 static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1]; static unsigned map_cmdline_to_pid[SAVED_CMDLINES]; @@ -635,8 +594,8 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags) } notrace void -ftrace(struct trace_array *tr, struct trace_array_cpu *data, - unsigned long ip, unsigned long parent_ip, unsigned long flags) +__ftrace(struct trace_array *tr, struct trace_array_cpu *data, + unsigned long ip, unsigned long parent_ip, unsigned long flags) { struct trace_entry *entry; unsigned long irq_flags; @@ -651,6 +610,14 @@ ftrace(struct trace_array *tr, struct trace_array_cpu *data, } notrace void +ftrace(struct trace_array *tr, struct trace_array_cpu *data, + unsigned long ip, unsigned long parent_ip, unsigned long flags) +{ + if (likely(!atomic_read(&data->disabled))) + __ftrace(tr, data, ip, parent_ip, flags); +} + +notrace void trace_special(struct trace_array *tr, struct trace_array_cpu *data, unsigned long arg1, unsigned long arg2, unsigned long arg3) { @@ -688,6 +655,47 @@ tracing_sched_switch_trace(struct trace_array *tr, spin_unlock_irqrestore(&data->lock, irq_flags); } +#ifdef CONFIG_FTRACE +static notrace void +function_trace_call(unsigned long ip, unsigned long parent_ip) +{ + struct trace_array *tr = &global_trace; + struct trace_array_cpu *data; + unsigned long flags; + long disabled; + int cpu; + + if (unlikely(!tracer_enabled)) + return; + + local_irq_save(flags); + cpu = raw_smp_processor_id(); + data = tr->data[cpu]; + disabled = atomic_inc_return(&data->disabled); + + if (likely(disabled == 1)) + __ftrace(tr, data, ip, parent_ip, flags); + + atomic_dec(&data->disabled); + local_irq_restore(flags); +} + +static struct ftrace_ops trace_ops __read_mostly = +{ + .func = function_trace_call, +}; + +notrace void tracing_start_function_trace(void) +{ + register_ftrace_function(&trace_ops); +} + +notrace void tracing_stop_function_trace(void) +{ + unregister_ftrace_function(&trace_ops); +} +#endif + enum trace_file_type { TRACE_FILE_LAT_FMT = 1, }; @@ -722,7 +730,7 @@ trace_entry_idx(struct trace_array *tr, struct trace_array_cpu *data, return &array[iter->next_page_idx[cpu]]; } -static struct notrace trace_entry * +static struct trace_entry * notrace find_next_entry(struct trace_iterator *iter, int *ent_cpu) { struct trace_array *tr = iter->tr; @@ -1866,6 +1874,7 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, static cpumask_t mask; static int start; unsigned long flags; + int ftrace_save; int read = 0; int cpu; int len; @@ -1944,6 +1953,9 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, cpus_clear(mask); local_irq_save(flags); + ftrace_save = ftrace_enabled; + ftrace_enabled = 0; + smp_wmb(); for_each_possible_cpu(cpu) { data = iter->tr->data[cpu]; @@ -1951,10 +1963,14 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, continue; atomic_inc(&data->disabled); - spin_lock(&data->lock); cpu_set(cpu, mask); } + for_each_cpu_mask(cpu, mask) { + data = iter->tr->data[cpu]; + spin_lock(&data->lock); + } + while (find_next_entry_inc(iter) != NULL) { int len = iter->seq.len; @@ -1974,8 +1990,13 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, for_each_cpu_mask(cpu, mask) { data = iter->tr->data[cpu]; spin_unlock(&data->lock); + } + + for_each_cpu_mask(cpu, mask) { + data = iter->tr->data[cpu]; atomic_dec(&data->disabled); } + ftrace_enabled = ftrace_save; local_irq_restore(flags); /* Now copy what we have to the user */ diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index e5d34b7..69a0eb0 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -9,10 +9,10 @@ * Copyright (C) 2004-2006 Ingo Molnar * Copyright (C) 2004 William Lee Irwin III */ -#include #include #include #include +#include #include "trace.h" -- cgit v0.10.2 From 5e3ca0ec76fce92daa4eed0d02de9c79b1fe3920 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:49 +0200 Subject: ftrace: introduce the "hex" output method Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 71b25b7..6974b21 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -103,7 +103,8 @@ enum trace_iterator_flags { TRACE_ITER_SYM_ADDR = 0x04, TRACE_ITER_VERBOSE = 0x08, TRACE_ITER_RAW = 0x10, - TRACE_ITER_BIN = 0x20, + TRACE_ITER_HEX = 0x20, + TRACE_ITER_BIN = 0x40, }; #define TRACE_ITER_SYM_MASK \ @@ -116,6 +117,7 @@ static const char *trace_options[] = { "sym-addr", "verbose", "raw", + "hex", "bin", NULL }; @@ -238,6 +240,47 @@ trace_seq_putmem(struct trace_seq *s, void *mem, size_t len) return len; } +#define HEX_CHARS 17 + +static notrace int +trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len) +{ + unsigned char hex[HEX_CHARS]; + unsigned char *data; + unsigned char byte; + int i, j; + + BUG_ON(len >= HEX_CHARS); + + data = mem; + +#ifdef __BIG_ENDIAN + for (i = 0, j = 0; i < len; i++) { +#else + for (i = len-1, j = 0; i >= 0; i--) { +#endif + byte = data[i]; + + hex[j] = byte & 0x0f; + if (hex[j] >= 10) + hex[j] += 'a' - 10; + else + hex[j] += '0'; + j++; + + hex[j] = byte >> 4; + if (hex[j] >= 10) + hex[j] += 'a' - 10; + else + hex[j] += '0'; + j++; + } + hex[j] = ' '; + j++; + + return trace_seq_putmem(s, hex, j); +} + static notrace void trace_seq_reset(struct trace_seq *s) { @@ -1274,6 +1317,51 @@ do { \ return 0; \ } while (0) +#define SEQ_PUT_HEX_FIELD_RET(s, x) \ +do { \ + if (!trace_seq_putmem_hex(s, &(x), sizeof(x))) \ + return 0; \ +} while (0) + +static notrace int print_hex_fmt(struct trace_iterator *iter) +{ + struct trace_seq *s = &iter->seq; + unsigned char newline = '\n'; + struct trace_entry *entry; + int S; + + entry = iter->ent; + + SEQ_PUT_HEX_FIELD_RET(s, entry->pid); + SEQ_PUT_HEX_FIELD_RET(s, iter->cpu); + SEQ_PUT_HEX_FIELD_RET(s, entry->t); + + switch (entry->type) { + case TRACE_FN: + SEQ_PUT_HEX_FIELD_RET(s, entry->fn.ip); + SEQ_PUT_HEX_FIELD_RET(s, entry->fn.parent_ip); + break; + case TRACE_CTX: + S = entry->ctx.prev_state < sizeof(state_to_char) ? + state_to_char[entry->ctx.prev_state] : 'X'; + SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.prev_pid); + SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.prev_prio); + SEQ_PUT_HEX_FIELD_RET(s, S); + SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.next_pid); + SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.next_prio); + SEQ_PUT_HEX_FIELD_RET(s, entry->fn.parent_ip); + break; + case TRACE_SPECIAL: + SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg1); + SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg2); + SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg3); + break; + } + SEQ_PUT_FIELD_RET(s, newline); + + return 1; +} + static notrace int print_bin_fmt(struct trace_iterator *iter) { struct trace_seq *s = &iter->seq; @@ -1327,6 +1415,9 @@ static int print_trace_line(struct trace_iterator *iter) if (trace_flags & TRACE_ITER_BIN) return print_bin_fmt(iter); + if (trace_flags & TRACE_ITER_HEX) + return print_hex_fmt(iter); + if (trace_flags & TRACE_ITER_RAW) return print_raw_fmt(iter); -- cgit v0.10.2 From 2577046740fe6d77864128c6187c11125c2449ea Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:49 +0200 Subject: ftrace: build fix no need to backmerge, only affects ftrace-enabled kernels. (which is not the default) Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 6974b21..958c4d7 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1965,7 +1965,9 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, static cpumask_t mask; static int start; unsigned long flags; +#ifdef CONFIG_FTRACE int ftrace_save; +#endif int read = 0; int cpu; int len; @@ -2044,8 +2046,10 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, cpus_clear(mask); local_irq_save(flags); +#ifdef CONFIG_FTRACE ftrace_save = ftrace_enabled; ftrace_enabled = 0; +#endif smp_wmb(); for_each_possible_cpu(cpu) { data = iter->tr->data[cpu]; @@ -2087,7 +2091,9 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, data = iter->tr->data[cpu]; atomic_dec(&data->disabled); } +#ifdef CONFIG_FTRACE ftrace_enabled = ftrace_save; +#endif local_irq_restore(flags); /* Now copy what we have to the user */ -- cgit v0.10.2 From aeaee8a2c9cb4489f166ca0e39c568e8254faaa6 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:49 +0200 Subject: ftrace: build fix Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 4650a31..08fbef1 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -58,9 +58,6 @@ struct dyn_ftrace { int ftrace_force_update(void); void ftrace_set_filter(unsigned char *buf, int len, int reset); -/* totally disable ftrace - can not re-enable after this */ -void ftrace_kill(void); - /* defined in arch */ extern int ftrace_ip_converted(unsigned long ip); extern unsigned char *ftrace_nop_replace(void); @@ -74,10 +71,13 @@ extern void ftrace_caller(void); extern void ftrace_call(void); extern void mcount_call(void); #else -# define ftrace_force_update() ({ 0; }) -# define ftrace_set_filter(buf, len, reset) do { } while (0) +# define ftrace_force_update() ({ 0; }) +# define ftrace_set_filter(buf, len, reset) do { } while (0) #endif +/* totally disable ftrace - can not re-enable after this */ +void ftrace_kill(void); + static inline void tracer_disable(void) { #ifdef CONFIG_FTRACE -- cgit v0.10.2 From 2a2cc8f7c4d0dfd75720867f7dc58d24f075edfc Mon Sep 17 00:00:00 2001 From: Soeren Sandmann Pedersen Date: Mon, 12 May 2008 21:20:49 +0200 Subject: ftrace: allow the event pipe to be polled Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 958c4d7..d041578 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -63,6 +64,7 @@ static struct tracer *current_trace __read_mostly; static int max_tracer_type_len; static DEFINE_MUTEX(trace_types_lock); +static DECLARE_WAIT_QUEUE_HEAD (trace_wait); #define ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(struct trace_entry)) @@ -105,6 +107,7 @@ enum trace_iterator_flags { TRACE_ITER_RAW = 0x10, TRACE_ITER_HEX = 0x20, TRACE_ITER_BIN = 0x40, + TRACE_ITER_BLOCK = 0x80, }; #define TRACE_ITER_SYM_MASK \ @@ -119,6 +122,7 @@ static const char *trace_options[] = { "raw", "hex", "bin", + "block", NULL }; @@ -650,6 +654,9 @@ __ftrace(struct trace_array *tr, struct trace_array_cpu *data, entry->fn.ip = ip; entry->fn.parent_ip = parent_ip; spin_unlock_irqrestore(&data->lock, irq_flags); + + if (!(trace_flags & TRACE_ITER_BLOCK)) + wake_up (&trace_wait); } notrace void @@ -675,6 +682,9 @@ trace_special(struct trace_array *tr, struct trace_array_cpu *data, entry->special.arg2 = arg2; entry->special.arg3 = arg3; spin_unlock_irqrestore(&data->lock, irq_flags); + + if (!(trace_flags & TRACE_ITER_BLOCK)) + wake_up (&trace_wait); } notrace void @@ -696,6 +706,9 @@ tracing_sched_switch_trace(struct trace_array *tr, entry->ctx.next_pid = next->pid; entry->ctx.next_prio = next->prio; spin_unlock_irqrestore(&data->lock, irq_flags); + + if (!(trace_flags & TRACE_ITER_BLOCK)) + wake_up (&trace_wait); } #ifdef CONFIG_FTRACE @@ -1765,7 +1778,6 @@ static struct file_operations tracing_readme_fops = { .read = tracing_readme_read, }; - static ssize_t tracing_ctrl_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) @@ -1953,6 +1965,28 @@ static int tracing_release_pipe(struct inode *inode, struct file *file) return 0; } +static unsigned int +tracing_poll_pipe(struct file *filp, poll_table *poll_table) +{ + struct trace_iterator *iter = filp->private_data; + + if (trace_flags & TRACE_ITER_BLOCK) { + /* + * Always select as readable when in blocking mode + */ + return POLLIN | POLLRDNORM; + } + else { + if (!trace_empty(iter)) + return POLLIN | POLLRDNORM; + poll_wait(filp, &trace_wait, poll_table); + if (!trace_empty(iter)) + return POLLIN | POLLRDNORM; + + return 0; + } +} + /* * Consumer reader. */ @@ -1991,6 +2025,8 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, start = 0; while (trace_empty(iter)) { + if (!(trace_flags & TRACE_ITER_BLOCK)) + return -EWOULDBLOCK; /* * This is a make-shift waitqueue. The reason we don't use * an actual wait queue is because: @@ -2134,6 +2170,7 @@ static struct file_operations set_tracer_fops = { static struct file_operations tracing_pipe_fops = { .open = tracing_open_pipe, + .poll = tracing_poll_pipe, .read = tracing_read_pipe, .release = tracing_release_pipe, }; -- cgit v0.10.2 From 6fb44b717c10ecf37beaaebd312f3afa93fed714 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:49 +0200 Subject: ftrace: add trace_function api for other tracers to use A new check was added in the ftrace function that wont trace if the CPU trace buffer is disabled. Unfortunately, other tracers used ftrace() to write to the buffer after they disabled it. The new disable check makes these calls into a nop. This patch changes the __ftrace that is called without the check into a new api for the other tracers to use, called "trace_function". The other tracers use this interface instead when the trace CPU buffer is already disabled. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d041578..9022c35 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -641,8 +641,8 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags) } notrace void -__ftrace(struct trace_array *tr, struct trace_array_cpu *data, - unsigned long ip, unsigned long parent_ip, unsigned long flags) +trace_function(struct trace_array *tr, struct trace_array_cpu *data, + unsigned long ip, unsigned long parent_ip, unsigned long flags) { struct trace_entry *entry; unsigned long irq_flags; @@ -664,7 +664,7 @@ ftrace(struct trace_array *tr, struct trace_array_cpu *data, unsigned long ip, unsigned long parent_ip, unsigned long flags) { if (likely(!atomic_read(&data->disabled))) - __ftrace(tr, data, ip, parent_ip, flags); + trace_function(tr, data, ip, parent_ip, flags); } notrace void @@ -730,7 +730,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip) disabled = atomic_inc_return(&data->disabled); if (likely(disabled == 1)) - __ftrace(tr, data, ip, parent_ip, flags); + trace_function(tr, data, ip, parent_ip, flags); atomic_dec(&data->disabled); local_irq_restore(flags); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 7bdfef3..faf9f67 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -169,6 +169,11 @@ void trace_special(struct trace_array *tr, unsigned long arg1, unsigned long arg2, unsigned long arg3); +void trace_function(struct trace_array *tr, + struct trace_array_cpu *data, + unsigned long ip, + unsigned long parent_ip, + unsigned long flags); void tracing_start_function_trace(void); void tracing_stop_function_trace(void); diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index d2a6e6f..3269f4f 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -95,7 +95,7 @@ irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip) disabled = atomic_inc_return(&data->disabled); if (likely(disabled == 1)) - ftrace(tr, data, ip, parent_ip, flags); + trace_function(tr, data, ip, parent_ip, flags); atomic_dec(&data->disabled); } @@ -150,7 +150,7 @@ check_critical_timing(struct trace_array *tr, if (!report_latency(delta)) goto out_unlock; - ftrace(tr, data, CALLER_ADDR0, parent_ip, flags); + trace_function(tr, data, CALLER_ADDR0, parent_ip, flags); latency = nsecs_to_usecs(delta); @@ -188,7 +188,7 @@ out: data->critical_sequence = max_sequence; data->preempt_timestamp = ftrace_now(cpu); tracing_reset(data); - ftrace(tr, data, CALLER_ADDR0, parent_ip, flags); + trace_function(tr, data, CALLER_ADDR0, parent_ip, flags); } static inline void notrace @@ -221,7 +221,7 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip) local_save_flags(flags); - ftrace(tr, data, ip, parent_ip, flags); + trace_function(tr, data, ip, parent_ip, flags); __get_cpu_var(tracing_cpu) = 1; @@ -254,7 +254,7 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip) atomic_inc(&data->disabled); local_save_flags(flags); - ftrace(tr, data, ip, parent_ip, flags); + trace_function(tr, data, ip, parent_ip, flags); check_critical_timing(tr, data, parent_ip ? : ip, cpu); data->critical_start = 0; atomic_dec(&data->disabled); diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index b7df825..3549e41 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -85,7 +85,7 @@ wakeup_sched_switch(struct task_struct *prev, struct task_struct *next) if (unlikely(!tracer_enabled || next != wakeup_task)) goto out_unlock; - ftrace(tr, data, CALLER_ADDR1, CALLER_ADDR2, flags); + trace_function(tr, data, CALLER_ADDR1, CALLER_ADDR2, flags); /* * usecs conversion is slow so we try to delay the conversion @@ -192,7 +192,8 @@ wakeup_check_start(struct trace_array *tr, struct task_struct *p, local_save_flags(flags); tr->data[wakeup_cpu]->preempt_timestamp = ftrace_now(cpu); - ftrace(tr, tr->data[wakeup_cpu], CALLER_ADDR1, CALLER_ADDR2, flags); + trace_function(tr, tr->data[wakeup_cpu], + CALLER_ADDR1, CALLER_ADDR2, flags); out_locked: spin_unlock(&wakeup_lock); -- cgit v0.10.2 From 9ff9cdb2d3b0971f89e899b3420aadd91bddc215 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:50 +0200 Subject: ftrace: cleanups clean up recent code. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index ff42345..425b1fe 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -197,8 +197,8 @@ static int ftrace_record_suspend; static struct dyn_ftrace *ftrace_free_records; -static inline int -notrace ftrace_ip_in_hash(unsigned long ip, unsigned long key) +static inline int notrace +ftrace_ip_in_hash(unsigned long ip, unsigned long key) { struct dyn_ftrace *p; struct hlist_node *t; @@ -1249,6 +1249,7 @@ static int __init notrace ftrace_dynamic_init(void) int ret; addr = (unsigned long)ftrace_record_ip; + stop_machine_run(ftrace_dyn_arch_init, &addr, NR_CPUS); /* ftrace_dyn_arch_init places the return code in addr */ diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 3269f4f..2ac0d09 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -309,7 +309,7 @@ void trace_softirqs_off(unsigned long ip) { } -inline void print_irqtrace_events(struct task_struct *curr) +inline notrace void print_irqtrace_events(struct task_struct *curr) { } diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 85715b8..546307d 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -3,7 +3,7 @@ #include #include -static inline int trace_valid_entry(struct trace_entry *entry) +static notrace inline int trace_valid_entry(struct trace_entry *entry) { switch (entry->type) { case TRACE_FN: -- cgit v0.10.2 From caf8cdebfb6c1cff50ea8077f1a07c2333d6d1fd Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:50 +0200 Subject: ftrace: remove address of function names PowerPC is very fragile when it comes to use of function names and function addresses. ftrace needs to either use all function addresses or function names (i.e. my_func as suppose to &my_func). This patch chooses to use the names and not the addresses, and makes ftrace consistent. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 425b1fe..57350cb 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -326,8 +326,8 @@ ftrace_record_ip(unsigned long ip) preempt_enable_notrace(); } -#define FTRACE_ADDR ((long)(&ftrace_caller)) -#define MCOUNT_ADDR ((long)(&mcount)) +#define FTRACE_ADDR ((long)(ftrace_caller)) +#define MCOUNT_ADDR ((long)(mcount)) static void notrace __ftrace_replace_code(struct dyn_ftrace *rec, -- cgit v0.10.2 From 3594136ad67a54d77bcb2547e70011754a2f91d5 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:50 +0200 Subject: ftrace: do not profile lib/string.o Most archs define the string and memory compare functions in assembly. Some do not. But these functions may be used in some archs at early boot up. Since most archs define this code in assembly and they are not usually traced, there's no need to trace them when they are not defined in assembly. This patch removes the -pg from the CFLAGS for lib/string.o. This prevents the string functions use in either vdso or early bootup from crashing the system. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/lib/Makefile b/lib/Makefile index 74b0cfb..6ca9e6e 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -8,6 +8,14 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ sha1.o irq_regs.o reciprocal_div.o argv_split.o \ proportions.o prio_heap.o ratelimit.o +ifdef CONFIG_FTRACE +# Do not profile string.o, since it may be used in early boot or vdso +ORIG_CFLAGS := $(KBUILD_CFLAGS) +KBUILD_CFLAGS = $(if $(subst string,,$(basename $(notdir $@))), \ + $(ORIG_CFLAGS), \ + $(subst -pg,,$(ORIG_CFLAGS))) +endif + lib-$(CONFIG_MMU) += ioremap.o lib-$(CONFIG_SMP) += cpumask.o -- cgit v0.10.2 From b53dde9d34f2df396540988ebc65c33400f57b04 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:51 +0200 Subject: ftrace: disable -pg for the tracer itself Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 3fec653..c25a6cd 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -1,3 +1,11 @@ + +# Do not instrument the tracer itself: + +ifdef CONFIG_FTRACE +ORIG_CFLAGS := $(KBUILD_CFLAGS) +KBUILD_CFLAGS = $(subst -pg,,$(ORIG_CFLAGS)) +endif + obj-$(CONFIG_FTRACE) += libftrace.o obj-$(CONFIG_TRACING) += trace.o -- cgit v0.10.2 From e309b41dd65aa953f86765eeeecc941d8e1e8b8f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:51 +0200 Subject: ftrace: remove notrace now that we have a kbuild method for notrace, no need to pollute the C code with the annotations. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 57350cb..281d97a 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -53,7 +53,7 @@ ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub; /* mcount is defined per arch in assembly */ EXPORT_SYMBOL(mcount); -notrace void ftrace_list_func(unsigned long ip, unsigned long parent_ip) +void ftrace_list_func(unsigned long ip, unsigned long parent_ip) { struct ftrace_ops *op = ftrace_list; @@ -79,7 +79,7 @@ void clear_ftrace_function(void) ftrace_trace_function = ftrace_stub; } -static int notrace __register_ftrace_function(struct ftrace_ops *ops) +static int __register_ftrace_function(struct ftrace_ops *ops) { /* Should never be called by interrupts */ spin_lock(&ftrace_lock); @@ -110,7 +110,7 @@ static int notrace __register_ftrace_function(struct ftrace_ops *ops) return 0; } -static int notrace __unregister_ftrace_function(struct ftrace_ops *ops) +static int __unregister_ftrace_function(struct ftrace_ops *ops) { struct ftrace_ops **p; int ret = 0; @@ -197,7 +197,7 @@ static int ftrace_record_suspend; static struct dyn_ftrace *ftrace_free_records; -static inline int notrace +static inline int ftrace_ip_in_hash(unsigned long ip, unsigned long key) { struct dyn_ftrace *p; @@ -214,13 +214,13 @@ ftrace_ip_in_hash(unsigned long ip, unsigned long key) return found; } -static inline void notrace +static inline void ftrace_add_hash(struct dyn_ftrace *node, unsigned long key) { hlist_add_head(&node->node, &ftrace_hash[key]); } -static notrace void ftrace_free_rec(struct dyn_ftrace *rec) +static void ftrace_free_rec(struct dyn_ftrace *rec) { /* no locking, only called from kstop_machine */ @@ -229,7 +229,7 @@ static notrace void ftrace_free_rec(struct dyn_ftrace *rec) rec->flags |= FTRACE_FL_FREE; } -static notrace struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip) +static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip) { struct dyn_ftrace *rec; @@ -259,7 +259,7 @@ static notrace struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip) return &ftrace_pages->records[ftrace_pages->index++]; } -static void notrace +static void ftrace_record_ip(unsigned long ip) { struct dyn_ftrace *node; @@ -329,7 +329,7 @@ ftrace_record_ip(unsigned long ip) #define FTRACE_ADDR ((long)(ftrace_caller)) #define MCOUNT_ADDR ((long)(mcount)) -static void notrace +static void __ftrace_replace_code(struct dyn_ftrace *rec, unsigned char *old, unsigned char *new, int enable) { @@ -405,7 +405,7 @@ __ftrace_replace_code(struct dyn_ftrace *rec, } } -static void notrace ftrace_replace_code(int enable) +static void ftrace_replace_code(int enable) { unsigned char *new = NULL, *old = NULL; struct dyn_ftrace *rec; @@ -430,7 +430,7 @@ static void notrace ftrace_replace_code(int enable) } } -static notrace void ftrace_shutdown_replenish(void) +static void ftrace_shutdown_replenish(void) { if (ftrace_pages->next) return; @@ -439,7 +439,7 @@ static notrace void ftrace_shutdown_replenish(void) ftrace_pages->next = (void *)get_zeroed_page(GFP_KERNEL); } -static notrace void +static void ftrace_code_disable(struct dyn_ftrace *rec) { unsigned long ip; @@ -458,7 +458,7 @@ ftrace_code_disable(struct dyn_ftrace *rec) } } -static int notrace __ftrace_modify_code(void *data) +static int __ftrace_modify_code(void *data) { unsigned long addr; int *command = data; @@ -482,14 +482,14 @@ static int notrace __ftrace_modify_code(void *data) return 0; } -static void notrace ftrace_run_update_code(int command) +static void ftrace_run_update_code(int command) { stop_machine_run(__ftrace_modify_code, &command, NR_CPUS); } static ftrace_func_t saved_ftrace_func; -static void notrace ftrace_startup(void) +static void ftrace_startup(void) { int command = 0; @@ -514,7 +514,7 @@ static void notrace ftrace_startup(void) mutex_unlock(&ftraced_lock); } -static void notrace ftrace_shutdown(void) +static void ftrace_shutdown(void) { int command = 0; @@ -539,7 +539,7 @@ static void notrace ftrace_shutdown(void) mutex_unlock(&ftraced_lock); } -static void notrace ftrace_startup_sysctl(void) +static void ftrace_startup_sysctl(void) { int command = FTRACE_ENABLE_MCOUNT; @@ -557,7 +557,7 @@ static void notrace ftrace_startup_sysctl(void) mutex_unlock(&ftraced_lock); } -static void notrace ftrace_shutdown_sysctl(void) +static void ftrace_shutdown_sysctl(void) { int command = FTRACE_DISABLE_MCOUNT; @@ -577,7 +577,7 @@ static cycle_t ftrace_update_time; static unsigned long ftrace_update_cnt; unsigned long ftrace_update_tot_cnt; -static int notrace __ftrace_update_code(void *ignore) +static int __ftrace_update_code(void *ignore) { struct dyn_ftrace *p; struct hlist_head head; @@ -618,7 +618,7 @@ static int notrace __ftrace_update_code(void *ignore) return 0; } -static void notrace ftrace_update_code(void) +static void ftrace_update_code(void) { if (unlikely(ftrace_disabled)) return; @@ -626,7 +626,7 @@ static void notrace ftrace_update_code(void) stop_machine_run(__ftrace_update_code, NULL, NR_CPUS); } -static int notrace ftraced(void *ignore) +static int ftraced(void *ignore) { unsigned long usecs; @@ -733,7 +733,7 @@ struct ftrace_iterator { unsigned filtered; }; -static void notrace * +static void * t_next(struct seq_file *m, void *v, loff_t *pos) { struct ftrace_iterator *iter = m->private; @@ -806,7 +806,7 @@ static struct seq_operations show_ftrace_seq_ops = { .show = t_show, }; -static int notrace +static int ftrace_avail_open(struct inode *inode, struct file *file) { struct ftrace_iterator *iter; @@ -845,7 +845,7 @@ int ftrace_avail_release(struct inode *inode, struct file *file) return 0; } -static void notrace ftrace_filter_reset(void) +static void ftrace_filter_reset(void) { struct ftrace_page *pg; struct dyn_ftrace *rec; @@ -867,7 +867,7 @@ static void notrace ftrace_filter_reset(void) preempt_enable(); } -static int notrace +static int ftrace_filter_open(struct inode *inode, struct file *file) { struct ftrace_iterator *iter; @@ -903,7 +903,7 @@ ftrace_filter_open(struct inode *inode, struct file *file) return ret; } -static ssize_t notrace +static ssize_t ftrace_filter_read(struct file *file, char __user *ubuf, size_t cnt, loff_t *ppos) { @@ -913,7 +913,7 @@ ftrace_filter_read(struct file *file, char __user *ubuf, return -EPERM; } -static loff_t notrace +static loff_t ftrace_filter_lseek(struct file *file, loff_t offset, int origin) { loff_t ret; @@ -933,7 +933,7 @@ enum { MATCH_END_ONLY, }; -static void notrace +static void ftrace_match(unsigned char *buff, int len) { char str[KSYM_SYMBOL_LEN]; @@ -1002,7 +1002,7 @@ ftrace_match(unsigned char *buff, int len) preempt_enable(); } -static ssize_t notrace +static ssize_t ftrace_filter_write(struct file *file, const char __user *ubuf, size_t cnt, loff_t *ppos) { @@ -1094,7 +1094,7 @@ ftrace_filter_write(struct file *file, const char __user *ubuf, * Filters denote which functions should be enabled when tracing is enabled. * If @buf is NULL and reset is set, all functions will be enabled for tracing. */ -notrace void ftrace_set_filter(unsigned char *buf, int len, int reset) +void ftrace_set_filter(unsigned char *buf, int len, int reset) { if (unlikely(ftrace_disabled)) return; @@ -1107,7 +1107,7 @@ notrace void ftrace_set_filter(unsigned char *buf, int len, int reset) mutex_unlock(&ftrace_filter_lock); } -static int notrace +static int ftrace_filter_release(struct inode *inode, struct file *file) { struct seq_file *m = (struct seq_file *)file->private_data; @@ -1242,7 +1242,7 @@ static __init int ftrace_init_debugfs(void) fs_initcall(ftrace_init_debugfs); -static int __init notrace ftrace_dynamic_init(void) +static int __init ftrace_dynamic_init(void) { struct task_struct *p; unsigned long addr; @@ -1352,7 +1352,7 @@ int unregister_ftrace_function(struct ftrace_ops *ops) return ret; } -notrace int +int ftrace_enable_sysctl(struct ctl_table *table, int write, struct file *file, void __user *buffer, size_t *lenp, loff_t *ppos) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 9022c35..f589805 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -35,7 +35,7 @@ unsigned long __read_mostly tracing_thresh; static int tracing_disabled = 1; -static long notrace +static long ns2usecs(cycle_t nsec) { nsec += 500; @@ -43,7 +43,7 @@ ns2usecs(cycle_t nsec) return nsec; } -notrace cycle_t ftrace_now(int cpu) +cycle_t ftrace_now(int cpu) { return cpu_clock(cpu); } @@ -135,7 +135,7 @@ static DEFINE_SPINLOCK(ftrace_max_lock); * structure. (this way the maximum trace is permanently saved, * for later retrieval via /debugfs/tracing/latency_trace) */ -static notrace void +static void __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) { struct trace_array_cpu *data = tr->data[cpu]; @@ -184,7 +184,7 @@ void *head_page(struct trace_array_cpu *data) return page_address(page); } -static notrace int +static int trace_seq_printf(struct trace_seq *s, const char *fmt, ...) { int len = (PAGE_SIZE - 1) - s->len; @@ -207,7 +207,7 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...) return len; } -static notrace int +static int trace_seq_puts(struct trace_seq *s, const char *str) { int len = strlen(str); @@ -221,7 +221,7 @@ trace_seq_puts(struct trace_seq *s, const char *str) return len; } -static notrace int +static int trace_seq_putc(struct trace_seq *s, unsigned char c) { if (s->len >= (PAGE_SIZE - 1)) @@ -232,7 +232,7 @@ trace_seq_putc(struct trace_seq *s, unsigned char c) return 1; } -static notrace int +static int trace_seq_putmem(struct trace_seq *s, void *mem, size_t len) { if (len > ((PAGE_SIZE - 1) - s->len)) @@ -246,7 +246,7 @@ trace_seq_putmem(struct trace_seq *s, void *mem, size_t len) #define HEX_CHARS 17 -static notrace int +static int trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len) { unsigned char hex[HEX_CHARS]; @@ -285,13 +285,13 @@ trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len) return trace_seq_putmem(s, hex, j); } -static notrace void +static void trace_seq_reset(struct trace_seq *s) { s->len = 0; } -static notrace void +static void trace_print_seq(struct seq_file *m, struct trace_seq *s) { int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len; @@ -302,7 +302,7 @@ trace_print_seq(struct seq_file *m, struct trace_seq *s) trace_seq_reset(s); } -notrace static void +static void flip_trace(struct trace_array_cpu *tr1, struct trace_array_cpu *tr2) { struct list_head flip_pages; @@ -323,7 +323,7 @@ flip_trace(struct trace_array_cpu *tr1, struct trace_array_cpu *tr2) check_pages(tr2); } -notrace void +void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) { struct trace_array_cpu *data; @@ -348,7 +348,7 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) * @tsk - task with the latency * @cpu - the cpu of the buffer to copy. */ -notrace void +void update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) { struct trace_array_cpu *data = tr->data[cpu]; @@ -471,7 +471,7 @@ void unregister_tracer(struct tracer *type) mutex_unlock(&trace_types_lock); } -notrace void tracing_reset(struct trace_array_cpu *data) +void tracing_reset(struct trace_array_cpu *data) { data->trace_idx = 0; data->trace_head = data->trace_tail = head_page(data); @@ -494,9 +494,9 @@ static void trace_init_cmdlines(void) cmdline_idx = 0; } -notrace void trace_stop_cmdline_recording(void); +void trace_stop_cmdline_recording(void); -static notrace void trace_save_cmdline(struct task_struct *tsk) +static void trace_save_cmdline(struct task_struct *tsk) { unsigned map; unsigned idx; @@ -531,7 +531,7 @@ static notrace void trace_save_cmdline(struct task_struct *tsk) spin_unlock(&trace_cmdline_lock); } -static notrace char *trace_find_cmdline(int pid) +static char *trace_find_cmdline(int pid) { char *cmdline = "<...>"; unsigned map; @@ -552,7 +552,7 @@ static notrace char *trace_find_cmdline(int pid) return cmdline; } -notrace void tracing_record_cmdline(struct task_struct *tsk) +void tracing_record_cmdline(struct task_struct *tsk) { if (atomic_read(&trace_record_cmdline_disabled)) return; @@ -560,7 +560,7 @@ notrace void tracing_record_cmdline(struct task_struct *tsk) trace_save_cmdline(tsk); } -static inline notrace struct list_head * +static inline struct list_head * trace_next_list(struct trace_array_cpu *data, struct list_head *next) { /* @@ -574,7 +574,7 @@ trace_next_list(struct trace_array_cpu *data, struct list_head *next) return next; } -static inline notrace void * +static inline void * trace_next_page(struct trace_array_cpu *data, void *addr) { struct list_head *next; @@ -588,7 +588,7 @@ trace_next_page(struct trace_array_cpu *data, void *addr) return page_address(page); } -static inline notrace struct trace_entry * +static inline struct trace_entry * tracing_get_trace_entry(struct trace_array *tr, struct trace_array_cpu *data) { unsigned long idx, idx_next; @@ -623,7 +623,7 @@ tracing_get_trace_entry(struct trace_array *tr, struct trace_array_cpu *data) return entry; } -static inline notrace void +static inline void tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags) { struct task_struct *tsk = current; @@ -640,7 +640,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags) (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0); } -notrace void +void trace_function(struct trace_array *tr, struct trace_array_cpu *data, unsigned long ip, unsigned long parent_ip, unsigned long flags) { @@ -659,7 +659,7 @@ trace_function(struct trace_array *tr, struct trace_array_cpu *data, wake_up (&trace_wait); } -notrace void +void ftrace(struct trace_array *tr, struct trace_array_cpu *data, unsigned long ip, unsigned long parent_ip, unsigned long flags) { @@ -667,7 +667,7 @@ ftrace(struct trace_array *tr, struct trace_array_cpu *data, trace_function(tr, data, ip, parent_ip, flags); } -notrace void +void trace_special(struct trace_array *tr, struct trace_array_cpu *data, unsigned long arg1, unsigned long arg2, unsigned long arg3) { @@ -687,7 +687,7 @@ trace_special(struct trace_array *tr, struct trace_array_cpu *data, wake_up (&trace_wait); } -notrace void +void tracing_sched_switch_trace(struct trace_array *tr, struct trace_array_cpu *data, struct task_struct *prev, struct task_struct *next, @@ -712,7 +712,7 @@ tracing_sched_switch_trace(struct trace_array *tr, } #ifdef CONFIG_FTRACE -static notrace void +static void function_trace_call(unsigned long ip, unsigned long parent_ip) { struct trace_array *tr = &global_trace; @@ -741,12 +741,12 @@ static struct ftrace_ops trace_ops __read_mostly = .func = function_trace_call, }; -notrace void tracing_start_function_trace(void) +void tracing_start_function_trace(void) { register_ftrace_function(&trace_ops); } -notrace void tracing_stop_function_trace(void) +void tracing_stop_function_trace(void) { unregister_ftrace_function(&trace_ops); } @@ -786,7 +786,7 @@ trace_entry_idx(struct trace_array *tr, struct trace_array_cpu *data, return &array[iter->next_page_idx[cpu]]; } -static struct trace_entry * notrace +static struct trace_entry * find_next_entry(struct trace_iterator *iter, int *ent_cpu) { struct trace_array *tr = iter->tr; @@ -813,7 +813,7 @@ find_next_entry(struct trace_iterator *iter, int *ent_cpu) return next; } -static notrace void trace_iterator_increment(struct trace_iterator *iter) +static void trace_iterator_increment(struct trace_iterator *iter) { iter->idx++; iter->next_idx[iter->cpu]++; @@ -828,7 +828,7 @@ static notrace void trace_iterator_increment(struct trace_iterator *iter) } } -static notrace void trace_consume(struct trace_iterator *iter) +static void trace_consume(struct trace_iterator *iter) { struct trace_array_cpu *data = iter->tr->data[iter->cpu]; @@ -844,7 +844,7 @@ static notrace void trace_consume(struct trace_iterator *iter) data->trace_idx = 0; } -static notrace void *find_next_entry_inc(struct trace_iterator *iter) +static void *find_next_entry_inc(struct trace_iterator *iter) { struct trace_entry *next; int next_cpu = -1; @@ -863,7 +863,7 @@ static notrace void *find_next_entry_inc(struct trace_iterator *iter) return next ? iter : NULL; } -static notrace void *s_next(struct seq_file *m, void *v, loff_t *pos) +static void *s_next(struct seq_file *m, void *v, loff_t *pos) { struct trace_iterator *iter = m->private; void *last_ent = iter->ent; @@ -978,7 +978,7 @@ seq_print_sym_offset(struct trace_seq *s, const char *fmt, # define IP_FMT "%016lx" #endif -static notrace int +static int seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags) { int ret; @@ -999,7 +999,7 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags) return ret; } -static notrace void print_lat_help_header(struct seq_file *m) +static void print_lat_help_header(struct seq_file *m) { seq_puts(m, "# _------=> CPU# \n"); seq_puts(m, "# / _-----=> irqs-off \n"); @@ -1012,14 +1012,14 @@ static notrace void print_lat_help_header(struct seq_file *m) seq_puts(m, "# \\ / ||||| \\ | / \n"); } -static notrace void print_func_help_header(struct seq_file *m) +static void print_func_help_header(struct seq_file *m) { seq_puts(m, "# TASK-PID CPU# TIMESTAMP FUNCTION\n"); seq_puts(m, "# | | | | |\n"); } -static notrace void +static void print_trace_header(struct seq_file *m, struct trace_iterator *iter) { unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); @@ -1090,7 +1090,7 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter) seq_puts(m, "\n"); } -static notrace void +static void lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu) { int hardirq, softirq; @@ -1127,7 +1127,7 @@ lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu) unsigned long preempt_mark_thresh = 100; -static notrace void +static void lat_print_timestamp(struct trace_seq *s, unsigned long long abs_usecs, unsigned long rel_usecs) { @@ -1142,7 +1142,7 @@ lat_print_timestamp(struct trace_seq *s, unsigned long long abs_usecs, static const char state_to_char[] = TASK_STATE_TO_CHAR_STR; -static notrace int +static int print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) { struct trace_seq *s = &iter->seq; @@ -1206,7 +1206,7 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) return 1; } -static notrace int print_trace_fmt(struct trace_iterator *iter) +static int print_trace_fmt(struct trace_iterator *iter) { struct trace_seq *s = &iter->seq; unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); @@ -1279,7 +1279,7 @@ static notrace int print_trace_fmt(struct trace_iterator *iter) return 1; } -static notrace int print_raw_fmt(struct trace_iterator *iter) +static int print_raw_fmt(struct trace_iterator *iter) { struct trace_seq *s = &iter->seq; struct trace_entry *entry; @@ -1336,7 +1336,7 @@ do { \ return 0; \ } while (0) -static notrace int print_hex_fmt(struct trace_iterator *iter) +static int print_hex_fmt(struct trace_iterator *iter) { struct trace_seq *s = &iter->seq; unsigned char newline = '\n'; @@ -1375,7 +1375,7 @@ static notrace int print_hex_fmt(struct trace_iterator *iter) return 1; } -static notrace int print_bin_fmt(struct trace_iterator *iter) +static int print_bin_fmt(struct trace_iterator *iter) { struct trace_seq *s = &iter->seq; struct trace_entry *entry; @@ -1475,7 +1475,7 @@ static struct seq_operations tracer_seq_ops = { .show = s_show, }; -static struct trace_iterator notrace * +static struct trace_iterator * __tracing_open(struct inode *inode, struct file *file, int *ret) { struct trace_iterator *iter; @@ -1572,7 +1572,7 @@ static int tracing_lt_open(struct inode *inode, struct file *file) } -static notrace void * +static void * t_next(struct seq_file *m, void *v, loff_t *pos) { struct tracer *t = m->private; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index faf9f67..2b7352b 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -150,7 +150,7 @@ struct trace_iterator { long idx; }; -void notrace tracing_reset(struct trace_array_cpu *data); +void tracing_reset(struct trace_array_cpu *data); int tracing_open_generic(struct inode *inode, struct file *filp); struct dentry *tracing_init_dentry(void); void ftrace(struct trace_array *tr, @@ -189,10 +189,10 @@ void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu); void update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu); -extern notrace cycle_t ftrace_now(int cpu); +extern cycle_t ftrace_now(int cpu); #ifdef CONFIG_SCHED_TRACER -extern void notrace +extern void wakeup_sched_switch(struct task_struct *prev, struct task_struct *next); #else static inline void diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index 69a0eb0..4165d34 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -16,7 +16,7 @@ #include "trace.h" -static notrace void function_reset(struct trace_array *tr) +static void function_reset(struct trace_array *tr) { int cpu; @@ -26,30 +26,30 @@ static notrace void function_reset(struct trace_array *tr) tracing_reset(tr->data[cpu]); } -static notrace void start_function_trace(struct trace_array *tr) +static void start_function_trace(struct trace_array *tr) { function_reset(tr); tracing_start_function_trace(); } -static notrace void stop_function_trace(struct trace_array *tr) +static void stop_function_trace(struct trace_array *tr) { tracing_stop_function_trace(); } -static notrace void function_trace_init(struct trace_array *tr) +static void function_trace_init(struct trace_array *tr) { if (tr->ctrl) start_function_trace(tr); } -static notrace void function_trace_reset(struct trace_array *tr) +static void function_trace_reset(struct trace_array *tr) { if (tr->ctrl) stop_function_trace(tr); } -static notrace void function_trace_ctrl_update(struct trace_array *tr) +static void function_trace_ctrl_update(struct trace_array *tr) { if (tr->ctrl) start_function_trace(tr); diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 2ac0d09..7a4dc01 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -33,7 +33,7 @@ enum { static int trace_type __read_mostly; #ifdef CONFIG_PREEMPT_TRACER -static inline int notrace +static inline int preempt_trace(void) { return ((trace_type & TRACER_PREEMPT_OFF) && preempt_count()); @@ -43,7 +43,7 @@ preempt_trace(void) #endif #ifdef CONFIG_IRQSOFF_TRACER -static inline int notrace +static inline int irq_trace(void) { return ((trace_type & TRACER_IRQS_OFF) && @@ -67,7 +67,7 @@ static __cacheline_aligned_in_smp unsigned long max_sequence; /* * irqsoff uses its own tracer function to keep the overhead down: */ -static void notrace +static void irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip) { struct trace_array *tr = irqsoff_trace; @@ -109,7 +109,7 @@ static struct ftrace_ops trace_ops __read_mostly = /* * Should this new latency be reported/recorded? */ -static int notrace report_latency(cycle_t delta) +static int report_latency(cycle_t delta) { if (tracing_thresh) { if (delta < tracing_thresh) @@ -121,7 +121,7 @@ static int notrace report_latency(cycle_t delta) return 1; } -static void notrace +static void check_critical_timing(struct trace_array *tr, struct trace_array_cpu *data, unsigned long parent_ip, @@ -191,7 +191,7 @@ out: trace_function(tr, data, CALLER_ADDR0, parent_ip, flags); } -static inline void notrace +static inline void start_critical_timing(unsigned long ip, unsigned long parent_ip) { int cpu; @@ -228,7 +228,7 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip) atomic_dec(&data->disabled); } -static inline void notrace +static inline void stop_critical_timing(unsigned long ip, unsigned long parent_ip) { int cpu; @@ -261,13 +261,13 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip) } /* start and stop critical timings used to for stoppage (in idle) */ -void notrace start_critical_timings(void) +void start_critical_timings(void) { if (preempt_trace() || irq_trace()) start_critical_timing(CALLER_ADDR0, CALLER_ADDR1); } -void notrace stop_critical_timings(void) +void stop_critical_timings(void) { if (preempt_trace() || irq_trace()) stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1); @@ -275,13 +275,13 @@ void notrace stop_critical_timings(void) #ifdef CONFIG_IRQSOFF_TRACER #ifdef CONFIG_PROVE_LOCKING -void notrace time_hardirqs_on(unsigned long a0, unsigned long a1) +void time_hardirqs_on(unsigned long a0, unsigned long a1) { if (!preempt_trace() && irq_trace()) stop_critical_timing(a0, a1); } -void notrace time_hardirqs_off(unsigned long a0, unsigned long a1) +void time_hardirqs_off(unsigned long a0, unsigned long a1) { if (!preempt_trace() && irq_trace()) start_critical_timing(a0, a1); @@ -309,35 +309,35 @@ void trace_softirqs_off(unsigned long ip) { } -inline notrace void print_irqtrace_events(struct task_struct *curr) +inline void print_irqtrace_events(struct task_struct *curr) { } /* * We are only interested in hardirq on/off events: */ -void notrace trace_hardirqs_on(void) +void trace_hardirqs_on(void) { if (!preempt_trace() && irq_trace()) stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1); } EXPORT_SYMBOL(trace_hardirqs_on); -void notrace trace_hardirqs_off(void) +void trace_hardirqs_off(void) { if (!preempt_trace() && irq_trace()) start_critical_timing(CALLER_ADDR0, CALLER_ADDR1); } EXPORT_SYMBOL(trace_hardirqs_off); -void notrace trace_hardirqs_on_caller(unsigned long caller_addr) +void trace_hardirqs_on_caller(unsigned long caller_addr) { if (!preempt_trace() && irq_trace()) stop_critical_timing(CALLER_ADDR0, caller_addr); } EXPORT_SYMBOL(trace_hardirqs_on_caller); -void notrace trace_hardirqs_off_caller(unsigned long caller_addr) +void trace_hardirqs_off_caller(unsigned long caller_addr) { if (!preempt_trace() && irq_trace()) start_critical_timing(CALLER_ADDR0, caller_addr); @@ -348,12 +348,12 @@ EXPORT_SYMBOL(trace_hardirqs_off_caller); #endif /* CONFIG_IRQSOFF_TRACER */ #ifdef CONFIG_PREEMPT_TRACER -void notrace trace_preempt_on(unsigned long a0, unsigned long a1) +void trace_preempt_on(unsigned long a0, unsigned long a1) { stop_critical_timing(a0, a1); } -void notrace trace_preempt_off(unsigned long a0, unsigned long a1) +void trace_preempt_off(unsigned long a0, unsigned long a1) { start_critical_timing(a0, a1); } @@ -395,14 +395,14 @@ static void irqsoff_tracer_ctrl_update(struct trace_array *tr) stop_irqsoff_tracer(tr); } -static void notrace irqsoff_tracer_open(struct trace_iterator *iter) +static void irqsoff_tracer_open(struct trace_iterator *iter) { /* stop the trace while dumping */ if (iter->tr->ctrl) stop_irqsoff_tracer(iter->tr); } -static void notrace irqsoff_tracer_close(struct trace_iterator *iter) +static void irqsoff_tracer_close(struct trace_iterator *iter) { if (iter->tr->ctrl) start_irqsoff_tracer(iter->tr); diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index 8d65667..b738eac 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -17,7 +17,7 @@ static struct trace_array *ctx_trace; static int __read_mostly tracer_enabled; -static void notrace +static void ctx_switch_func(struct task_struct *prev, struct task_struct *next) { struct trace_array *tr = ctx_trace; @@ -57,7 +57,7 @@ void ftrace_ctx_switch(struct task_struct *prev, struct task_struct *next) wakeup_sched_switch(prev, next); } -static notrace void sched_switch_reset(struct trace_array *tr) +static void sched_switch_reset(struct trace_array *tr) { int cpu; @@ -67,18 +67,18 @@ static notrace void sched_switch_reset(struct trace_array *tr) tracing_reset(tr->data[cpu]); } -static notrace void start_sched_trace(struct trace_array *tr) +static void start_sched_trace(struct trace_array *tr) { sched_switch_reset(tr); tracer_enabled = 1; } -static notrace void stop_sched_trace(struct trace_array *tr) +static void stop_sched_trace(struct trace_array *tr) { tracer_enabled = 0; } -static notrace void sched_switch_trace_init(struct trace_array *tr) +static void sched_switch_trace_init(struct trace_array *tr) { ctx_trace = tr; @@ -86,7 +86,7 @@ static notrace void sched_switch_trace_init(struct trace_array *tr) start_sched_trace(tr); } -static notrace void sched_switch_trace_reset(struct trace_array *tr) +static void sched_switch_trace_reset(struct trace_array *tr) { if (tr->ctrl) stop_sched_trace(tr); diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 3549e41..662679c 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -27,12 +27,12 @@ static unsigned wakeup_prio = -1; static DEFINE_SPINLOCK(wakeup_lock); -static void notrace __wakeup_reset(struct trace_array *tr); +static void __wakeup_reset(struct trace_array *tr); /* * Should this new latency be reported/recorded? */ -static int notrace report_latency(cycle_t delta) +static int report_latency(cycle_t delta) { if (tracing_thresh) { if (delta < tracing_thresh) @@ -44,7 +44,7 @@ static int notrace report_latency(cycle_t delta) return 1; } -void notrace +void wakeup_sched_switch(struct task_struct *prev, struct task_struct *next) { unsigned long latency = 0, t0 = 0, t1 = 0; @@ -126,7 +126,7 @@ out: atomic_dec(&tr->data[cpu]->disabled); } -static void notrace __wakeup_reset(struct trace_array *tr) +static void __wakeup_reset(struct trace_array *tr) { struct trace_array_cpu *data; int cpu; @@ -147,7 +147,7 @@ static void notrace __wakeup_reset(struct trace_array *tr) wakeup_task = NULL; } -static void notrace wakeup_reset(struct trace_array *tr) +static void wakeup_reset(struct trace_array *tr) { unsigned long flags; @@ -156,7 +156,7 @@ static void notrace wakeup_reset(struct trace_array *tr) spin_unlock_irqrestore(&wakeup_lock, flags); } -static notrace void +static void wakeup_check_start(struct trace_array *tr, struct task_struct *p, struct task_struct *curr) { @@ -201,7 +201,7 @@ out: atomic_dec(&tr->data[cpu]->disabled); } -notrace void +void ftrace_wake_up_task(struct task_struct *wakee, struct task_struct *curr) { if (likely(!tracer_enabled)) @@ -210,7 +210,7 @@ ftrace_wake_up_task(struct task_struct *wakee, struct task_struct *curr) wakeup_check_start(wakeup_trace, wakee, curr); } -notrace void +void ftrace_wake_up_new_task(struct task_struct *wakee, struct task_struct *curr) { if (likely(!tracer_enabled)) @@ -219,7 +219,7 @@ ftrace_wake_up_new_task(struct task_struct *wakee, struct task_struct *curr) wakeup_check_start(wakeup_trace, wakee, curr); } -static notrace void start_wakeup_tracer(struct trace_array *tr) +static void start_wakeup_tracer(struct trace_array *tr) { wakeup_reset(tr); @@ -237,12 +237,12 @@ static notrace void start_wakeup_tracer(struct trace_array *tr) return; } -static notrace void stop_wakeup_tracer(struct trace_array *tr) +static void stop_wakeup_tracer(struct trace_array *tr) { tracer_enabled = 0; } -static notrace void wakeup_tracer_init(struct trace_array *tr) +static void wakeup_tracer_init(struct trace_array *tr) { wakeup_trace = tr; @@ -250,7 +250,7 @@ static notrace void wakeup_tracer_init(struct trace_array *tr) start_wakeup_tracer(tr); } -static notrace void wakeup_tracer_reset(struct trace_array *tr) +static void wakeup_tracer_reset(struct trace_array *tr) { if (tr->ctrl) { stop_wakeup_tracer(tr); @@ -267,14 +267,14 @@ static void wakeup_tracer_ctrl_update(struct trace_array *tr) stop_wakeup_tracer(tr); } -static void notrace wakeup_tracer_open(struct trace_iterator *iter) +static void wakeup_tracer_open(struct trace_iterator *iter) { /* stop the trace while dumping */ if (iter->tr->ctrl) stop_wakeup_tracer(iter->tr); } -static void notrace wakeup_tracer_close(struct trace_iterator *iter) +static void wakeup_tracer_close(struct trace_iterator *iter) { /* forget about any processes we were recording */ if (iter->tr->ctrl) diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 546307d..85715b8 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -3,7 +3,7 @@ #include #include -static notrace inline int trace_valid_entry(struct trace_entry *entry) +static inline int trace_valid_entry(struct trace_entry *entry) { switch (entry->type) { case TRACE_FN: -- cgit v0.10.2 From 57422797dc009fc83766bcf230d29dbe6e08e21e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:51 +0200 Subject: ftrace: add wakeup events to sched tracer Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index f589805..192c135 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -57,7 +57,7 @@ static struct trace_array max_tr; static DEFINE_PER_CPU(struct trace_array_cpu, max_data); static int tracer_enabled = 1; -static unsigned long trace_nr_entries = 16384UL; +static unsigned long trace_nr_entries = 65536UL; static struct tracer *trace_types __read_mostly; static struct tracer *current_trace __read_mostly; @@ -87,6 +87,7 @@ enum trace_type { TRACE_FN, TRACE_CTX, + TRACE_WAKE, TRACE_SPECIAL, __TRACE_LAST_TYPE @@ -711,6 +712,30 @@ tracing_sched_switch_trace(struct trace_array *tr, wake_up (&trace_wait); } +void +tracing_sched_wakeup_trace(struct trace_array *tr, + struct trace_array_cpu *data, + struct task_struct *wakee, struct task_struct *curr, + unsigned long flags) +{ + struct trace_entry *entry; + unsigned long irq_flags; + + spin_lock_irqsave(&data->lock, irq_flags); + entry = tracing_get_trace_entry(tr, data); + tracing_generic_entry_update(entry, flags); + entry->type = TRACE_WAKE; + entry->ctx.prev_pid = curr->pid; + entry->ctx.prev_prio = curr->prio; + entry->ctx.prev_state = curr->state; + entry->ctx.next_pid = wakee->pid; + entry->ctx.next_prio = wakee->prio; + spin_unlock_irqrestore(&data->lock, irq_flags); + + if (!(trace_flags & TRACE_ITER_BLOCK)) + wake_up(&trace_wait); +} + #ifdef CONFIG_FTRACE static void function_trace_call(unsigned long ip, unsigned long parent_ip) @@ -1183,13 +1208,14 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) trace_seq_puts(s, ")\n"); break; case TRACE_CTX: + case TRACE_WAKE: S = entry->ctx.prev_state < sizeof(state_to_char) ? state_to_char[entry->ctx.prev_state] : 'X'; comm = trace_find_cmdline(entry->ctx.next_pid); - trace_seq_printf(s, " %d:%d:%c --> %d:%d %s\n", + trace_seq_printf(s, " %5d:%3d:%c %s %5d:%3d %s\n", entry->ctx.prev_pid, entry->ctx.prev_prio, - S, + S, entry->type == TRACE_CTX ? "==>" : " +", entry->ctx.next_pid, entry->ctx.next_prio, comm); @@ -1256,12 +1282,14 @@ static int print_trace_fmt(struct trace_iterator *iter) return 0; break; case TRACE_CTX: + case TRACE_WAKE: S = entry->ctx.prev_state < sizeof(state_to_char) ? state_to_char[entry->ctx.prev_state] : 'X'; - ret = trace_seq_printf(s, " %d:%d:%c ==> %d:%d\n", + ret = trace_seq_printf(s, " %5d:%3d:%c %s %5d:%3d\n", entry->ctx.prev_pid, entry->ctx.prev_prio, S, + entry->type == TRACE_CTX ? "==>" : " +", entry->ctx.next_pid, entry->ctx.next_prio); if (!ret) @@ -1301,8 +1329,11 @@ static int print_raw_fmt(struct trace_iterator *iter) return 0; break; case TRACE_CTX: + case TRACE_WAKE: S = entry->ctx.prev_state < sizeof(state_to_char) ? state_to_char[entry->ctx.prev_state] : 'X'; + if (entry->type == TRACE_WAKE) + S = '+'; ret = trace_seq_printf(s, "%d %d %c %d %d\n", entry->ctx.prev_pid, entry->ctx.prev_prio, @@ -1355,8 +1386,11 @@ static int print_hex_fmt(struct trace_iterator *iter) SEQ_PUT_HEX_FIELD_RET(s, entry->fn.parent_ip); break; case TRACE_CTX: + case TRACE_WAKE: S = entry->ctx.prev_state < sizeof(state_to_char) ? state_to_char[entry->ctx.prev_state] : 'X'; + if (entry->type == TRACE_WAKE) + S = '+'; SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.prev_pid); SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.prev_prio); SEQ_PUT_HEX_FIELD_RET(s, S); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 2b7352b..90e0ba0 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -164,6 +164,12 @@ void tracing_sched_switch_trace(struct trace_array *tr, struct task_struct *next, unsigned long flags); void tracing_record_cmdline(struct task_struct *tsk); + +void tracing_sched_wakeup_trace(struct trace_array *tr, + struct trace_array_cpu *data, + struct task_struct *wakee, + struct task_struct *cur, + unsigned long flags); void trace_special(struct trace_array *tr, struct trace_array_cpu *data, unsigned long arg1, @@ -194,11 +200,17 @@ extern cycle_t ftrace_now(int cpu); #ifdef CONFIG_SCHED_TRACER extern void wakeup_sched_switch(struct task_struct *prev, struct task_struct *next); +extern void +wakeup_sched_wakeup(struct task_struct *wakee, struct task_struct *curr); #else static inline void wakeup_sched_switch(struct task_struct *prev, struct task_struct *next) { } +static inline void +wakeup_sched_wakeup(struct task_struct *wakee, struct task_struct *curr) +{ +} #endif #ifdef CONFIG_CONTEXT_SWITCH_TRACER diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index b738eac..8b1cf1a 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -41,6 +41,29 @@ ctx_switch_func(struct task_struct *prev, struct task_struct *next) local_irq_restore(flags); } +static void wakeup_func(struct task_struct *wakee, struct task_struct *curr) +{ + struct trace_array *tr = ctx_trace; + struct trace_array_cpu *data; + unsigned long flags; + long disabled; + int cpu; + + if (!tracer_enabled) + return; + + local_irq_save(flags); + cpu = raw_smp_processor_id(); + data = tr->data[cpu]; + disabled = atomic_inc_return(&data->disabled); + + if (likely(disabled == 1)) + tracing_sched_wakeup_trace(tr, data, wakee, curr, flags); + + atomic_dec(&data->disabled); + local_irq_restore(flags); +} + void ftrace_ctx_switch(struct task_struct *prev, struct task_struct *next) { tracing_record_cmdline(prev); @@ -57,6 +80,19 @@ void ftrace_ctx_switch(struct task_struct *prev, struct task_struct *next) wakeup_sched_switch(prev, next); } +void +ftrace_wake_up_task(struct task_struct *wakee, struct task_struct *curr) +{ + tracing_record_cmdline(curr); + + wakeup_func(wakee, curr); + + /* + * Chain to the wakeup tracer (this is a NOP if disabled): + */ + wakeup_sched_wakeup(wakee, curr); +} + static void sched_switch_reset(struct trace_array *tr) { int cpu; diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 662679c..87fa7b2 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -202,7 +202,7 @@ out: } void -ftrace_wake_up_task(struct task_struct *wakee, struct task_struct *curr) +wakeup_sched_wakeup(struct task_struct *wakee, struct task_struct *curr) { if (likely(!tracer_enabled)) return; diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 85715b8..39dd452 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -8,6 +8,7 @@ static inline int trace_valid_entry(struct trace_entry *entry) switch (entry->type) { case TRACE_FN: case TRACE_CTX: + case TRACE_WAKE: return 1; } return 0; -- cgit v0.10.2 From 86387f7ee5d3273ff4859e2c64ce656639b6ca65 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:51 +0200 Subject: ftrace: add stack tracing Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 08fbef1..0d3714e 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -93,6 +93,7 @@ static inline void tracer_disable(void) # define CALLER_ADDR3 ((unsigned long)__builtin_return_address(3)) # define CALLER_ADDR4 ((unsigned long)__builtin_return_address(4)) # define CALLER_ADDR5 ((unsigned long)__builtin_return_address(5)) +# define CALLER_ADDR6 ((unsigned long)__builtin_return_address(6)) #else # define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0)) # define CALLER_ADDR1 0UL @@ -100,6 +101,7 @@ static inline void tracer_disable(void) # define CALLER_ADDR3 0UL # define CALLER_ADDR4 0UL # define CALLER_ADDR5 0UL +# define CALLER_ADDR6 0UL #endif #ifdef CONFIG_IRQSOFF_TRACER diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 3f73a17..eb1988e 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -10,6 +10,7 @@ config TRACER_MAX_TRACE config TRACING bool select DEBUG_FS + select STACKTRACE config FTRACE bool "Kernel Function Tracer" diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 192c135..b4b1b4f 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -28,6 +28,8 @@ #include #include +#include + #include "trace.h" unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX; @@ -88,6 +90,7 @@ enum trace_type { TRACE_FN, TRACE_CTX, TRACE_WAKE, + TRACE_STACK, TRACE_SPECIAL, __TRACE_LAST_TYPE @@ -109,6 +112,7 @@ enum trace_iterator_flags { TRACE_ITER_HEX = 0x20, TRACE_ITER_BIN = 0x40, TRACE_ITER_BLOCK = 0x80, + TRACE_ITER_STACKTRACE = 0x100, }; #define TRACE_ITER_SYM_MASK \ @@ -124,10 +128,11 @@ static const char *trace_options[] = { "hex", "bin", "block", + "stacktrace", NULL }; -static unsigned trace_flags; +static unsigned trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_STACKTRACE; static DEFINE_SPINLOCK(ftrace_max_lock); @@ -657,7 +662,7 @@ trace_function(struct trace_array *tr, struct trace_array_cpu *data, spin_unlock_irqrestore(&data->lock, irq_flags); if (!(trace_flags & TRACE_ITER_BLOCK)) - wake_up (&trace_wait); + wake_up(&trace_wait); } void @@ -685,13 +690,39 @@ trace_special(struct trace_array *tr, struct trace_array_cpu *data, spin_unlock_irqrestore(&data->lock, irq_flags); if (!(trace_flags & TRACE_ITER_BLOCK)) - wake_up (&trace_wait); + wake_up(&trace_wait); +} + +void __trace_stack(struct trace_array *tr, + struct trace_array_cpu *data, + unsigned long flags, + int skip) +{ + struct trace_entry *entry; + struct stack_trace trace; + + if (!(trace_flags & TRACE_ITER_STACKTRACE)) + return; + + entry = tracing_get_trace_entry(tr, data); + tracing_generic_entry_update(entry, flags); + entry->type = TRACE_STACK; + + memset(&entry->stack, 0, sizeof(entry->stack)); + + trace.nr_entries = 0; + trace.max_entries = FTRACE_STACK_ENTRIES; + trace.skip = skip; + trace.entries = entry->stack.caller; + + save_stack_trace(&trace); } void tracing_sched_switch_trace(struct trace_array *tr, struct trace_array_cpu *data, - struct task_struct *prev, struct task_struct *next, + struct task_struct *prev, + struct task_struct *next, unsigned long flags) { struct trace_entry *entry; @@ -706,16 +737,18 @@ tracing_sched_switch_trace(struct trace_array *tr, entry->ctx.prev_state = prev->state; entry->ctx.next_pid = next->pid; entry->ctx.next_prio = next->prio; + __trace_stack(tr, data, flags, 4); spin_unlock_irqrestore(&data->lock, irq_flags); if (!(trace_flags & TRACE_ITER_BLOCK)) - wake_up (&trace_wait); + wake_up(&trace_wait); } void tracing_sched_wakeup_trace(struct trace_array *tr, struct trace_array_cpu *data, - struct task_struct *wakee, struct task_struct *curr, + struct task_struct *wakee, + struct task_struct *curr, unsigned long flags) { struct trace_entry *entry; @@ -730,6 +763,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr, entry->ctx.prev_state = curr->state; entry->ctx.next_pid = wakee->pid; entry->ctx.next_prio = wakee->prio; + __trace_stack(tr, data, flags, 5); spin_unlock_irqrestore(&data->lock, irq_flags); if (!(trace_flags & TRACE_ITER_BLOCK)) @@ -1179,6 +1213,7 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) unsigned long rel_usecs; char *comm; int S; + int i; if (!next_entry) next_entry = entry; @@ -1197,8 +1232,10 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) abs_usecs % 1000, rel_usecs/1000, rel_usecs % 1000); } else { - lat_print_generic(s, entry, cpu); - lat_print_timestamp(s, abs_usecs, rel_usecs); + if (entry->type != TRACE_STACK) { + lat_print_generic(s, entry, cpu); + lat_print_timestamp(s, abs_usecs, rel_usecs); + } } switch (entry->type) { case TRACE_FN: @@ -1226,6 +1263,14 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) entry->special.arg2, entry->special.arg3); break; + case TRACE_STACK: + for (i = 0; i < FTRACE_STACK_ENTRIES; i++) { + if (i) + trace_seq_puts(s, " <= "); + seq_print_ip_sym(s, entry->stack.caller[i], sym_flags); + } + trace_seq_puts(s, "\n"); + break; default: trace_seq_printf(s, "Unknown type %d\n", entry->type); } @@ -1241,8 +1286,9 @@ static int print_trace_fmt(struct trace_iterator *iter) unsigned long long t; unsigned long secs; char *comm; - int S; int ret; + int S; + int i; entry = iter->ent; @@ -1252,15 +1298,17 @@ static int print_trace_fmt(struct trace_iterator *iter) usec_rem = do_div(t, 1000000ULL); secs = (unsigned long)t; - ret = trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid); - if (!ret) - return 0; - ret = trace_seq_printf(s, "[%02d] ", iter->cpu); - if (!ret) - return 0; - ret = trace_seq_printf(s, "%5lu.%06lu: ", secs, usec_rem); - if (!ret) - return 0; + if (entry->type != TRACE_STACK) { + ret = trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid); + if (!ret) + return 0; + ret = trace_seq_printf(s, "[%02d] ", iter->cpu); + if (!ret) + return 0; + ret = trace_seq_printf(s, "%5lu.%06lu: ", secs, usec_rem); + if (!ret) + return 0; + } switch (entry->type) { case TRACE_FN: @@ -1303,6 +1351,22 @@ static int print_trace_fmt(struct trace_iterator *iter) if (!ret) return 0; break; + case TRACE_STACK: + for (i = 0; i < FTRACE_STACK_ENTRIES; i++) { + if (i) { + ret = trace_seq_puts(s, " <= "); + if (!ret) + return 0; + } + ret = seq_print_ip_sym(s, entry->stack.caller[i], + sym_flags); + if (!ret) + return 0; + } + ret = trace_seq_puts(s, "\n"); + if (!ret) + return 0; + break; } return 1; } @@ -1344,6 +1408,7 @@ static int print_raw_fmt(struct trace_iterator *iter) return 0; break; case TRACE_SPECIAL: + case TRACE_STACK: ret = trace_seq_printf(s, " %lx %lx %lx\n", entry->special.arg1, entry->special.arg2, @@ -1399,6 +1464,7 @@ static int print_hex_fmt(struct trace_iterator *iter) SEQ_PUT_HEX_FIELD_RET(s, entry->fn.parent_ip); break; case TRACE_SPECIAL: + case TRACE_STACK: SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg1); SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg2); SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg3); @@ -1433,6 +1499,7 @@ static int print_bin_fmt(struct trace_iterator *iter) SEQ_PUT_FIELD_RET(s, entry->ctx.next_prio); break; case TRACE_SPECIAL: + case TRACE_STACK: SEQ_PUT_FIELD_RET(s, entry->special.arg1); SEQ_PUT_FIELD_RET(s, entry->special.arg2); SEQ_PUT_FIELD_RET(s, entry->special.arg3); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 90e0ba0..387bdcf 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -35,6 +35,16 @@ struct special_entry { }; /* + * Stack-trace entry: + */ + +#define FTRACE_STACK_ENTRIES 5 + +struct stack_entry { + unsigned long caller[FTRACE_STACK_ENTRIES]; +}; + +/* * The trace entry - the most basic unit of tracing. This is what * is printed in the end as a single line in the trace output, such as: * @@ -51,6 +61,7 @@ struct trace_entry { struct ftrace_entry fn; struct ctx_switch_entry ctx; struct special_entry special; + struct stack_entry stack; }; }; -- cgit v0.10.2 From 8ac0fca4ccb355ce50471d7aa3f10f5900b28b95 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:51 +0200 Subject: ftrace: sched tracer fix Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/include/linux/sched.h b/include/linux/sched.h index 6e26f1f..05744f9 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2130,17 +2130,11 @@ ftrace_ctx_switch(struct task_struct *prev, struct task_struct *next) #ifdef CONFIG_SCHED_TRACER extern void ftrace_wake_up_task(struct task_struct *wakee, struct task_struct *curr); -extern void -ftrace_wake_up_new_task(struct task_struct *wakee, struct task_struct *curr); #else static inline void ftrace_wake_up_task(struct task_struct *wakee, struct task_struct *curr) { } -static inline void -ftrace_wake_up_new_task(struct task_struct *wakee, struct task_struct *curr) -{ -} #endif extern long sched_setaffinity(pid_t pid, const cpumask_t *new_mask); diff --git a/kernel/sched.c b/kernel/sched.c index 328494e..53ab117 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2613,7 +2613,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags) p->sched_class->task_new(rq, p); inc_nr_running(rq); } - ftrace_wake_up_new_task(p, rq->curr); + ftrace_wake_up_task(p, rq->curr); check_preempt_curr(rq, p); #ifdef CONFIG_SMP if (p->sched_class->task_wake_up) diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 87fa7b2..2a01242 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -201,20 +201,13 @@ out: atomic_dec(&tr->data[cpu]->disabled); } -void -wakeup_sched_wakeup(struct task_struct *wakee, struct task_struct *curr) +void wakeup_sched_wakeup(struct task_struct *wakee, struct task_struct *curr) { if (likely(!tracer_enabled)) return; - wakeup_check_start(wakeup_trace, wakee, curr); -} - -void -ftrace_wake_up_new_task(struct task_struct *wakee, struct task_struct *curr) -{ - if (likely(!tracer_enabled)) - return; + tracing_record_cmdline(curr); + tracing_record_cmdline(wakee); wakeup_check_start(wakeup_trace, wakee, curr); } -- cgit v0.10.2 From 4c1f4d4f0175129934d5dbc19a39296430937a05 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:51 +0200 Subject: ftrace: make nostacktrace the default Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index b4b1b4f..0e4b711 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -132,7 +132,7 @@ static const char *trace_options[] = { NULL }; -static unsigned trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_STACKTRACE; +static unsigned trace_flags = TRACE_ITER_PRINT_PARENT; static DEFINE_SPINLOCK(ftrace_max_lock); -- cgit v0.10.2 From 4e65551905fb0300ae7e667cbaa41ee2e3f29a13 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:52 +0200 Subject: ftrace: sched tracer, trace full rbtree Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/include/linux/sched.h b/include/linux/sched.h index 05744f9..652d380 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2119,20 +2119,34 @@ static inline void arch_pick_mmap_layout(struct mm_struct *mm) #ifdef CONFIG_CONTEXT_SWITCH_TRACER extern void -ftrace_ctx_switch(struct task_struct *prev, struct task_struct *next); +ftrace_ctx_switch(void *rq, struct task_struct *prev, struct task_struct *next); +extern void +ftrace_wake_up_task(void *rq, struct task_struct *wakee, + struct task_struct *curr); +extern void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data); +extern void +__trace_special(void *__tr, void *__data, + unsigned long arg1, unsigned long arg2, unsigned long arg3); #else static inline void -ftrace_ctx_switch(struct task_struct *prev, struct task_struct *next) +ftrace_ctx_switch(void *rq, struct task_struct *prev, struct task_struct *next) +{ +} +static inline void +sched_trace_special(unsigned long p1, unsigned long p2, unsigned long p3) +{ +} +static inline void +ftrace_wake_up_task(void *rq, struct task_struct *wakee, + struct task_struct *curr) +{ +} +static inline void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data) { } -#endif - -#ifdef CONFIG_SCHED_TRACER -extern void -ftrace_wake_up_task(struct task_struct *wakee, struct task_struct *curr); -#else static inline void -ftrace_wake_up_task(struct task_struct *wakee, struct task_struct *curr) +__trace_special(void *__tr, void *__data, + unsigned long arg1, unsigned long arg2, unsigned long arg3) { } #endif diff --git a/kernel/sched.c b/kernel/sched.c index 53ab117..b9208a0 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2394,6 +2394,35 @@ static int sched_balance_self(int cpu, int flag) #endif /* CONFIG_SMP */ +#ifdef CONFIG_CONTEXT_SWITCH_TRACER + +void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data) +{ + struct sched_entity *se; + struct task_struct *p; + struct rb_node *curr; + struct rq *rq = __rq; + + curr = first_fair(&rq->cfs); + if (!curr) + return; + + while (curr) { + se = rb_entry(curr, struct sched_entity, run_node); + if (!entity_is_task(se)) + continue; + + p = task_of(se); + + __trace_special(__tr, __data, + p->pid, p->se.vruntime, p->se.sum_exec_runtime); + + curr = rb_next(curr); + } +} + +#endif + /*** * try_to_wake_up - wake up a thread * @p: the to-be-woken-up thread @@ -2468,7 +2497,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) out_activate: #endif /* CONFIG_SMP */ - ftrace_wake_up_task(p, rq->curr); + ftrace_wake_up_task(rq, p, rq->curr); schedstat_inc(p, se.nr_wakeups); if (sync) schedstat_inc(p, se.nr_wakeups_sync); @@ -2613,7 +2642,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags) p->sched_class->task_new(rq, p); inc_nr_running(rq); } - ftrace_wake_up_task(p, rq->curr); + ftrace_wake_up_task(rq, p, rq->curr); check_preempt_curr(rq, p); #ifdef CONFIG_SMP if (p->sched_class->task_wake_up) @@ -2786,7 +2815,7 @@ context_switch(struct rq *rq, struct task_struct *prev, struct mm_struct *mm, *oldmm; prepare_task_switch(rq, prev, next); - ftrace_ctx_switch(prev, next); + ftrace_ctx_switch(rq, prev, next); mm = next->mm; oldmm = prev->active_mm; /* diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 0e4b711..65173b1 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -66,7 +66,18 @@ static struct tracer *current_trace __read_mostly; static int max_tracer_type_len; static DEFINE_MUTEX(trace_types_lock); -static DECLARE_WAIT_QUEUE_HEAD (trace_wait); +static DECLARE_WAIT_QUEUE_HEAD(trace_wait); + +unsigned long trace_flags = TRACE_ITER_PRINT_PARENT; + +/* + * FIXME: where should this be called? + */ +void trace_wake_up(void) +{ + if (!(trace_flags & TRACE_ITER_BLOCK)) + wake_up(&trace_wait); +} #define ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(struct trace_entry)) @@ -103,18 +114,6 @@ enum trace_flag_type { TRACE_FLAG_SOFTIRQ = 0x08, }; -enum trace_iterator_flags { - TRACE_ITER_PRINT_PARENT = 0x01, - TRACE_ITER_SYM_OFFSET = 0x02, - TRACE_ITER_SYM_ADDR = 0x04, - TRACE_ITER_VERBOSE = 0x08, - TRACE_ITER_RAW = 0x10, - TRACE_ITER_HEX = 0x20, - TRACE_ITER_BIN = 0x40, - TRACE_ITER_BLOCK = 0x80, - TRACE_ITER_STACKTRACE = 0x100, -}; - #define TRACE_ITER_SYM_MASK \ (TRACE_ITER_PRINT_PARENT|TRACE_ITER_SYM_OFFSET|TRACE_ITER_SYM_ADDR) @@ -132,8 +131,6 @@ static const char *trace_options[] = { NULL }; -static unsigned trace_flags = TRACE_ITER_PRINT_PARENT; - static DEFINE_SPINLOCK(ftrace_max_lock); /* @@ -660,9 +657,6 @@ trace_function(struct trace_array *tr, struct trace_array_cpu *data, entry->fn.ip = ip; entry->fn.parent_ip = parent_ip; spin_unlock_irqrestore(&data->lock, irq_flags); - - if (!(trace_flags & TRACE_ITER_BLOCK)) - wake_up(&trace_wait); } void @@ -673,10 +667,14 @@ ftrace(struct trace_array *tr, struct trace_array_cpu *data, trace_function(tr, data, ip, parent_ip, flags); } +#ifdef CONFIG_CONTEXT_SWITCH_TRACER + void -trace_special(struct trace_array *tr, struct trace_array_cpu *data, - unsigned long arg1, unsigned long arg2, unsigned long arg3) +__trace_special(void *__tr, void *__data, + unsigned long arg1, unsigned long arg2, unsigned long arg3) { + struct trace_array_cpu *data = __data; + struct trace_array *tr = __tr; struct trace_entry *entry; unsigned long irq_flags; @@ -688,11 +686,10 @@ trace_special(struct trace_array *tr, struct trace_array_cpu *data, entry->special.arg2 = arg2; entry->special.arg3 = arg3; spin_unlock_irqrestore(&data->lock, irq_flags); - - if (!(trace_flags & TRACE_ITER_BLOCK)) - wake_up(&trace_wait); } +#endif + void __trace_stack(struct trace_array *tr, struct trace_array_cpu *data, unsigned long flags, @@ -739,9 +736,6 @@ tracing_sched_switch_trace(struct trace_array *tr, entry->ctx.next_prio = next->prio; __trace_stack(tr, data, flags, 4); spin_unlock_irqrestore(&data->lock, irq_flags); - - if (!(trace_flags & TRACE_ITER_BLOCK)) - wake_up(&trace_wait); } void @@ -765,9 +759,6 @@ tracing_sched_wakeup_trace(struct trace_array *tr, entry->ctx.next_prio = wakee->prio; __trace_stack(tr, data, flags, 5); spin_unlock_irqrestore(&data->lock, irq_flags); - - if (!(trace_flags & TRACE_ITER_BLOCK)) - wake_up(&trace_wait); } #ifdef CONFIG_FTRACE @@ -1258,7 +1249,7 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) comm); break; case TRACE_SPECIAL: - trace_seq_printf(s, " %lx %lx %lx\n", + trace_seq_printf(s, " %ld %ld %ld\n", entry->special.arg1, entry->special.arg2, entry->special.arg3); @@ -1344,7 +1335,7 @@ static int print_trace_fmt(struct trace_iterator *iter) return 0; break; case TRACE_SPECIAL: - ret = trace_seq_printf(s, " %lx %lx %lx\n", + ret = trace_seq_printf(s, " %ld %ld %ld\n", entry->special.arg1, entry->special.arg2, entry->special.arg3); @@ -1409,7 +1400,7 @@ static int print_raw_fmt(struct trace_iterator *iter) break; case TRACE_SPECIAL: case TRACE_STACK: - ret = trace_seq_printf(s, " %lx %lx %lx\n", + ret = trace_seq_printf(s, " %ld %ld %ld\n", entry->special.arg1, entry->special.arg2, entry->special.arg3); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 387bdcf..75e2374 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -274,4 +274,18 @@ extern int trace_selftest_startup_sched_switch(struct tracer *trace, extern void *head_page(struct trace_array_cpu *data); +extern unsigned long trace_flags; + +enum trace_iterator_flags { + TRACE_ITER_PRINT_PARENT = 0x01, + TRACE_ITER_SYM_OFFSET = 0x02, + TRACE_ITER_SYM_ADDR = 0x04, + TRACE_ITER_VERBOSE = 0x08, + TRACE_ITER_RAW = 0x10, + TRACE_ITER_HEX = 0x20, + TRACE_ITER_BIN = 0x40, + TRACE_ITER_BLOCK = 0x80, + TRACE_ITER_STACKTRACE = 0x100, +}; + #endif /* _LINUX_KERNEL_TRACE_H */ diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index 8b1cf1a..12658b3 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -18,7 +18,7 @@ static struct trace_array *ctx_trace; static int __read_mostly tracer_enabled; static void -ctx_switch_func(struct task_struct *prev, struct task_struct *next) +ctx_switch_func(void *__rq, struct task_struct *prev, struct task_struct *next) { struct trace_array *tr = ctx_trace; struct trace_array_cpu *data; @@ -34,14 +34,17 @@ ctx_switch_func(struct task_struct *prev, struct task_struct *next) data = tr->data[cpu]; disabled = atomic_inc_return(&data->disabled); - if (likely(disabled == 1)) + if (likely(disabled == 1)) { tracing_sched_switch_trace(tr, data, prev, next, flags); + ftrace_all_fair_tasks(__rq, tr, data); + } atomic_dec(&data->disabled); local_irq_restore(flags); } -static void wakeup_func(struct task_struct *wakee, struct task_struct *curr) +static void +wakeup_func(void *__rq, struct task_struct *wakee, struct task_struct *curr) { struct trace_array *tr = ctx_trace; struct trace_array_cpu *data; @@ -57,14 +60,18 @@ static void wakeup_func(struct task_struct *wakee, struct task_struct *curr) data = tr->data[cpu]; disabled = atomic_inc_return(&data->disabled); - if (likely(disabled == 1)) + if (likely(disabled == 1)) { tracing_sched_wakeup_trace(tr, data, wakee, curr, flags); + ftrace_all_fair_tasks(__rq, tr, data); + } atomic_dec(&data->disabled); local_irq_restore(flags); } -void ftrace_ctx_switch(struct task_struct *prev, struct task_struct *next) +void +ftrace_ctx_switch(void *__rq, struct task_struct *prev, + struct task_struct *next) { tracing_record_cmdline(prev); @@ -72,7 +79,7 @@ void ftrace_ctx_switch(struct task_struct *prev, struct task_struct *next) * If tracer_switch_func only points to the local * switch func, it still needs the ptr passed to it. */ - ctx_switch_func(prev, next); + ctx_switch_func(__rq, prev, next); /* * Chain to the wakeup tracer (this is a NOP if disabled): @@ -81,11 +88,12 @@ void ftrace_ctx_switch(struct task_struct *prev, struct task_struct *next) } void -ftrace_wake_up_task(struct task_struct *wakee, struct task_struct *curr) +ftrace_wake_up_task(void *__rq, struct task_struct *wakee, + struct task_struct *curr) { tracing_record_cmdline(curr); - wakeup_func(wakee, curr); + wakeup_func(__rq, wakee, curr); /* * Chain to the wakeup tracer (this is a NOP if disabled): -- cgit v0.10.2 From 24cd5d111e8c713e62cda7ca1d01232402e3d3c9 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:52 +0200 Subject: ftrace: trace curr/next tasks Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/sched.c b/kernel/sched.c index b9208a0..673b588 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2398,8 +2398,8 @@ static int sched_balance_self(int cpu, int flag) void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data) { - struct sched_entity *se; struct task_struct *p; + struct sched_entity *se; struct rb_node *curr; struct rq *rq = __rq; @@ -2407,6 +2407,17 @@ void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data) if (!curr) return; + if (rq->cfs.curr) { + p = task_of(rq->cfs.curr); + __trace_special(__tr, __data, + p->pid, p->se.vruntime, p->se.sum_exec_runtime); + } + if (rq->cfs.next) { + p = task_of(rq->cfs.next); + __trace_special(__tr, __data, + p->pid, p->se.vruntime, p->se.sum_exec_runtime); + } + while (curr) { se = rb_entry(curr, struct sched_entity, run_node); if (!entity_is_task(se)) -- cgit v0.10.2 From 017730c11241e26577673eb9d957cfc66172ea91 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:52 +0200 Subject: ftrace: fix wakeups Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/include/linux/sched.h b/include/linux/sched.h index 652d380..a3970b5 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -246,6 +246,8 @@ extern asmlinkage void schedule_tail(struct task_struct *prev); extern void init_idle(struct task_struct *idle, int cpu); extern void init_idle_bootup_task(struct task_struct *idle); +extern int runqueue_is_locked(void); + extern cpumask_t nohz_cpu_mask; #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ) extern int select_nohz_load_balancer(int cpu); diff --git a/kernel/sched.c b/kernel/sched.c index 673b588..9ca4a2e 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -642,6 +642,24 @@ static inline void update_rq_clock(struct rq *rq) # define const_debug static const #endif +/** + * runqueue_is_locked + * + * Returns true if the current cpu runqueue is locked. + * This interface allows printk to be called with the runqueue lock + * held and know whether or not it is OK to wake up the klogd. + */ +int runqueue_is_locked(void) +{ + int cpu = get_cpu(); + struct rq *rq = cpu_rq(cpu); + int ret; + + ret = spin_is_locked(&rq->lock); + put_cpu(); + return ret; +} + /* * Debugging: various feature bits */ diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 65173b1..2ca9d66 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -70,12 +70,13 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait); unsigned long trace_flags = TRACE_ITER_PRINT_PARENT; -/* - * FIXME: where should this be called? - */ void trace_wake_up(void) { - if (!(trace_flags & TRACE_ITER_BLOCK)) + /* + * The runqueue_is_locked() can fail, but this is the best we + * have for now: + */ + if (!(trace_flags & TRACE_ITER_BLOCK) && !runqueue_is_locked()) wake_up(&trace_wait); } @@ -657,6 +658,8 @@ trace_function(struct trace_array *tr, struct trace_array_cpu *data, entry->fn.ip = ip; entry->fn.parent_ip = parent_ip; spin_unlock_irqrestore(&data->lock, irq_flags); + + trace_wake_up(); } void @@ -686,6 +689,8 @@ __trace_special(void *__tr, void *__data, entry->special.arg2 = arg2; entry->special.arg3 = arg3; spin_unlock_irqrestore(&data->lock, irq_flags); + + trace_wake_up(); } #endif @@ -759,6 +764,8 @@ tracing_sched_wakeup_trace(struct trace_array *tr, entry->ctx.next_prio = wakee->prio; __trace_stack(tr, data, flags, 5); spin_unlock_irqrestore(&data->lock, irq_flags); + + trace_wake_up(); } #ifdef CONFIG_FTRACE -- cgit v0.10.2 From 1a3c3034336320554a3342572dae98d69e054fc7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:52 +0200 Subject: ftrace: fix __trace_special() Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/include/linux/sched.h b/include/linux/sched.h index a3970b5..5b186be 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2119,6 +2119,18 @@ static inline void arch_pick_mmap_layout(struct mm_struct *mm) } #endif +#ifdef CONFIG_TRACING +extern void +__trace_special(void *__tr, void *__data, + unsigned long arg1, unsigned long arg2, unsigned long arg3); +#else +static inline void +__trace_special(void *__tr, void *__data, + unsigned long arg1, unsigned long arg2, unsigned long arg3) +{ +} +#endif + #ifdef CONFIG_CONTEXT_SWITCH_TRACER extern void ftrace_ctx_switch(void *rq, struct task_struct *prev, struct task_struct *next); @@ -2126,9 +2138,6 @@ extern void ftrace_wake_up_task(void *rq, struct task_struct *wakee, struct task_struct *curr); extern void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data); -extern void -__trace_special(void *__tr, void *__data, - unsigned long arg1, unsigned long arg2, unsigned long arg3); #else static inline void ftrace_ctx_switch(void *rq, struct task_struct *prev, struct task_struct *next) @@ -2146,11 +2155,6 @@ ftrace_wake_up_task(void *rq, struct task_struct *wakee, static inline void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data) { } -static inline void -__trace_special(void *__tr, void *__data, - unsigned long arg1, unsigned long arg2, unsigned long arg3) -{ -} #endif extern long sched_setaffinity(pid_t pid, const cpumask_t *new_mask); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 2ca9d66..65d2c0a 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -670,8 +670,6 @@ ftrace(struct trace_array *tr, struct trace_array_cpu *data, trace_function(tr, data, ip, parent_ip, flags); } -#ifdef CONFIG_CONTEXT_SWITCH_TRACER - void __trace_special(void *__tr, void *__data, unsigned long arg1, unsigned long arg2, unsigned long arg3) @@ -693,8 +691,6 @@ __trace_special(void *__tr, void *__data, trace_wake_up(); } -#endif - void __trace_stack(struct trace_array *tr, struct trace_array_cpu *data, unsigned long flags, -- cgit v0.10.2 From 4ac3ba41d372e3a9e420b36bc43589662b188a14 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:52 +0200 Subject: ftrace: trace scheduler rbtree Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 65d2c0a..06380dc 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -129,6 +129,7 @@ static const char *trace_options[] = { "bin", "block", "stacktrace", + "sched-tree", NULL }; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 75e2374..a520157 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -286,6 +286,7 @@ enum trace_iterator_flags { TRACE_ITER_BIN = 0x40, TRACE_ITER_BLOCK = 0x80, TRACE_ITER_STACKTRACE = 0x100, + TRACE_ITER_SCHED_TREE = 0x200, }; #endif /* _LINUX_KERNEL_TRACE_H */ diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index 12658b3..5555b90 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -36,7 +36,8 @@ ctx_switch_func(void *__rq, struct task_struct *prev, struct task_struct *next) if (likely(disabled == 1)) { tracing_sched_switch_trace(tr, data, prev, next, flags); - ftrace_all_fair_tasks(__rq, tr, data); + if (trace_flags & TRACE_ITER_SCHED_TREE) + ftrace_all_fair_tasks(__rq, tr, data); } atomic_dec(&data->disabled); @@ -62,7 +63,8 @@ wakeup_func(void *__rq, struct task_struct *wakee, struct task_struct *curr) if (likely(disabled == 1)) { tracing_sched_wakeup_trace(tr, data, wakee, curr, flags); - ftrace_all_fair_tasks(__rq, tr, data); + if (trace_flags & TRACE_ITER_SCHED_TREE) + ftrace_all_fair_tasks(__rq, tr, data); } atomic_dec(&data->disabled); -- cgit v0.10.2 From c7078de1aaf562a31b20984409c38cc1b40fa8a3 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:52 +0200 Subject: ftrace: add tracing_cpumask Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 06380dc..18f0ab8 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -70,6 +70,23 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait); unsigned long trace_flags = TRACE_ITER_PRINT_PARENT; +/* + * Only trace on a CPU if the bitmask is set: + */ +static cpumask_t tracing_cpumask __read_mostly = CPU_MASK_ALL; + +/* + * The tracer itself will not take this lock, but still we want + * to provide a consistent cpumask to user-space: + */ +static DEFINE_MUTEX(tracing_cpumask_update_lock); + +/* + * Temporary storage for the character representation of the + * CPU bitmask: + */ +static char mask_str[NR_CPUS]; + void trace_wake_up(void) { /* @@ -1754,9 +1771,49 @@ static struct file_operations tracing_lt_fops = { }; static struct file_operations show_traces_fops = { - .open = show_traces_open, - .read = seq_read, - .release = seq_release, + .open = show_traces_open, + .read = seq_read, + .release = seq_release, +}; + +static ssize_t +tracing_cpumask_read(struct file *filp, char __user *ubuf, + size_t count, loff_t *ppos) +{ + int err; + + count = min(count, (size_t)NR_CPUS); + + mutex_lock(&tracing_cpumask_update_lock); + cpumask_scnprintf(mask_str, NR_CPUS, tracing_cpumask); + err = copy_to_user(ubuf, mask_str, count); + if (err) + count = -EFAULT; + mutex_unlock(&tracing_cpumask_update_lock); + + return count; +} + +static ssize_t +tracing_cpumask_write(struct file *filp, const char __user *ubuf, + size_t count, loff_t *ppos) +{ + int err; + + mutex_lock(&tracing_cpumask_update_lock); + err = cpumask_parse_user(ubuf, count, tracing_cpumask); + mutex_unlock(&tracing_cpumask_update_lock); + + if (err) + return err; + + return count; +} + +static struct file_operations tracing_cpumask_fops = { + .open = tracing_open_generic, + .read = tracing_cpumask_read, + .write = tracing_cpumask_write, }; static ssize_t @@ -1837,9 +1894,9 @@ tracing_iter_ctrl_write(struct file *filp, const char __user *ubuf, } static struct file_operations tracing_iter_fops = { - .open = tracing_open_generic, - .read = tracing_iter_ctrl_read, - .write = tracing_iter_ctrl_write, + .open = tracing_open_generic, + .read = tracing_iter_ctrl_read, + .write = tracing_iter_ctrl_write, }; static const char readme_msg[] = @@ -1870,8 +1927,8 @@ tracing_readme_read(struct file *filp, char __user *ubuf, } static struct file_operations tracing_readme_fops = { - .open = tracing_open_generic, - .read = tracing_readme_read, + .open = tracing_open_generic, + .read = tracing_readme_read, }; static ssize_t @@ -2334,6 +2391,11 @@ static __init void tracer_init_debugfs(void) if (!entry) pr_warning("Could not create debugfs 'iter_ctrl' entry\n"); + entry = debugfs_create_file("tracing_cpumask", 0644, d_tracer, + NULL, &tracing_cpumask_fops); + if (!entry) + pr_warning("Could not create debugfs 'tracing_cpumask' entry\n"); + entry = debugfs_create_file("latency_trace", 0444, d_tracer, &global_trace, &tracing_lt_fops); if (!entry) -- cgit v0.10.2 From 36dfe9252bd4c9b55e8454363fb7e444c92c5030 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:52 +0200 Subject: ftrace: make use of tracing_cpumask Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 18f0ab8..3ae17e2 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -70,23 +70,6 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait); unsigned long trace_flags = TRACE_ITER_PRINT_PARENT; -/* - * Only trace on a CPU if the bitmask is set: - */ -static cpumask_t tracing_cpumask __read_mostly = CPU_MASK_ALL; - -/* - * The tracer itself will not take this lock, but still we want - * to provide a consistent cpumask to user-space: - */ -static DEFINE_MUTEX(tracing_cpumask_update_lock); - -/* - * Temporary storage for the character representation of the - * CPU bitmask: - */ -static char mask_str[NR_CPUS]; - void trace_wake_up(void) { /* @@ -1776,19 +1759,46 @@ static struct file_operations show_traces_fops = { .release = seq_release, }; +/* + * Only trace on a CPU if the bitmask is set: + */ +static cpumask_t tracing_cpumask = CPU_MASK_ALL; + +/* + * When tracing/tracing_cpu_mask is modified then this holds + * the new bitmask we are about to install: + */ +static cpumask_t tracing_cpumask_new; + +/* + * The tracer itself will not take this lock, but still we want + * to provide a consistent cpumask to user-space: + */ +static DEFINE_MUTEX(tracing_cpumask_update_lock); + +/* + * Temporary storage for the character representation of the + * CPU bitmask (and one more byte for the newline): + */ +static char mask_str[NR_CPUS + 1]; + static ssize_t tracing_cpumask_read(struct file *filp, char __user *ubuf, size_t count, loff_t *ppos) { - int err; - - count = min(count, (size_t)NR_CPUS); + int len; mutex_lock(&tracing_cpumask_update_lock); - cpumask_scnprintf(mask_str, NR_CPUS, tracing_cpumask); - err = copy_to_user(ubuf, mask_str, count); - if (err) - count = -EFAULT; + + len = cpumask_scnprintf(mask_str, count, tracing_cpumask); + if (count - len < 2) { + count = -EINVAL; + goto out_err; + } + len += sprintf(mask_str + len, "\n"); + count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1); + +out_err: mutex_unlock(&tracing_cpumask_update_lock); return count; @@ -1798,16 +1808,40 @@ static ssize_t tracing_cpumask_write(struct file *filp, const char __user *ubuf, size_t count, loff_t *ppos) { - int err; + int err, cpu; mutex_lock(&tracing_cpumask_update_lock); - err = cpumask_parse_user(ubuf, count, tracing_cpumask); - mutex_unlock(&tracing_cpumask_update_lock); - + err = cpumask_parse_user(ubuf, count, tracing_cpumask_new); if (err) - return err; + goto err_unlock; + + spin_lock_irq(&ftrace_max_lock); + for_each_possible_cpu(cpu) { + /* + * Increase/decrease the disabled counter if we are + * about to flip a bit in the cpumask: + */ + if (cpu_isset(cpu, tracing_cpumask) && + !cpu_isset(cpu, tracing_cpumask_new)) { + atomic_inc(&global_trace.data[cpu]->disabled); + } + if (!cpu_isset(cpu, tracing_cpumask) && + cpu_isset(cpu, tracing_cpumask_new)) { + atomic_dec(&global_trace.data[cpu]->disabled); + } + } + spin_unlock_irq(&ftrace_max_lock); + + tracing_cpumask = tracing_cpumask_new; + + mutex_unlock(&tracing_cpumask_update_lock); return count; + +err_unlock: + mutex_unlock(&tracing_cpumask_update_lock); + + return err; } static struct file_operations tracing_cpumask_fops = { @@ -1846,8 +1880,7 @@ tracing_iter_ctrl_read(struct file *filp, char __user *ubuf, r += sprintf(buf + r, "\n"); WARN_ON(r >= len + 2); - r = simple_read_from_buffer(ubuf, cnt, ppos, - buf, r); + r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r); kfree(buf); -- cgit v0.10.2 From d9af56fbd8b37181bffaad060f74aa2e17382874 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:53 +0200 Subject: ftrace: fix cmdline tracing Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index 5555b90..5a217e8 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -29,6 +29,8 @@ ctx_switch_func(void *__rq, struct task_struct *prev, struct task_struct *next) if (!tracer_enabled) return; + tracing_record_cmdline(prev); + local_irq_save(flags); cpu = raw_smp_processor_id(); data = tr->data[cpu]; @@ -56,6 +58,8 @@ wakeup_func(void *__rq, struct task_struct *wakee, struct task_struct *curr) if (!tracer_enabled) return; + tracing_record_cmdline(curr); + local_irq_save(flags); cpu = raw_smp_processor_id(); data = tr->data[cpu]; @@ -75,8 +79,6 @@ void ftrace_ctx_switch(void *__rq, struct task_struct *prev, struct task_struct *next) { - tracing_record_cmdline(prev); - /* * If tracer_switch_func only points to the local * switch func, it still needs the ptr passed to it. @@ -93,8 +95,6 @@ void ftrace_wake_up_task(void *__rq, struct task_struct *wakee, struct task_struct *curr) { - tracing_record_cmdline(curr); - wakeup_func(__rq, wakee, curr); /* -- cgit v0.10.2 From 442e544ce52d4415a024920b84fb95c5f9aa0855 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:53 +0200 Subject: ftrace: iter ctrl fix Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 3ae17e2..688b4cf 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1920,6 +1920,11 @@ tracing_iter_ctrl_write(struct file *filp, const char __user *ubuf, break; } } + /* + * If no option could be set, return an error: + */ + if (!trace_options[i]) + return -EINVAL; filp->f_pos += cnt; -- cgit v0.10.2 From f29c73fe3404f8799ed57aaf48859e0b55fc071f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:53 +0200 Subject: ftrace: include cpu in stacktrace Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 688b4cf..3a40324 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1227,10 +1227,8 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) abs_usecs % 1000, rel_usecs/1000, rel_usecs % 1000); } else { - if (entry->type != TRACE_STACK) { - lat_print_generic(s, entry, cpu); - lat_print_timestamp(s, abs_usecs, rel_usecs); - } + lat_print_generic(s, entry, cpu); + lat_print_timestamp(s, abs_usecs, rel_usecs); } switch (entry->type) { case TRACE_FN: @@ -1293,17 +1291,15 @@ static int print_trace_fmt(struct trace_iterator *iter) usec_rem = do_div(t, 1000000ULL); secs = (unsigned long)t; - if (entry->type != TRACE_STACK) { - ret = trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid); - if (!ret) - return 0; - ret = trace_seq_printf(s, "[%02d] ", iter->cpu); - if (!ret) - return 0; - ret = trace_seq_printf(s, "%5lu.%06lu: ", secs, usec_rem); - if (!ret) - return 0; - } + ret = trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid); + if (!ret) + return 0; + ret = trace_seq_printf(s, "[%02d] ", iter->cpu); + if (!ret) + return 0; + ret = trace_seq_printf(s, "%5lu.%06lu: ", secs, usec_rem); + if (!ret) + return 0; switch (entry->type) { case TRACE_FN: -- cgit v0.10.2 From 36fc25a9f48deacd8aa08cd2d1c186a4e412604f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:53 +0200 Subject: ftrace: sched tree fix Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/sched.c b/kernel/sched.c index 9ca4a2e..3bc7c53 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2414,6 +2414,23 @@ static int sched_balance_self(int cpu, int flag) #ifdef CONFIG_CONTEXT_SWITCH_TRACER +void ftrace_task(struct task_struct *p, void *__tr, void *__data) +{ +#if 0 + /* + * trace timeline tree + */ + __trace_special(__tr, __data, + p->pid, p->se.vruntime, p->se.sum_exec_runtime); +#else + /* + * trace balance metrics + */ + __trace_special(__tr, __data, + p->pid, p->se.avg_overlap, 0); +#endif +} + void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data) { struct task_struct *p; @@ -2421,32 +2438,22 @@ void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data) struct rb_node *curr; struct rq *rq = __rq; - curr = first_fair(&rq->cfs); - if (!curr) - return; - if (rq->cfs.curr) { p = task_of(rq->cfs.curr); - __trace_special(__tr, __data, - p->pid, p->se.vruntime, p->se.sum_exec_runtime); + ftrace_task(p, __tr, __data); } if (rq->cfs.next) { p = task_of(rq->cfs.next); - __trace_special(__tr, __data, - p->pid, p->se.vruntime, p->se.sum_exec_runtime); + ftrace_task(p, __tr, __data); } - while (curr) { + for (curr = first_fair(&rq->cfs); curr; curr = rb_next(curr)) { se = rb_entry(curr, struct sched_entity, run_node); if (!entity_is_task(se)) continue; p = task_of(se); - - __trace_special(__tr, __data, - p->pid, p->se.vruntime, p->se.sum_exec_runtime); - - curr = rb_next(curr); + ftrace_task(p, __tr, __data); } } -- cgit v0.10.2 From 88a4216c3ec4281fc7e6725cc3a3ccd01fb1aa14 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:53 +0200 Subject: ftrace: sched special Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/include/linux/sched.h b/include/linux/sched.h index 5b186be..360ca99 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2138,6 +2138,8 @@ extern void ftrace_wake_up_task(void *rq, struct task_struct *wakee, struct task_struct *curr); extern void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data); +extern void +ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3); #else static inline void ftrace_ctx_switch(void *rq, struct task_struct *prev, struct task_struct *next) @@ -2155,6 +2157,10 @@ ftrace_wake_up_task(void *rq, struct task_struct *wakee, static inline void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data) { } +static inline void +ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) +{ +} #endif extern long sched_setaffinity(pid_t pid, const cpumask_t *new_mask); diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index e24ecd3..dc1856f 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -1061,6 +1061,8 @@ wake_affine(struct rq *rq, struct sched_domain *this_sd, struct rq *this_rq, if (!(this_sd->flags & SD_WAKE_AFFINE)) return 0; + ftrace_special(__LINE__, curr->se.avg_overlap, sync); + ftrace_special(__LINE__, p->se.avg_overlap, -1); /* * If the currently running task will sleep within * a reasonable amount of time then attract this newly @@ -1238,6 +1240,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p) if (unlikely(se == pse)) return; + ftrace_special(__LINE__, p->pid, se->last_wakeup); cfs_rq_of(pse)->next = pse; /* diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 3a40324..b87a264 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1251,7 +1251,7 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) comm); break; case TRACE_SPECIAL: - trace_seq_printf(s, " %ld %ld %ld\n", + trace_seq_printf(s, "# %ld %ld %ld\n", entry->special.arg1, entry->special.arg2, entry->special.arg3); @@ -1335,7 +1335,7 @@ static int print_trace_fmt(struct trace_iterator *iter) return 0; break; case TRACE_SPECIAL: - ret = trace_seq_printf(s, " %ld %ld %ld\n", + ret = trace_seq_printf(s, "# %ld %ld %ld\n", entry->special.arg1, entry->special.arg2, entry->special.arg3); @@ -1400,7 +1400,7 @@ static int print_raw_fmt(struct trace_iterator *iter) break; case TRACE_SPECIAL: case TRACE_STACK: - ret = trace_seq_printf(s, " %ld %ld %ld\n", + ret = trace_seq_printf(s, "# %ld %ld %ld\n", entry->special.arg1, entry->special.arg2, entry->special.arg3); diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index 5a217e8..bddf676 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -103,6 +103,30 @@ ftrace_wake_up_task(void *__rq, struct task_struct *wakee, wakeup_sched_wakeup(wakee, curr); } +void +ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) +{ + struct trace_array *tr = ctx_trace; + struct trace_array_cpu *data; + unsigned long flags; + long disabled; + int cpu; + + if (!tracer_enabled) + return; + + local_irq_save(flags); + cpu = raw_smp_processor_id(); + data = tr->data[cpu]; + disabled = atomic_inc_return(&data->disabled); + + if (likely(disabled == 1)) + __trace_special(tr, data, arg1, arg2, arg3); + + atomic_dec(&data->disabled); + local_irq_restore(flags); +} + static void sched_switch_reset(struct trace_array *tr) { int cpu; -- cgit v0.10.2 From bac524d3f3dfeffa3a9d44f2c64035b88bcaacb4 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 12 May 2008 21:20:53 +0200 Subject: ftrace: trace next state Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index b87a264..b63fe90 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -736,6 +736,7 @@ tracing_sched_switch_trace(struct trace_array *tr, entry->ctx.prev_state = prev->state; entry->ctx.next_pid = next->pid; entry->ctx.next_prio = next->prio; + entry->ctx.next_state = next->state; __trace_stack(tr, data, flags, 4); spin_unlock_irqrestore(&data->lock, irq_flags); } @@ -759,6 +760,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr, entry->ctx.prev_state = curr->state; entry->ctx.next_pid = wakee->pid; entry->ctx.next_prio = wakee->prio; + entry->ctx.next_state = wakee->state; __trace_stack(tr, data, flags, 5); spin_unlock_irqrestore(&data->lock, irq_flags); @@ -1207,7 +1209,7 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) unsigned long abs_usecs; unsigned long rel_usecs; char *comm; - int S; + int S, T; int i; if (!next_entry) @@ -1241,14 +1243,17 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) case TRACE_WAKE: S = entry->ctx.prev_state < sizeof(state_to_char) ? state_to_char[entry->ctx.prev_state] : 'X'; + T = entry->ctx.next_state < sizeof(state_to_char) ? + state_to_char[entry->ctx.next_state] : 'X'; + comm = trace_find_cmdline(entry->ctx.next_pid); - trace_seq_printf(s, " %5d:%3d:%c %s %5d:%3d %s\n", + trace_seq_printf(s, " %5d:%3d:%c %s %5d:%3d:%c %s\n", entry->ctx.prev_pid, entry->ctx.prev_prio, S, entry->type == TRACE_CTX ? "==>" : " +", entry->ctx.next_pid, entry->ctx.next_prio, - comm); + T, comm); break; case TRACE_SPECIAL: trace_seq_printf(s, "# %ld %ld %ld\n", @@ -1280,7 +1285,7 @@ static int print_trace_fmt(struct trace_iterator *iter) unsigned long secs; char *comm; int ret; - int S; + int S, T; int i; entry = iter->ent; @@ -1324,13 +1329,16 @@ static int print_trace_fmt(struct trace_iterator *iter) case TRACE_WAKE: S = entry->ctx.prev_state < sizeof(state_to_char) ? state_to_char[entry->ctx.prev_state] : 'X'; - ret = trace_seq_printf(s, " %5d:%3d:%c %s %5d:%3d\n", + T = entry->ctx.next_state < sizeof(state_to_char) ? + state_to_char[entry->ctx.next_state] : 'X'; + ret = trace_seq_printf(s, " %5d:%3d:%c %s %5d:%3d:%c\n", entry->ctx.prev_pid, entry->ctx.prev_prio, S, entry->type == TRACE_CTX ? "==>" : " +", entry->ctx.next_pid, - entry->ctx.next_prio); + entry->ctx.next_prio, + T); if (!ret) return 0; break; @@ -1367,7 +1375,7 @@ static int print_raw_fmt(struct trace_iterator *iter) struct trace_seq *s = &iter->seq; struct trace_entry *entry; int ret; - int S; + int S, T; entry = iter->ent; @@ -1387,14 +1395,17 @@ static int print_raw_fmt(struct trace_iterator *iter) case TRACE_WAKE: S = entry->ctx.prev_state < sizeof(state_to_char) ? state_to_char[entry->ctx.prev_state] : 'X'; + T = entry->ctx.next_state < sizeof(state_to_char) ? + state_to_char[entry->ctx.next_state] : 'X'; if (entry->type == TRACE_WAKE) S = '+'; - ret = trace_seq_printf(s, "%d %d %c %d %d\n", + ret = trace_seq_printf(s, "%d %d %c %d %d %c\n", entry->ctx.prev_pid, entry->ctx.prev_prio, S, entry->ctx.next_pid, - entry->ctx.next_prio); + entry->ctx.next_prio, + T); if (!ret) return 0; break; @@ -1428,7 +1439,7 @@ static int print_hex_fmt(struct trace_iterator *iter) struct trace_seq *s = &iter->seq; unsigned char newline = '\n'; struct trace_entry *entry; - int S; + int S, T; entry = iter->ent; @@ -1445,6 +1456,8 @@ static int print_hex_fmt(struct trace_iterator *iter) case TRACE_WAKE: S = entry->ctx.prev_state < sizeof(state_to_char) ? state_to_char[entry->ctx.prev_state] : 'X'; + T = entry->ctx.next_state < sizeof(state_to_char) ? + state_to_char[entry->ctx.next_state] : 'X'; if (entry->type == TRACE_WAKE) S = '+'; SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.prev_pid); @@ -1453,6 +1466,7 @@ static int print_hex_fmt(struct trace_iterator *iter) SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.next_pid); SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.next_prio); SEQ_PUT_HEX_FIELD_RET(s, entry->fn.parent_ip); + SEQ_PUT_HEX_FIELD_RET(s, T); break; case TRACE_SPECIAL: case TRACE_STACK: @@ -1488,6 +1502,7 @@ static int print_bin_fmt(struct trace_iterator *iter) SEQ_PUT_FIELD_RET(s, entry->ctx.prev_state); SEQ_PUT_FIELD_RET(s, entry->ctx.next_pid); SEQ_PUT_FIELD_RET(s, entry->ctx.next_prio); + SEQ_PUT_FIELD_RET(s, entry->ctx.next_state); break; case TRACE_SPECIAL: case TRACE_STACK: diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index a520157..96951a8 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -23,6 +23,7 @@ struct ctx_switch_entry { unsigned char prev_state; unsigned int next_pid; unsigned char next_prio; + unsigned char next_state; }; /* -- cgit v0.10.2 From 5429db2d26a59903c81a4f6c6dae7eb9daaea5fc Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 12 May 2008 21:20:53 +0200 Subject: ftrace: fix wakeup callback Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/sched.c b/kernel/sched.c index 3bc7c53..1ec3fb2 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2533,7 +2533,6 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) out_activate: #endif /* CONFIG_SMP */ - ftrace_wake_up_task(rq, p, rq->curr); schedstat_inc(p, se.nr_wakeups); if (sync) schedstat_inc(p, se.nr_wakeups_sync); @@ -2548,6 +2547,7 @@ out_activate: success = 1; out_running: + ftrace_wake_up_task(rq, p, rq->curr); check_preempt_curr(rq, p); p->state = TASK_RUNNING; -- cgit v0.10.2 From 694379e9ed4f2f6babe111bf001c69e2e263338b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:54 +0200 Subject: ftrace: make it more available in the Kconfig Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index eb1988e..ebc158e 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -14,7 +14,7 @@ config TRACING config FTRACE bool "Kernel Function Tracer" - depends on DEBUG_KERNEL && HAVE_FTRACE + depends on HAVE_FTRACE select FRAME_POINTER select TRACING select CONTEXT_SWITCH_TRACER @@ -72,7 +72,6 @@ config PREEMPT_TRACER config SCHED_TRACER bool "Scheduling Latency Tracer" - depends on DEBUG_KERNEL select TRACING select CONTEXT_SWITCH_TRACER select TRACER_MAX_TRACE @@ -82,7 +81,6 @@ config SCHED_TRACER config CONTEXT_SWITCH_TRACER bool "Trace process context switches" - depends on DEBUG_KERNEL select TRACING select MARKERS help -- cgit v0.10.2 From 8f96da02c14d722ad9a3713cd7273ce28c9036ad Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:54 +0200 Subject: ftrace: remove wakeup from function trace trace_function is called by mcount and calling wake_up from that can have unpredictable results. This patch removes the wakeup from trace_function. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index b63fe90..736dcfb 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -659,8 +659,6 @@ trace_function(struct trace_array *tr, struct trace_array_cpu *data, entry->fn.ip = ip; entry->fn.parent_ip = parent_ip; spin_unlock_irqrestore(&data->lock, irq_flags); - - trace_wake_up(); } void -- cgit v0.10.2 From 4fe8c3048cd8280a54256bca9cac2007bd546c33 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:54 +0200 Subject: ftrace: printk and trace irqsoff and wakeups printk called from wakeup critical timings and irqs off can cause deadlocks since printk might do a wakeup itself. If the call to printk happens with the runqueue lock held, it can deadlock. This patch protects the printk from being called in trace irqs off with a test to see if the runqueue for the current CPU is locked. If it is locked, the printk is skipped. The wakeup always holds the runqueue lock, so the printk is simply removed. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 7a4dc01..d0c1748 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -165,18 +165,20 @@ check_critical_timing(struct trace_array *tr, update_max_tr_single(tr, current, cpu); - if (tracing_thresh) { - printk(KERN_INFO "(%16s-%-5d|#%d):" - " %lu us critical section violates %lu us threshold.\n", - current->comm, current->pid, - raw_smp_processor_id(), - latency, nsecs_to_usecs(tracing_thresh)); - } else { - printk(KERN_INFO "(%16s-%-5d|#%d):" - " new %lu us maximum-latency critical section.\n", - current->comm, current->pid, - raw_smp_processor_id(), - latency); + if (!runqueue_is_locked()) { + if (tracing_thresh) { + printk(KERN_INFO "(%16s-%-5d|#%d): %lu us critical" + " section violates %lu us threshold.\n", + current->comm, current->pid, + raw_smp_processor_id(), + latency, nsecs_to_usecs(tracing_thresh)); + } else { + printk(KERN_INFO "(%16s-%-5d|#%d): new %lu us" + " maximum-latency critical section.\n", + current->comm, current->pid, + raw_smp_processor_id(), + latency); + } } max_sequence++; diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 2a01242..5948011 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -106,19 +106,6 @@ wakeup_sched_switch(struct task_struct *prev, struct task_struct *next) update_max_tr(tr, wakeup_task, wakeup_cpu); - if (tracing_thresh) { - printk(KERN_INFO "(%16s-%-5d|#%d):" - " %lu us wakeup latency violates %lu us threshold.\n", - wakeup_task->comm, wakeup_task->pid, - raw_smp_processor_id(), - latency, nsecs_to_usecs(tracing_thresh)); - } else { - printk(KERN_INFO "(%16s-%-5d|#%d):" - " new %lu us maximum wakeup latency.\n", - wakeup_task->comm, wakeup_task->pid, - cpu, latency); - } - out_unlock: __wakeup_reset(tr); spin_unlock_irqrestore(&wakeup_lock, flags); -- cgit v0.10.2 From 06fa75ab566c50e01bfd7b055bde85cf9b1bc98a Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:54 +0200 Subject: ftrace: add TRACE_STACK and TRACE_SPECIAL to selftest validation The selftest validation code checks for valid entries in the trace buffer. TRACE_STACK and TRACE_SPECIAL have been added to the code but not to the validator. This patch adds the two to prevent them from flagging a failure in the selftest. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 39dd452..92f4acb 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -9,6 +9,8 @@ static inline int trace_valid_entry(struct trace_entry *entry) case TRACE_FN: case TRACE_CTX: case TRACE_WAKE: + case TRACE_STACK: + case TRACE_SPECIAL: return 1; } return 0; @@ -180,7 +182,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace, /* we should only have one item */ if (!ret && count != 1) { - printk(KERN_CONT ".. filter failed .."); + printk(KERN_CONT ".. filter failed count=%ld ..", count); ret = -1; goto out; } -- cgit v0.10.2 From d05cdb25d80f06f77aa6bddb53cd1390d4d91a0b Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:54 +0200 Subject: ftrace: fix dynamic ftrace selftest With the adding of the configuration changes in the Makefile to prevent tracing of functions in the ftrace code, all tracing of all the ftrace code has been removed. Unfortunately, one of the selftests, relied on a function to be traced. With the new change, the function was no longer traced and the test failed. This patch separates out the test function into its own file so that we can add the "-pg" flag to the compilation of that function and the adding of the mcount call to that function. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index c25a6cd..d9efbbf 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -4,6 +4,10 @@ ifdef CONFIG_FTRACE ORIG_CFLAGS := $(KBUILD_CFLAGS) KBUILD_CFLAGS = $(subst -pg,,$(ORIG_CFLAGS)) + +# selftest needs instrumentation +CFLAGS_trace_selftest_dynamic.o = -pg +obj-y += trace_selftest_dynamic.o endif obj-$(CONFIG_FTRACE) += libftrace.o diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 96951a8..98cbfd0 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -244,6 +244,8 @@ extern int unregister_tracer_switch(struct tracer_switch_ops *ops); #ifdef CONFIG_DYNAMIC_FTRACE extern unsigned long ftrace_update_tot_cnt; +#define DYN_FTRACE_TEST_NAME trace_selftest_dynamic_test_func +extern int DYN_FTRACE_TEST_NAME(void); #endif #ifdef CONFIG_FTRACE_STARTUP_TEST diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 92f4acb..83e55a2 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -107,14 +107,8 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count) #ifdef CONFIG_DYNAMIC_FTRACE -#define DYN_FTRACE_TEST_NAME trace_selftest_dynamic_test_func #define __STR(x) #x #define STR(x) __STR(x) -static int DYN_FTRACE_TEST_NAME(void) -{ - /* used to call mcount */ - return 0; -} /* Test dynamic code modification and ftrace filters */ int trace_selftest_startup_dynamic_tracing(struct tracer *trace, diff --git a/kernel/trace/trace_selftest_dynamic.c b/kernel/trace/trace_selftest_dynamic.c new file mode 100644 index 0000000..54dd77c --- /dev/null +++ b/kernel/trace/trace_selftest_dynamic.c @@ -0,0 +1,7 @@ +#include "trace.h" + +int DYN_FTRACE_TEST_NAME(void) +{ + /* used to call mcount */ + return 0; +} -- cgit v0.10.2 From 4d9493c90f8e6e1b164aede3814010a290161abb Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:54 +0200 Subject: ftrace: remove add-hoc code Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/sched.c b/kernel/sched.c index 1ec3fb2..ad95cca 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2412,53 +2412,6 @@ static int sched_balance_self(int cpu, int flag) #endif /* CONFIG_SMP */ -#ifdef CONFIG_CONTEXT_SWITCH_TRACER - -void ftrace_task(struct task_struct *p, void *__tr, void *__data) -{ -#if 0 - /* - * trace timeline tree - */ - __trace_special(__tr, __data, - p->pid, p->se.vruntime, p->se.sum_exec_runtime); -#else - /* - * trace balance metrics - */ - __trace_special(__tr, __data, - p->pid, p->se.avg_overlap, 0); -#endif -} - -void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data) -{ - struct task_struct *p; - struct sched_entity *se; - struct rb_node *curr; - struct rq *rq = __rq; - - if (rq->cfs.curr) { - p = task_of(rq->cfs.curr); - ftrace_task(p, __tr, __data); - } - if (rq->cfs.next) { - p = task_of(rq->cfs.next); - ftrace_task(p, __tr, __data); - } - - for (curr = first_fair(&rq->cfs); curr; curr = rb_next(curr)) { - se = rb_entry(curr, struct sched_entity, run_node); - if (!entity_is_task(se)) - continue; - - p = task_of(se); - ftrace_task(p, __tr, __data); - } -} - -#endif - /*** * try_to_wake_up - wake up a thread * @p: the to-be-woken-up thread diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index dc1856f..e24ecd3 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -1061,8 +1061,6 @@ wake_affine(struct rq *rq, struct sched_domain *this_sd, struct rq *this_rq, if (!(this_sd->flags & SD_WAKE_AFFINE)) return 0; - ftrace_special(__LINE__, curr->se.avg_overlap, sync); - ftrace_special(__LINE__, p->se.avg_overlap, -1); /* * If the currently running task will sleep within * a reasonable amount of time then attract this newly @@ -1240,7 +1238,6 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p) if (unlikely(se == pse)) return; - ftrace_special(__LINE__, p->pid, se->last_wakeup); cfs_rq_of(pse)->next = pse; /* diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index bddf676..5671db0 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -36,11 +36,8 @@ ctx_switch_func(void *__rq, struct task_struct *prev, struct task_struct *next) data = tr->data[cpu]; disabled = atomic_inc_return(&data->disabled); - if (likely(disabled == 1)) { + if (likely(disabled == 1)) tracing_sched_switch_trace(tr, data, prev, next, flags); - if (trace_flags & TRACE_ITER_SCHED_TREE) - ftrace_all_fair_tasks(__rq, tr, data); - } atomic_dec(&data->disabled); local_irq_restore(flags); @@ -65,11 +62,8 @@ wakeup_func(void *__rq, struct task_struct *wakee, struct task_struct *curr) data = tr->data[cpu]; disabled = atomic_inc_return(&data->disabled); - if (likely(disabled == 1)) { + if (likely(disabled == 1)) tracing_sched_wakeup_trace(tr, data, wakee, curr, flags); - if (trace_flags & TRACE_ITER_SCHED_TREE) - ftrace_all_fair_tasks(__rq, tr, data); - } atomic_dec(&data->disabled); local_irq_restore(flags); -- cgit v0.10.2 From c5f888cae49dfe3e86d9d1e0dab2b63ceb057be3 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:55 +0200 Subject: ftrace: irqsoff use raw_smp_processor_id This patch changes the use of __get_cpu_var to explicitly calling raw_smp_processor_id and using the per_cpu() macro. On some debug configurations, the use of __get_cpu_var may cause ftrace to trigger and this can cause problems with the irqsoff tracing. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index d0c1748..761f3ec 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -204,14 +204,14 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip) if (likely(!tracer_enabled)) return; - if (__get_cpu_var(tracing_cpu)) + cpu = raw_smp_processor_id(); + + if (per_cpu(tracing_cpu, cpu)) return; - cpu = raw_smp_processor_id(); data = tr->data[cpu]; - if (unlikely(!data) || unlikely(!head_page(data)) || - atomic_read(&data->disabled)) + if (unlikely(!data) || atomic_read(&data->disabled)) return; atomic_inc(&data->disabled); @@ -225,7 +225,7 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip) trace_function(tr, data, ip, parent_ip, flags); - __get_cpu_var(tracing_cpu) = 1; + per_cpu(tracing_cpu, cpu) = 1; atomic_dec(&data->disabled); } @@ -238,16 +238,16 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip) struct trace_array_cpu *data; unsigned long flags; + cpu = raw_smp_processor_id(); /* Always clear the tracing cpu on stopping the trace */ - if (unlikely(__get_cpu_var(tracing_cpu))) - __get_cpu_var(tracing_cpu) = 0; + if (unlikely(per_cpu(tracing_cpu, cpu))) + per_cpu(tracing_cpu, cpu) = 0; else return; if (!tracer_enabled) return; - cpu = raw_smp_processor_id(); data = tr->data[cpu]; if (unlikely(!data) || unlikely(!head_page(data)) || @@ -255,6 +255,7 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip) return; atomic_inc(&data->disabled); + local_save_flags(flags); trace_function(tr, data, ip, parent_ip, flags); check_critical_timing(tr, data, parent_ip ? : ip, cpu); @@ -376,7 +377,7 @@ static void stop_irqsoff_tracer(struct trace_array *tr) static void __irqsoff_tracer_init(struct trace_array *tr) { irqsoff_trace = tr; - /* make sure that the tracer is visibel */ + /* make sure that the tracer is visible */ smp_wmb(); if (tr->ctrl) -- cgit v0.10.2 From 92205c2343527a863d660360599a4bf8cede77b0 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:55 +0200 Subject: ftrace: user raw_spin_lock in tracing Lock debugging enabled cause huge performance problems for tracing. Having the lock verification happening for every function that is called because mcount calls spin_lock can cripple the system. This patch converts the spin_locks used by ftrace into raw_spin_locks. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 736dcfb..3009aaf 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -133,7 +133,8 @@ static const char *trace_options[] = { NULL }; -static DEFINE_SPINLOCK(ftrace_max_lock); +static raw_spinlock_t ftrace_max_lock = + (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; /* * Copy the new maximum trace into the separate maximum-trace @@ -335,7 +336,7 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) int i; WARN_ON_ONCE(!irqs_disabled()); - spin_lock(&ftrace_max_lock); + __raw_spin_lock(&ftrace_max_lock); /* clear out all the previous traces */ for_each_possible_cpu(i) { data = tr->data[i]; @@ -344,7 +345,7 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) } __update_max_tr(tr, tsk, cpu); - spin_unlock(&ftrace_max_lock); + __raw_spin_unlock(&ftrace_max_lock); } /** @@ -360,7 +361,7 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) int i; WARN_ON_ONCE(!irqs_disabled()); - spin_lock(&ftrace_max_lock); + __raw_spin_lock(&ftrace_max_lock); for_each_possible_cpu(i) tracing_reset(max_tr.data[i]); @@ -368,7 +369,7 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) tracing_reset(data); __update_max_tr(tr, tsk, cpu); - spin_unlock(&ftrace_max_lock); + __raw_spin_unlock(&ftrace_max_lock); } int register_tracer(struct tracer *type) @@ -652,13 +653,15 @@ trace_function(struct trace_array *tr, struct trace_array_cpu *data, struct trace_entry *entry; unsigned long irq_flags; - spin_lock_irqsave(&data->lock, irq_flags); + raw_local_irq_save(irq_flags); + __raw_spin_lock(&data->lock); entry = tracing_get_trace_entry(tr, data); tracing_generic_entry_update(entry, flags); entry->type = TRACE_FN; entry->fn.ip = ip; entry->fn.parent_ip = parent_ip; - spin_unlock_irqrestore(&data->lock, irq_flags); + __raw_spin_unlock(&data->lock); + raw_local_irq_restore(irq_flags); } void @@ -678,14 +681,16 @@ __trace_special(void *__tr, void *__data, struct trace_entry *entry; unsigned long irq_flags; - spin_lock_irqsave(&data->lock, irq_flags); + raw_local_irq_save(irq_flags); + __raw_spin_lock(&data->lock); entry = tracing_get_trace_entry(tr, data); tracing_generic_entry_update(entry, 0); entry->type = TRACE_SPECIAL; entry->special.arg1 = arg1; entry->special.arg2 = arg2; entry->special.arg3 = arg3; - spin_unlock_irqrestore(&data->lock, irq_flags); + __raw_spin_unlock(&data->lock); + raw_local_irq_restore(irq_flags); trace_wake_up(); } @@ -725,7 +730,8 @@ tracing_sched_switch_trace(struct trace_array *tr, struct trace_entry *entry; unsigned long irq_flags; - spin_lock_irqsave(&data->lock, irq_flags); + raw_local_irq_save(irq_flags); + __raw_spin_lock(&data->lock); entry = tracing_get_trace_entry(tr, data); tracing_generic_entry_update(entry, flags); entry->type = TRACE_CTX; @@ -736,7 +742,8 @@ tracing_sched_switch_trace(struct trace_array *tr, entry->ctx.next_prio = next->prio; entry->ctx.next_state = next->state; __trace_stack(tr, data, flags, 4); - spin_unlock_irqrestore(&data->lock, irq_flags); + __raw_spin_unlock(&data->lock); + raw_local_irq_restore(irq_flags); } void @@ -749,7 +756,8 @@ tracing_sched_wakeup_trace(struct trace_array *tr, struct trace_entry *entry; unsigned long irq_flags; - spin_lock_irqsave(&data->lock, irq_flags); + raw_local_irq_save(irq_flags); + __raw_spin_lock(&data->lock); entry = tracing_get_trace_entry(tr, data); tracing_generic_entry_update(entry, flags); entry->type = TRACE_WAKE; @@ -760,7 +768,8 @@ tracing_sched_wakeup_trace(struct trace_array *tr, entry->ctx.next_prio = wakee->prio; entry->ctx.next_state = wakee->state; __trace_stack(tr, data, flags, 5); - spin_unlock_irqrestore(&data->lock, irq_flags); + __raw_spin_unlock(&data->lock); + raw_local_irq_restore(irq_flags); trace_wake_up(); } @@ -1824,7 +1833,8 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf, if (err) goto err_unlock; - spin_lock_irq(&ftrace_max_lock); + raw_local_irq_disable(); + __raw_spin_lock(&ftrace_max_lock); for_each_possible_cpu(cpu) { /* * Increase/decrease the disabled counter if we are @@ -1839,7 +1849,8 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf, atomic_dec(&global_trace.data[cpu]->disabled); } } - spin_unlock_irq(&ftrace_max_lock); + __raw_spin_unlock(&ftrace_max_lock); + raw_local_irq_enable(); tracing_cpumask = tracing_cpumask_new; @@ -2299,7 +2310,7 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, for_each_cpu_mask(cpu, mask) { data = iter->tr->data[cpu]; - spin_lock(&data->lock); + __raw_spin_lock(&data->lock); } while (find_next_entry_inc(iter) != NULL) { @@ -2320,7 +2331,7 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, for_each_cpu_mask(cpu, mask) { data = iter->tr->data[cpu]; - spin_unlock(&data->lock); + __raw_spin_unlock(&data->lock); } for_each_cpu_mask(cpu, mask) { @@ -2538,8 +2549,7 @@ static int trace_alloc_page(void) /* Now that we successfully allocate a page per CPU, add them */ for_each_possible_cpu(i) { data = global_trace.data[i]; - spin_lock_init(&data->lock); - lockdep_set_class(&data->lock, &data->lock_key); + data->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; page = list_entry(pages.next, struct page, lru); list_del_init(&page->lru); list_add_tail(&page->lru, &data->trace_pages); @@ -2547,8 +2557,7 @@ static int trace_alloc_page(void) #ifdef CONFIG_TRACER_MAX_TRACE data = max_tr.data[i]; - spin_lock_init(&data->lock); - lockdep_set_class(&data->lock, &data->lock_key); + data->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; page = list_entry(pages.next, struct page, lru); list_del_init(&page->lru); list_add_tail(&page->lru, &data->trace_pages); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 98cbfd0..25cba28 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -76,7 +76,7 @@ struct trace_entry { struct trace_array_cpu { struct list_head trace_pages; atomic_t disabled; - spinlock_t lock; + raw_spinlock_t lock; struct lock_class_key lock_key; /* these fields get copied into max-trace: */ -- cgit v0.10.2 From 9d0a420b737f72d84fabebf29634d800cbf54538 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:55 +0200 Subject: ftrace: remove function tracing from spinlock debug The debug functions in spin_lock debugging pollute the output of the function tracer. This patch adds the debug files in the lib director to those that should not be compiled with mcount tracing. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/lib/Makefile b/lib/Makefile index 6ca9e6e..d97ad11 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -10,8 +10,9 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ ifdef CONFIG_FTRACE # Do not profile string.o, since it may be used in early boot or vdso +# Also do not profile any debug utilities ORIG_CFLAGS := $(KBUILD_CFLAGS) -KBUILD_CFLAGS = $(if $(subst string,,$(basename $(notdir $@))), \ +KBUILD_CFLAGS = $(if $(filter-out %debug debug% string%,$(basename $(notdir $@))), \ $(ORIG_CFLAGS), \ $(subst -pg,,$(ORIG_CFLAGS))) endif -- cgit v0.10.2 From 1d09daa55d2e9bab7e7d30f0d05e5a7bc60b2a4a Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:55 +0200 Subject: ftrace: use Makefile to remove tracing from lockdep This patch removes the "notrace" annotation from lockdep and adds the debugging files in the kernel director to those that should not be compiled with "-pg" mcount tracing. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/Makefile b/kernel/Makefile index 7e344e7..d2f80ea 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -11,6 +11,14 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ notifier.o ksysfs.o pm_qos_params.o sched_clock.o +ifdef CONFIG_FTRACE +# Do not profile debug utilities +ORIG_CFLAGS := $(KBUILD_CFLAGS) +KBUILD_CFLAGS = $(if $(filter-out lockdep% %debug,$(basename $(notdir $@))), \ + $(ORIG_CFLAGS), \ + $(subst -pg,,$(ORIG_CFLAGS))) +endif + obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-y += time/ diff --git a/kernel/lockdep.c b/kernel/lockdep.c index ac46847..90a440c 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -271,14 +271,14 @@ static struct list_head chainhash_table[CHAINHASH_SIZE]; ((key1) >> (64-MAX_LOCKDEP_KEYS_BITS)) ^ \ (key2)) -notrace void lockdep_off(void) +void lockdep_off(void) { current->lockdep_recursion++; } EXPORT_SYMBOL(lockdep_off); -notrace void lockdep_on(void) +void lockdep_on(void) { current->lockdep_recursion--; } @@ -1041,7 +1041,7 @@ find_usage_forwards(struct lock_class *source, unsigned int depth) * Return 1 otherwise and keep unchanged. * Return 0 on error. */ -static noinline notrace int +static noinline int find_usage_backwards(struct lock_class *source, unsigned int depth) { struct lock_list *entry; @@ -1591,7 +1591,7 @@ static inline int validate_chain(struct task_struct *curr, * We are building curr_chain_key incrementally, so double-check * it from scratch, to make sure that it's done correctly: */ -static notrace void check_chain_key(struct task_struct *curr) +static void check_chain_key(struct task_struct *curr) { #ifdef CONFIG_DEBUG_LOCKDEP struct held_lock *hlock, *prev_hlock = NULL; @@ -1967,7 +1967,7 @@ static int mark_lock_irq(struct task_struct *curr, struct held_lock *this, /* * Mark all held locks with a usage bit: */ -static notrace int +static int mark_held_locks(struct task_struct *curr, int hardirq) { enum lock_usage_bit usage_bit; @@ -2014,7 +2014,7 @@ void early_boot_irqs_on(void) /* * Hardirqs will be enabled: */ -void notrace trace_hardirqs_on_caller(unsigned long a0) +void trace_hardirqs_on_caller(unsigned long a0) { struct task_struct *curr = current; unsigned long ip; @@ -2060,7 +2060,7 @@ void notrace trace_hardirqs_on_caller(unsigned long a0) } EXPORT_SYMBOL(trace_hardirqs_on_caller); -void notrace trace_hardirqs_on(void) +void trace_hardirqs_on(void) { trace_hardirqs_on_caller(CALLER_ADDR0); } @@ -2069,7 +2069,7 @@ EXPORT_SYMBOL(trace_hardirqs_on); /* * Hardirqs were disabled: */ -void notrace trace_hardirqs_off_caller(unsigned long a0) +void trace_hardirqs_off_caller(unsigned long a0) { struct task_struct *curr = current; @@ -2094,7 +2094,7 @@ void notrace trace_hardirqs_off_caller(unsigned long a0) } EXPORT_SYMBOL(trace_hardirqs_off_caller); -void notrace trace_hardirqs_off(void) +void trace_hardirqs_off(void) { trace_hardirqs_off_caller(CALLER_ADDR0); } @@ -2260,7 +2260,7 @@ static inline int separate_irq_context(struct task_struct *curr, /* * Mark a lock with a usage bit, and validate the state transition: */ -static notrace int mark_lock(struct task_struct *curr, struct held_lock *this, +static int mark_lock(struct task_struct *curr, struct held_lock *this, enum lock_usage_bit new_bit) { unsigned int new_mask = 1 << new_bit, ret = 1; @@ -2663,7 +2663,7 @@ __lock_release(struct lockdep_map *lock, int nested, unsigned long ip) /* * Check whether we follow the irq-flags state precisely: */ -static notrace void check_flags(unsigned long flags) +static void check_flags(unsigned long flags) { #if defined(CONFIG_DEBUG_LOCKDEP) && defined(CONFIG_TRACE_IRQFLAGS) if (!debug_locks) @@ -2700,7 +2700,7 @@ static notrace void check_flags(unsigned long flags) * We are not always called with irqs disabled - do that here, * and also avoid lockdep recursion: */ -notrace void lock_acquire(struct lockdep_map *lock, unsigned int subclass, +void lock_acquire(struct lockdep_map *lock, unsigned int subclass, int trylock, int read, int check, unsigned long ip) { unsigned long flags; @@ -2723,7 +2723,7 @@ notrace void lock_acquire(struct lockdep_map *lock, unsigned int subclass, EXPORT_SYMBOL_GPL(lock_acquire); -notrace void lock_release(struct lockdep_map *lock, int nested, +void lock_release(struct lockdep_map *lock, int nested, unsigned long ip) { unsigned long flags; -- cgit v0.10.2 From c1d2327b36f2261ffa8ff7227321ba900c7eee7f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:55 +0200 Subject: ftrace: restrict tracing to HAVE_FTRACE architectures Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index ebc158e..f300571 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -32,6 +32,7 @@ config IRQSOFF_TRACER default n depends on TRACE_IRQFLAGS_SUPPORT depends on GENERIC_TIME + depends on HAVE_FTRACE select TRACE_IRQFLAGS select TRACING select TRACER_MAX_TRACE @@ -54,6 +55,7 @@ config PREEMPT_TRACER default n depends on GENERIC_TIME depends on PREEMPT + depends on HAVE_FTRACE select TRACING select TRACER_MAX_TRACE help @@ -72,6 +74,7 @@ config PREEMPT_TRACER config SCHED_TRACER bool "Scheduling Latency Tracer" + depends on HAVE_FTRACE select TRACING select CONTEXT_SWITCH_TRACER select TRACER_MAX_TRACE @@ -81,6 +84,7 @@ config SCHED_TRACER config CONTEXT_SWITCH_TRACER bool "Trace process context switches" + depends on HAVE_FTRACE select TRACING select MARKERS help -- cgit v0.10.2 From 07a267cdd2fd7d1de9455b1e36a1635ace7276c7 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:55 +0200 Subject: ftrace: add UNINTERRUPTIBLE state for kftraced on disable When dynamic ftrace fails and sets itself disabled, the ftraced daemon will go back to sleep everytime it wakes up. The setting of the ftraced state to UNINTERRUPTIBLE is skipped in this process, and the daemon takes up 100% of the CPU. This patch makes sure the ftraced daemon sets itself to UNINTERRUPTIBLE in that loop. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 281d97a..40f64f7 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -630,10 +630,10 @@ static int ftraced(void *ignore) { unsigned long usecs; - set_current_state(TASK_INTERRUPTIBLE); - while (!kthread_should_stop()) { + set_current_state(TASK_INTERRUPTIBLE); + /* check once a second */ schedule_timeout(HZ); @@ -667,8 +667,6 @@ static int ftraced(void *ignore) wake_up_interruptible(&ftraced_waiters); ftrace_shutdown_replenish(); - - set_current_state(TASK_INTERRUPTIBLE); } __set_current_state(TASK_RUNNING); return 0; -- cgit v0.10.2 From d15f57f23eaba975309a153b23699cd0c0236974 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:56 +0200 Subject: ftrace: fix mutex unlock in trace output If the trace output changes on reading the trace files, there is a chance that the start function will return NULL. If the start function of a sequence returns NULL the stop equivalent is not called. In this case, all locks that are taken must be released even if they are released in the stop function. This patch fixes a case that a mutex was not released on return of NULL in the start sequence function. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 3009aaf..ea11f4e 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -964,8 +964,10 @@ static void *s_start(struct seq_file *m, loff_t *pos) mutex_lock(&trace_types_lock); - if (!current_trace || current_trace != iter->trace) + if (!current_trace || current_trace != iter->trace) { + mutex_unlock(&trace_types_lock); return NULL; + } atomic_inc(&trace_record_cmdline_disabled); -- cgit v0.10.2 From 30afdcb1de0a37a2086145a82ca3febebe47d019 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:56 +0200 Subject: ftrace: selftest protect againt max flip There is a slight race condition in the selftest where the max update of the wakeup and irqs/preemption off tests can be doing a max update as the buffers are being tested. If this happens the system can crash with a GPF. This patch adds the max update spinlock around the checking of the buffers to prevent such a race. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 83e55a2..395274e 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -82,10 +82,12 @@ trace_test_buffer_cpu(struct trace_array *tr, struct trace_array_cpu *data) */ static int trace_test_buffer(struct trace_array *tr, unsigned long *count) { - unsigned long cnt = 0; - int cpu; - int ret = 0; + unsigned long flags, cnt = 0; + int cpu, ret = 0; + /* Don't allow flipping of max traces now */ + raw_local_irq_save(flags); + __raw_spin_lock(&ftrace_max_lock); for_each_possible_cpu(cpu) { if (!head_page(tr->data[cpu])) continue; @@ -96,6 +98,8 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count) if (ret) break; } + __raw_spin_unlock(&ftrace_max_lock); + raw_local_irq_restore(flags); if (count) *count = cnt; -- cgit v0.10.2 From a56be3fe2f65f9f776e727bfd382e35db75911d6 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:56 +0200 Subject: ftrace: fix the fault label in updating code The fault label to jump to on fault of updating the code was misplaced preventing the fault from being recorded. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 9f44623..498608c 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -93,8 +93,8 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, " movb %b4, 4(%2)\n" "2:\n" ".section .fixup, \"ax\"\n" - " movl $1, %0\n" - "3: jmp 2b\n" + "3: movl $1, %0\n" + " jmp 2b\n" ".previous\n" _ASM_EXTABLE(1b, 3b) : "=r"(faulted), "=a"(replaced) -- cgit v0.10.2 From 8f0f996e80b980fba07d11961d96a5fefb60976a Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:56 +0200 Subject: ftrace: dont write protect kernel text Dynamic ftrace cant work when the kernel has its text write protected. This patch keeps the kernel from being write protected when dynamic ftrace is in place. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index ec30d10..f96eca2 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -710,6 +710,8 @@ void mark_rodata_ro(void) unsigned long start = PFN_ALIGN(_text); unsigned long size = PFN_ALIGN(_etext) - start; +#ifndef CONFIG_DYNAMIC_FTRACE + /* Dynamic tracing modifies the kernel text section */ set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); printk(KERN_INFO "Write protecting the kernel text: %luk\n", size >> 10); @@ -722,6 +724,8 @@ void mark_rodata_ro(void) printk(KERN_INFO "Testing CPA: write protecting again\n"); set_pages_ro(virt_to_page(start), size>>PAGE_SHIFT); #endif +#endif /* CONFIG_DYNAMIC_FTRACE */ + start += size; size = (unsigned long)__end_rodata - start; set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 32ba13b..41824e7 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -766,6 +766,13 @@ EXPORT_SYMBOL_GPL(rodata_test_data); void mark_rodata_ro(void) { unsigned long start = PFN_ALIGN(_stext), end = PFN_ALIGN(__end_rodata); + unsigned long rodata_start = + ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK; + +#ifdef CONFIG_DYNAMIC_FTRACE + /* Dynamic tracing modifies the kernel text section */ + start = rodata_start; +#endif printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", (end - start) >> 10); @@ -775,8 +782,7 @@ void mark_rodata_ro(void) * The rodata section (but not the kernel text!) should also be * not-executable. */ - start = ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK; - set_memory_nx(start, (end - start) >> PAGE_SHIFT); + set_memory_nx(rodata_start, (end - start) >> PAGE_SHIFT); rodata_test(); -- cgit v0.10.2 From 86069782d62e731b4835a0cf8eb7d1d0e17cf306 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:20:56 +0200 Subject: x86: add a list for custom page fault handlers. Provides kernel modules a way to register custom page fault handlers. On every page fault this will call a list of registered functions. The functions may handle the fault and force do_page_fault() to return immediately. This functionality is similar to the now removed page fault notifiers. Custom page fault handlers are used by debugging and reverse engineering tools. Mmiotrace is one such tool and a patch to add it into the tree will follow. The custom page fault handlers are called earlier in do_page_fault() than the page fault notifiers were. Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index ac1e31b..9431a83 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -168,6 +168,14 @@ config IOMMU_LEAK Add a simple leak tracer to the IOMMU code. This is useful when you are debugging a buggy device driver that leaks IOMMU mappings. +config PAGE_FAULT_HANDLERS + bool "Custom page fault handlers" + depends on DEBUG_KERNEL + help + Allow the use of custom page fault handlers. A kernel module may + register a function that is called on every page fault. Custom + handlers are used by some debugging and reverse engineering tools. + # # IO delay types: # diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index fd7e179..343f5c1 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -49,6 +49,60 @@ #define PF_RSVD (1<<3) #define PF_INSTR (1<<4) +#ifdef CONFIG_PAGE_FAULT_HANDLERS +static HLIST_HEAD(pf_handlers); /* protected by RCU */ +static DEFINE_SPINLOCK(pf_handlers_writer); + +void register_page_fault_handler(struct pf_handler *new_pfh) +{ + unsigned long flags; + spin_lock_irqsave(&pf_handlers_writer, flags); + hlist_add_head_rcu(&new_pfh->hlist, &pf_handlers); + spin_unlock_irqrestore(&pf_handlers_writer, flags); +} +EXPORT_SYMBOL_GPL(register_page_fault_handler); + +/** + * unregister_page_fault_handler: + * The caller must ensure @old_pfh is not in use anymore before freeing it. + * This function does not guarantee it. The list of handlers is protected by + * RCU, so you can do this by e.g. calling synchronize_rcu(). + */ +void unregister_page_fault_handler(struct pf_handler *old_pfh) +{ + unsigned long flags; + spin_lock_irqsave(&pf_handlers_writer, flags); + hlist_del_rcu(&old_pfh->hlist); + spin_unlock_irqrestore(&pf_handlers_writer, flags); +} +EXPORT_SYMBOL_GPL(unregister_page_fault_handler); +#endif + +/* returns non-zero if do_page_fault() should return */ +static int handle_custom_pf(struct pt_regs *regs, unsigned long error_code, + unsigned long address) +{ +#ifdef CONFIG_PAGE_FAULT_HANDLERS + int ret = 0; + struct pf_handler *cur; + struct hlist_node *ncur; + + if (hlist_empty(&pf_handlers)) + return 0; + + rcu_read_lock(); + hlist_for_each_entry_rcu(cur, ncur, &pf_handlers, hlist) { + ret = cur->handler(regs, error_code, address); + if (ret) + break; + } + rcu_read_unlock(); + return ret; +#else + return 0; +#endif +} + static inline int notify_page_fault(struct pt_regs *regs) { #ifdef CONFIG_KPROBES @@ -601,6 +655,8 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) if (notify_page_fault(regs)) return; + if (handle_custom_pf(regs, error_code, address)) + return; /* * We fault-in kernel-space virtual memory on-demand. The diff --git a/include/asm-x86/kdebug.h b/include/asm-x86/kdebug.h index 96651bb..a80f2d6 100644 --- a/include/asm-x86/kdebug.h +++ b/include/asm-x86/kdebug.h @@ -35,4 +35,13 @@ extern void show_regs(struct pt_regs *regs); extern unsigned long oops_begin(void); extern void oops_end(unsigned long, struct pt_regs *, int signr); +struct pf_handler { + struct hlist_node hlist; + int (*handler)(struct pt_regs *regs, unsigned long error_code, + unsigned long address); +}; + +extern void register_page_fault_handler(struct pf_handler *new_pfh); +extern void unregister_page_fault_handler(struct pf_handler *old_pfh); + #endif -- cgit v0.10.2 From 72829bc3d63cdc592d8f7dd86ad3b3fe8900fb74 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 23 May 2008 21:37:28 +0200 Subject: ftrace: move enums to ftrace.h and make helper function global picked from the mmiotracer patches to distangle the patch queues. Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index ea11f4e..0eef050 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -37,7 +37,7 @@ unsigned long __read_mostly tracing_thresh; static int tracing_disabled = 1; -static long +long ns2usecs(cycle_t nsec) { nsec += 500; @@ -96,18 +96,6 @@ unsigned long nsecs_to_usecs(unsigned long nsecs) return nsecs / 1000; } -enum trace_type { - __TRACE_FIRST_TYPE = 0, - - TRACE_FN, - TRACE_CTX, - TRACE_WAKE, - TRACE_STACK, - TRACE_SPECIAL, - - __TRACE_LAST_TYPE -}; - enum trace_flag_type { TRACE_FLAG_IRQS_OFF = 0x01, TRACE_FLAG_NEED_RESCHED = 0x02, @@ -190,7 +178,7 @@ void *head_page(struct trace_array_cpu *data) return page_address(page); } -static int +int trace_seq_printf(struct trace_seq *s, const char *fmt, ...) { int len = (PAGE_SIZE - 1) - s->len; @@ -205,7 +193,7 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...) va_end(ap); /* If we can't write it all, don't bother writing anything */ - if (ret > len) + if (ret >= len) return 0; s->len += ret; @@ -638,7 +626,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags) pc = preempt_count(); entry->preempt_count = pc & 0xff; - entry->pid = tsk->pid; + entry->pid = (tsk) ? tsk->pid : 0; entry->t = ftrace_now(raw_smp_processor_id()); entry->flags = (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) | ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) | @@ -1541,6 +1529,9 @@ static int trace_empty(struct trace_iterator *iter) static int print_trace_line(struct trace_iterator *iter) { + if (iter->trace && iter->trace->print_line) + return iter->trace->print_line(iter); + if (trace_flags & TRACE_ITER_BIN) return print_bin_fmt(iter); @@ -2162,6 +2153,7 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) return -ENOMEM; iter->tr = &global_trace; + iter->trace = current_trace; filp->private_data = iter; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 25cba28..b0ca747 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -6,6 +6,18 @@ #include #include +enum trace_type { + __TRACE_FIRST_TYPE = 0, + + TRACE_FN, + TRACE_CTX, + TRACE_WAKE, + TRACE_STACK, + TRACE_SPECIAL, + + __TRACE_LAST_TYPE +}; + /* * Function trace entry - function address and parent function addres: */ @@ -130,6 +142,7 @@ struct tracer { int (*selftest)(struct tracer *trace, struct trace_array *tr); #endif + int (*print_line)(struct trace_iterator *iter); struct tracer *next; int print_max; }; @@ -276,6 +289,8 @@ extern int trace_selftest_startup_sched_switch(struct tracer *trace, #endif /* CONFIG_FTRACE_STARTUP_TEST */ extern void *head_page(struct trace_array_cpu *data); +extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...); +extern long ns2usecs(cycle_t nsec); extern unsigned long trace_flags; -- cgit v0.10.2 From d17d969160c18b631a19c2b34d260691402650f8 Mon Sep 17 00:00:00 2001 From: Ankita Garg Date: Mon, 12 May 2008 21:20:58 +0200 Subject: ftrace: fix conversion of task state to char in latency tracer The conversion of task states to a character in the sched_switch tracer (part of latency tracer infrastructure), seems to be incorrect. We currently do it by indexing into the state_to_char array using the state value. The state values do not map directly into the array index and are thus incorrect. The following patch addresses this issue. This is also what is being done even in the show_task() routine in kernel/sched.c The patch has been compile and run tested. Signed-off-by: Ankita Garg Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 0eef050..9197782 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1208,6 +1208,7 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) char *comm; int S, T; int i; + unsigned state; if (!next_entry) next_entry = entry; @@ -1238,11 +1239,11 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) break; case TRACE_CTX: case TRACE_WAKE: - S = entry->ctx.prev_state < sizeof(state_to_char) ? - state_to_char[entry->ctx.prev_state] : 'X'; T = entry->ctx.next_state < sizeof(state_to_char) ? state_to_char[entry->ctx.next_state] : 'X'; + state = entry->ctx.prev_state ? __ffs(entry->ctx.prev_state) + 1 : 0; + S = state < sizeof(state_to_char) - 1 ? state_to_char[state] : 'X'; comm = trace_find_cmdline(entry->ctx.next_pid); trace_seq_printf(s, " %5d:%3d:%c %s %5d:%3d:%c %s\n", entry->ctx.prev_pid, -- cgit v0.10.2 From 8487c23765b6e0444ec6b5f1530766d63fe68e35 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:58 +0200 Subject: ftrace: allow trace_pipe to block on all reads We expect things like "cat" to block on reads to trace_pipe. That's what trace_pipe is for. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 9197782..fd4ecc2 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2231,8 +2231,6 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, start = 0; while (trace_empty(iter)) { - if (!(trace_flags & TRACE_ITER_BLOCK)) - return -EWOULDBLOCK; /* * This is a make-shift waitqueue. The reason we don't use * an actual wait queue is because: -- cgit v0.10.2 From b5685aede3b7b65e72ddc73b951aa1f70798a614 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:58 +0200 Subject: ftrace: restore iterator trace in pipe read The trace iterator is reset in the read. We still need to restore the tracer that the trace_pipe was opened with. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index fd4ecc2..d141fc9 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2202,6 +2202,8 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, { struct trace_iterator *iter = filp->private_data; struct trace_array_cpu *data; + struct trace_array *tr = iter->tr; + struct tracer *tracer = iter->trace; static cpumask_t mask; static int start; unsigned long flags; @@ -2274,7 +2276,8 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, cnt = PAGE_SIZE - 1; memset(iter, 0, sizeof(*iter)); - iter->tr = &global_trace; + iter->tr = tr; + iter->trace = tracer; iter->pos = -1; /* -- cgit v0.10.2 From 845279972f1736c3463c9cebd1bad92a0a347176 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:58 +0200 Subject: ftrace: return EOF in trace_pipe on change of tracer Break out of while loop with EOF when the current_trace changes. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d141fc9..2af9404 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2253,6 +2253,9 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, if (signal_pending(current)) return -EINTR; + if (iter->trace != current_trace) + return 0; + /* * We block until we read something and tracing is disabled. * We still block if tracing is disabled, but we have never -- cgit v0.10.2 From 2dc8f09571a61d9cb3dc47bba6608689dfe15d16 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:58 +0200 Subject: ftrace: trace_pipe implement NONBLOCK This patch implements "NONBLOCK" for trace_pipe. If the trace_pipe is opened with O_NONBLOCK, then the trace_pipe read will not block when buffer is empty. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 2af9404..3b4eaf3 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2233,6 +2233,10 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, start = 0; while (trace_empty(iter)) { + + if ((filp->f_flags & O_NONBLOCK)) + return -EAGAIN; + /* * This is a make-shift waitqueue. The reason we don't use * an actual wait queue is because: -- cgit v0.10.2 From 05bd68c514579e007b46e4fa0461b78416a3f4c2 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:59 +0200 Subject: ftrace: user proper API for setting RT prios in selftest The wakeup selftest used an internal API for setting the test task priority. This patch fixes it to use the proper API for performing such a task. Thanks goes to Randy Dunlap for pointing out this build failure. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 395274e..a5f6001 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -410,11 +410,11 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array * #ifdef CONFIG_SCHED_TRACER static int trace_wakeup_test_thread(void *data) { - struct completion *x = data; - /* Make this a RT thread, doesn't need to be too high */ + struct sched_param param = { .sched_priority = 5 }; + struct completion *x = data; - rt_mutex_setprio(current, MAX_RT_PRIO - 5); + sched_setscheduler(current, SCHED_FIFO, ¶m); /* Make it know we have a new prio */ complete(x); -- cgit v0.10.2 From a98a3c3fde3ae7614f19758a043691b6f59dac53 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:59 +0200 Subject: ftrace: trace_entries to dynamically change trace buffer size This patch adds /debug/tracing/trace_entries that allows users to see as well as modify the number of trace entries the buffers hold. The number of entries only increments in ENTRIES_PER_PAGE which is calculated by the size of an entry with the number of entries that can fit in a page. The user does not need to use an exact size, but the entries will be rounded to one of the increments. Trying to set the entries to 0 will return with -EINVAL. To avoid race conditions, the modification of the buffer size can only be done when tracing is completely disabled (current_tracer == none). A info message will be printed if a user tries to modify the buffer size when not set to none. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 3b4eaf3..4723e01 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -35,6 +35,15 @@ unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX; unsigned long __read_mostly tracing_thresh; +/* dummy trace to disable tracing */ +static struct tracer no_tracer __read_mostly = +{ + .name = "none", +}; + +static int trace_alloc_page(void); +static int trace_free_page(void); + static int tracing_disabled = 1; long @@ -2364,6 +2373,70 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, return read; } +static ssize_t +tracing_entries_read(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + struct trace_array *tr = filp->private_data; + char buf[64]; + int r; + + r = sprintf(buf, "%lu\n", tr->entries); + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); +} + +static ssize_t +tracing_entries_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + unsigned long val; + char buf[64]; + + if (cnt > 63) + cnt = 63; + + if (copy_from_user(&buf, ubuf, cnt)) + return -EFAULT; + + buf[cnt] = 0; + + val = simple_strtoul(buf, NULL, 10); + + /* must have at least 1 entry */ + if (!val) + return -EINVAL; + + mutex_lock(&trace_types_lock); + + if (current_trace != &no_tracer) { + cnt = -EBUSY; + pr_info("ftrace: set current_tracer to none" + " before modifying buffer size\n"); + goto out; + } + + if (val > global_trace.entries) { + while (global_trace.entries < val) { + if (trace_alloc_page()) { + cnt = -ENOMEM; + goto out; + } + } + } else { + /* include the number of entries in val (inc of page entries) */ + while (global_trace.entries > val + (ENTRIES_PER_PAGE - 1)) + trace_free_page(); + } + + filp->f_pos += cnt; + + out: + max_tr.entries = global_trace.entries; + mutex_unlock(&trace_types_lock); + + return cnt; +} + static struct file_operations tracing_max_lat_fops = { .open = tracing_open_generic, .read = tracing_max_lat_read, @@ -2389,6 +2462,12 @@ static struct file_operations tracing_pipe_fops = { .release = tracing_release_pipe, }; +static struct file_operations tracing_entries_fops = { + .open = tracing_open_generic, + .read = tracing_entries_read, + .write = tracing_entries_write, +}; + #ifdef CONFIG_DYNAMIC_FTRACE static ssize_t @@ -2500,6 +2579,12 @@ static __init void tracer_init_debugfs(void) pr_warning("Could not create debugfs " "'tracing_threash' entry\n"); + entry = debugfs_create_file("trace_entries", 0644, d_tracer, + &global_trace, &tracing_entries_fops); + if (!entry) + pr_warning("Could not create debugfs " + "'tracing_threash' entry\n"); + #ifdef CONFIG_DYNAMIC_FTRACE entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer, &ftrace_update_tot_cnt, @@ -2510,12 +2595,6 @@ static __init void tracer_init_debugfs(void) #endif } -/* dummy trace to disable tracing */ -static struct tracer no_tracer __read_mostly = -{ - .name = "none", -}; - static int trace_alloc_page(void) { struct trace_array_cpu *data; @@ -2552,7 +2631,6 @@ static int trace_alloc_page(void) /* Now that we successfully allocate a page per CPU, add them */ for_each_possible_cpu(i) { data = global_trace.data[i]; - data->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; page = list_entry(pages.next, struct page, lru); list_del_init(&page->lru); list_add_tail(&page->lru, &data->trace_pages); @@ -2560,7 +2638,6 @@ static int trace_alloc_page(void) #ifdef CONFIG_TRACER_MAX_TRACE data = max_tr.data[i]; - data->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; page = list_entry(pages.next, struct page, lru); list_del_init(&page->lru); list_add_tail(&page->lru, &data->trace_pages); @@ -2579,6 +2656,55 @@ static int trace_alloc_page(void) return -ENOMEM; } +static int trace_free_page(void) +{ + struct trace_array_cpu *data; + struct page *page; + struct list_head *p; + int i; + int ret = 0; + + /* free one page from each buffer */ + for_each_possible_cpu(i) { + data = global_trace.data[i]; + p = data->trace_pages.next; + if (p == &data->trace_pages) { + /* should never happen */ + WARN_ON(1); + tracing_disabled = 1; + ret = -1; + break; + } + page = list_entry(p, struct page, lru); + ClearPageLRU(page); + list_del(&page->lru); + __free_page(page); + + tracing_reset(data); + +#ifdef CONFIG_TRACER_MAX_TRACE + data = max_tr.data[i]; + p = data->trace_pages.next; + if (p == &data->trace_pages) { + /* should never happen */ + WARN_ON(1); + tracing_disabled = 1; + ret = -1; + break; + } + page = list_entry(p, struct page, lru); + ClearPageLRU(page); + list_del(&page->lru); + __free_page(page); + + tracing_reset(data); +#endif + } + global_trace.entries -= ENTRIES_PER_PAGE; + + return ret; +} + __init static int tracer_alloc_buffers(void) { struct trace_array_cpu *data; @@ -2609,6 +2735,9 @@ __init static int tracer_alloc_buffers(void) /* use the LRU flag to differentiate the two buffers */ ClearPageLRU(page); + data->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; + max_tr.data[i]->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; + /* Only allocate if we are actually using the max trace */ #ifdef CONFIG_TRACER_MAX_TRACE array = (void *)__get_free_page(GFP_KERNEL); -- cgit v0.10.2 From bb065afb8ebd07a03155502dba29ebf0f6fe67e8 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:21:00 +0200 Subject: lockdep: update lockdep_recursion on graph_lock With the introduction of ftrace, it is possible to recurse into the lockdep functions via the mcount call. To prevent possible lockups, updating the lockdep_recursion counter on grabbing the internal lockdep_lock should prevent deadlocks. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 90a440c..65548ef 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -82,6 +82,8 @@ static int graph_lock(void) __raw_spin_unlock(&lockdep_lock); return 0; } + /* prevent any recursions within lockdep from causing deadlocks */ + current->lockdep_recursion++; return 1; } @@ -90,6 +92,7 @@ static inline int graph_unlock(void) if (debug_locks && !__raw_spin_is_locked(&lockdep_lock)) return DEBUG_LOCKS_WARN_ON(1); + current->lockdep_recursion--; __raw_spin_unlock(&lockdep_lock); return 0; } -- cgit v0.10.2 From 93dcc6ea096c51cc30ad0f6647ed05fead2439bf Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 12 May 2008 21:21:00 +0200 Subject: ftrace: simplify hexprint simplify hex to ascii conversion. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 4723e01..627e399 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -248,19 +248,18 @@ trace_seq_putmem(struct trace_seq *s, void *mem, size_t len) } #define HEX_CHARS 17 +static const char hex2asc[] = "0123456789abcdef"; static int trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len) { unsigned char hex[HEX_CHARS]; - unsigned char *data; + unsigned char *data = mem; unsigned char byte; int i, j; BUG_ON(len >= HEX_CHARS); - data = mem; - #ifdef __BIG_ENDIAN for (i = 0, j = 0; i < len; i++) { #else @@ -268,22 +267,10 @@ trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len) #endif byte = data[i]; - hex[j] = byte & 0x0f; - if (hex[j] >= 10) - hex[j] += 'a' - 10; - else - hex[j] += '0'; - j++; - - hex[j] = byte >> 4; - if (hex[j] >= 10) - hex[j] += 'a' - 10; - else - hex[j] += '0'; - j++; + hex[j++] = hex2asc[byte & 0x0f]; + hex[j++] = hex2asc[byte >> 4]; } - hex[j] = ' '; - j++; + hex[j++] = ' '; return trace_seq_putmem(s, hex, j); } -- cgit v0.10.2 From afc2abc0ae4265730a0fc48618012193a09a1d10 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:21:00 +0200 Subject: ftrace: cleanups no code changed. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 627e399..d6b6057 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1155,12 +1155,12 @@ lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu) hardirq = entry->flags & TRACE_FLAG_HARDIRQ; softirq = entry->flags & TRACE_FLAG_SOFTIRQ; - if (hardirq && softirq) + if (hardirq && softirq) { trace_seq_putc(s, 'H'); - else { - if (hardirq) + } else { + if (hardirq) { trace_seq_putc(s, 'h'); - else { + } else { if (softirq) trace_seq_putc(s, 's'); else @@ -2177,8 +2177,7 @@ tracing_poll_pipe(struct file *filp, poll_table *poll_table) * Always select as readable when in blocking mode */ return POLLIN | POLLRDNORM; - } - else { + } else { if (!trace_empty(iter)) return POLLIN | POLLRDNORM; poll_wait(filp, &trace_wait, poll_table); -- cgit v0.10.2 From cffae437cdfbc8a5370d38cefbff1dfe34dad6ca Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:21:00 +0200 Subject: ftrace: simple clean ups Andrew Morton mentioned some clean ups that should be done to ftrace. This patch does some of the simple clean ups. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d6b6057..0a36736 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -36,8 +36,7 @@ unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX; unsigned long __read_mostly tracing_thresh; /* dummy trace to disable tracing */ -static struct tracer no_tracer __read_mostly = -{ +static struct tracer no_tracer __read_mostly = { .name = "none", }; @@ -1906,8 +1905,8 @@ tracing_iter_ctrl_write(struct file *filp, const char __user *ubuf, int neg = 0; int i; - if (cnt > 63) - cnt = 63; + if (cnt >= sizeof(buf)) + return -EINVAL; if (copy_from_user(&buf, ubuf, cnt)) return -EFAULT; @@ -1999,8 +1998,8 @@ tracing_ctrl_write(struct file *filp, const char __user *ubuf, long val; char buf[64]; - if (cnt > 63) - cnt = 63; + if (cnt >= sizeof(buf)) + return -EINVAL; if (copy_from_user(&buf, ubuf, cnt)) return -EFAULT; @@ -2099,10 +2098,10 @@ tracing_max_lat_read(struct file *filp, char __user *ubuf, char buf[64]; int r; - r = snprintf(buf, 64, "%ld\n", + r = snprintf(buf, sizeof(buf), "%ld\n", *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr)); - if (r > 64) - r = 64; + if (r > sizeof(buf)) + r = sizeof(buf); return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); } @@ -2114,8 +2113,8 @@ tracing_max_lat_write(struct file *filp, const char __user *ubuf, long val; char buf[64]; - if (cnt > 63) - cnt = 63; + if (cnt >= sizeof(buf)) + return -EINVAL; if (copy_from_user(&buf, ubuf, cnt)) return -EFAULT; @@ -2378,8 +2377,8 @@ tracing_entries_write(struct file *filp, const char __user *ubuf, unsigned long val; char buf[64]; - if (cnt > 63) - cnt = 63; + if (cnt >= sizeof(buf)) + return -EINVAL; if (copy_from_user(&buf, ubuf, cnt)) return -EFAULT; -- cgit v0.10.2 From c6caeeb142cd3a03c46107aac45c8effc02bbadb Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:21:00 +0200 Subject: ftrace: replace simple_strtoul with strict_strtoul Andrew Morton suggested using strict_strtoul over simple_strtoul. This patch replaces them in ftrace. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 0a36736..290e9da 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -92,9 +92,16 @@ void trace_wake_up(void) static int __init set_nr_entries(char *str) { + unsigned long nr_entries; + int ret; + if (!str) return 0; - trace_nr_entries = simple_strtoul(str, &str, 0); + ret = strict_strtoul(str, 0, &nr_entries); + /* nr_entries can not be zero */ + if (ret < 0 || nr_entries == 0) + return 0; + trace_nr_entries = nr_entries; return 1; } __setup("trace_entries=", set_nr_entries); @@ -1995,8 +2002,9 @@ tracing_ctrl_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { struct trace_array *tr = filp->private_data; - long val; char buf[64]; + long val; + int ret; if (cnt >= sizeof(buf)) return -EINVAL; @@ -2006,7 +2014,9 @@ tracing_ctrl_write(struct file *filp, const char __user *ubuf, buf[cnt] = 0; - val = simple_strtoul(buf, NULL, 10); + ret = strict_strtoul(buf, 10, &val); + if (ret < 0) + return ret; val = !!val; @@ -2110,8 +2120,9 @@ tracing_max_lat_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { long *ptr = filp->private_data; - long val; char buf[64]; + long val; + int ret; if (cnt >= sizeof(buf)) return -EINVAL; @@ -2121,7 +2132,9 @@ tracing_max_lat_write(struct file *filp, const char __user *ubuf, buf[cnt] = 0; - val = simple_strtoul(buf, NULL, 10); + ret = strict_strtoul(buf, 10, &val); + if (ret < 0) + return ret; *ptr = val * 1000; @@ -2376,6 +2389,7 @@ tracing_entries_write(struct file *filp, const char __user *ubuf, { unsigned long val; char buf[64]; + int ret; if (cnt >= sizeof(buf)) return -EINVAL; @@ -2385,7 +2399,9 @@ tracing_entries_write(struct file *filp, const char __user *ubuf, buf[cnt] = 0; - val = simple_strtoul(buf, NULL, 10); + ret = strict_strtoul(buf, 10, &val); + if (ret < 0) + return ret; /* must have at least 1 entry */ if (!val) -- cgit v0.10.2 From ab46428c6969d50ecf6f6e97b7a84abba6274368 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:21:00 +0200 Subject: ftrace: modulize the number of CPU buffers Currently ftrace allocates a trace buffer for every possible CPU. Work is being done to change it to only online CPUs and add hooks to hotplug CPUS. This patch lays out the infrastructure for such a change. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 290e9da..5da391c 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -35,6 +35,12 @@ unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX; unsigned long __read_mostly tracing_thresh; +static unsigned long __read_mostly tracing_nr_buffers; +static cpumask_t __read_mostly tracing_buffer_mask; + +#define for_each_tracing_cpu(cpu) \ + for_each_cpu_mask(cpu, tracing_buffer_mask) + /* dummy trace to disable tracing */ static struct tracer no_tracer __read_mostly = { .name = "none", @@ -328,7 +334,7 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) WARN_ON_ONCE(!irqs_disabled()); __raw_spin_lock(&ftrace_max_lock); /* clear out all the previous traces */ - for_each_possible_cpu(i) { + for_each_tracing_cpu(i) { data = tr->data[i]; flip_trace(max_tr.data[i], data); tracing_reset(data); @@ -352,7 +358,7 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) WARN_ON_ONCE(!irqs_disabled()); __raw_spin_lock(&ftrace_max_lock); - for_each_possible_cpu(i) + for_each_tracing_cpu(i) tracing_reset(max_tr.data[i]); flip_trace(max_tr.data[cpu], data); @@ -398,7 +404,7 @@ int register_tracer(struct tracer *type) * internal tracing to verify that everything is in order. * If we fail, we do not register this tracer. */ - for_each_possible_cpu(i) { + for_each_tracing_cpu(i) { data = tr->data[i]; if (!head_page(data)) continue; @@ -417,7 +423,7 @@ int register_tracer(struct tracer *type) goto out; } /* Only reset on passing, to avoid touching corrupted buffers */ - for_each_possible_cpu(i) { + for_each_tracing_cpu(i) { data = tr->data[i]; if (!head_page(data)) continue; @@ -847,7 +853,7 @@ find_next_entry(struct trace_iterator *iter, int *ent_cpu) int next_cpu = -1; int cpu; - for_each_possible_cpu(cpu) { + for_each_tracing_cpu(cpu) { if (!head_page(tr->data[cpu])) continue; ent = trace_entry_idx(tr, tr->data[cpu], iter, cpu); @@ -972,7 +978,7 @@ static void *s_start(struct seq_file *m, loff_t *pos) iter->prev_ent = NULL; iter->prev_cpu = -1; - for_each_possible_cpu(i) { + for_each_tracing_cpu(i) { iter->next_idx[i] = 0; iter->next_page[i] = NULL; } @@ -1089,7 +1095,7 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter) if (type) name = type->name; - for_each_possible_cpu(cpu) { + for_each_tracing_cpu(cpu) { if (head_page(tr->data[cpu])) { total += tr->data[cpu]->trace_idx; if (tr->data[cpu]->trace_idx > tr->entries) @@ -1519,7 +1525,7 @@ static int trace_empty(struct trace_iterator *iter) struct trace_array_cpu *data; int cpu; - for_each_possible_cpu(cpu) { + for_each_tracing_cpu(cpu) { data = iter->tr->data[cpu]; if (head_page(data) && data->trace_idx && @@ -1831,7 +1837,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf, raw_local_irq_disable(); __raw_spin_lock(&ftrace_max_lock); - for_each_possible_cpu(cpu) { + for_each_tracing_cpu(cpu) { /* * Increase/decrease the disabled counter if we are * about to flip a bit in the cpumask: @@ -2308,7 +2314,7 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, ftrace_enabled = 0; #endif smp_wmb(); - for_each_possible_cpu(cpu) { + for_each_tracing_cpu(cpu) { data = iter->tr->data[cpu]; if (!head_page(data) || !data->trace_idx) @@ -2605,7 +2611,7 @@ static int trace_alloc_page(void) int i; /* first allocate a page for each CPU */ - for_each_possible_cpu(i) { + for_each_tracing_cpu(i) { array = (void *)__get_free_page(GFP_KERNEL); if (array == NULL) { printk(KERN_ERR "tracer: failed to allocate page" @@ -2630,7 +2636,7 @@ static int trace_alloc_page(void) } /* Now that we successfully allocate a page per CPU, add them */ - for_each_possible_cpu(i) { + for_each_tracing_cpu(i) { data = global_trace.data[i]; page = list_entry(pages.next, struct page, lru); list_del_init(&page->lru); @@ -2666,7 +2672,7 @@ static int trace_free_page(void) int ret = 0; /* free one page from each buffer */ - for_each_possible_cpu(i) { + for_each_tracing_cpu(i) { data = global_trace.data[i]; p = data->trace_pages.next; if (p == &data->trace_pages) { @@ -2717,8 +2723,12 @@ __init static int tracer_alloc_buffers(void) global_trace.ctrl = tracer_enabled; + /* TODO: make the number of buffers hot pluggable with CPUS */ + tracing_nr_buffers = num_possible_cpus(); + tracing_buffer_mask = cpu_possible_map; + /* Allocate the first page for all buffers */ - for_each_possible_cpu(i) { + for_each_tracing_cpu(i) { data = global_trace.data[i] = &per_cpu(global_trace_cpu, i); max_tr.data[i] = &per_cpu(max_data, i); -- cgit v0.10.2 From 4fcdae83cebda24b519a89d3dd976081fff1ca80 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:21:00 +0200 Subject: ftrace: comment code This is first installment of adding documentation to the ftrace. Expect many more patches of this kind in the near future. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 5da391c..a102b11 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -64,26 +64,79 @@ cycle_t ftrace_now(int cpu) return cpu_clock(cpu); } +/* + * The global_trace is the descriptor that holds the tracing + * buffers for the live tracing. For each CPU, it contains + * a link list of pages that will store trace entries. The + * page descriptor of the pages in the memory is used to hold + * the link list by linking the lru item in the page descriptor + * to each of the pages in the buffer per CPU. + * + * For each active CPU there is a data field that holds the + * pages for the buffer for that CPU. Each CPU has the same number + * of pages allocated for its buffer. + */ static struct trace_array global_trace; static DEFINE_PER_CPU(struct trace_array_cpu, global_trace_cpu); +/* + * The max_tr is used to snapshot the global_trace when a maximum + * latency is reached. Some tracers will use this to store a maximum + * trace while it continues examining live traces. + * + * The buffers for the max_tr are set up the same as the global_trace. + * When a snapshot is taken, the link list of the max_tr is swapped + * with the link list of the global_trace and the buffers are reset for + * the global_trace so the tracing can continue. + */ static struct trace_array max_tr; static DEFINE_PER_CPU(struct trace_array_cpu, max_data); +/* tracer_enabled is used to toggle activation of a tracer */ static int tracer_enabled = 1; + +/* + * trace_nr_entries is the number of entries that is allocated + * for a buffer. Note, the number of entries is always rounded + * to ENTRIES_PER_PAGE. + */ static unsigned long trace_nr_entries = 65536UL; +/* trace_types holds a link list of available tracers. */ static struct tracer *trace_types __read_mostly; + +/* current_trace points to the tracer that is currently active */ static struct tracer *current_trace __read_mostly; + +/* + * max_tracer_type_len is used to simplify the allocating of + * buffers to read userspace tracer names. We keep track of + * the longest tracer name registered. + */ static int max_tracer_type_len; +/* + * trace_types_lock is used to protect the trace_types list. + * This lock is also used to keep user access serialized. + * Accesses from userspace will grab this lock while userspace + * activities happen inside the kernel. + */ static DEFINE_MUTEX(trace_types_lock); + +/* trace_wait is a waitqueue for tasks blocked on trace_poll */ static DECLARE_WAIT_QUEUE_HEAD(trace_wait); +/* trace_flags holds iter_ctrl options */ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT; +/** + * trace_wake_up - wake up tasks waiting for trace input + * + * Simply wakes up any task that is blocked on the trace_wait + * queue. These is used with trace_poll for tasks polling the trace. + */ void trace_wake_up(void) { /* @@ -117,6 +170,14 @@ unsigned long nsecs_to_usecs(unsigned long nsecs) return nsecs / 1000; } +/* + * trace_flag_type is an enumeration that holds different + * states when a trace occurs. These are: + * IRQS_OFF - interrupts were disabled + * NEED_RESCED - reschedule is requested + * HARDIRQ - inside an interrupt handler + * SOFTIRQ - inside a softirq handler + */ enum trace_flag_type { TRACE_FLAG_IRQS_OFF = 0x01, TRACE_FLAG_NEED_RESCHED = 0x02, @@ -124,10 +185,14 @@ enum trace_flag_type { TRACE_FLAG_SOFTIRQ = 0x08, }; +/* + * TRACE_ITER_SYM_MASK masks the options in trace_flags that + * control the output of kernel symbols. + */ #define TRACE_ITER_SYM_MASK \ (TRACE_ITER_PRINT_PARENT|TRACE_ITER_SYM_OFFSET|TRACE_ITER_SYM_ADDR) -/* These must match the bit postions above */ +/* These must match the bit postions in trace_iterator_flags */ static const char *trace_options[] = { "print-parent", "sym-offset", @@ -142,6 +207,15 @@ static const char *trace_options[] = { NULL }; +/* + * ftrace_max_lock is used to protect the swapping of buffers + * when taking a max snapshot. The buffers themselves are + * protected by per_cpu spinlocks. But the action of the swap + * needs its own lock. + * + * This is defined as a raw_spinlock_t in order to help + * with performance when lockdep debugging is enabled. + */ static raw_spinlock_t ftrace_max_lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; @@ -172,6 +246,13 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) tracing_record_cmdline(current); } +/** + * check_pages - integrity check of trace buffers + * + * As a safty measure we check to make sure the data pages have not + * been corrupted. TODO: configure to disable this because it adds + * a bit of overhead. + */ void check_pages(struct trace_array_cpu *data) { struct page *page, *tmp; @@ -185,6 +266,13 @@ void check_pages(struct trace_array_cpu *data) } } +/** + * head_page - page address of the first page in per_cpu buffer. + * + * head_page returns the page address of the first page in + * a per_cpu buffer. This also preforms various consistency + * checks to make sure the buffer has not been corrupted. + */ void *head_page(struct trace_array_cpu *data) { struct page *page; @@ -199,6 +287,17 @@ void *head_page(struct trace_array_cpu *data) return page_address(page); } +/** + * trace_seq_printf - sequence printing of trace information + * @s: trace sequence descriptor + * @fmt: printf format string + * + * The tracer may use either sequence operations or its own + * copy to user routines. To simplify formating of a trace + * trace_seq_printf is used to store strings into a special + * buffer (@s). Then the output may be either used by + * the sequencer or pulled into another buffer. + */ int trace_seq_printf(struct trace_seq *s, const char *fmt, ...) { @@ -222,6 +321,16 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...) return len; } +/** + * trace_seq_puts - trace sequence printing of simple string + * @s: trace sequence descriptor + * @str: simple string to record + * + * The tracer may use either the sequence operations or its own + * copy to user routines. This function records a simple string + * into a special buffer (@s) for later retrieval by a sequencer + * or other mechanism. + */ static int trace_seq_puts(struct trace_seq *s, const char *str) { @@ -304,6 +413,13 @@ trace_print_seq(struct seq_file *m, struct trace_seq *s) trace_seq_reset(s); } +/* + * flip the trace buffers between two trace descriptors. + * This usually is the buffers between the global_trace and + * the max_tr to record a snapshot of a current trace. + * + * The ftrace_max_lock must be held. + */ static void flip_trace(struct trace_array_cpu *tr1, struct trace_array_cpu *tr2) { @@ -325,6 +441,15 @@ flip_trace(struct trace_array_cpu *tr1, struct trace_array_cpu *tr2) check_pages(tr2); } +/** + * update_max_tr - snapshot all trace buffers from global_trace to max_tr + * @tr: tracer + * @tsk: the task with the latency + * @cpu: The cpu that initiated the trace. + * + * Flip the buffers between the @tr and the max_tr and record information + * about which task was the cause of this latency. + */ void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) { @@ -349,6 +474,8 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) * @tr - tracer * @tsk - task with the latency * @cpu - the cpu of the buffer to copy. + * + * Flip the trace of a single CPU buffer between the @tr and the max_tr. */ void update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) @@ -368,6 +495,12 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) __raw_spin_unlock(&ftrace_max_lock); } +/** + * register_tracer - register a tracer with the ftrace system. + * @type - the plugin for the tracer + * + * Register a new plugin tracer. + */ int register_tracer(struct tracer *type) { struct tracer *t; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index b0ca747..21c29ee 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -294,6 +294,13 @@ extern long ns2usecs(cycle_t nsec); extern unsigned long trace_flags; +/* + * trace_iterator_flags is an enumeration that defines bit + * positions into trace_flags that controls the output. + * + * NOTE: These bits must match the trace_options array in + * trace.c. + */ enum trace_iterator_flags { TRACE_ITER_PRINT_PARENT = 0x01, TRACE_ITER_SYM_OFFSET = 0x02, -- cgit v0.10.2 From 25b0b44a1c732ccfc58095cdd8438955a0a19fff Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:21:00 +0200 Subject: ftrace: fix comm on function trace output In cleaning up of the sched_switch code, the function trace recording of task comms was removed. This patch adds back the recording of comms for function trace. The output of ftrace now has the task comm instead of <...>. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index a102b11..1281969 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -620,7 +620,12 @@ static unsigned map_cmdline_to_pid[SAVED_CMDLINES]; static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN]; static int cmdline_idx; static DEFINE_SPINLOCK(trace_cmdline_lock); -atomic_t trace_record_cmdline_disabled; + +/* trace in all context switches */ +atomic_t trace_record_cmdline_enabled __read_mostly; + +/* temporary disable recording */ +atomic_t trace_record_cmdline_disabled __read_mostly; static void trace_init_cmdlines(void) { diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 21c29ee..8991c5e 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -216,6 +216,8 @@ extern unsigned long nsecs_to_usecs(unsigned long nsecs); extern unsigned long tracing_max_latency; extern unsigned long tracing_thresh; +extern atomic_t trace_record_cmdline_enabled; + void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu); void update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu); diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index 4165d34..0a08465 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -29,12 +29,14 @@ static void function_reset(struct trace_array *tr) static void start_function_trace(struct trace_array *tr) { function_reset(tr); + atomic_inc(&trace_record_cmdline_enabled); tracing_start_function_trace(); } static void stop_function_trace(struct trace_array *tr) { tracing_stop_function_trace(); + atomic_dec(&trace_record_cmdline_enabled); } static void function_trace_init(struct trace_array *tr) diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index 5671db0..a337647 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -29,8 +29,6 @@ ctx_switch_func(void *__rq, struct task_struct *prev, struct task_struct *next) if (!tracer_enabled) return; - tracing_record_cmdline(prev); - local_irq_save(flags); cpu = raw_smp_processor_id(); data = tr->data[cpu]; @@ -73,6 +71,9 @@ void ftrace_ctx_switch(void *__rq, struct task_struct *prev, struct task_struct *next) { + if (unlikely(atomic_read(&trace_record_cmdline_enabled))) + tracing_record_cmdline(prev); + /* * If tracer_switch_func only points to the local * switch func, it still needs the ptr passed to it. @@ -134,11 +135,13 @@ static void sched_switch_reset(struct trace_array *tr) static void start_sched_trace(struct trace_array *tr) { sched_switch_reset(tr); + atomic_inc(&trace_record_cmdline_enabled); tracer_enabled = 1; } static void stop_sched_trace(struct trace_array *tr) { + atomic_dec(&trace_record_cmdline_enabled); tracer_enabled = 0; } -- cgit v0.10.2 From 72b59d67f80983f7bb587b086fb4cb1bc95263a4 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:21:01 +0200 Subject: x86_64: fix kernel rodata NX setting Without CONFIG_DYNAMIC_FTRACE, mark_rodata_ro() would mark a wrong number of pages as no-execute. The bug was introduced in the patch "ftrace: dont write protect kernel text". The symptom was machine reboot after a CPU hotplug. Signed-off-by: Pekka Paalanen Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 41824e7..295be1d 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -782,7 +782,7 @@ void mark_rodata_ro(void) * The rodata section (but not the kernel text!) should also be * not-executable. */ - set_memory_nx(rodata_start, (end - start) >> PAGE_SHIFT); + set_memory_nx(rodata_start, (end - rodata_start) >> PAGE_SHIFT); rodata_test(); -- cgit v0.10.2 From 53d0aa773053ab18287781e25d52c5faf9e0e09e Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:21:01 +0200 Subject: ftrace: add logic to record overruns This patch sets up the infrastructure to record overruns of the tracing buffer. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 1281969..b9126ef 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -609,6 +609,7 @@ void unregister_tracer(struct tracer *type) void tracing_reset(struct trace_array_cpu *data) { data->trace_idx = 0; + data->overrun = 0; data->trace_head = data->trace_tail = head_page(data); data->trace_head_idx = 0; data->trace_tail_idx = 0; @@ -750,6 +751,7 @@ tracing_get_trace_entry(struct trace_array *tr, struct trace_array_cpu *data) if (data->trace_head == data->trace_tail && idx_next == data->trace_tail_idx) { /* overrun */ + data->overrun++; data->trace_tail_idx++; if (data->trace_tail_idx >= ENTRIES_PER_PAGE) { data->trace_tail = @@ -2353,8 +2355,6 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, { struct trace_iterator *iter = filp->private_data; struct trace_array_cpu *data; - struct trace_array *tr = iter->tr; - struct tracer *tracer = iter->trace; static cpumask_t mask; static int start; unsigned long flags; @@ -2433,10 +2433,11 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, if (cnt >= PAGE_SIZE) cnt = PAGE_SIZE - 1; - memset(iter, 0, sizeof(*iter)); - iter->tr = tr; - iter->trace = tracer; + /* reset all but tr, trace, and overruns */ iter->pos = -1; + memset(&iter->seq, 0, + sizeof(struct trace_iterator) - + offsetof(struct trace_iterator, seq)); /* * We need to stop all tracing on all CPUS to read the @@ -2465,6 +2466,11 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, for_each_cpu_mask(cpu, mask) { data = iter->tr->data[cpu]; __raw_spin_lock(&data->lock); + + if (data->overrun > iter->last_overrun[cpu]) + iter->overrun[cpu] += + data->overrun - iter->last_overrun[cpu]; + iter->last_overrun[cpu] = data->overrun; } while (find_next_entry_inc(iter) != NULL) { diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 8991c5e..c1ec134 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -97,6 +97,7 @@ struct trace_array_cpu { void *trace_head; /* producer */ void *trace_tail; /* consumer */ unsigned long trace_idx; + unsigned long overrun; unsigned long saved_latency; unsigned long critical_start; unsigned long critical_end; @@ -157,10 +158,13 @@ struct trace_seq { * results to users and which routines might sleep, etc: */ struct trace_iterator { - struct trace_seq seq; struct trace_array *tr; struct tracer *trace; + long last_overrun[NR_CPUS]; + long overrun[NR_CPUS]; + /* The below is zeroed out in pipe_read */ + struct trace_seq seq; struct trace_entry *ent; int cpu; -- cgit v0.10.2 From 107bad8bef5ab2c3a3bff7648c18c9dc3abdc13b Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:21:01 +0200 Subject: ftrace: add trace pipe header pluggin This patch adds a method for open_pipe and open_read to the pluggins so that they can add a header to the trace pipe call. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index b9126ef..32f9106 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2307,11 +2307,15 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) if (!iter) return -ENOMEM; + mutex_lock(&trace_types_lock); iter->tr = &global_trace; iter->trace = current_trace; - filp->private_data = iter; + if (iter->trace->pipe_open) + iter->trace->pipe_open(iter); + mutex_unlock(&trace_types_lock); + return 0; } @@ -2380,13 +2384,24 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, return cnt; } + mutex_lock(&trace_types_lock); + if (iter->trace->read) { + ret = iter->trace->read(iter, filp, ubuf, cnt, ppos); + if (ret) { + read = ret; + goto out; + } + } + trace_seq_reset(&iter->seq); start = 0; while (trace_empty(iter)) { - if ((filp->f_flags & O_NONBLOCK)) - return -EAGAIN; + if ((filp->f_flags & O_NONBLOCK)) { + read = -EAGAIN; + goto out; + } /* * This is a make-shift waitqueue. The reason we don't use @@ -2400,16 +2415,22 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, set_current_state(TASK_INTERRUPTIBLE); iter->tr->waiter = current; + mutex_unlock(&trace_types_lock); + /* sleep for one second, and try again. */ schedule_timeout(HZ); + mutex_lock(&trace_types_lock); + iter->tr->waiter = NULL; - if (signal_pending(current)) - return -EINTR; + if (signal_pending(current)) { + read = -EINTR; + goto out; + } if (iter->trace != current_trace) - return 0; + goto out; /* * We block until we read something and tracing is disabled. @@ -2428,7 +2449,7 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, /* stop when tracing is finished */ if (trace_empty(iter)) - return 0; + goto out; if (cnt >= PAGE_SIZE) cnt = PAGE_SIZE - 1; @@ -2518,6 +2539,9 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, if (ret) read = -EFAULT; +out: + mutex_unlock(&trace_types_lock); + return read; } diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index c1ec134..ee53d70 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -135,9 +135,13 @@ struct tracer { void (*init)(struct trace_array *tr); void (*reset)(struct trace_array *tr); void (*open)(struct trace_iterator *iter); + void (*pipe_open)(struct trace_iterator *iter); void (*close)(struct trace_iterator *iter); void (*start)(struct trace_iterator *iter); void (*stop)(struct trace_iterator *iter); + ssize_t (*read)(struct trace_iterator *iter, + struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos); void (*ctrl_update)(struct trace_array *tr); #ifdef CONFIG_FTRACE_STARTUP_TEST int (*selftest)(struct tracer *trace, @@ -160,6 +164,7 @@ struct trace_seq { struct trace_iterator { struct trace_array *tr; struct tracer *trace; + void *private; long last_overrun[NR_CPUS]; long overrun[NR_CPUS]; -- cgit v0.10.2 From 2f1dafe50cc4e58a239fd81bd47f87f32042a1ee Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:21:01 +0200 Subject: x86: fix SMP alternatives: use mutex instead of spinlock, text_poke is sleepable text_poke is sleepable. The original fix by Mathieu Desnoyers . Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index de240ba..2763cb3 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include #include @@ -279,7 +279,7 @@ struct smp_alt_module { struct list_head next; }; static LIST_HEAD(smp_alt_modules); -static DEFINE_SPINLOCK(smp_alt); +static DEFINE_MUTEX(smp_alt); static int smp_mode = 1; /* protected by smp_alt */ void alternatives_smp_module_add(struct module *mod, char *name, @@ -312,12 +312,12 @@ void alternatives_smp_module_add(struct module *mod, char *name, __func__, smp->locks, smp->locks_end, smp->text, smp->text_end, smp->name); - spin_lock(&smp_alt); + mutex_lock(&smp_alt); list_add_tail(&smp->next, &smp_alt_modules); if (boot_cpu_has(X86_FEATURE_UP)) alternatives_smp_unlock(smp->locks, smp->locks_end, smp->text, smp->text_end); - spin_unlock(&smp_alt); + mutex_unlock(&smp_alt); } void alternatives_smp_module_del(struct module *mod) @@ -327,17 +327,17 @@ void alternatives_smp_module_del(struct module *mod) if (smp_alt_once || noreplace_smp) return; - spin_lock(&smp_alt); + mutex_lock(&smp_alt); list_for_each_entry(item, &smp_alt_modules, next) { if (mod != item->mod) continue; list_del(&item->next); - spin_unlock(&smp_alt); + mutex_unlock(&smp_alt); DPRINTK("%s: %s\n", __func__, item->name); kfree(item); return; } - spin_unlock(&smp_alt); + mutex_unlock(&smp_alt); } void alternatives_smp_switch(int smp) @@ -359,7 +359,7 @@ void alternatives_smp_switch(int smp) return; BUG_ON(!smp && (num_online_cpus() > 1)); - spin_lock(&smp_alt); + mutex_lock(&smp_alt); /* * Avoid unnecessary switches because it forces JIT based VMs to @@ -383,7 +383,7 @@ void alternatives_smp_switch(int smp) mod->text, mod->text_end); } smp_mode = smp; - spin_unlock(&smp_alt); + mutex_unlock(&smp_alt); } #endif -- cgit v0.10.2 From 4823ed7eadf35e4b57ce581327e21d39585f1f32 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:21:01 +0200 Subject: ftrace: fix setting of pos in read_pipe In resetting the iterator in read_pipe, the reset of pos was postitioned in the wrong location with respect to the memset operation. The current code sets pos, incorrectly, to zero. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 32f9106..49e1663 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2455,10 +2455,10 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, cnt = PAGE_SIZE - 1; /* reset all but tr, trace, and overruns */ - iter->pos = -1; memset(&iter->seq, 0, sizeof(struct trace_iterator) - offsetof(struct trace_iterator, seq)); + iter->pos = -1; /* * We need to stop all tracing on all CPUS to read the -- cgit v0.10.2 From 9fe068e92f6290e89e19adc521441661a1229f00 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:21:02 +0200 Subject: ftrace: trace faster Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 49e1663..ca0d6ff 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2417,8 +2417,8 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, mutex_unlock(&trace_types_lock); - /* sleep for one second, and try again. */ - schedule_timeout(HZ); + /* sleep for 100 msecs, and try again. */ + schedule_timeout(HZ/10); mutex_lock(&trace_types_lock); -- cgit v0.10.2 From a4feb8348b62fe76a63cdb5569f5c920f5283c06 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:21:02 +0200 Subject: ftrace: special stacktrace Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index ca0d6ff..c232d82 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -808,29 +808,6 @@ ftrace(struct trace_array *tr, struct trace_array_cpu *data, trace_function(tr, data, ip, parent_ip, flags); } -void -__trace_special(void *__tr, void *__data, - unsigned long arg1, unsigned long arg2, unsigned long arg3) -{ - struct trace_array_cpu *data = __data; - struct trace_array *tr = __tr; - struct trace_entry *entry; - unsigned long irq_flags; - - raw_local_irq_save(irq_flags); - __raw_spin_lock(&data->lock); - entry = tracing_get_trace_entry(tr, data); - tracing_generic_entry_update(entry, 0); - entry->type = TRACE_SPECIAL; - entry->special.arg1 = arg1; - entry->special.arg2 = arg2; - entry->special.arg3 = arg3; - __raw_spin_unlock(&data->lock); - raw_local_irq_restore(irq_flags); - - trace_wake_up(); -} - void __trace_stack(struct trace_array *tr, struct trace_array_cpu *data, unsigned long flags, @@ -857,6 +834,30 @@ void __trace_stack(struct trace_array *tr, } void +__trace_special(void *__tr, void *__data, + unsigned long arg1, unsigned long arg2, unsigned long arg3) +{ + struct trace_array_cpu *data = __data; + struct trace_array *tr = __tr; + struct trace_entry *entry; + unsigned long irq_flags; + + raw_local_irq_save(irq_flags); + __raw_spin_lock(&data->lock); + entry = tracing_get_trace_entry(tr, data); + tracing_generic_entry_update(entry, 0); + entry->type = TRACE_SPECIAL; + entry->special.arg1 = arg1; + entry->special.arg2 = arg2; + entry->special.arg3 = arg3; + __trace_stack(tr, data, irq_flags, 4); + __raw_spin_unlock(&data->lock); + raw_local_irq_restore(irq_flags); + + trace_wake_up(); +} + +void tracing_sched_switch_trace(struct trace_array *tr, struct trace_array_cpu *data, struct task_struct *prev, -- cgit v0.10.2 From 2bb6f8d6389cbfadd657e7dc069f6986abf35e4f Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:21:02 +0200 Subject: ftrace: use raw_smp_processor_id for mcount functions Due to debug hooks in the kernel that can change the way smp_processor_id works, use raw_smp_processor_id in mcount called functions (namely ftrace_record_ip). Currently we annotate most debug functions from calling mcount, but we should not rely on that to prevent kernel lockups. This patch uses the raw_smp_processor_id to prevent a recusive crash that can happen if a debug hook in smp_processor_id calls mcount. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 40f64f7..af5ad89 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -267,6 +267,7 @@ ftrace_record_ip(unsigned long ip) unsigned long key; int resched; int atomic; + int cpu; if (!ftrace_enabled || ftrace_disabled) return; @@ -274,9 +275,15 @@ ftrace_record_ip(unsigned long ip) resched = need_resched(); preempt_disable_notrace(); - /* We simply need to protect against recursion */ - __get_cpu_var(ftrace_shutdown_disable_cpu)++; - if (__get_cpu_var(ftrace_shutdown_disable_cpu) != 1) + /* + * We simply need to protect against recursion. + * Use the the raw version of smp_processor_id and not + * __get_cpu_var which can call debug hooks that can + * cause a recursive crash here. + */ + cpu = raw_smp_processor_id(); + per_cpu(ftrace_shutdown_disable_cpu, cpu)++; + if (per_cpu(ftrace_shutdown_disable_cpu, cpu) != 1) goto out; if (unlikely(ftrace_record_suspend)) @@ -317,7 +324,7 @@ ftrace_record_ip(unsigned long ip) out_unlock: spin_unlock_irqrestore(&ftrace_shutdown_lock, flags); out: - __get_cpu_var(ftrace_shutdown_disable_cpu)--; + per_cpu(ftrace_shutdown_disable_cpu, cpu)--; /* prevent recursion with scheduler */ if (resched) -- cgit v0.10.2 From 6c6c27969a4c6024e6c8838829546c02aaddca18 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:21:02 +0200 Subject: ftrace: add readpos to struct trace_seq; add trace_seq_to_user() Refactor code from tracing_read_pipe() and create trace_seq_to_user(). Moved trace_seq_reset() call before iter->trace->read() call so that when all leftover data is returned, trace_seq is reset automatically. Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index c232d82..82ced40 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -400,6 +400,26 @@ static void trace_seq_reset(struct trace_seq *s) { s->len = 0; + s->readpos = 0; +} + +ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt) +{ + int len; + int ret; + + if (s->len <= s->readpos) + return -EBUSY; + + len = s->len - s->readpos; + if (cnt > len) + cnt = len; + ret = copy_to_user(ubuf, s->buffer + s->readpos, cnt); + if (ret) + return -EFAULT; + + s->readpos += len; + return cnt; } static void @@ -2361,46 +2381,32 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, struct trace_iterator *iter = filp->private_data; struct trace_array_cpu *data; static cpumask_t mask; - static int start; unsigned long flags; #ifdef CONFIG_FTRACE int ftrace_save; #endif - int read = 0; int cpu; - int len; - int ret; + ssize_t sret; /* return any leftover data */ - if (iter->seq.len > start) { - len = iter->seq.len - start; - if (cnt > len) - cnt = len; - ret = copy_to_user(ubuf, iter->seq.buffer + start, cnt); - if (ret) - cnt = -EFAULT; - - start += len; + sret = trace_seq_to_user(&iter->seq, ubuf, cnt); + if (sret != -EBUSY) + return sret; + sret = 0; - return cnt; - } + trace_seq_reset(&iter->seq); mutex_lock(&trace_types_lock); if (iter->trace->read) { - ret = iter->trace->read(iter, filp, ubuf, cnt, ppos); - if (ret) { - read = ret; + sret = iter->trace->read(iter, filp, ubuf, cnt, ppos); + if (sret) goto out; - } } - trace_seq_reset(&iter->seq); - start = 0; - while (trace_empty(iter)) { if ((filp->f_flags & O_NONBLOCK)) { - read = -EAGAIN; + sret = -EAGAIN; goto out; } @@ -2426,7 +2432,7 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, iter->tr->waiter = NULL; if (signal_pending(current)) { - read = -EINTR; + sret = -EINTR; goto out; } @@ -2496,6 +2502,7 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, } while (find_next_entry_inc(iter) != NULL) { + int ret; int len = iter->seq.len; ret = print_trace_line(iter); @@ -2526,24 +2533,16 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, local_irq_restore(flags); /* Now copy what we have to the user */ - read = iter->seq.len; - if (read > cnt) - read = cnt; - - ret = copy_to_user(ubuf, iter->seq.buffer, read); - - if (read < iter->seq.len) - start = read; - else + sret = trace_seq_to_user(&iter->seq, ubuf, cnt); + if (iter->seq.readpos >= iter->seq.len) trace_seq_reset(&iter->seq); - - if (ret) - read = -EFAULT; + if (sret == -EBUSY) + sret = 0; out: mutex_unlock(&trace_types_lock); - return read; + return sret; } static ssize_t diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index ee53d70..8845033 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -155,6 +155,7 @@ struct tracer { struct trace_seq { unsigned char buffer[PAGE_SIZE]; unsigned int len; + unsigned int readpos; }; /* @@ -301,6 +302,8 @@ extern int trace_selftest_startup_sched_switch(struct tracer *trace, extern void *head_page(struct trace_array_cpu *data); extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...); +extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, + size_t cnt); extern long ns2usecs(cycle_t nsec); extern unsigned long trace_flags; -- cgit v0.10.2 From 3eefae994d9224fb7771a3ddb683868363c23510 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:21:04 +0200 Subject: ftrace: limit trace entries Currently there is no protection from the root user to use up all of memory for trace buffers. If the root user allocates too many entries, the OOM killer might start kill off all tasks. This patch adds an algorith to check the following condition: pages_requested > (freeable_memory + current_trace_buffer_pages) / 4 If the above is met then the allocation fails. The above prevents more than 1/4th of freeable memory from being used by trace buffers. To determine the freeable_memory, I made determine_dirtyable_memory in mm/page-writeback.c global. Special thanks goes to Peter Zijlstra for suggesting the above calculation. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/include/linux/writeback.h b/include/linux/writeback.h index f462439..bd91987 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -105,6 +105,8 @@ extern int vm_highmem_is_dirtyable; extern int block_dump; extern int laptop_mode; +extern unsigned long determine_dirtyable_memory(void); + extern int dirty_ratio_handler(struct ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 82ced40..2824cf48 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -27,6 +27,7 @@ #include #include #include +#include #include @@ -51,6 +52,8 @@ static int trace_free_page(void); static int tracing_disabled = 1; +static unsigned long tracing_pages_allocated; + long ns2usecs(cycle_t nsec) { @@ -2591,12 +2594,41 @@ tracing_entries_write(struct file *filp, const char __user *ubuf, } if (val > global_trace.entries) { + long pages_requested; + unsigned long freeable_pages; + + /* make sure we have enough memory before mapping */ + pages_requested = + (val + (ENTRIES_PER_PAGE-1)) / ENTRIES_PER_PAGE; + + /* account for each buffer (and max_tr) */ + pages_requested *= tracing_nr_buffers * 2; + + /* Check for overflow */ + if (pages_requested < 0) { + cnt = -ENOMEM; + goto out; + } + + freeable_pages = determine_dirtyable_memory(); + + /* we only allow to request 1/4 of useable memory */ + if (pages_requested > + ((freeable_pages + tracing_pages_allocated) / 4)) { + cnt = -ENOMEM; + goto out; + } + while (global_trace.entries < val) { if (trace_alloc_page()) { cnt = -ENOMEM; goto out; } + /* double check that we don't go over the known pages */ + if (tracing_pages_allocated > pages_requested) + break; } + } else { /* include the number of entries in val (inc of page entries) */ while (global_trace.entries > val + (ENTRIES_PER_PAGE - 1)) @@ -2776,6 +2808,7 @@ static int trace_alloc_page(void) struct page *page, *tmp; LIST_HEAD(pages); void *array; + unsigned pages_allocated = 0; int i; /* first allocate a page for each CPU */ @@ -2787,6 +2820,7 @@ static int trace_alloc_page(void) goto free_pages; } + pages_allocated++; page = virt_to_page(array); list_add(&page->lru, &pages); @@ -2798,6 +2832,7 @@ static int trace_alloc_page(void) "for trace buffer!\n"); goto free_pages; } + pages_allocated++; page = virt_to_page(array); list_add(&page->lru, &pages); #endif @@ -2819,6 +2854,7 @@ static int trace_alloc_page(void) SetPageLRU(page); #endif } + tracing_pages_allocated += pages_allocated; global_trace.entries += ENTRIES_PER_PAGE; return 0; @@ -2853,6 +2889,8 @@ static int trace_free_page(void) page = list_entry(p, struct page, lru); ClearPageLRU(page); list_del(&page->lru); + tracing_pages_allocated--; + tracing_pages_allocated--; __free_page(page); tracing_reset(data); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 789b6ad..b38f700 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -126,8 +126,6 @@ static void background_writeout(unsigned long _min_pages); static struct prop_descriptor vm_completions; static struct prop_descriptor vm_dirties; -static unsigned long determine_dirtyable_memory(void); - /* * couple the period to the dirty_ratio: * @@ -347,7 +345,13 @@ static unsigned long highmem_dirtyable_memory(unsigned long total) #endif } -static unsigned long determine_dirtyable_memory(void) +/** + * determine_dirtyable_memory - amount of memory that may be used + * + * Returns the numebr of pages that can currently be freed and used + * by the kernel for direct mappings. + */ +unsigned long determine_dirtyable_memory(void) { unsigned long x; -- cgit v0.10.2 From dc102a8fae2d0d6bf5223fc549247f2e23959ae6 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Mon, 12 May 2008 21:21:09 +0200 Subject: Markers - remove extra format argument Denys Vlasenko : > Not in this patch, but I noticed: > > #define __trace_mark(name, call_private, format, args...) \ > do { \ > static const char __mstrtab_##name[] \ > __attribute__((section("__markers_strings"))) \ > = #name "\0" format; \ > static struct marker __mark_##name \ > __attribute__((section("__markers"), aligned(8))) = \ > { __mstrtab_##name, &__mstrtab_##name[sizeof(#name)], \ > 0, 0, marker_probe_cb, \ > { __mark_empty_function, NULL}, NULL }; \ > __mark_check_format(format, ## args); \ > if (unlikely(__mark_##name.state)) { \ > (*__mark_##name.call) \ > (&__mark_##name, call_private, \ > format, ## args); \ > } \ > } while (0) > > In this call: > > (*__mark_##name.call) \ > (&__mark_##name, call_private, \ > format, ## args); \ > > you make gcc allocate duplicate format string. You can use > &__mstrtab_##name[sizeof(#name)] instead since it holds the same string, > or drop ", format," above and "const char *fmt" from here: > > void (*call)(const struct marker *mdata, /* Probe wrapper */ > void *call_private, const char *fmt, ...); > > since mdata->format is the same and all callees which need it can take it there. Very good point. I actually thought about dropping it, since it would remove an unnecessary argument from the stack. And actually, since I now have the marker_probe_cb sitting between the marker site and the callbacks, there is no API change required. Thanks :) Mathieu Signed-off-by: Mathieu Desnoyers CC: Denys Vlasenko Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/include/linux/marker.h b/include/linux/marker.h index 430f6ad..338533a 100644 --- a/include/linux/marker.h +++ b/include/linux/marker.h @@ -44,8 +44,8 @@ struct marker { */ char state; /* Marker state. */ char ptype; /* probe type : 0 : single, 1 : multi */ - void (*call)(const struct marker *mdata, /* Probe wrapper */ - void *call_private, const char *fmt, ...); + /* Probe wrapper */ + void (*call)(const struct marker *mdata, void *call_private, ...); struct marker_probe_closure single; struct marker_probe_closure *multi; } __attribute__((aligned(8))); @@ -72,8 +72,7 @@ struct marker { __mark_check_format(format, ## args); \ if (unlikely(__mark_##name.state)) { \ (*__mark_##name.call) \ - (&__mark_##name, call_private, \ - format, ## args); \ + (&__mark_##name, call_private, ## args);\ } \ } while (0) @@ -117,9 +116,9 @@ static inline void __printf(1, 2) ___mark_check_format(const char *fmt, ...) extern marker_probe_func __mark_empty_function; extern void marker_probe_cb(const struct marker *mdata, - void *call_private, const char *fmt, ...); + void *call_private, ...); extern void marker_probe_cb_noarg(const struct marker *mdata, - void *call_private, const char *fmt, ...); + void *call_private, ...); /* * Connect a probe to a marker. diff --git a/kernel/marker.c b/kernel/marker.c index b5a9fe1..1abfb92 100644 --- a/kernel/marker.c +++ b/kernel/marker.c @@ -55,8 +55,8 @@ static DEFINE_MUTEX(markers_mutex); struct marker_entry { struct hlist_node hlist; char *format; - void (*call)(const struct marker *mdata, /* Probe wrapper */ - void *call_private, const char *fmt, ...); + /* Probe wrapper */ + void (*call)(const struct marker *mdata, void *call_private, ...); struct marker_probe_closure single; struct marker_probe_closure *multi; int refcount; /* Number of times armed. 0 if disarmed. */ @@ -91,15 +91,13 @@ EXPORT_SYMBOL_GPL(__mark_empty_function); * marker_probe_cb Callback that prepares the variable argument list for probes. * @mdata: pointer of type struct marker * @call_private: caller site private data - * @fmt: format string * @...: Variable argument list. * * Since we do not use "typical" pointer based RCU in the 1 argument case, we * need to put a full smp_rmb() in this branch. This is why we do not use * rcu_dereference() for the pointer read. */ -void marker_probe_cb(const struct marker *mdata, void *call_private, - const char *fmt, ...) +void marker_probe_cb(const struct marker *mdata, void *call_private, ...) { va_list args; char ptype; @@ -120,8 +118,9 @@ void marker_probe_cb(const struct marker *mdata, void *call_private, /* Must read the ptr before private data. They are not data * dependant, so we put an explicit smp_rmb() here. */ smp_rmb(); - va_start(args, fmt); - func(mdata->single.probe_private, call_private, fmt, &args); + va_start(args, call_private); + func(mdata->single.probe_private, call_private, mdata->format, + &args); va_end(args); } else { struct marker_probe_closure *multi; @@ -136,9 +135,9 @@ void marker_probe_cb(const struct marker *mdata, void *call_private, smp_read_barrier_depends(); multi = mdata->multi; for (i = 0; multi[i].func; i++) { - va_start(args, fmt); - multi[i].func(multi[i].probe_private, call_private, fmt, - &args); + va_start(args, call_private); + multi[i].func(multi[i].probe_private, call_private, + mdata->format, &args); va_end(args); } } @@ -150,13 +149,11 @@ EXPORT_SYMBOL_GPL(marker_probe_cb); * marker_probe_cb Callback that does not prepare the variable argument list. * @mdata: pointer of type struct marker * @call_private: caller site private data - * @fmt: format string * @...: Variable argument list. * * Should be connected to markers "MARK_NOARGS". */ -void marker_probe_cb_noarg(const struct marker *mdata, - void *call_private, const char *fmt, ...) +void marker_probe_cb_noarg(const struct marker *mdata, void *call_private, ...) { va_list args; /* not initialized */ char ptype; @@ -172,7 +169,8 @@ void marker_probe_cb_noarg(const struct marker *mdata, /* Must read the ptr before private data. They are not data * dependant, so we put an explicit smp_rmb() here. */ smp_rmb(); - func(mdata->single.probe_private, call_private, fmt, &args); + func(mdata->single.probe_private, call_private, mdata->format, + &args); } else { struct marker_probe_closure *multi; int i; @@ -186,8 +184,8 @@ void marker_probe_cb_noarg(const struct marker *mdata, smp_read_barrier_depends(); multi = mdata->multi; for (i = 0; multi[i].func; i++) - multi[i].func(multi[i].probe_private, call_private, fmt, - &args); + multi[i].func(multi[i].probe_private, call_private, + mdata->format, &args); } preempt_enable(); } -- cgit v0.10.2 From 0aa977f592f17004f9d1d545f2e1bb9ea71896c3 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Mon, 12 May 2008 21:21:10 +0200 Subject: Markers - define non optimized marker To support the forthcoming "immediate values" marker optimization, we must have a way to declare markers in few code paths that does not use instruction modification based enable. This will be the case of printk(), some traps and eventually lockdep instrumentation. Changelog : - Fix reversed boolean logic of "generic". Signed-off-by: Mathieu Desnoyers Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/include/linux/marker.h b/include/linux/marker.h index 338533a..1290653 100644 --- a/include/linux/marker.h +++ b/include/linux/marker.h @@ -58,8 +58,12 @@ struct marker { * Make sure the alignment of the structure in the __markers section will * not add unwanted padding between the beginning of the section and the * structure. Force alignment to the same alignment as the section start. + * + * The "generic" argument controls which marker enabling mechanism must be used. + * If generic is true, a variable read is used. + * If generic is false, immediate values are used. */ -#define __trace_mark(name, call_private, format, args...) \ +#define __trace_mark(generic, name, call_private, format, args...) \ do { \ static const char __mstrtab_##name[] \ __attribute__((section("__markers_strings"))) \ @@ -79,7 +83,7 @@ struct marker { extern void marker_update_probe_range(struct marker *begin, struct marker *end); #else /* !CONFIG_MARKERS */ -#define __trace_mark(name, call_private, format, args...) \ +#define __trace_mark(generic, name, call_private, format, args...) \ __mark_check_format(format, ## args) static inline void marker_update_probe_range(struct marker *begin, struct marker *end) @@ -87,15 +91,30 @@ static inline void marker_update_probe_range(struct marker *begin, #endif /* CONFIG_MARKERS */ /** - * trace_mark - Marker + * trace_mark - Marker using code patching * @name: marker name, not quoted. * @format: format string * @args...: variable argument list * - * Places a marker. + * Places a marker using optimized code patching technique (imv_read()) + * to be enabled when immediate values are present. */ #define trace_mark(name, format, args...) \ - __trace_mark(name, NULL, format, ## args) + __trace_mark(0, name, NULL, format, ## args) + +/** + * _trace_mark - Marker using variable read + * @name: marker name, not quoted. + * @format: format string + * @args...: variable argument list + * + * Places a marker using a standard memory read (_imv_read()) to be + * enabled. Should be used for markers in code paths where instruction + * modification based enabling is not welcome. (__init and __exit functions, + * lockdep, some traps, printk). + */ +#define _trace_mark(name, format, args...) \ + __trace_mark(1, name, NULL, format, ## args) /** * MARK_NOARGS - Format string for a marker with no argument. -- cgit v0.10.2 From 5b82a1b08a00b2adca3d9dd9777efff40b7aaaa1 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Mon, 12 May 2008 21:21:10 +0200 Subject: Port ftrace to markers Porting ftrace to the marker infrastructure. Don't need to chain to the wakeup tracer from the sched tracer, because markers support multiple probes connected. Signed-off-by: Mathieu Desnoyers CC: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/include/linux/sched.h b/include/linux/sched.h index 360ca99..c0b1c69 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2131,38 +2131,6 @@ __trace_special(void *__tr, void *__data, } #endif -#ifdef CONFIG_CONTEXT_SWITCH_TRACER -extern void -ftrace_ctx_switch(void *rq, struct task_struct *prev, struct task_struct *next); -extern void -ftrace_wake_up_task(void *rq, struct task_struct *wakee, - struct task_struct *curr); -extern void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data); -extern void -ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3); -#else -static inline void -ftrace_ctx_switch(void *rq, struct task_struct *prev, struct task_struct *next) -{ -} -static inline void -sched_trace_special(unsigned long p1, unsigned long p2, unsigned long p3) -{ -} -static inline void -ftrace_wake_up_task(void *rq, struct task_struct *wakee, - struct task_struct *curr) -{ -} -static inline void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data) -{ -} -static inline void -ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) -{ -} -#endif - extern long sched_setaffinity(pid_t pid, const cpumask_t *new_mask); extern long sched_getaffinity(pid_t pid, cpumask_t *mask); diff --git a/kernel/sched.c b/kernel/sched.c index ad95cca..e2e985e 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2500,7 +2500,9 @@ out_activate: success = 1; out_running: - ftrace_wake_up_task(rq, p, rq->curr); + trace_mark(kernel_sched_wakeup, + "pid %d state %ld ## rq %p task %p rq->curr %p", + p->pid, p->state, rq, p, rq->curr); check_preempt_curr(rq, p); p->state = TASK_RUNNING; @@ -2631,7 +2633,9 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags) p->sched_class->task_new(rq, p); inc_nr_running(rq); } - ftrace_wake_up_task(rq, p, rq->curr); + trace_mark(kernel_sched_wakeup_new, + "pid %d state %ld ## rq %p task %p rq->curr %p", + p->pid, p->state, rq, p, rq->curr); check_preempt_curr(rq, p); #ifdef CONFIG_SMP if (p->sched_class->task_wake_up) @@ -2804,7 +2808,11 @@ context_switch(struct rq *rq, struct task_struct *prev, struct mm_struct *mm, *oldmm; prepare_task_switch(rq, prev, next); - ftrace_ctx_switch(rq, prev, next); + trace_mark(kernel_sched_schedule, + "prev_pid %d next_pid %d prev_state %ld " + "## rq %p prev %p next %p", + prev->pid, next->pid, prev->state, + rq, prev, next); mm = next->mm; oldmm = prev->active_mm; /* diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 8845033..f5de060 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -234,25 +234,10 @@ void update_max_tr_single(struct trace_array *tr, extern cycle_t ftrace_now(int cpu); -#ifdef CONFIG_SCHED_TRACER -extern void -wakeup_sched_switch(struct task_struct *prev, struct task_struct *next); -extern void -wakeup_sched_wakeup(struct task_struct *wakee, struct task_struct *curr); -#else -static inline void -wakeup_sched_switch(struct task_struct *prev, struct task_struct *next) -{ -} -static inline void -wakeup_sched_wakeup(struct task_struct *wakee, struct task_struct *curr) -{ -} -#endif - #ifdef CONFIG_CONTEXT_SWITCH_TRACER typedef void (*tracer_switch_func_t)(void *private, + void *__rq, struct task_struct *prev, struct task_struct *next); @@ -262,9 +247,6 @@ struct tracer_switch_ops { struct tracer_switch_ops *next; }; -extern int register_tracer_switch(struct tracer_switch_ops *ops); -extern int unregister_tracer_switch(struct tracer_switch_ops *ops); - #endif /* CONFIG_CONTEXT_SWITCH_TRACER */ #ifdef CONFIG_DYNAMIC_FTRACE diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index a337647..d25ffa5 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -16,11 +16,14 @@ static struct trace_array *ctx_trace; static int __read_mostly tracer_enabled; +static atomic_t sched_ref; static void -ctx_switch_func(void *__rq, struct task_struct *prev, struct task_struct *next) +sched_switch_func(void *private, void *__rq, struct task_struct *prev, + struct task_struct *next) { - struct trace_array *tr = ctx_trace; + struct trace_array **ptr = private; + struct trace_array *tr = *ptr; struct trace_array_cpu *data; unsigned long flags; long disabled; @@ -41,10 +44,40 @@ ctx_switch_func(void *__rq, struct task_struct *prev, struct task_struct *next) local_irq_restore(flags); } +static notrace void +sched_switch_callback(void *probe_data, void *call_data, + const char *format, va_list *args) +{ + struct task_struct *prev; + struct task_struct *next; + struct rq *__rq; + + if (!atomic_read(&sched_ref)) + return; + + /* skip prev_pid %d next_pid %d prev_state %ld */ + (void)va_arg(*args, int); + (void)va_arg(*args, int); + (void)va_arg(*args, long); + __rq = va_arg(*args, typeof(__rq)); + prev = va_arg(*args, typeof(prev)); + next = va_arg(*args, typeof(next)); + + tracing_record_cmdline(prev); + + /* + * If tracer_switch_func only points to the local + * switch func, it still needs the ptr passed to it. + */ + sched_switch_func(probe_data, __rq, prev, next); +} + static void -wakeup_func(void *__rq, struct task_struct *wakee, struct task_struct *curr) +wakeup_func(void *private, void *__rq, struct task_struct *wakee, struct + task_struct *curr) { - struct trace_array *tr = ctx_trace; + struct trace_array **ptr = private; + struct trace_array *tr = *ptr; struct trace_array_cpu *data; unsigned long flags; long disabled; @@ -67,35 +100,29 @@ wakeup_func(void *__rq, struct task_struct *wakee, struct task_struct *curr) local_irq_restore(flags); } -void -ftrace_ctx_switch(void *__rq, struct task_struct *prev, - struct task_struct *next) +static notrace void +wake_up_callback(void *probe_data, void *call_data, + const char *format, va_list *args) { - if (unlikely(atomic_read(&trace_record_cmdline_enabled))) - tracing_record_cmdline(prev); + struct task_struct *curr; + struct task_struct *task; + struct rq *__rq; - /* - * If tracer_switch_func only points to the local - * switch func, it still needs the ptr passed to it. - */ - ctx_switch_func(__rq, prev, next); + if (likely(!tracer_enabled)) + return; - /* - * Chain to the wakeup tracer (this is a NOP if disabled): - */ - wakeup_sched_switch(prev, next); -} + /* Skip pid %d state %ld */ + (void)va_arg(*args, int); + (void)va_arg(*args, long); + /* now get the meat: "rq %p task %p rq->curr %p" */ + __rq = va_arg(*args, typeof(__rq)); + task = va_arg(*args, typeof(task)); + curr = va_arg(*args, typeof(curr)); -void -ftrace_wake_up_task(void *__rq, struct task_struct *wakee, - struct task_struct *curr) -{ - wakeup_func(__rq, wakee, curr); + tracing_record_cmdline(task); + tracing_record_cmdline(curr); - /* - * Chain to the wakeup tracer (this is a NOP if disabled): - */ - wakeup_sched_wakeup(wakee, curr); + wakeup_func(probe_data, __rq, task, curr); } void @@ -132,15 +159,95 @@ static void sched_switch_reset(struct trace_array *tr) tracing_reset(tr->data[cpu]); } +static int tracing_sched_register(void) +{ + int ret; + + ret = marker_probe_register("kernel_sched_wakeup", + "pid %d state %ld ## rq %p task %p rq->curr %p", + wake_up_callback, + &ctx_trace); + if (ret) { + pr_info("wakeup trace: Couldn't add marker" + " probe to kernel_sched_wakeup\n"); + return ret; + } + + ret = marker_probe_register("kernel_sched_wakeup_new", + "pid %d state %ld ## rq %p task %p rq->curr %p", + wake_up_callback, + &ctx_trace); + if (ret) { + pr_info("wakeup trace: Couldn't add marker" + " probe to kernel_sched_wakeup_new\n"); + goto fail_deprobe; + } + + ret = marker_probe_register("kernel_sched_schedule", + "prev_pid %d next_pid %d prev_state %ld " + "## rq %p prev %p next %p", + sched_switch_callback, + &ctx_trace); + if (ret) { + pr_info("sched trace: Couldn't add marker" + " probe to kernel_sched_schedule\n"); + goto fail_deprobe_wake_new; + } + + return ret; +fail_deprobe_wake_new: + marker_probe_unregister("kernel_sched_wakeup_new", + wake_up_callback, + &ctx_trace); +fail_deprobe: + marker_probe_unregister("kernel_sched_wakeup", + wake_up_callback, + &ctx_trace); + return ret; +} + +static void tracing_sched_unregister(void) +{ + marker_probe_unregister("kernel_sched_schedule", + sched_switch_callback, + &ctx_trace); + marker_probe_unregister("kernel_sched_wakeup_new", + wake_up_callback, + &ctx_trace); + marker_probe_unregister("kernel_sched_wakeup", + wake_up_callback, + &ctx_trace); +} + +void tracing_start_sched_switch(void) +{ + long ref; + + ref = atomic_inc_return(&sched_ref); + if (ref == 1) + tracing_sched_register(); +} + +void tracing_stop_sched_switch(void) +{ + long ref; + + ref = atomic_dec_and_test(&sched_ref); + if (ref) + tracing_sched_unregister(); +} + static void start_sched_trace(struct trace_array *tr) { sched_switch_reset(tr); atomic_inc(&trace_record_cmdline_enabled); tracer_enabled = 1; + tracing_start_sched_switch(); } static void stop_sched_trace(struct trace_array *tr) { + tracing_stop_sched_switch(); atomic_dec(&trace_record_cmdline_enabled); tracer_enabled = 0; } @@ -181,6 +288,14 @@ static struct tracer sched_switch_trace __read_mostly = __init static int init_sched_switch_trace(void) { + int ret = 0; + + if (atomic_read(&sched_ref)) + ret = tracing_sched_register(); + if (ret) { + pr_info("error registering scheduler trace\n"); + return ret; + } return register_tracer(&sched_switch_trace); } device_initcall(init_sched_switch_trace); diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 5948011..5d2fb48 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "trace.h" @@ -44,11 +45,13 @@ static int report_latency(cycle_t delta) return 1; } -void -wakeup_sched_switch(struct task_struct *prev, struct task_struct *next) +static void notrace +wakeup_sched_switch(void *private, void *rq, struct task_struct *prev, + struct task_struct *next) { unsigned long latency = 0, t0 = 0, t1 = 0; - struct trace_array *tr = wakeup_trace; + struct trace_array **ptr = private; + struct trace_array *tr = *ptr; struct trace_array_cpu *data; cycle_t T0, T1, delta; unsigned long flags; @@ -113,6 +116,31 @@ out: atomic_dec(&tr->data[cpu]->disabled); } +static notrace void +sched_switch_callback(void *probe_data, void *call_data, + const char *format, va_list *args) +{ + struct task_struct *prev; + struct task_struct *next; + struct rq *__rq; + + /* skip prev_pid %d next_pid %d prev_state %ld */ + (void)va_arg(*args, int); + (void)va_arg(*args, int); + (void)va_arg(*args, long); + __rq = va_arg(*args, typeof(__rq)); + prev = va_arg(*args, typeof(prev)); + next = va_arg(*args, typeof(next)); + + tracing_record_cmdline(prev); + + /* + * If tracer_switch_func only points to the local + * switch func, it still needs the ptr passed to it. + */ + wakeup_sched_switch(probe_data, __rq, prev, next); +} + static void __wakeup_reset(struct trace_array *tr) { struct trace_array_cpu *data; @@ -188,19 +216,68 @@ out: atomic_dec(&tr->data[cpu]->disabled); } -void wakeup_sched_wakeup(struct task_struct *wakee, struct task_struct *curr) +static notrace void +wake_up_callback(void *probe_data, void *call_data, + const char *format, va_list *args) { + struct trace_array **ptr = probe_data; + struct trace_array *tr = *ptr; + struct task_struct *curr; + struct task_struct *task; + struct rq *__rq; + if (likely(!tracer_enabled)) return; + /* Skip pid %d state %ld */ + (void)va_arg(*args, int); + (void)va_arg(*args, long); + /* now get the meat: "rq %p task %p rq->curr %p" */ + __rq = va_arg(*args, typeof(__rq)); + task = va_arg(*args, typeof(task)); + curr = va_arg(*args, typeof(curr)); + + tracing_record_cmdline(task); tracing_record_cmdline(curr); - tracing_record_cmdline(wakee); - wakeup_check_start(wakeup_trace, wakee, curr); + wakeup_check_start(tr, task, curr); } static void start_wakeup_tracer(struct trace_array *tr) { + int ret; + + ret = marker_probe_register("kernel_sched_wakeup", + "pid %d state %ld ## rq %p task %p rq->curr %p", + wake_up_callback, + &wakeup_trace); + if (ret) { + pr_info("wakeup trace: Couldn't add marker" + " probe to kernel_sched_wakeup\n"); + return; + } + + ret = marker_probe_register("kernel_sched_wakeup_new", + "pid %d state %ld ## rq %p task %p rq->curr %p", + wake_up_callback, + &wakeup_trace); + if (ret) { + pr_info("wakeup trace: Couldn't add marker" + " probe to kernel_sched_wakeup_new\n"); + goto fail_deprobe; + } + + ret = marker_probe_register("kernel_sched_schedule", + "prev_pid %d next_pid %d prev_state %ld " + "## rq %p prev %p next %p", + sched_switch_callback, + &wakeup_trace); + if (ret) { + pr_info("sched trace: Couldn't add marker" + " probe to kernel_sched_schedule\n"); + goto fail_deprobe_wake_new; + } + wakeup_reset(tr); /* @@ -215,11 +292,28 @@ static void start_wakeup_tracer(struct trace_array *tr) tracer_enabled = 1; return; +fail_deprobe_wake_new: + marker_probe_unregister("kernel_sched_wakeup_new", + wake_up_callback, + &wakeup_trace); +fail_deprobe: + marker_probe_unregister("kernel_sched_wakeup", + wake_up_callback, + &wakeup_trace); } static void stop_wakeup_tracer(struct trace_array *tr) { tracer_enabled = 0; + marker_probe_unregister("kernel_sched_schedule", + sched_switch_callback, + &wakeup_trace); + marker_probe_unregister("kernel_sched_wakeup_new", + wake_up_callback, + &wakeup_trace); + marker_probe_unregister("kernel_sched_wakeup", + wake_up_callback, + &wakeup_trace); } static void wakeup_tracer_init(struct trace_array *tr) -- cgit v0.10.2 From 74f4e369fc5b52433ad824cef32d3bf1304549be Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:21:15 +0200 Subject: ftrace: stacktrace fix Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 0d3714e..017ab44 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -120,4 +120,12 @@ static inline void tracer_disable(void) # define trace_preempt_off(a0, a1) do { } while (0) #endif +#ifdef CONFIG_CONTEXT_SWITCH_TRACER +extern void +ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3); +#else +static inline void +ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { } +#endif + #endif /* _LINUX_FTRACE_H */ diff --git a/kernel/semaphore.c b/kernel/semaphore.c index 5c2942e..1a064ad 100644 --- a/kernel/semaphore.c +++ b/kernel/semaphore.c @@ -31,6 +31,7 @@ #include #include #include +#include static noinline void __down(struct semaphore *sem); static noinline int __down_interruptible(struct semaphore *sem); @@ -53,6 +54,7 @@ void down(struct semaphore *sem) { unsigned long flags; + ftrace_special(sem->count, 0, __LINE__); spin_lock_irqsave(&sem->lock, flags); if (likely(sem->count > 0)) sem->count--; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 2824cf48..3271916 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -901,7 +901,7 @@ tracing_sched_switch_trace(struct trace_array *tr, entry->ctx.next_pid = next->pid; entry->ctx.next_prio = next->prio; entry->ctx.next_state = next->state; - __trace_stack(tr, data, flags, 4); + __trace_stack(tr, data, flags, 5); __raw_spin_unlock(&data->lock); raw_local_irq_restore(irq_flags); } @@ -927,7 +927,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr, entry->ctx.next_pid = wakee->pid; entry->ctx.next_prio = wakee->prio; entry->ctx.next_state = wakee->state; - __trace_stack(tr, data, flags, 5); + __trace_stack(tr, data, flags, 6); __raw_spin_unlock(&data->lock); raw_local_irq_restore(irq_flags); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index f5de060..c460e85 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -51,7 +51,7 @@ struct special_entry { * Stack-trace entry: */ -#define FTRACE_STACK_ENTRIES 5 +#define FTRACE_STACK_ENTRIES 8 struct stack_entry { unsigned long caller[FTRACE_STACK_ENTRIES]; -- cgit v0.10.2 From aa5e5ceaf52a882a29d9b86531a20733f5116066 Mon Sep 17 00:00:00 2001 From: David Miller Date: Tue, 13 May 2008 22:06:56 -0700 Subject: ftrace: remove packed attribute on ftrace_page. It causes unaligned access traps on platforms like sparc (ftrace_page may be marked packed, but once we return a dyn_ftrace sub-object from this array to another piece of code, the "packed" part of the typing information doesn't propagate). But also, it didn't serve any purpose either. Even if packed, on 64-bit or 32-bit, it didn't give us any more dyn_ftrace entries per-page. Signed-off-by: David S. Miller Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index af5ad89..07b2a14 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -177,9 +177,9 @@ static DEFINE_MUTEX(ftrace_filter_lock); struct ftrace_page { struct ftrace_page *next; - int index; + unsigned long index; struct dyn_ftrace records[]; -} __attribute__((packed)); +}; #define ENTRIES_PER_PAGE \ ((PAGE_SIZE - sizeof(struct ftrace_page)) / sizeof(struct dyn_ftrace)) -- cgit v0.10.2 From d05f5f9906740474eb768823004ffcd775b12ca6 Mon Sep 17 00:00:00 2001 From: David Miller Date: Tue, 13 May 2008 22:06:59 -0700 Subject: sparc64: add ftrace support. Signed-off-by: David S. Miller Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig index eb36f3b..a480df6 100644 --- a/arch/sparc64/Kconfig +++ b/arch/sparc64/Kconfig @@ -11,6 +11,7 @@ config SPARC config SPARC64 bool default y + select HAVE_FTRACE select HAVE_IDE select HAVE_LMB select HAVE_ARCH_KGDB diff --git a/arch/sparc64/Kconfig.debug b/arch/sparc64/Kconfig.debug index 6a4d28a..d6d32d1 100644 --- a/arch/sparc64/Kconfig.debug +++ b/arch/sparc64/Kconfig.debug @@ -33,7 +33,7 @@ config DEBUG_PAGEALLOC config MCOUNT bool - depends on STACK_DEBUG + depends on STACK_DEBUG || FTRACE default y config FRAME_POINTER diff --git a/arch/sparc64/kernel/Makefile b/arch/sparc64/kernel/Makefile index ec4f5eb..418b578 100644 --- a/arch/sparc64/kernel/Makefile +++ b/arch/sparc64/kernel/Makefile @@ -14,6 +14,7 @@ obj-y := process.o setup.o cpu.o idprom.o \ power.o sbus.o sparc64_ksyms.o chmc.o \ visemul.o prom.o of_device.o hvapi.o sstate.o mdesc.o +obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-$(CONFIG_PCI) += ebus.o pci_common.o \ pci_psycho.o pci_sabre.o pci_schizo.o \ diff --git a/arch/sparc64/kernel/ftrace.c b/arch/sparc64/kernel/ftrace.c new file mode 100644 index 0000000..f449e6d --- /dev/null +++ b/arch/sparc64/kernel/ftrace.c @@ -0,0 +1,99 @@ +#include +#include +#include +#include +#include +#include + +static const u32 ftrace_nop = 0x01000000; + +notrace int ftrace_ip_converted(unsigned long ip) +{ + u32 insn = *(u32 *) ip; + + return (insn == ftrace_nop); +} + +notrace unsigned char *ftrace_nop_replace(void) +{ + return (char *)&ftrace_nop; +} + +notrace unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) +{ + static u32 call; + s32 off; + + off = ((s32)addr - (s32)ip); + call = 0x40000000 | ((u32)off >> 2); + + return (unsigned char *) &call; +} + +notrace int +ftrace_modify_code(unsigned long ip, unsigned char *old_code, + unsigned char *new_code) +{ + u32 old = *(u32 *)old_code; + u32 new = *(u32 *)new_code; + u32 replaced; + int faulted; + + __asm__ __volatile__( + "1: cas [%[ip]], %[old], %[new]\n" + " flush %[ip]\n" + " mov 0, %[faulted]\n" + "2:\n" + " .section .fixup,#alloc,#execinstr\n" + " .align 4\n" + "3: sethi %%hi(2b), %[faulted]\n" + " jmpl %[faulted] + %%lo(2b), %%g0\n" + " mov 1, %[faulted]\n" + " .previous\n" + " .section __ex_table,\"a\"\n" + " .align 4\n" + " .word 1b, 3b\n" + " .previous\n" + : "=r" (replaced), [faulted] "=r" (faulted) + : [new] "0" (new), [old] "r" (old), [ip] "r" (ip) + : "memory"); + + if (replaced != old && replaced != new) + faulted = 2; + + return faulted; +} + +notrace int ftrace_update_ftrace_func(ftrace_func_t func) +{ + unsigned long ip = (unsigned long)(&ftrace_call); + unsigned char old[4], *new; + + memcpy(old, &ftrace_call, 4); + new = ftrace_call_replace(ip, (unsigned long)func); + return ftrace_modify_code(ip, old, new); +} + +notrace int ftrace_mcount_set(unsigned long *data) +{ + unsigned long ip = (long)(&mcount_call); + unsigned long *addr = data; + unsigned char old[4], *new; + + /* + * Replace the mcount stub with a pointer to the + * ip recorder function. + */ + memcpy(old, &mcount_call, 4); + new = ftrace_call_replace(ip, *addr); + *addr = ftrace_modify_code(ip, old, new); + + return 0; +} + + +int __init ftrace_dyn_arch_init(void *data) +{ + ftrace_mcount_set(data); + return 0; +} diff --git a/arch/sparc64/lib/mcount.S b/arch/sparc64/lib/mcount.S index 9e4534b..7735a7a 100644 --- a/arch/sparc64/lib/mcount.S +++ b/arch/sparc64/lib/mcount.S @@ -28,10 +28,13 @@ ovstack: .skip OVSTACKSIZE #endif .text - .align 32 - .globl mcount, _mcount -mcount: + .align 32 + .globl _mcount + .type _mcount,#function + .globl mcount + .type mcount,#function _mcount: +mcount: #ifdef CONFIG_STACK_DEBUG /* * Check whether %sp is dangerously low. @@ -55,6 +58,53 @@ _mcount: or %g3, %lo(panicstring), %o0 call prom_halt nop +1: +#endif +#ifdef CONFIG_FTRACE +#ifdef CONFIG_DYNAMIC_FTRACE + mov %o7, %o0 + .globl mcount_call +mcount_call: + call ftrace_stub + mov %o0, %o7 +#else + sethi %hi(ftrace_trace_function), %g1 + sethi %hi(ftrace_stub), %g2 + ldx [%g1 + %lo(ftrace_trace_function)], %g1 + or %g2, %lo(ftrace_stub), %g2 + cmp %g1, %g2 + be,pn %icc, 1f + mov %i7, %o1 + jmpl %g1, %g0 + mov %o7, %o0 + /* not reached */ +1: #endif -1: retl +#endif + retl nop + .size _mcount,.-_mcount + .size mcount,.-mcount + +#ifdef CONFIG_FTRACE + .globl ftrace_stub + .type ftrace_stub,#function +ftrace_stub: + retl + nop + .size ftrace_stub,.-ftrace_stub +#ifdef CONFIG_DYNAMIC_FTRACE + .globl ftrace_caller + .type ftrace_caller,#function +ftrace_caller: + mov %i7, %o1 + mov %o7, %o0 + .globl ftrace_call +ftrace_call: + call ftrace_stub + mov %o0, %o7 + retl + nop + .size ftrace_caller,.-ftrace_caller +#endif +#endif -- cgit v0.10.2 From 37135677e653537ffc6e7def679443272a1c03c3 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 14 May 2008 08:10:31 +0200 Subject: ftrace: fix mcount export bug David S. Miller noticed the following bug: the -pg instrumentation function callback is named differently on each platform. On x86 it is mcount, on sparc it is _mcount. So the export does not make sense in kernel/trace/ftrace.c - move it to x86. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c index deb4378..29999db 100644 --- a/arch/x86/kernel/i386_ksyms_32.c +++ b/arch/x86/kernel/i386_ksyms_32.c @@ -1,7 +1,14 @@ +#include #include + #include -#include #include +#include + +#ifdef CONFIG_FTRACE +/* mcount is defined in assembly */ +EXPORT_SYMBOL(mcount); +#endif /* Networking helper routines. */ EXPORT_SYMBOL(csum_partial_copy_generic); diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index f6c05d0..122885b 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c @@ -1,15 +1,22 @@ /* Exports for assembly files. All C exports should go in the respective C files. */ +#include #include -#include #include +#include + #include -#include #include +#include #include +#ifdef CONFIG_FTRACE +/* mcount is defined in assembly */ +EXPORT_SYMBOL(mcount); +#endif + EXPORT_SYMBOL(kernel_thread); EXPORT_SYMBOL(__get_user_1); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 07b2a14..a3e47f4 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -50,9 +50,6 @@ static struct ftrace_ops ftrace_list_end __read_mostly = static struct ftrace_ops *ftrace_list __read_mostly = &ftrace_list_end; ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub; -/* mcount is defined per arch in assembly */ -EXPORT_SYMBOL(mcount); - void ftrace_list_func(unsigned long ip, unsigned long parent_ip) { struct ftrace_ops *op = ftrace_list; -- cgit v0.10.2 From 2d8b820b2e81954754277723379ae9ed5de316fa Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 23 Feb 2008 16:55:50 +0100 Subject: ftrace: cleanups factor out code and clean it up. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index a3e47f4..89bd9a6 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -18,13 +18,13 @@ #include #include #include -#include #include -#include +#include #include +#include #include -#include #include +#include #include #include "trace.h" -- cgit v0.10.2 From d49dbf33f0bf8748ee3662b973eb57e60525d622 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 16 May 2008 10:41:53 +0200 Subject: ftrace: fix include file dependency Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 017ab44..911d5d8 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -4,6 +4,7 @@ #ifdef CONFIG_FTRACE #include +#include extern int ftrace_enabled; extern int -- cgit v0.10.2 From 489f139614596cbc956a06f5e4bb41288e276fe3 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 25 Feb 2008 13:38:05 +0100 Subject: ftrace: fix build bug Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 911d5d8..922e23d 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -106,16 +106,16 @@ static inline void tracer_disable(void) #endif #ifdef CONFIG_IRQSOFF_TRACER - extern void notrace time_hardirqs_on(unsigned long a0, unsigned long a1); - extern void notrace time_hardirqs_off(unsigned long a0, unsigned long a1); + extern void time_hardirqs_on(unsigned long a0, unsigned long a1); + extern void time_hardirqs_off(unsigned long a0, unsigned long a1); #else # define time_hardirqs_on(a0, a1) do { } while (0) # define time_hardirqs_off(a0, a1) do { } while (0) #endif #ifdef CONFIG_PREEMPT_TRACER - extern void notrace trace_preempt_on(unsigned long a0, unsigned long a1); - extern void notrace trace_preempt_off(unsigned long a0, unsigned long a1); + extern void trace_preempt_on(unsigned long a0, unsigned long a1); + extern void trace_preempt_off(unsigned long a0, unsigned long a1); #else # define trace_preempt_on(a0, a1) do { } while (0) # define trace_preempt_off(a0, a1) do { } while (0) -- cgit v0.10.2 From e0eca07badc023a675a61906020b397da20f07c3 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 14 May 2008 23:49:43 -0400 Subject: ftrace, POWERPC: add irqs_disabled_flags to ppc PPC doesn't have the irqs_disabled_flags needed by ftrace. This patch adds it. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/include/asm-powerpc/hw_irq.h b/include/asm-powerpc/hw_irq.h index ad8c9f7..f75a5fc 100644 --- a/include/asm-powerpc/hw_irq.h +++ b/include/asm-powerpc/hw_irq.h @@ -59,6 +59,11 @@ extern void iseries_handle_interrupts(void); get_paca()->hard_enabled = 0; \ } while(0) +static inline int irqs_disabled_flags(unsigned long flags) +{ + return flags == 0; +} + #else #if defined(CONFIG_BOOKE) @@ -113,6 +118,11 @@ static inline void local_irq_save_ptr(unsigned long *flags) #define hard_irq_enable() local_irq_enable() #define hard_irq_disable() local_irq_disable() +static inline int irqs_disabled_flags(unsigned long flags) +{ + return (flags & MSR_EE) == 0; +} + #endif /* CONFIG_PPC64 */ /* -- cgit v0.10.2 From 4e491d14f2506b218d678935c25a7027b79178b1 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 14 May 2008 23:49:44 -0400 Subject: ftrace: support for PowerPC This patch adds full support for ftrace for PowerPC (both 64 and 32 bit). This includes dynamic tracing and function filtering. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 3934e26..62d034a 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -105,11 +105,12 @@ config ARCH_NO_VIRT_TO_BUS config PPC bool default y + select HAVE_FTRACE select HAVE_IDE - select HAVE_OPROFILE select HAVE_KPROBES select HAVE_KRETPROBES select HAVE_LMB + select HAVE_OPROFILE config EARLY_PRINTK bool diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 2346d27..f3f5e26 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -12,6 +12,18 @@ CFLAGS_prom_init.o += -fPIC CFLAGS_btext.o += -fPIC endif +ifdef CONFIG_FTRACE +# Do not trace early boot code +CFLAGS_REMOVE_cputable.o = -pg +CFLAGS_REMOVE_prom_init.o = -pg + +ifdef CONFIG_DYNAMIC_FTRACE +# dynamic ftrace setup. +CFLAGS_REMOVE_ftrace.o = -pg +endif + +endif + obj-y := cputable.o ptrace.o syscalls.o \ irq.o align.o signal_32.o pmc.o vdso.o \ init_task.o process.o systbl.o idle.o \ @@ -78,6 +90,8 @@ obj-$(CONFIG_KEXEC) += machine_kexec.o crash.o \ obj-$(CONFIG_AUDIT) += audit.o obj64-$(CONFIG_AUDIT) += compat_audit.o +obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o + obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o ifneq ($(CONFIG_PPC_INDIRECT_IO),y) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 0c8614d..0e62218 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -1035,3 +1035,133 @@ machine_check_in_rtas: /* XXX load up BATs and panic */ #endif /* CONFIG_PPC_RTAS */ + +#ifdef CONFIG_FTRACE +#ifdef CONFIG_DYNAMIC_FTRACE +_GLOBAL(mcount) +_GLOBAL(_mcount) + stwu r1,-48(r1) + stw r3, 12(r1) + stw r4, 16(r1) + stw r5, 20(r1) + stw r6, 24(r1) + mflr r3 + stw r7, 28(r1) + mfcr r5 + stw r8, 32(r1) + stw r9, 36(r1) + stw r10,40(r1) + stw r3, 44(r1) + stw r5, 8(r1) + .globl mcount_call +mcount_call: + bl ftrace_stub + nop + lwz r6, 8(r1) + lwz r0, 44(r1) + lwz r3, 12(r1) + mtctr r0 + lwz r4, 16(r1) + mtcr r6 + lwz r5, 20(r1) + lwz r6, 24(r1) + lwz r0, 52(r1) + lwz r7, 28(r1) + lwz r8, 32(r1) + mtlr r0 + lwz r9, 36(r1) + lwz r10,40(r1) + addi r1, r1, 48 + bctr + +_GLOBAL(ftrace_caller) + /* Based off of objdump optput from glibc */ + stwu r1,-48(r1) + stw r3, 12(r1) + stw r4, 16(r1) + stw r5, 20(r1) + stw r6, 24(r1) + mflr r3 + lwz r4, 52(r1) + mfcr r5 + stw r7, 28(r1) + stw r8, 32(r1) + stw r9, 36(r1) + stw r10,40(r1) + stw r3, 44(r1) + stw r5, 8(r1) +.globl ftrace_call +ftrace_call: + bl ftrace_stub + nop + lwz r6, 8(r1) + lwz r0, 44(r1) + lwz r3, 12(r1) + mtctr r0 + lwz r4, 16(r1) + mtcr r6 + lwz r5, 20(r1) + lwz r6, 24(r1) + lwz r0, 52(r1) + lwz r7, 28(r1) + lwz r8, 32(r1) + mtlr r0 + lwz r9, 36(r1) + lwz r10,40(r1) + addi r1, r1, 48 + bctr +#else +_GLOBAL(mcount) +_GLOBAL(_mcount) + stwu r1,-48(r1) + stw r3, 12(r1) + stw r4, 16(r1) + stw r5, 20(r1) + stw r6, 24(r1) + mflr r3 + lwz r4, 52(r1) + mfcr r5 + stw r7, 28(r1) + stw r8, 32(r1) + stw r9, 36(r1) + stw r10,40(r1) + stw r3, 44(r1) + stw r5, 8(r1) + + LOAD_REG_ADDR(r5, ftrace_trace_function) +#if 0 + mtctr r3 + mr r1, r5 + bctrl +#endif + lwz r5,0(r5) +#if 1 + mtctr r5 + bctrl +#else + bl ftrace_stub +#endif + nop + + lwz r6, 8(r1) + lwz r0, 44(r1) + lwz r3, 12(r1) + mtctr r0 + lwz r4, 16(r1) + mtcr r6 + lwz r5, 20(r1) + lwz r6, 24(r1) + lwz r0, 52(r1) + lwz r7, 28(r1) + lwz r8, 32(r1) + mtlr r0 + lwz r9, 36(r1) + lwz r10,40(r1) + addi r1, r1, 48 + bctr +#endif + +_GLOBAL(ftrace_stub) + blr + +#endif /* CONFIG_MCOUNT */ diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index c0db5b7..2c4d9e0 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -870,3 +870,65 @@ _GLOBAL(enter_prom) ld r0,16(r1) mtlr r0 blr + +#ifdef CONFIG_FTRACE +#ifdef CONFIG_DYNAMIC_FTRACE +_GLOBAL(mcount) +_GLOBAL(_mcount) + /* Taken from output of objdump from lib64/glibc */ + mflr r3 + stdu r1, -112(r1) + std r3, 128(r1) + .globl mcount_call +mcount_call: + bl ftrace_stub + nop + ld r0, 128(r1) + mtlr r0 + addi r1, r1, 112 + blr + +_GLOBAL(ftrace_caller) + /* Taken from output of objdump from lib64/glibc */ + mflr r3 + ld r11, 0(r1) + stdu r1, -112(r1) + std r3, 128(r1) + ld r4, 16(r11) +.globl ftrace_call +ftrace_call: + bl ftrace_stub + nop + ld r0, 128(r1) + mtlr r0 + addi r1, r1, 112 +_GLOBAL(ftrace_stub) + blr +#else +_GLOBAL(mcount) + blr + +_GLOBAL(_mcount) + /* Taken from output of objdump from lib64/glibc */ + mflr r3 + ld r11, 0(r1) + stdu r1, -112(r1) + std r3, 128(r1) + ld r4, 16(r11) + + + LOAD_REG_ADDR(r5,ftrace_trace_function) + ld r5,0(r5) + ld r5,0(r5) + mtctr r5 + bctrl + + nop + ld r0, 128(r1) + mtlr r0 + addi r1, r1, 112 +_GLOBAL(ftrace_stub) + blr + +#endif +#endif diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c new file mode 100644 index 0000000..5a4993f --- /dev/null +++ b/arch/powerpc/kernel/ftrace.c @@ -0,0 +1,165 @@ +/* + * Code for replacing ftrace calls with jumps. + * + * Copyright (C) 2007-2008 Steven Rostedt + * + * Thanks goes out to P.A. Semi, Inc for supplying me with a PPC64 box. + * + */ + +#include +#include +#include +#include +#include +#include + +#include + +#define CALL_BACK 4 + +static unsigned int ftrace_nop = 0x60000000; + +#ifdef CONFIG_PPC32 +# define GET_ADDR(addr) addr +#else +/* PowerPC64's functions are data that points to the functions */ +# define GET_ADDR(addr) *(unsigned long *)addr +#endif + +notrace int ftrace_ip_converted(unsigned long ip) +{ + unsigned int save; + + ip -= CALL_BACK; + save = *(unsigned int *)ip; + + return save == ftrace_nop; +} + +static unsigned int notrace ftrace_calc_offset(long ip, long addr) +{ + return (int)((addr + CALL_BACK) - ip); +} + +notrace unsigned char *ftrace_nop_replace(void) +{ + return (char *)&ftrace_nop; +} + +notrace unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) +{ + static unsigned int op; + + addr = GET_ADDR(addr); + + /* Set to "bl addr" */ + op = 0x48000001 | (ftrace_calc_offset(ip, addr) & 0x03fffffe); + + /* + * No locking needed, this must be called via kstop_machine + * which in essence is like running on a uniprocessor machine. + */ + return (unsigned char *)&op; +} + +#ifdef CONFIG_PPC64 +# define _ASM_ALIGN " .align 3 " +# define _ASM_PTR " .llong " +#else +# define _ASM_ALIGN " .align 2 " +# define _ASM_PTR " .long " +#endif + +notrace int +ftrace_modify_code(unsigned long ip, unsigned char *old_code, + unsigned char *new_code) +{ + unsigned replaced; + unsigned old = *(unsigned *)old_code; + unsigned new = *(unsigned *)new_code; + int faulted = 0; + + /* move the IP back to the start of the call */ + ip -= CALL_BACK; + + /* + * Note: Due to modules and __init, code can + * disappear and change, we need to protect against faulting + * as well as code changing. + * + * No real locking needed, this code is run through + * kstop_machine. + */ + asm volatile ( + "1: lwz %1, 0(%2)\n" + " cmpw %1, %5\n" + " bne 2f\n" + " stwu %3, 0(%2)\n" + "2:\n" + ".section .fixup, \"ax\"\n" + "3: li %0, 1\n" + " b 2b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + _ASM_ALIGN "\n" + _ASM_PTR "1b, 3b\n" + ".previous" + : "=r"(faulted), "=r"(replaced) + : "r"(ip), "r"(new), + "0"(faulted), "r"(old) + : "memory"); + + if (replaced != old && replaced != new) + faulted = 2; + + if (!faulted) + flush_icache_range(ip, ip + 8); + + return faulted; +} + +notrace int ftrace_update_ftrace_func(ftrace_func_t func) +{ + unsigned long ip = (unsigned long)(&ftrace_call); + unsigned char old[4], *new; + int ret; + + ip += CALL_BACK; + + memcpy(old, &ftrace_call, 4); + new = ftrace_call_replace(ip, (unsigned long)func); + ret = ftrace_modify_code(ip, old, new); + + return ret; +} + +notrace int ftrace_mcount_set(unsigned long *data) +{ + unsigned long ip = (long)(&mcount_call); + unsigned long *addr = data; + unsigned char old[4], *new; + + /* ip is at the location, but modify code will subtact this */ + ip += CALL_BACK; + + /* + * Replace the mcount stub with a pointer to the + * ip recorder function. + */ + memcpy(old, &mcount_call, 4); + new = ftrace_call_replace(ip, *addr); + *addr = ftrace_modify_code(ip, old, new); + + return 0; +} + +int __init ftrace_dyn_arch_init(void *data) +{ + /* This is running in kstop_machine */ + + ftrace_mcount_set(data); + + return 0; +} + diff --git a/arch/powerpc/kernel/io.c b/arch/powerpc/kernel/io.c index e31aca92..1882bf4 100644 --- a/arch/powerpc/kernel/io.c +++ b/arch/powerpc/kernel/io.c @@ -120,7 +120,8 @@ EXPORT_SYMBOL(_outsl_ns); #define IO_CHECK_ALIGN(v,a) ((((unsigned long)(v)) & ((a) - 1)) == 0) -void _memset_io(volatile void __iomem *addr, int c, unsigned long n) +notrace void +_memset_io(volatile void __iomem *addr, int c, unsigned long n) { void *p = (void __force *)addr; u32 lc = c; diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 2f73f70..6e01eb0 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -98,7 +98,7 @@ EXPORT_SYMBOL(irq_desc); int distribute_irqs = 1; -static inline unsigned long get_hard_enabled(void) +static inline notrace unsigned long get_hard_enabled(void) { unsigned long enabled; @@ -108,13 +108,13 @@ static inline unsigned long get_hard_enabled(void) return enabled; } -static inline void set_soft_enabled(unsigned long enable) +static inline notrace void set_soft_enabled(unsigned long enable) { __asm__ __volatile__("stb %0,%1(13)" : : "r" (enable), "i" (offsetof(struct paca_struct, soft_enabled))); } -void raw_local_irq_restore(unsigned long en) +notrace void raw_local_irq_restore(unsigned long en) { /* * get_paca()->soft_enabled = en; diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 5112a4a..22f8e2b 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -47,6 +47,11 @@ #include #endif +#ifdef CONFIG_FTRACE +extern void _mcount(void); +EXPORT_SYMBOL(_mcount); +#endif + extern void bootx_init(unsigned long r4, unsigned long phys); int boot_cpuid; @@ -81,7 +86,7 @@ int ucache_bsize; * from the address that it was linked at, so we must use RELOC/PTRRELOC * to access static data (including strings). -- paulus */ -unsigned long __init early_init(unsigned long dt_ptr) +notrace unsigned long __init early_init(unsigned long dt_ptr) { unsigned long offset = reloc_offset(); struct cpu_spec *spec; @@ -111,7 +116,7 @@ unsigned long __init early_init(unsigned long dt_ptr) * This is called very early on the boot process, after a minimal * MMU environment has been set up but before MMU_init is called. */ -void __init machine_init(unsigned long dt_ptr, unsigned long phys) +notrace void __init machine_init(unsigned long dt_ptr, unsigned long phys) { /* Enable early debugging if any specified (see udbg.h) */ udbg_early_init(); @@ -133,7 +138,7 @@ void __init machine_init(unsigned long dt_ptr, unsigned long phys) #ifdef CONFIG_BOOKE_WDT /* Checks wdt=x and wdt_period=xx command-line option */ -int __init early_parse_wdt(char *p) +notrace int __init early_parse_wdt(char *p) { if (p && strncmp(p, "0", 1) != 0) booke_wdt_enabled = 1; diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 098fd96..277bf18 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -85,6 +85,11 @@ struct ppc64_caches ppc64_caches = { }; EXPORT_SYMBOL_GPL(ppc64_caches); +#ifdef CONFIG_FTRACE +extern void _mcount(void); +EXPORT_SYMBOL(_mcount); +#endif + /* * These are used in binfmt_elf.c to put aux entries on the stack * for each elf executable being started. diff --git a/arch/powerpc/platforms/powermac/Makefile b/arch/powerpc/platforms/powermac/Makefile index 4d72c8f..8977417 100644 --- a/arch/powerpc/platforms/powermac/Makefile +++ b/arch/powerpc/platforms/powermac/Makefile @@ -1,5 +1,10 @@ CFLAGS_bootx_init.o += -fPIC +ifdef CONFIG_FTRACE +# Do not trace early boot code +CFLAGS_REMOVE_bootx_init.o = -pg +endif + obj-y += pic.o setup.o time.o feature.o pci.o \ sleep.o low_i2c.o cache.o pfunc_core.o \ pfunc_base.o diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index a5f6001..3877dd9 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -123,6 +123,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace, int ret; int save_ftrace_enabled = ftrace_enabled; int save_tracer_enabled = tracer_enabled; + char *func_name; /* The ftrace test PASSED */ printk(KERN_CONT "PASSED\n"); @@ -142,9 +143,15 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace, return ret; } + /* + * Some archs *cough*PowerPC*cough* add charachters to the + * start of the function names. We simply put a '*' to + * accomodate them. + */ + func_name = "*" STR(DYN_FTRACE_TEST_NAME); + /* filter only on our function */ - ftrace_set_filter(STR(DYN_FTRACE_TEST_NAME), - sizeof(STR(DYN_FTRACE_TEST_NAME)), 1); + ftrace_set_filter(func_name, strlen(func_name), 1); /* enable tracing */ tr->ctrl = 1; -- cgit v0.10.2 From 656ee82cc855027b2e994ad218519b09fa652cc1 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 14 May 2008 21:30:29 -0400 Subject: kbuild: create new CFLAGS_REMOVE_(basename).o option We currently have a way to add special CFLAGS to code, but we do not have a way to remove them if needed. With the case of ftrace, some files should simply not be profiled. Adding the -pg flag to these files is simply a waste, and adding "notrace" to each and every function is ugly. Currently we put in "Makefile turd" [1] to stop the compiler from adding -pg to certain files. This was clumsy and awkward. This patch now adds the revese of CFLAGS_(basename).o with CFLAGS_REMOVE_(basename).o. This allows developers to prevent certain CFLAGS from being used to compile files. For example, we can now do CFLAGS_REMOVE_string.o = -pg to remove the -pg option from the string.o file in the lib directory. Note: a space delimited list of options may be added to the REMOVE macro. [1] - what David Miller called the workaronud to remove -pg Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index 8e44023..ea48b82 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -96,7 +96,8 @@ basename_flags = -D"KBUILD_BASENAME=KBUILD_STR($(call name-fix,$(basetarget)))" modname_flags = $(if $(filter 1,$(words $(modname))),\ -D"KBUILD_MODNAME=KBUILD_STR($(call name-fix,$(modname)))") -_c_flags = $(KBUILD_CFLAGS) $(ccflags-y) $(CFLAGS_$(basetarget).o) +orig_c_flags = $(KBUILD_CFLAGS) $(ccflags-y) $(CFLAGS_$(basetarget).o) +_c_flags = $(filter-out $(CFLAGS_REMOVE_$(basetarget).o), $(orig_c_flags)) _a_flags = $(KBUILD_AFLAGS) $(asflags-y) $(AFLAGS_$(basetarget).o) _cpp_flags = $(KBUILD_CPPFLAGS) $(cppflags-y) $(CPPFLAGS_$(@F)) -- cgit v0.10.2 From 6ec562328fda585be2d7f472cfac99d3b44d362a Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 14 May 2008 21:30:30 -0400 Subject: ftrace: use the new kbuild CFLAGS_REMOVE for kernel directory This patch removes the Makefile turd and uses the nice CFLAGS_REMOVE macro in the kernel directory. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/Makefile b/kernel/Makefile index d2f80ea..ca2433e 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -11,12 +11,16 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ notifier.o ksysfs.o pm_qos_params.o sched_clock.o +CFLAGS_REMOVE_sched.o = -pg -mno-spe + ifdef CONFIG_FTRACE -# Do not profile debug utilities -ORIG_CFLAGS := $(KBUILD_CFLAGS) -KBUILD_CFLAGS = $(if $(filter-out lockdep% %debug,$(basename $(notdir $@))), \ - $(ORIG_CFLAGS), \ - $(subst -pg,,$(ORIG_CFLAGS))) +# Do not trace debug files and internal ftrace files +CFLAGS_REMOVE_lockdep.o = -pg +CFLAGS_REMOVE_lockdep_proc.o = -pg +CFLAGS_REMOVE_mutex-debug.o = -pg +CFLAGS_REMOVE_rtmutex-debug.o = -pg +CFLAGS_REMOVE_cgroup-debug.o = -pg +CFLAGS_REMOVE_sched_clock.o = -pg endif obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o -- cgit v0.10.2 From 654e4787689faffdd2137fe91f59fd3ef3363ad2 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 14 May 2008 21:30:31 -0400 Subject: ftrace: use the new kbuild CFLAGS_REMOVE for lib directory This patch removes the Makefile turd and uses the nice CFLAGS_REMOVE macro in the lib directory. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/lib/Makefile b/lib/Makefile index d97ad11..4b836a5 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -10,11 +10,11 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ ifdef CONFIG_FTRACE # Do not profile string.o, since it may be used in early boot or vdso +CFLAGS_REMOVE_string.o = -pg # Also do not profile any debug utilities -ORIG_CFLAGS := $(KBUILD_CFLAGS) -KBUILD_CFLAGS = $(if $(filter-out %debug debug% string%,$(basename $(notdir $@))), \ - $(ORIG_CFLAGS), \ - $(subst -pg,,$(ORIG_CFLAGS))) +CFLAGS_REMOVE_spinlock_debug.o = -pg +CFLAGS_REMOVE_list_debug.o = -pg +CFLAGS_REMOVE_debugobjects.o = -pg endif lib-$(CONFIG_MMU) += ioremap.o -- cgit v0.10.2 From 7fa09f24b477ad41b821713eba757b3aa7a2864a Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 14 May 2008 21:30:32 -0400 Subject: ftrace: use the new kbuild CFLAGS_REMOVE for x86/kernel directory This patch removes the Makefile turd and uses the nice CFLAGS_REMOVE macro in the x86/kernel directory. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index e142091..739d49a 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -6,6 +6,13 @@ extra-y := head_$(BITS).o head$(BITS).o init_task.o vmlinux.lds CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE) +ifdef CONFIG_FTRACE +# Do not profile debug utilities +CFLAGS_REMOVE_tsc_64.o = -pg +CFLAGS_REMOVE_tsc_32.o = -pg +CFLAGS_REMOVE_rtc.o = -pg +endif + # # vsyscalls (which work on the user stack) should have # no stack-protector checks: -- cgit v0.10.2 From 677aa9f77e8de3791b481a0cec6c8b84d1eec626 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Sat, 17 May 2008 00:01:36 -0400 Subject: ftrace: add have dynamic ftrace config for archs Now that ftrace is being ported to other architectures, it has become apparent that DYNAMIC_FTRACE is dependent on whether or not that architecture implements dynamic ftrace. FTRACE itself may be ported to an architecture without porting dynamic ftrace. This patch adds HAVE_DYNAMIC_FTRACE to allow architectures to port ftrace without having to also port the dynamic aspect as well. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 62d034a..a5e9912 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -105,6 +105,7 @@ config ARCH_NO_VIRT_TO_BUS config PPC bool default y + select HAVE_DYNAMIC_FTRACE select HAVE_FTRACE select HAVE_IDE select HAVE_KPROBES diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig index a480df6..fca9246 100644 --- a/arch/sparc64/Kconfig +++ b/arch/sparc64/Kconfig @@ -11,6 +11,7 @@ config SPARC config SPARC64 bool default y + select HAVE_DYNAMIC_FTRACE select HAVE_FTRACE select HAVE_IDE select HAVE_LMB diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index c742dfe..fc86c54 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -23,6 +23,7 @@ config X86 select HAVE_OPROFILE select HAVE_KPROBES select HAVE_KRETPROBES + select HAVE_DYNAMIC_FTRACE select HAVE_FTRACE select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) select HAVE_ARCH_KGDB if !X86_VOYAGER diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index f300571..5c2295b 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -4,6 +4,9 @@ config HAVE_FTRACE bool +config HAVE_DYNAMIC_FTRACE + bool + config TRACER_MAX_TRACE bool @@ -94,6 +97,7 @@ config CONTEXT_SWITCH_TRACER config DYNAMIC_FTRACE bool "enable/disable ftrace tracepoints dynamically" depends on FTRACE + depends on HAVE_DYNAMIC_FTRACE default y help This option will modify all the calls to ftrace dynamically -- cgit v0.10.2 From f06c38103ea9dbca27c3f4d77f444ddefb5477cd Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:47 +0200 Subject: ftrace: add sysprof plugin very first baby version. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 5c2295b..e101c9a 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -75,6 +75,14 @@ config PREEMPT_TRACER enabled. This option and the irqs-off timing option can be used together or separately.) +config SYSPROF_TRACER + bool "Sysprof Tracer" + depends on DEBUG_KERNEL + select TRACING + help + This tracer provides the trace needed by the 'Sysprof' userspace + tool. + config SCHED_TRACER bool "Scheduling Latency Tracer" depends on HAVE_FTRACE diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index d9efbbf..7aec123 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -14,6 +14,7 @@ obj-$(CONFIG_FTRACE) += libftrace.o obj-$(CONFIG_TRACING) += trace.o obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o +obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o obj-$(CONFIG_FTRACE) += trace_functions.o obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c new file mode 100644 index 0000000..6c139bc --- /dev/null +++ b/kernel/trace/trace_sysprof.c @@ -0,0 +1,80 @@ +/* + * trace stack traces + * + * Copyright (C) 2007 Steven Rostedt + * Copyright (C) 2008 Ingo Molnar + * + */ +#include +#include +#include +#include +#include +#include +#include + +#include "trace.h" + +static struct trace_array *ctx_trace; +static int __read_mostly tracer_enabled; + +static notrace void stack_reset(struct trace_array *tr) +{ + int cpu; + + tr->time_start = ftrace_now(tr->cpu); + + for_each_online_cpu(cpu) + tracing_reset(tr->data[cpu]); +} + +static notrace void start_stack_trace(struct trace_array *tr) +{ + stack_reset(tr); + tracer_enabled = 1; +} + +static notrace void stop_stack_trace(struct trace_array *tr) +{ + tracer_enabled = 0; +} + +static notrace void stack_trace_init(struct trace_array *tr) +{ + ctx_trace = tr; + + if (tr->ctrl) + start_stack_trace(tr); +} + +static notrace void stack_trace_reset(struct trace_array *tr) +{ + if (tr->ctrl) + stop_stack_trace(tr); +} + +static void stack_trace_ctrl_update(struct trace_array *tr) +{ + /* When starting a new trace, reset the buffers */ + if (tr->ctrl) + start_stack_trace(tr); + else + stop_stack_trace(tr); +} + +static struct tracer stack_trace __read_mostly = +{ + .name = "sysprof", + .init = stack_trace_init, + .reset = stack_trace_reset, + .ctrl_update = stack_trace_ctrl_update, +#ifdef CONFIG_FTRACE_SELFTEST + .selftest = trace_selftest_startup_stack, +#endif +}; + +__init static int init_stack_trace(void) +{ + return register_tracer(&stack_trace); +} +device_initcall(init_stack_trace); -- cgit v0.10.2 From 0075fa80305f3231a2d5df97b00d7f55a48ea27e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:47 +0200 Subject: ftrace: extend sysprof plugin add per CPU hrtimers. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c index 6c139bc..ba55b87 100644 --- a/kernel/trace/trace_sysprof.c +++ b/kernel/trace/trace_sysprof.c @@ -5,19 +5,76 @@ * Copyright (C) 2008 Ingo Molnar * */ -#include -#include -#include #include +#include +#include #include -#include #include +#include +#include #include "trace.h" static struct trace_array *ctx_trace; static int __read_mostly tracer_enabled; +static const unsigned long sample_period = 1000000; + +/* + * Per CPU hrtimers that do the profiling: + */ +static DEFINE_PER_CPU(struct hrtimer, stack_trace_hrtimer); + +static enum hrtimer_restart stack_trace_timer_fn(struct hrtimer *hrtimer) +{ + /* trace here */ + panic_timeout++; + + hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period)); + + return HRTIMER_RESTART; +} + +static void start_stack_timer(int cpu) +{ + struct hrtimer *hrtimer = &per_cpu(stack_trace_hrtimer, cpu); + + hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hrtimer->function = stack_trace_timer_fn; + hrtimer->cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ; + + hrtimer_start(hrtimer, ns_to_ktime(sample_period), HRTIMER_MODE_REL); +} + +static void start_stack_timers(void) +{ + cpumask_t saved_mask = current->cpus_allowed; + int cpu; + + for_each_online_cpu(cpu) { + set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); + start_stack_timer(cpu); + printk("started timer on cpu%d\n", cpu); + } + set_cpus_allowed_ptr(current, &saved_mask); +} + +static void stop_stack_timer(int cpu) +{ + struct hrtimer *hrtimer = &per_cpu(stack_trace_hrtimer, cpu); + + hrtimer_cancel(hrtimer); + printk("cancelled timer on cpu%d\n", cpu); +} + +static void stop_stack_timers(void) +{ + int cpu; + + for_each_online_cpu(cpu) + stop_stack_timer(cpu); +} + static notrace void stack_reset(struct trace_array *tr) { int cpu; @@ -31,11 +88,13 @@ static notrace void stack_reset(struct trace_array *tr) static notrace void start_stack_trace(struct trace_array *tr) { stack_reset(tr); + start_stack_timers(); tracer_enabled = 1; } static notrace void stop_stack_trace(struct trace_array *tr) { + stop_stack_timers(); tracer_enabled = 0; } -- cgit v0.10.2 From 56a08bdcff20f0022bd9160c1093e56f763499aa Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:47 +0200 Subject: ftrace: extend sysprof plugin some more Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c index ba55b87..b1137c1 100644 --- a/kernel/trace/trace_sysprof.c +++ b/kernel/trace/trace_sysprof.c @@ -3,7 +3,7 @@ * * Copyright (C) 2007 Steven Rostedt * Copyright (C) 2008 Ingo Molnar - * + * Copyright (C) 2004, 2005, Soeren Sandmann */ #include #include @@ -11,13 +11,17 @@ #include #include #include +#include #include #include "trace.h" -static struct trace_array *ctx_trace; +static struct trace_array *sysprof_trace; static int __read_mostly tracer_enabled; +/* + * 10 msecs for now: + */ static const unsigned long sample_period = 1000000; /* @@ -25,10 +29,78 @@ static const unsigned long sample_period = 1000000; */ static DEFINE_PER_CPU(struct hrtimer, stack_trace_hrtimer); +struct stack_frame { + const void __user *next_fp; + unsigned long return_address; +}; + +static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) +{ + if (!access_ok(VERIFY_READ, fp, sizeof(*frame))) + return 0; + + if (__copy_from_user_inatomic(frame, frame_pointer, sizeof(*frame))) + return 0; + + return 1; +} + +#define SYSPROF_MAX_ADDRESSES 512 + +static void timer_notify(struct pt_regs *regs, int cpu) +{ + const void __user *frame_pointer; + struct trace_array_cpu *data; + struct stack_frame frame; + struct trace_array *tr; + int is_user; + int i; + + if (!regs) + return; + + tr = sysprof_trace; + data = tr->data[cpu]; + is_user = user_mode(regs); + + if (!current || current->pid == 0) + return; + + if (is_user && current->state != TASK_RUNNING) + return; + + if (!is_user) { + /* kernel */ + ftrace(tr, data, current->pid, 1, 0); + return; + + } + + trace_special(tr, data, 0, current->pid, regs->ip); + + frame_pointer = (void __user *)regs->bp; + + for (i = 0; i < SYSPROF_MAX_ADDRESSES; i++) { + if (!copy_stack_frame(frame_pointer, &frame)) + break; + if ((unsigned long)frame_pointer < regs->sp) + break; + + trace_special(tr, data, 1, frame.return_address, + (unsigned long)frame_pointer); + frame_pointer = frame.next_fp; + } + + trace_special(tr, data, 2, current->pid, i); + + if (i == SYSPROF_MAX_ADDRESSES) + trace_special(tr, data, -1, -1, -1); +} + static enum hrtimer_restart stack_trace_timer_fn(struct hrtimer *hrtimer) { /* trace here */ - panic_timeout++; + timer_notify(get_irq_regs(), smp_processor_id()); hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period)); @@ -100,7 +172,7 @@ static notrace void stop_stack_trace(struct trace_array *tr) static notrace void stack_trace_init(struct trace_array *tr) { - ctx_trace = tr; + sysprof_trace = tr; if (tr->ctrl) start_stack_trace(tr); -- cgit v0.10.2 From a6dd24f8d00cbccb560b19a723e6fb9bdfb20799 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:47 +0200 Subject: ftrace: sysprof-plugin, add self-tests Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index c460e85..b2198bc 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -280,6 +280,10 @@ extern int trace_selftest_startup_wakeup(struct tracer *trace, extern int trace_selftest_startup_sched_switch(struct tracer *trace, struct trace_array *tr); #endif +#ifdef CONFIG_SYSPROF_TRACER +extern int trace_selftest_startup_sysprof(struct tracer *trace, + struct trace_array *tr); +#endif #endif /* CONFIG_FTRACE_STARTUP_TEST */ extern void *head_page(struct trace_array_cpu *data); diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 3877dd9..033a6fb 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -537,3 +537,31 @@ trace_selftest_startup_sched_switch(struct tracer *trace, struct trace_array *tr return ret; } #endif /* CONFIG_CONTEXT_SWITCH_TRACER */ + +#ifdef CONFIG_SYSPROF_TRACER +int +trace_selftest_startup_sysprof(struct tracer *trace, struct trace_array *tr) +{ + unsigned long count; + int ret; + + /* start the tracing */ + tr->ctrl = 1; + trace->init(tr); + /* Sleep for a 1/10 of a second */ + msleep(100); + /* stop the tracing. */ + tr->ctrl = 0; + trace->ctrl_update(tr); + /* check the trace buffer */ + ret = trace_test_buffer(tr, &count); + trace->reset(tr); + + if (!ret && !count) { + printk(KERN_CONT ".. no entries found .."); + ret = -1; + } + + return ret; +} +#endif /* CONFIG_SYSPROF_TRACER */ diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c index b1137c1..b78f12f 100644 --- a/kernel/trace/trace_sysprof.c +++ b/kernel/trace/trace_sysprof.c @@ -126,7 +126,7 @@ static void start_stack_timers(void) for_each_online_cpu(cpu) { set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); start_stack_timer(cpu); - printk("started timer on cpu%d\n", cpu); + printk(KERN_INFO "started sysprof timer on cpu%d\n", cpu); } set_cpus_allowed_ptr(current, &saved_mask); } @@ -136,7 +136,7 @@ static void stop_stack_timer(int cpu) struct hrtimer *hrtimer = &per_cpu(stack_trace_hrtimer, cpu); hrtimer_cancel(hrtimer); - printk("cancelled timer on cpu%d\n", cpu); + printk(KERN_INFO "cancelled sysprof timer on cpu%d\n", cpu); } static void stop_stack_timers(void) @@ -200,7 +200,7 @@ static struct tracer stack_trace __read_mostly = .reset = stack_trace_reset, .ctrl_update = stack_trace_ctrl_update, #ifdef CONFIG_FTRACE_SELFTEST - .selftest = trace_selftest_startup_stack, + .selftest = trace_selftest_startup_sysprof, #endif }; -- cgit v0.10.2 From 842af315e8b0adad58fc642eaa5e6f53525e0534 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:47 +0200 Subject: ftrace: sysprof plugin improvement add sample maximum depth. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c index b78f12f..7f6fccc 100644 --- a/kernel/trace/trace_sysprof.c +++ b/kernel/trace/trace_sysprof.c @@ -23,6 +23,7 @@ static int __read_mostly tracer_enabled; * 10 msecs for now: */ static const unsigned long sample_period = 1000000; +static const unsigned int sample_max_depth = 512; /* * Per CPU hrtimers that do the profiling: @@ -45,8 +46,6 @@ static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) return 1; } -#define SYSPROF_MAX_ADDRESSES 512 - static void timer_notify(struct pt_regs *regs, int cpu) { const void __user *frame_pointer; @@ -80,7 +79,7 @@ static void timer_notify(struct pt_regs *regs, int cpu) frame_pointer = (void __user *)regs->bp; - for (i = 0; i < SYSPROF_MAX_ADDRESSES; i++) { + for (i = 0; i < sample_max_depth; i++) { if (!copy_stack_frame(frame_pointer, &frame)) break; if ((unsigned long)frame_pointer < regs->sp) @@ -93,7 +92,7 @@ static void timer_notify(struct pt_regs *regs, int cpu) trace_special(tr, data, 2, current->pid, i); - if (i == SYSPROF_MAX_ADDRESSES) + if (i == sample_max_depth) trace_special(tr, data, -1, -1, -1); } @@ -126,7 +125,6 @@ static void start_stack_timers(void) for_each_online_cpu(cpu) { set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); start_stack_timer(cpu); - printk(KERN_INFO "started sysprof timer on cpu%d\n", cpu); } set_cpus_allowed_ptr(current, &saved_mask); } @@ -136,7 +134,6 @@ static void stop_stack_timer(int cpu) struct hrtimer *hrtimer = &per_cpu(stack_trace_hrtimer, cpu); hrtimer_cancel(hrtimer); - printk(KERN_INFO "cancelled sysprof timer on cpu%d\n", cpu); } static void stop_stack_timers(void) -- cgit v0.10.2 From ef4ab15ff34fd9c65e92bee70f58e7179da881c5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:48 +0200 Subject: ftrace: make sysprof dependent on x86 for now that's the only tested platform for now. If there's interest we can make it generic easily. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index e101c9a..9b49526 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -77,7 +77,7 @@ config PREEMPT_TRACER config SYSPROF_TRACER bool "Sysprof Tracer" - depends on DEBUG_KERNEL + depends on DEBUG_KERNEL && X86 select TRACING help This tracer provides the trace needed by the 'Sysprof' userspace diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 033a6fb..5588ecc 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -557,11 +557,6 @@ trace_selftest_startup_sysprof(struct tracer *trace, struct trace_array *tr) ret = trace_test_buffer(tr, &count); trace->reset(tr); - if (!ret && !count) { - printk(KERN_CONT ".. no entries found .."); - ret = -1; - } - return ret; } #endif /* CONFIG_SYSPROF_TRACER */ -- cgit v0.10.2 From 9f6b4e3f4a24f2590f1c96f117fc45fbea9b0fa4 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:48 +0200 Subject: ftrace: sysprof fix Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c index 7f6fccc..f9a09fe 100644 --- a/kernel/trace/trace_sysprof.c +++ b/kernel/trace/trace_sysprof.c @@ -37,21 +37,26 @@ struct stack_frame { static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) { + int ret; + if (!access_ok(VERIFY_READ, fp, sizeof(*frame))) return 0; - if (__copy_from_user_inatomic(frame, frame_pointer, sizeof(*frame))) - return 0; + ret = 1; + pagefault_disable(); + if (__copy_from_user_inatomic(frame, fp, sizeof(*frame))) + ret = 0; + pagefault_enable(); - return 1; + return ret; } static void timer_notify(struct pt_regs *regs, int cpu) { - const void __user *frame_pointer; struct trace_array_cpu *data; struct stack_frame frame; struct trace_array *tr; + const void __user *fp; int is_user; int i; @@ -77,21 +82,26 @@ static void timer_notify(struct pt_regs *regs, int cpu) trace_special(tr, data, 0, current->pid, regs->ip); - frame_pointer = (void __user *)regs->bp; + fp = (void __user *)regs->bp; for (i = 0; i < sample_max_depth; i++) { - if (!copy_stack_frame(frame_pointer, &frame)) + frame.next_fp = 0; + frame.return_address = 0; + if (!copy_stack_frame(fp, &frame)) break; - if ((unsigned long)frame_pointer < regs->sp) + if ((unsigned long)fp < regs->sp) break; trace_special(tr, data, 1, frame.return_address, - (unsigned long)frame_pointer); - frame_pointer = frame.next_fp; + (unsigned long)fp); + fp = frame.next_fp; } trace_special(tr, data, 2, current->pid, i); + /* + * Special trace entry if we overflow the max depth: + */ if (i == sample_max_depth) trace_special(tr, data, -1, -1, -1); } -- cgit v0.10.2 From d618b3e6e50970a6248ac857653fdd49bcd3c045 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:49 +0200 Subject: ftrace: sysprof updates make the sample period configurable. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 3271916..95b7c48 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2800,6 +2800,9 @@ static __init void tracer_init_debugfs(void) pr_warning("Could not create debugfs " "'dyn_ftrace_total_info' entry\n"); #endif +#ifdef CONFIG_SYSPROF_TRACER + init_tracer_sysprof_debugfs(d_tracer); +#endif } static int trace_alloc_page(void) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index b2198bc..b7f85d9 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -188,6 +188,8 @@ struct trace_iterator { void tracing_reset(struct trace_array_cpu *data); int tracing_open_generic(struct inode *inode, struct file *filp); struct dentry *tracing_init_dentry(void); +void init_tracer_sysprof_debugfs(struct dentry *d_tracer); + void ftrace(struct trace_array *tr, struct trace_array_cpu *data, unsigned long ip, diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c index f9a09fe..1940623 100644 --- a/kernel/trace/trace_sysprof.c +++ b/kernel/trace/trace_sysprof.c @@ -20,11 +20,12 @@ static struct trace_array *sysprof_trace; static int __read_mostly tracer_enabled; /* - * 10 msecs for now: + * 1 msec sample interval by default: */ -static const unsigned long sample_period = 1000000; +static unsigned long sample_period = 1000000; static const unsigned int sample_max_depth = 512; +static DEFINE_MUTEX(sample_timer_lock); /* * Per CPU hrtimers that do the profiling: */ @@ -166,15 +167,19 @@ static notrace void stack_reset(struct trace_array *tr) static notrace void start_stack_trace(struct trace_array *tr) { + mutex_lock(&sample_timer_lock); stack_reset(tr); start_stack_timers(); tracer_enabled = 1; + mutex_unlock(&sample_timer_lock); } static notrace void stop_stack_trace(struct trace_array *tr) { + mutex_lock(&sample_timer_lock); stop_stack_timers(); tracer_enabled = 0; + mutex_unlock(&sample_timer_lock); } static notrace void stack_trace_init(struct trace_array *tr) @@ -216,3 +221,64 @@ __init static int init_stack_trace(void) return register_tracer(&stack_trace); } device_initcall(init_stack_trace); + +#define MAX_LONG_DIGITS 22 + +static ssize_t +sysprof_sample_read(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + char buf[MAX_LONG_DIGITS]; + int r; + + r = sprintf(buf, "%ld\n", nsecs_to_usecs(sample_period)); + + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); +} + +static ssize_t +sysprof_sample_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + char buf[MAX_LONG_DIGITS]; + unsigned long val; + + if (cnt > MAX_LONG_DIGITS-1) + cnt = MAX_LONG_DIGITS-1; + + if (copy_from_user(&buf, ubuf, cnt)) + return -EFAULT; + + buf[cnt] = 0; + + val = simple_strtoul(buf, NULL, 10); + /* + * Enforce a minimum sample period of 100 usecs: + */ + if (val < 100) + val = 100; + + mutex_lock(&sample_timer_lock); + stop_stack_timers(); + sample_period = val * 1000; + start_stack_timers(); + mutex_unlock(&sample_timer_lock); + + return cnt; +} + +static struct file_operations sysprof_sample_fops = { + .read = sysprof_sample_read, + .write = sysprof_sample_write, +}; + +void init_tracer_sysprof_debugfs(struct dentry *d_tracer) +{ + struct dentry *entry; + + entry = debugfs_create_file("sysprof_sample_period", 0644, + d_tracer, NULL, &sysprof_sample_fops); + if (entry) + return; + pr_warning("Could not create debugfs 'dyn_ftrace_total_info' entry\n"); +} -- cgit v0.10.2 From ada6b835067dc022f11cdae1c313a3710d3d977c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 23 May 2008 23:50:41 +0200 Subject: ftrace: remove notrace Remove the notrace annotations. The build logic takes care of that. Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c index 1940623..3b1e4ba 100644 --- a/kernel/trace/trace_sysprof.c +++ b/kernel/trace/trace_sysprof.c @@ -155,7 +155,7 @@ static void stop_stack_timers(void) stop_stack_timer(cpu); } -static notrace void stack_reset(struct trace_array *tr) +static void stack_reset(struct trace_array *tr) { int cpu; @@ -165,7 +165,7 @@ static notrace void stack_reset(struct trace_array *tr) tracing_reset(tr->data[cpu]); } -static notrace void start_stack_trace(struct trace_array *tr) +static void start_stack_trace(struct trace_array *tr) { mutex_lock(&sample_timer_lock); stack_reset(tr); @@ -174,7 +174,7 @@ static notrace void start_stack_trace(struct trace_array *tr) mutex_unlock(&sample_timer_lock); } -static notrace void stop_stack_trace(struct trace_array *tr) +static void stop_stack_trace(struct trace_array *tr) { mutex_lock(&sample_timer_lock); stop_stack_timers(); @@ -182,7 +182,7 @@ static notrace void stop_stack_trace(struct trace_array *tr) mutex_unlock(&sample_timer_lock); } -static notrace void stack_trace_init(struct trace_array *tr) +static void stack_trace_init(struct trace_array *tr) { sysprof_trace = tr; @@ -190,7 +190,7 @@ static notrace void stack_trace_init(struct trace_array *tr) start_stack_trace(tr); } -static notrace void stack_trace_reset(struct trace_array *tr) +static void stack_trace_reset(struct trace_array *tr) { if (tr->ctrl) stop_stack_trace(tr); -- cgit v0.10.2 From 9caee613d3b860ae81b79370eeae9ac967c07536 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 23 May 2008 23:55:54 +0200 Subject: ftrace: fix __trace_special() Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c index 3b1e4ba..76dd953 100644 --- a/kernel/trace/trace_sysprof.c +++ b/kernel/trace/trace_sysprof.c @@ -81,7 +81,7 @@ static void timer_notify(struct pt_regs *regs, int cpu) } - trace_special(tr, data, 0, current->pid, regs->ip); + __trace_special(tr, data, 0, current->pid, regs->ip); fp = (void __user *)regs->bp; @@ -93,18 +93,18 @@ static void timer_notify(struct pt_regs *regs, int cpu) if ((unsigned long)fp < regs->sp) break; - trace_special(tr, data, 1, frame.return_address, + __trace_special(tr, data, 1, frame.return_address, (unsigned long)fp); fp = frame.next_fp; } - trace_special(tr, data, 2, current->pid, i); + __trace_special(tr, data, 2, current->pid, i); /* * Special trace entry if we overflow the max depth: */ if (i == sample_max_depth) - trace_special(tr, data, -1, -1, -1); + __trace_special(tr, data, -1, -1, -1); } static enum hrtimer_restart stack_trace_timer_fn(struct hrtimer *hrtimer) -- cgit v0.10.2 From 5fc4511c756860149b81aead6eca5bdf5c438ea7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 23 May 2008 23:58:21 +0200 Subject: ftrace: make it more available in the Kconfig Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 9b49526..e101c9a 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -77,7 +77,7 @@ config PREEMPT_TRACER config SYSPROF_TRACER bool "Sysprof Tracer" - depends on DEBUG_KERNEL && X86 + depends on DEBUG_KERNEL select TRACING help This tracer provides the trace needed by the 'Sysprof' userspace -- cgit v0.10.2 From cd2134b1dda92fd450e6a1e12b1c7960dd6a2178 Mon Sep 17 00:00:00 2001 From: Soeren Sandmann Pedersen Date: Mon, 12 May 2008 21:20:54 +0200 Subject: sysprof: kernel trace add kernel backtracing to the sysprof tracer. change the format of the data, so that type=0 means beginning of stack trace, 1 means kernel address, 2 means user address, and 3 means end of trace. EIP addresses are no longer distinguished from return addresses, mostly because sysprof userspace doesn't make use of it. It may be worthwhile adding this back in though, just in case it becomes interesting. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c index 76dd953..ebcb66d 100644 --- a/kernel/trace/trace_sysprof.c +++ b/kernel/trace/trace_sysprof.c @@ -14,6 +14,8 @@ #include #include +#include + #include "trace.h" static struct trace_array *sysprof_trace; @@ -52,6 +54,77 @@ static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) return ret; } +struct backtrace_info { + struct trace_array_cpu *data; + struct trace_array *tr; + int pos; +}; + +static void +backtrace_warning_symbol(void *data, char *msg, unsigned long symbol) +{ + /* Ignore warnings */ +} + +static void backtrace_warning(void *data, char *msg) +{ + /* Ignore warnings */ +} + +static int backtrace_stack(void *data, char *name) +{ + /* Don't bother with IRQ stacks for now */ + return -1; +} + +static void backtrace_address(void *data, unsigned long addr, int reliable) +{ + struct backtrace_info *info = data; + + if (info->pos < sample_max_depth && reliable) { + __trace_special(info->tr, info->data, 1, addr, 0); + + info->pos++; + } +} + +const static struct stacktrace_ops backtrace_ops = { + .warning = backtrace_warning, + .warning_symbol = backtrace_warning_symbol, + .stack = backtrace_stack, + .address = backtrace_address, +}; + +static struct pt_regs * +trace_kernel(struct pt_regs *regs, struct trace_array *tr, + struct trace_array_cpu *data) +{ + struct backtrace_info info; + unsigned long bp; + char *user_stack; + char *stack; + + info.tr = tr; + info.data = data; + info.pos = 1; + + __trace_special(info.tr, info.data, 1, regs->ip, 0); + + stack = ((char *)regs + sizeof(struct pt_regs)); +#ifdef CONFIG_FRAME_POINTER + bp = regs->bp; +#else + bp = 0; +#endif + + dump_trace(NULL, regs, (void *)stack, bp, &backtrace_ops, &info); + + /* Now trace the user stack */ + user_stack = ((char *)current->thread.sp0 - sizeof(struct pt_regs)); + + return (struct pt_regs *)user_stack; +} + static void timer_notify(struct pt_regs *regs, int cpu) { struct trace_array_cpu *data; @@ -74,17 +147,15 @@ static void timer_notify(struct pt_regs *regs, int cpu) if (is_user && current->state != TASK_RUNNING) return; - if (!is_user) { - /* kernel */ - ftrace(tr, data, current->pid, 1, 0); - return; + __trace_special(tr, data, 0, 0, current->pid); - } - - __trace_special(tr, data, 0, current->pid, regs->ip); + if (!is_user) + regs = trace_kernel(regs, tr, data); fp = (void __user *)regs->bp; + __trace_special(tr, data, 2, regs->ip, 0); + for (i = 0; i < sample_max_depth; i++) { frame.next_fp = 0; frame.return_address = 0; @@ -93,12 +164,12 @@ static void timer_notify(struct pt_regs *regs, int cpu) if ((unsigned long)fp < regs->sp) break; - __trace_special(tr, data, 1, frame.return_address, + __trace_special(tr, data, 2, frame.return_address, (unsigned long)fp); fp = frame.next_fp; } - __trace_special(tr, data, 2, current->pid, i); + __trace_special(tr, data, 3, current->pid, i); /* * Special trace entry if we overflow the max depth: -- cgit v0.10.2 From 8a9e94c1fbfdac45a3b6811b880777c4116aa309 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:54 +0200 Subject: sysprof: update copyrights Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c index ebcb66d..fe23d6d 100644 --- a/kernel/trace/trace_sysprof.c +++ b/kernel/trace/trace_sysprof.c @@ -1,9 +1,9 @@ /* * trace stack traces * + * Copyright (C) 2004-2008, Soeren Sandmann * Copyright (C) 2007 Steven Rostedt * Copyright (C) 2008 Ingo Molnar - * Copyright (C) 2004, 2005, Soeren Sandmann */ #include #include -- cgit v0.10.2 From cf3271a73b612a03da00681ecd9bfefab37c74c9 Mon Sep 17 00:00:00 2001 From: Soeren Sandmann Date: Mon, 12 May 2008 05:28:50 +0200 Subject: ftrace/sysprof: don't trace the user stack if we are a kernel thread. Check that current->mm is non-NULL before attempting to trace the user stack. Also take depth of the kernel stack into account when comparing against sample_max_depth. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c index fe23d6d..2301e1e 100644 --- a/kernel/trace/trace_sysprof.c +++ b/kernel/trace/trace_sysprof.c @@ -95,13 +95,12 @@ const static struct stacktrace_ops backtrace_ops = { .address = backtrace_address, }; -static struct pt_regs * +static int trace_kernel(struct pt_regs *regs, struct trace_array *tr, struct trace_array_cpu *data) { struct backtrace_info info; unsigned long bp; - char *user_stack; char *stack; info.tr = tr; @@ -119,10 +118,7 @@ trace_kernel(struct pt_regs *regs, struct trace_array *tr, dump_trace(NULL, regs, (void *)stack, bp, &backtrace_ops, &info); - /* Now trace the user stack */ - user_stack = ((char *)current->thread.sp0 - sizeof(struct pt_regs)); - - return (struct pt_regs *)user_stack; + return info.pos; } static void timer_notify(struct pt_regs *regs, int cpu) @@ -150,32 +146,44 @@ static void timer_notify(struct pt_regs *regs, int cpu) __trace_special(tr, data, 0, 0, current->pid); if (!is_user) - regs = trace_kernel(regs, tr, data); + i = trace_kernel(regs, tr, data); + else + i = 0; - fp = (void __user *)regs->bp; + /* + * Trace user stack if we are not a kernel thread + */ + if (current->mm && i < sample_max_depth) { + regs = (struct pt_regs *)current->thread.sp0 - 1; - __trace_special(tr, data, 2, regs->ip, 0); + fp = (void __user *)regs->bp; - for (i = 0; i < sample_max_depth; i++) { - frame.next_fp = 0; - frame.return_address = 0; - if (!copy_stack_frame(fp, &frame)) - break; - if ((unsigned long)fp < regs->sp) - break; + __trace_special(tr, data, 2, regs->ip, 0); - __trace_special(tr, data, 2, frame.return_address, - (unsigned long)fp); - fp = frame.next_fp; - } + while (i < sample_max_depth) { + frame.next_fp = 0; + frame.return_address = 0; + if (!copy_stack_frame(fp, &frame)) + break; + if ((unsigned long)fp < regs->sp) + break; - __trace_special(tr, data, 3, current->pid, i); + __trace_special(tr, data, 2, frame.return_address, + (unsigned long)fp); + fp = frame.next_fp; + + i++; + } + + } /* * Special trace entry if we overflow the max depth: */ if (i == sample_max_depth) __trace_special(tr, data, -1, -1, -1); + + __trace_special(tr, data, 3, current->pid, i); } static enum hrtimer_restart stack_trace_timer_fn(struct hrtimer *hrtimer) -- cgit v0.10.2 From bfeeeeb991cf75081e6c2f74d44ae5da05b50a94 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 12 May 2008 21:21:14 +0200 Subject: stacktrace: don't crash on invalid stack trace structs This patch makes the stacktrace printout code \warn when the entries pointer is unset rather than crashing when trying to access it in an attempt to make it a bit more robust. I was saving a stacktrace into an skb and forgot to copy it across skb copies... I have since fixed the code, but it would have been easier had the kernel not crashed in an interrupt. Signed-off-by: Johannes Berg Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c index b71816e..0914d0c 100644 --- a/kernel/stacktrace.c +++ b/kernel/stacktrace.c @@ -13,6 +13,9 @@ void print_stack_trace(struct stack_trace *trace, int spaces) { int i, j; + if (WARN_ON(!trace->entries)) + return; + for (i = 0; i < trace->nr_entries; i++) { unsigned long ip = trace->entries[i]; -- cgit v0.10.2 From 8b7d89d02ef3c6a7c73d6596f28cea7632850af4 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:20:56 +0200 Subject: x86: mmiotrace - trace memory mapped IO Mmiotrace is a tool for trapping memory mapped IO (MMIO) accesses within the kernel. It is used for debugging and especially for reverse engineering evil binary drivers. Mmiotrace works by wrapping the ioremap family of kernel functions and marking the returned pages as not present. Access to the IO memory triggers a page fault, which will be handled by mmiotrace's custom page fault handler. This will single-step the faulted instruction with the MMIO page marked as present. Access logs are directed to user space via relay and debug_fs. This page fault approach is necessary, because binary drivers have readl/writel etc. calls inlined and therefore extremely difficult to trap with with e.g. kprobes. This patch depends on the custom page fault handlers patch. Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 9431a83..7c6496e 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -176,6 +176,33 @@ config PAGE_FAULT_HANDLERS register a function that is called on every page fault. Custom handlers are used by some debugging and reverse engineering tools. +config MMIOTRACE + tristate "Memory mapped IO tracing" + depends on DEBUG_KERNEL && PAGE_FAULT_HANDLERS && RELAY && DEBUG_FS + default n + help + This will build a kernel module called mmiotrace. + + Mmiotrace traces Memory Mapped I/O access and is meant for debugging + and reverse engineering. The kernel module offers wrapped + versions of the ioremap family of functions. The driver to be traced + must be modified to call these wrappers. A user space program is + required to collect the MMIO data. + + See http://nouveau.freedesktop.org/wiki/MmioTrace + If you are not helping to develop drivers, say N. + +config MMIOTRACE_TEST + tristate "Test module for mmiotrace" + depends on MMIOTRACE && m + default n + help + This is a dumb module for testing mmiotrace. It is very dangerous + as it will write garbage to IO memory starting at a given address. + However, it should be safe to use on e.g. unused portion of VRAM. + + Say N, unless you absolutely know what you are doing. + # # IO delay types: # diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 739d49a..a51ac15 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -79,6 +79,8 @@ obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_VM86) += vm86_32.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o +obj-$(CONFIG_MMIOTRACE) += mmiotrace/ + obj-$(CONFIG_HPET_TIMER) += hpet.o obj-$(CONFIG_K8_NB) += k8.o diff --git a/arch/x86/kernel/init_task.c b/arch/x86/kernel/init_task.c index a4f93b4..027a5b6 100644 --- a/arch/x86/kernel/init_task.c +++ b/arch/x86/kernel/init_task.c @@ -15,6 +15,7 @@ static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); EXPORT_UNUSED_SYMBOL(init_mm); /* will be removed in 2.6.26 */ +EXPORT_SYMBOL_GPL(init_mm); /* * Initial thread structure. diff --git a/arch/x86/kernel/mmiotrace/Makefile b/arch/x86/kernel/mmiotrace/Makefile new file mode 100644 index 0000000..d6905f7 --- /dev/null +++ b/arch/x86/kernel/mmiotrace/Makefile @@ -0,0 +1,4 @@ +obj-$(CONFIG_MMIOTRACE) += mmiotrace.o +mmiotrace-objs := pf_in.o kmmio.o mmio-mod.o + +obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o diff --git a/arch/x86/kernel/mmiotrace/kmmio.c b/arch/x86/kernel/mmiotrace/kmmio.c new file mode 100644 index 0000000..8ba48f9 --- /dev/null +++ b/arch/x86/kernel/mmiotrace/kmmio.c @@ -0,0 +1,391 @@ +/* Support for MMIO probes. + * Benfit many code from kprobes + * (C) 2002 Louis Zhuang . + * 2007 Alexander Eichner + * 2008 Pekka Paalanen + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "kmmio.h" + +#define KMMIO_HASH_BITS 6 +#define KMMIO_TABLE_SIZE (1 << KMMIO_HASH_BITS) +#define KMMIO_PAGE_HASH_BITS 4 +#define KMMIO_PAGE_TABLE_SIZE (1 << KMMIO_PAGE_HASH_BITS) + +struct kmmio_context { + struct kmmio_fault_page *fpage; + struct kmmio_probe *probe; + unsigned long saved_flags; + int active; +}; + +static int kmmio_page_fault(struct pt_regs *regs, unsigned long error_code, + unsigned long address); +static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val, + void *args); + +static DEFINE_SPINLOCK(kmmio_lock); + +/* These are protected by kmmio_lock */ +unsigned int kmmio_count; +static unsigned int handler_registered; +static struct list_head kmmio_page_table[KMMIO_PAGE_TABLE_SIZE]; +static LIST_HEAD(kmmio_probes); + +static struct kmmio_context kmmio_ctx[NR_CPUS]; + +static struct pf_handler kmmio_pf_hook = { + .handler = kmmio_page_fault +}; + +static struct notifier_block nb_die = { + .notifier_call = kmmio_die_notifier +}; + +int init_kmmio(void) +{ + int i; + for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++) + INIT_LIST_HEAD(&kmmio_page_table[i]); + + register_die_notifier(&nb_die); + return 0; +} + +void cleanup_kmmio(void) +{ + /* + * Assume the following have been already cleaned by calling + * unregister_kmmio_probe() appropriately: + * kmmio_page_table, kmmio_probes + */ + if (handler_registered) { + unregister_page_fault_handler(&kmmio_pf_hook); + synchronize_rcu(); + } + unregister_die_notifier(&nb_die); +} + +/* + * this is basically a dynamic stabbing problem: + * Could use the existing prio tree code or + * Possible better implementations: + * The Interval Skip List: A Data Structure for Finding All Intervals That + * Overlap a Point (might be simple) + * Space Efficient Dynamic Stabbing with Fast Queries - Mikkel Thorup + */ +/* Get the kmmio at this addr (if any). You must be holding kmmio_lock. */ +static struct kmmio_probe *get_kmmio_probe(unsigned long addr) +{ + struct kmmio_probe *p; + list_for_each_entry(p, &kmmio_probes, list) { + if (addr >= p->addr && addr <= (p->addr + p->len)) + return p; + } + return NULL; +} + +static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page) +{ + struct list_head *head, *tmp; + + page &= PAGE_MASK; + head = &kmmio_page_table[hash_long(page, KMMIO_PAGE_HASH_BITS)]; + list_for_each(tmp, head) { + struct kmmio_fault_page *p + = list_entry(tmp, struct kmmio_fault_page, list); + if (p->page == page) + return p; + } + + return NULL; +} + +static void arm_kmmio_fault_page(unsigned long page, int *large) +{ + unsigned long address = page & PAGE_MASK; + pgd_t *pgd = pgd_offset_k(address); + pud_t *pud = pud_offset(pgd, address); + pmd_t *pmd = pmd_offset(pud, address); + pte_t *pte = pte_offset_kernel(pmd, address); + + if (pmd_large(*pmd)) { + set_pmd(pmd, __pmd(pmd_val(*pmd) & ~_PAGE_PRESENT)); + if (large) + *large = 1; + } else { + set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT)); + } + + __flush_tlb_one(page); +} + +static void disarm_kmmio_fault_page(unsigned long page, int *large) +{ + unsigned long address = page & PAGE_MASK; + pgd_t *pgd = pgd_offset_k(address); + pud_t *pud = pud_offset(pgd, address); + pmd_t *pmd = pmd_offset(pud, address); + pte_t *pte = pte_offset_kernel(pmd, address); + + if (large && *large) { + set_pmd(pmd, __pmd(pmd_val(*pmd) | _PAGE_PRESENT)); + *large = 0; + } else { + set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT)); + } + + __flush_tlb_one(page); +} + +/* + * Interrupts are disabled on entry as trap3 is an interrupt gate + * and they remain disabled thorough out this function. + */ +static int kmmio_handler(struct pt_regs *regs, unsigned long addr) +{ + struct kmmio_context *ctx; + int cpu; + + /* + * Preemption is now disabled to prevent process switch during + * single stepping. We can only handle one active kmmio trace + * per cpu, so ensure that we finish it before something else + * gets to run. + * + * XXX what if an interrupt occurs between returning from + * do_page_fault() and entering the single-step exception handler? + * And that interrupt triggers a kmmio trap? + */ + preempt_disable(); + cpu = smp_processor_id(); + ctx = &kmmio_ctx[cpu]; + + /* interrupts disabled and CPU-local data => atomicity guaranteed. */ + if (ctx->active) { + /* + * This avoids a deadlock with kmmio_lock. + * If this page fault really was due to kmmio trap, + * all hell breaks loose. + */ + printk(KERN_EMERG "mmiotrace: recursive probe hit on CPU %d, " + "for address %lu. Ignoring.\n", + cpu, addr); + goto no_kmmio; + } + ctx->active++; + + /* + * Acquire the kmmio lock to prevent changes affecting + * get_kmmio_fault_page() and get_kmmio_probe(), since we save their + * returned pointers. + * The lock is released in post_kmmio_handler(). + * XXX: could/should get_kmmio_*() be using RCU instead of spinlock? + */ + spin_lock(&kmmio_lock); + + ctx->fpage = get_kmmio_fault_page(addr); + if (!ctx->fpage) { + /* this page fault is not caused by kmmio */ + goto no_kmmio_locked; + } + + ctx->probe = get_kmmio_probe(addr); + ctx->saved_flags = (regs->flags & (TF_MASK|IF_MASK)); + + if (ctx->probe && ctx->probe->pre_handler) + ctx->probe->pre_handler(ctx->probe, regs, addr); + + regs->flags |= TF_MASK; + regs->flags &= ~IF_MASK; + + /* We hold lock, now we set present bit in PTE and single step. */ + disarm_kmmio_fault_page(ctx->fpage->page, NULL); + + return 1; + +no_kmmio_locked: + spin_unlock(&kmmio_lock); + ctx->active--; +no_kmmio: + preempt_enable_no_resched(); + /* page fault not handled by kmmio */ + return 0; +} + +/* + * Interrupts are disabled on entry as trap1 is an interrupt gate + * and they remain disabled thorough out this function. + * And we hold kmmio lock. + */ +static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) +{ + int cpu = smp_processor_id(); + struct kmmio_context *ctx = &kmmio_ctx[cpu]; + + if (!ctx->active) + return 0; + + if (ctx->probe && ctx->probe->post_handler) + ctx->probe->post_handler(ctx->probe, condition, regs); + + arm_kmmio_fault_page(ctx->fpage->page, NULL); + + regs->flags &= ~TF_MASK; + regs->flags |= ctx->saved_flags; + + /* These were acquired in kmmio_handler(). */ + ctx->active--; + spin_unlock(&kmmio_lock); + preempt_enable_no_resched(); + + /* + * if somebody else is singlestepping across a probe point, flags + * will have TF set, in which case, continue the remaining processing + * of do_debug, as if this is not a probe hit. + */ + if (regs->flags & TF_MASK) + return 0; + + return 1; +} + +static int add_kmmio_fault_page(unsigned long page) +{ + struct kmmio_fault_page *f; + + page &= PAGE_MASK; + f = get_kmmio_fault_page(page); + if (f) { + f->count++; + return 0; + } + + f = kmalloc(sizeof(*f), GFP_ATOMIC); + if (!f) + return -1; + + f->count = 1; + f->page = page; + list_add(&f->list, + &kmmio_page_table[hash_long(f->page, KMMIO_PAGE_HASH_BITS)]); + + arm_kmmio_fault_page(f->page, NULL); + + return 0; +} + +static void release_kmmio_fault_page(unsigned long page) +{ + struct kmmio_fault_page *f; + + page &= PAGE_MASK; + f = get_kmmio_fault_page(page); + if (!f) + return; + + f->count--; + if (!f->count) { + disarm_kmmio_fault_page(f->page, NULL); + list_del(&f->list); + } +} + +int register_kmmio_probe(struct kmmio_probe *p) +{ + int ret = 0; + unsigned long size = 0; + + spin_lock_irq(&kmmio_lock); + kmmio_count++; + if (get_kmmio_probe(p->addr)) { + ret = -EEXIST; + goto out; + } + list_add(&p->list, &kmmio_probes); + /*printk("adding fault pages...\n");*/ + while (size < p->len) { + if (add_kmmio_fault_page(p->addr + size)) + printk(KERN_ERR "mmio: Unable to set page fault.\n"); + size += PAGE_SIZE; + } + + if (!handler_registered) { + register_page_fault_handler(&kmmio_pf_hook); + handler_registered++; + } + +out: + spin_unlock_irq(&kmmio_lock); + /* + * XXX: What should I do here? + * Here was a call to global_flush_tlb(), but it does not exist + * anymore. + */ + return ret; +} + +void unregister_kmmio_probe(struct kmmio_probe *p) +{ + unsigned long size = 0; + + spin_lock_irq(&kmmio_lock); + while (size < p->len) { + release_kmmio_fault_page(p->addr + size); + size += PAGE_SIZE; + } + list_del(&p->list); + kmmio_count--; + spin_unlock_irq(&kmmio_lock); +} + +/* + * According to 2.6.20, mainly x86_64 arch: + * This is being called from do_page_fault(), via the page fault notifier + * chain. The chain is called for both user space faults and kernel space + * faults (address >= TASK_SIZE64), except not on faults serviced by + * vmalloc_fault(). + * + * We may be in an interrupt or a critical section. Also prefecthing may + * trigger a page fault. We may be in the middle of process switch. + * The page fault hook functionality has put us inside RCU read lock. + * + * Local interrupts are disabled, so preemption cannot happen. + * Do not enable interrupts, do not sleep, and watch out for other CPUs. + */ +static int kmmio_page_fault(struct pt_regs *regs, unsigned long error_code, + unsigned long address) +{ + if (is_kmmio_active()) + if (kmmio_handler(regs, address) == 1) + return -1; + return 0; +} + +static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val, + void *args) +{ + struct die_args *arg = args; + + if (val == DIE_DEBUG) + if (post_kmmio_handler(arg->err, arg->regs) == 1) + return NOTIFY_STOP; + + return NOTIFY_DONE; +} diff --git a/arch/x86/kernel/mmiotrace/kmmio.h b/arch/x86/kernel/mmiotrace/kmmio.h new file mode 100644 index 0000000..85b7f68 --- /dev/null +++ b/arch/x86/kernel/mmiotrace/kmmio.h @@ -0,0 +1,58 @@ +#ifndef _LINUX_KMMIO_H +#define _LINUX_KMMIO_H + +#include +#include +#include +#include +#include +#include +#include + +struct kmmio_probe; +struct kmmio_fault_page; +struct pt_regs; + +typedef void (*kmmio_pre_handler_t)(struct kmmio_probe *, + struct pt_regs *, unsigned long addr); +typedef void (*kmmio_post_handler_t)(struct kmmio_probe *, + unsigned long condition, struct pt_regs *); + +struct kmmio_probe { + struct list_head list; + + /* start location of the probe point */ + unsigned long addr; + + /* length of the probe region */ + unsigned long len; + + /* Called before addr is executed. */ + kmmio_pre_handler_t pre_handler; + + /* Called after addr is executed, unless... */ + kmmio_post_handler_t post_handler; +}; + +struct kmmio_fault_page { + struct list_head list; + + /* location of the fault page */ + unsigned long page; + + int count; +}; + +/* kmmio is active by some kmmio_probes? */ +static inline int is_kmmio_active(void) +{ + extern unsigned int kmmio_count; + return kmmio_count; +} + +int init_kmmio(void); +void cleanup_kmmio(void); +int register_kmmio_probe(struct kmmio_probe *p); +void unregister_kmmio_probe(struct kmmio_probe *p); + +#endif /* _LINUX_KMMIO_H */ diff --git a/arch/x86/kernel/mmiotrace/mmio-mod.c b/arch/x86/kernel/mmiotrace/mmio-mod.c new file mode 100644 index 0000000..73561fe --- /dev/null +++ b/arch/x86/kernel/mmiotrace/mmio-mod.c @@ -0,0 +1,527 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2005 + * Jeff Muizelaar, 2006, 2007 + * Pekka Paalanen, 2008 + * + * Derived from the read-mod example from relay-examples by Tom Zanussi. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include /* for ISA_START_ADDRESS */ + +#include "kmmio.h" +#include "pf_in.h" + +/* This app's relay channel files will appear in /debug/mmio-trace */ +#define APP_DIR "mmio-trace" +/* the marker injection file in /proc */ +#define MARKER_FILE "mmio-marker" + +#define MODULE_NAME "mmiotrace" + +struct trap_reason { + unsigned long addr; + unsigned long ip; + enum reason_type type; + int active_traces; +}; + +static struct trap_reason pf_reason[NR_CPUS]; +static struct mm_io_header_rw cpu_trace[NR_CPUS]; + +static struct file_operations mmio_fops = { + .owner = THIS_MODULE, +}; + +static const size_t subbuf_size = 256*1024; +static struct rchan *chan; +static struct dentry *dir; +static int suspended; /* XXX should this be per cpu? */ +static struct proc_dir_entry *proc_marker_file; + +/* module parameters */ +static unsigned int n_subbufs = 32*4; +static unsigned long filter_offset; +static int nommiotrace; +static int ISA_trace; +static int trace_pc; + +module_param(n_subbufs, uint, 0); +module_param(filter_offset, ulong, 0); +module_param(nommiotrace, bool, 0); +module_param(ISA_trace, bool, 0); +module_param(trace_pc, bool, 0); + +MODULE_PARM_DESC(n_subbufs, "Number of 256kB buffers, default 128."); +MODULE_PARM_DESC(filter_offset, "Start address of traced mappings."); +MODULE_PARM_DESC(nommiotrace, "Disable actual MMIO tracing."); +MODULE_PARM_DESC(ISA_trace, "Do not exclude the low ISA range."); +MODULE_PARM_DESC(trace_pc, "Record address of faulting instructions."); + +static void record_timestamp(struct mm_io_header *header) +{ + struct timespec now; + + getnstimeofday(&now); + header->sec = now.tv_sec; + header->nsec = now.tv_nsec; +} + +/* + * Write callback for the /proc entry: + * Read a marker and write it to the mmio trace log + */ +static int write_marker(struct file *file, const char __user *buffer, + unsigned long count, void *data) +{ + char *event = NULL; + struct mm_io_header *headp; + int len = (count > 65535) ? 65535 : count; + + event = kzalloc(sizeof(*headp) + len, GFP_KERNEL); + if (!event) + return -ENOMEM; + + headp = (struct mm_io_header *)event; + headp->type = MMIO_MAGIC | (MMIO_MARKER << MMIO_OPCODE_SHIFT); + headp->data_len = len; + record_timestamp(headp); + + if (copy_from_user(event + sizeof(*headp), buffer, len)) { + kfree(event); + return -EFAULT; + } + + relay_write(chan, event, sizeof(*headp) + len); + kfree(event); + return len; +} + +static void print_pte(unsigned long address) +{ + pgd_t *pgd = pgd_offset_k(address); + pud_t *pud = pud_offset(pgd, address); + pmd_t *pmd = pmd_offset(pud, address); + if (pmd_large(*pmd)) { + printk(KERN_EMERG MODULE_NAME ": 4MB pages are not " + "currently supported: %lx\n", + address); + BUG(); + } + printk(KERN_DEBUG MODULE_NAME ": pte for 0x%lx: 0x%lx 0x%lx\n", + address, + pte_val(*pte_offset_kernel(pmd, address)), + pte_val(*pte_offset_kernel(pmd, address)) & _PAGE_PRESENT); +} + +/* + * For some reason the pre/post pairs have been called in an + * unmatched order. Report and die. + */ +static void die_kmmio_nesting_error(struct pt_regs *regs, unsigned long addr) +{ + const unsigned long cpu = smp_processor_id(); + printk(KERN_EMERG MODULE_NAME ": unexpected fault for address: %lx, " + "last fault for address: %lx\n", + addr, pf_reason[cpu].addr); + print_pte(addr); +#ifdef __i386__ + print_symbol(KERN_EMERG "faulting EIP is at %s\n", regs->ip); + print_symbol(KERN_EMERG "last faulting EIP was at %s\n", + pf_reason[cpu].ip); + printk(KERN_EMERG + "eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", + regs->ax, regs->bx, regs->cx, regs->dx); + printk(KERN_EMERG + "esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", + regs->si, regs->di, regs->bp, regs->sp); +#else + print_symbol(KERN_EMERG "faulting RIP is at %s\n", regs->ip); + print_symbol(KERN_EMERG "last faulting RIP was at %s\n", + pf_reason[cpu].ip); + printk(KERN_EMERG "rax: %016lx rcx: %016lx rdx: %016lx\n", + regs->ax, regs->cx, regs->dx); + printk(KERN_EMERG "rsi: %016lx rdi: %016lx " + "rbp: %016lx rsp: %016lx\n", + regs->si, regs->di, regs->bp, regs->sp); +#endif + BUG(); +} + +static void pre(struct kmmio_probe *p, struct pt_regs *regs, + unsigned long addr) +{ + const unsigned long cpu = smp_processor_id(); + const unsigned long instptr = instruction_pointer(regs); + const enum reason_type type = get_ins_type(instptr); + + /* it doesn't make sense to have more than one active trace per cpu */ + if (pf_reason[cpu].active_traces) + die_kmmio_nesting_error(regs, addr); + else + pf_reason[cpu].active_traces++; + + pf_reason[cpu].type = type; + pf_reason[cpu].addr = addr; + pf_reason[cpu].ip = instptr; + + cpu_trace[cpu].header.type = MMIO_MAGIC; + cpu_trace[cpu].header.pid = 0; + cpu_trace[cpu].header.data_len = sizeof(struct mm_io_rw); + cpu_trace[cpu].rw.address = addr; + + /* + * Only record the program counter when requested. + * It may taint clean-room reverse engineering. + */ + if (trace_pc) + cpu_trace[cpu].rw.pc = instptr; + else + cpu_trace[cpu].rw.pc = 0; + + record_timestamp(&cpu_trace[cpu].header); + + switch (type) { + case REG_READ: + cpu_trace[cpu].header.type |= + (MMIO_READ << MMIO_OPCODE_SHIFT) | + (get_ins_mem_width(instptr) << MMIO_WIDTH_SHIFT); + break; + case REG_WRITE: + cpu_trace[cpu].header.type |= + (MMIO_WRITE << MMIO_OPCODE_SHIFT) | + (get_ins_mem_width(instptr) << MMIO_WIDTH_SHIFT); + cpu_trace[cpu].rw.value = get_ins_reg_val(instptr, regs); + break; + case IMM_WRITE: + cpu_trace[cpu].header.type |= + (MMIO_WRITE << MMIO_OPCODE_SHIFT) | + (get_ins_mem_width(instptr) << MMIO_WIDTH_SHIFT); + cpu_trace[cpu].rw.value = get_ins_imm_val(instptr); + break; + default: + { + unsigned char *ip = (unsigned char *)instptr; + cpu_trace[cpu].header.type |= + (MMIO_UNKNOWN_OP << MMIO_OPCODE_SHIFT); + cpu_trace[cpu].rw.value = (*ip) << 16 | + *(ip + 1) << 8 | + *(ip + 2); + } + } +} + +static void post(struct kmmio_probe *p, unsigned long condition, + struct pt_regs *regs) +{ + const unsigned long cpu = smp_processor_id(); + + /* this should always return the active_trace count to 0 */ + pf_reason[cpu].active_traces--; + if (pf_reason[cpu].active_traces) { + printk(KERN_EMERG MODULE_NAME ": unexpected post handler"); + BUG(); + } + + switch (pf_reason[cpu].type) { + case REG_READ: + cpu_trace[cpu].rw.value = get_ins_reg_val(pf_reason[cpu].ip, + regs); + break; + default: + break; + } + relay_write(chan, &cpu_trace[cpu], sizeof(struct mm_io_header_rw)); +} + +/* + * subbuf_start() relay callback. + * + * Defined so that we know when events are dropped due to the buffer-full + * condition. + */ +static int subbuf_start_handler(struct rchan_buf *buf, void *subbuf, + void *prev_subbuf, size_t prev_padding) +{ + if (relay_buf_full(buf)) { + if (!suspended) { + suspended = 1; + printk(KERN_ERR MODULE_NAME + ": cpu %d buffer full!!!\n", + smp_processor_id()); + } + return 0; + } else if (suspended) { + suspended = 0; + printk(KERN_ERR MODULE_NAME + ": cpu %d buffer no longer full.\n", + smp_processor_id()); + } + + return 1; +} + +/* file_create() callback. Creates relay file in debugfs. */ +static struct dentry *create_buf_file_handler(const char *filename, + struct dentry *parent, + int mode, + struct rchan_buf *buf, + int *is_global) +{ + struct dentry *buf_file; + + mmio_fops.read = relay_file_operations.read; + mmio_fops.open = relay_file_operations.open; + mmio_fops.poll = relay_file_operations.poll; + mmio_fops.mmap = relay_file_operations.mmap; + mmio_fops.release = relay_file_operations.release; + mmio_fops.splice_read = relay_file_operations.splice_read; + + buf_file = debugfs_create_file(filename, mode, parent, buf, + &mmio_fops); + + return buf_file; +} + +/* file_remove() default callback. Removes relay file in debugfs. */ +static int remove_buf_file_handler(struct dentry *dentry) +{ + debugfs_remove(dentry); + return 0; +} + +static struct rchan_callbacks relay_callbacks = { + .subbuf_start = subbuf_start_handler, + .create_buf_file = create_buf_file_handler, + .remove_buf_file = remove_buf_file_handler, +}; + +/* + * create_channel - creates channel /debug/APP_DIR/cpuXXX + * Returns channel on success, NULL otherwise + */ +static struct rchan *create_channel(unsigned size, unsigned n) +{ + return relay_open("cpu", dir, size, n, &relay_callbacks, NULL); +} + +/* destroy_channel - destroys channel /debug/APP_DIR/cpuXXX */ +static void destroy_channel(void) +{ + if (chan) { + relay_close(chan); + chan = NULL; + } +} + +struct remap_trace { + struct list_head list; + struct kmmio_probe probe; +}; +static LIST_HEAD(trace_list); +static DEFINE_SPINLOCK(trace_list_lock); + +static void do_ioremap_trace_core(unsigned long offset, unsigned long size, + void __iomem *addr) +{ + struct remap_trace *trace = kmalloc(sizeof(*trace), GFP_KERNEL); + struct mm_io_header_map event = { + .header = { + .type = MMIO_MAGIC | + (MMIO_PROBE << MMIO_OPCODE_SHIFT), + .sec = 0, + .nsec = 0, + .pid = 0, + .data_len = sizeof(struct mm_io_map) + }, + .map = { + .phys = offset, + .addr = (unsigned long)addr, + .len = size, + .pc = 0 + } + }; + record_timestamp(&event.header); + + *trace = (struct remap_trace) { + .probe = { + .addr = (unsigned long)addr, + .len = size, + .pre_handler = pre, + .post_handler = post, + } + }; + + relay_write(chan, &event, sizeof(event)); + spin_lock(&trace_list_lock); + list_add_tail(&trace->list, &trace_list); + spin_unlock(&trace_list_lock); + if (!nommiotrace) + register_kmmio_probe(&trace->probe); +} + +static void ioremap_trace_core(unsigned long offset, unsigned long size, + void __iomem *addr) +{ + if ((filter_offset) && (offset != filter_offset)) + return; + + /* Don't trace the low PCI/ISA area, it's always mapped.. */ + if (!ISA_trace && (offset < ISA_END_ADDRESS) && + (offset + size > ISA_START_ADDRESS)) { + printk(KERN_NOTICE MODULE_NAME ": Ignoring map of low " + "PCI/ISA area (0x%lx-0x%lx)\n", + offset, offset + size); + return; + } + do_ioremap_trace_core(offset, size, addr); +} + +void __iomem *ioremap_cache_trace(unsigned long offset, unsigned long size) +{ + void __iomem *p = ioremap_cache(offset, size); + printk(KERN_DEBUG MODULE_NAME ": ioremap_cache(0x%lx, 0x%lx) = %p\n", + offset, size, p); + ioremap_trace_core(offset, size, p); + return p; +} +EXPORT_SYMBOL(ioremap_cache_trace); + +void __iomem *ioremap_nocache_trace(unsigned long offset, unsigned long size) +{ + void __iomem *p = ioremap_nocache(offset, size); + printk(KERN_DEBUG MODULE_NAME ": ioremap_nocache(0x%lx, 0x%lx) = %p\n", + offset, size, p); + ioremap_trace_core(offset, size, p); + return p; +} +EXPORT_SYMBOL(ioremap_nocache_trace); + +void iounmap_trace(volatile void __iomem *addr) +{ + struct mm_io_header_map event = { + .header = { + .type = MMIO_MAGIC | + (MMIO_UNPROBE << MMIO_OPCODE_SHIFT), + .sec = 0, + .nsec = 0, + .pid = 0, + .data_len = sizeof(struct mm_io_map) + }, + .map = { + .phys = 0, + .addr = (unsigned long)addr, + .len = 0, + .pc = 0 + } + }; + struct remap_trace *trace; + struct remap_trace *tmp; + printk(KERN_DEBUG MODULE_NAME ": Unmapping %p.\n", addr); + record_timestamp(&event.header); + + spin_lock(&trace_list_lock); + list_for_each_entry_safe(trace, tmp, &trace_list, list) { + if ((unsigned long)addr == trace->probe.addr) { + if (!nommiotrace) + unregister_kmmio_probe(&trace->probe); + list_del(&trace->list); + kfree(trace); + break; + } + } + spin_unlock(&trace_list_lock); + relay_write(chan, &event, sizeof(event)); + iounmap(addr); +} +EXPORT_SYMBOL(iounmap_trace); + +static void clear_trace_list(void) +{ + struct remap_trace *trace; + struct remap_trace *tmp; + + spin_lock(&trace_list_lock); + list_for_each_entry_safe(trace, tmp, &trace_list, list) { + printk(KERN_WARNING MODULE_NAME ": purging non-iounmapped " + "trace @0x%08lx, size 0x%lx.\n", + trace->probe.addr, trace->probe.len); + if (!nommiotrace) + unregister_kmmio_probe(&trace->probe); + list_del(&trace->list); + kfree(trace); + break; + } + spin_unlock(&trace_list_lock); +} + +static int __init init(void) +{ + if (n_subbufs < 2) + return -EINVAL; + + dir = debugfs_create_dir(APP_DIR, NULL); + if (!dir) { + printk(KERN_ERR MODULE_NAME + ": Couldn't create relay app directory.\n"); + return -ENOMEM; + } + + chan = create_channel(subbuf_size, n_subbufs); + if (!chan) { + debugfs_remove(dir); + printk(KERN_ERR MODULE_NAME + ": relay app channel creation failed\n"); + return -ENOMEM; + } + + init_kmmio(); + + proc_marker_file = create_proc_entry(MARKER_FILE, 0, NULL); + if (proc_marker_file) + proc_marker_file->write_proc = write_marker; + + printk(KERN_DEBUG MODULE_NAME ": loaded.\n"); + if (nommiotrace) + printk(KERN_DEBUG MODULE_NAME ": MMIO tracing disabled.\n"); + if (ISA_trace) + printk(KERN_WARNING MODULE_NAME + ": Warning! low ISA range will be traced.\n"); + return 0; +} + +static void __exit cleanup(void) +{ + printk(KERN_DEBUG MODULE_NAME ": unload...\n"); + clear_trace_list(); + cleanup_kmmio(); + remove_proc_entry(MARKER_FILE, NULL); + destroy_channel(); + if (dir) + debugfs_remove(dir); +} + +module_init(init); +module_exit(cleanup); +MODULE_LICENSE("GPL"); diff --git a/arch/x86/kernel/mmiotrace/pf_in.c b/arch/x86/kernel/mmiotrace/pf_in.c new file mode 100644 index 0000000..67ea520 --- /dev/null +++ b/arch/x86/kernel/mmiotrace/pf_in.c @@ -0,0 +1,489 @@ +/* + * Fault Injection Test harness (FI) + * Copyright (C) Intel Crop. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, + * USA. + * + */ + +/* $Id: pf_in.c,v 1.1.1.1 2002/11/12 05:56:32 brlock Exp $ + * Copyright by Intel Crop., 2002 + * Louis Zhuang (louis.zhuang@intel.com) + * + * Bjorn Steinbrink (B.Steinbrink@gmx.de), 2007 + */ + +#include +#include /* struct pt_regs */ +#include "pf_in.h" + +#ifdef __i386__ +/* IA32 Manual 3, 2-1 */ +static unsigned char prefix_codes[] = { + 0xF0, 0xF2, 0xF3, 0x2E, 0x36, 0x3E, 0x26, 0x64, + 0x65, 0x2E, 0x3E, 0x66, 0x67 +}; +/* IA32 Manual 3, 3-432*/ +static unsigned int reg_rop[] = { + 0x8A, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F +}; +static unsigned int reg_wop[] = { 0x88, 0x89 }; +static unsigned int imm_wop[] = { 0xC6, 0xC7 }; +/* IA32 Manual 3, 3-432*/ +static unsigned int rw8[] = { 0x88, 0x8A, 0xC6 }; +static unsigned int rw32[] = { + 0x89, 0x8B, 0xC7, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F +}; +static unsigned int mw8[] = { 0x88, 0x8A, 0xC6, 0xB60F, 0xBE0F }; +static unsigned int mw16[] = { 0xB70F, 0xBF0F }; +static unsigned int mw32[] = { 0x89, 0x8B, 0xC7 }; +static unsigned int mw64[] = {}; +#else /* not __i386__ */ +static unsigned char prefix_codes[] = { + 0x66, 0x67, 0x2E, 0x3E, 0x26, 0x64, 0x65, 0x36, + 0xF0, 0xF3, 0xF2, + /* REX Prefixes */ + 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, + 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f +}; +/* AMD64 Manual 3, Appendix A*/ +static unsigned int reg_rop[] = { + 0x8A, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F +}; +static unsigned int reg_wop[] = { 0x88, 0x89 }; +static unsigned int imm_wop[] = { 0xC6, 0xC7 }; +static unsigned int rw8[] = { 0xC6, 0x88, 0x8A }; +static unsigned int rw32[] = { + 0xC7, 0x89, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F +}; +/* 8 bit only */ +static unsigned int mw8[] = { 0xC6, 0x88, 0x8A, 0xB60F, 0xBE0F }; +/* 16 bit only */ +static unsigned int mw16[] = { 0xB70F, 0xBF0F }; +/* 16 or 32 bit */ +static unsigned int mw32[] = { 0xC7 }; +/* 16, 32 or 64 bit */ +static unsigned int mw64[] = { 0x89, 0x8B }; +#endif /* not __i386__ */ + +static int skip_prefix(unsigned char *addr, int *shorted, int *enlarged, + int *rexr) +{ + int i; + unsigned char *p = addr; + *shorted = 0; + *enlarged = 0; + *rexr = 0; + +restart: + for (i = 0; i < ARRAY_SIZE(prefix_codes); i++) { + if (*p == prefix_codes[i]) { + if (*p == 0x66) + *shorted = 1; +#ifdef __amd64__ + if ((*p & 0xf8) == 0x48) + *enlarged = 1; + if ((*p & 0xf4) == 0x44) + *rexr = 1; +#endif + p++; + goto restart; + } + } + + return (p - addr); +} + +static int get_opcode(unsigned char *addr, unsigned int *opcode) +{ + int len; + + if (*addr == 0x0F) { + /* 0x0F is extension instruction */ + *opcode = *(unsigned short *)addr; + len = 2; + } else { + *opcode = *addr; + len = 1; + } + + return len; +} + +#define CHECK_OP_TYPE(opcode, array, type) \ + for (i = 0; i < ARRAY_SIZE(array); i++) { \ + if (array[i] == opcode) { \ + rv = type; \ + goto exit; \ + } \ + } + +enum reason_type get_ins_type(unsigned long ins_addr) +{ + unsigned int opcode; + unsigned char *p; + int shorted, enlarged, rexr; + int i; + enum reason_type rv = OTHERS; + + p = (unsigned char *)ins_addr; + p += skip_prefix(p, &shorted, &enlarged, &rexr); + p += get_opcode(p, &opcode); + + CHECK_OP_TYPE(opcode, reg_rop, REG_READ); + CHECK_OP_TYPE(opcode, reg_wop, REG_WRITE); + CHECK_OP_TYPE(opcode, imm_wop, IMM_WRITE); + +exit: + return rv; +} +#undef CHECK_OP_TYPE + +static unsigned int get_ins_reg_width(unsigned long ins_addr) +{ + unsigned int opcode; + unsigned char *p; + int i, shorted, enlarged, rexr; + + p = (unsigned char *)ins_addr; + p += skip_prefix(p, &shorted, &enlarged, &rexr); + p += get_opcode(p, &opcode); + + for (i = 0; i < ARRAY_SIZE(rw8); i++) + if (rw8[i] == opcode) + return 1; + + for (i = 0; i < ARRAY_SIZE(rw32); i++) + if (rw32[i] == opcode) + return (shorted ? 2 : (enlarged ? 8 : 4)); + + printk(KERN_ERR "mmiotrace: Unknown opcode 0x%02x\n", opcode); + return 0; +} + +unsigned int get_ins_mem_width(unsigned long ins_addr) +{ + unsigned int opcode; + unsigned char *p; + int i, shorted, enlarged, rexr; + + p = (unsigned char *)ins_addr; + p += skip_prefix(p, &shorted, &enlarged, &rexr); + p += get_opcode(p, &opcode); + + for (i = 0; i < ARRAY_SIZE(mw8); i++) + if (mw8[i] == opcode) + return 1; + + for (i = 0; i < ARRAY_SIZE(mw16); i++) + if (mw16[i] == opcode) + return 2; + + for (i = 0; i < ARRAY_SIZE(mw32); i++) + if (mw32[i] == opcode) + return shorted ? 2 : 4; + + for (i = 0; i < ARRAY_SIZE(mw64); i++) + if (mw64[i] == opcode) + return shorted ? 2 : (enlarged ? 8 : 4); + + printk(KERN_ERR "mmiotrace: Unknown opcode 0x%02x\n", opcode); + return 0; +} + +/* + * Define register ident in mod/rm byte. + * Note: these are NOT the same as in ptrace-abi.h. + */ +enum { + arg_AL = 0, + arg_CL = 1, + arg_DL = 2, + arg_BL = 3, + arg_AH = 4, + arg_CH = 5, + arg_DH = 6, + arg_BH = 7, + + arg_AX = 0, + arg_CX = 1, + arg_DX = 2, + arg_BX = 3, + arg_SP = 4, + arg_BP = 5, + arg_SI = 6, + arg_DI = 7, +#ifdef __amd64__ + arg_R8 = 8, + arg_R9 = 9, + arg_R10 = 10, + arg_R11 = 11, + arg_R12 = 12, + arg_R13 = 13, + arg_R14 = 14, + arg_R15 = 15 +#endif +}; + +static unsigned char *get_reg_w8(int no, struct pt_regs *regs) +{ + unsigned char *rv = NULL; + + switch (no) { + case arg_AL: + rv = (unsigned char *)®s->ax; + break; + case arg_BL: + rv = (unsigned char *)®s->bx; + break; + case arg_CL: + rv = (unsigned char *)®s->cx; + break; + case arg_DL: + rv = (unsigned char *)®s->dx; + break; + case arg_AH: + rv = 1 + (unsigned char *)®s->ax; + break; + case arg_BH: + rv = 1 + (unsigned char *)®s->bx; + break; + case arg_CH: + rv = 1 + (unsigned char *)®s->cx; + break; + case arg_DH: + rv = 1 + (unsigned char *)®s->dx; + break; +#ifdef __amd64__ + case arg_R8: + rv = (unsigned char *)®s->r8; + break; + case arg_R9: + rv = (unsigned char *)®s->r9; + break; + case arg_R10: + rv = (unsigned char *)®s->r10; + break; + case arg_R11: + rv = (unsigned char *)®s->r11; + break; + case arg_R12: + rv = (unsigned char *)®s->r12; + break; + case arg_R13: + rv = (unsigned char *)®s->r13; + break; + case arg_R14: + rv = (unsigned char *)®s->r14; + break; + case arg_R15: + rv = (unsigned char *)®s->r15; + break; +#endif + default: + printk(KERN_ERR "mmiotrace: Error reg no# %d\n", no); + break; + } + return rv; +} + +static unsigned long *get_reg_w32(int no, struct pt_regs *regs) +{ + unsigned long *rv = NULL; + + switch (no) { + case arg_AX: + rv = ®s->ax; + break; + case arg_BX: + rv = ®s->bx; + break; + case arg_CX: + rv = ®s->cx; + break; + case arg_DX: + rv = ®s->dx; + break; + case arg_SP: + rv = ®s->sp; + break; + case arg_BP: + rv = ®s->bp; + break; + case arg_SI: + rv = ®s->si; + break; + case arg_DI: + rv = ®s->di; + break; +#ifdef __amd64__ + case arg_R8: + rv = ®s->r8; + break; + case arg_R9: + rv = ®s->r9; + break; + case arg_R10: + rv = ®s->r10; + break; + case arg_R11: + rv = ®s->r11; + break; + case arg_R12: + rv = ®s->r12; + break; + case arg_R13: + rv = ®s->r13; + break; + case arg_R14: + rv = ®s->r14; + break; + case arg_R15: + rv = ®s->r15; + break; +#endif + default: + printk(KERN_ERR "mmiotrace: Error reg no# %d\n", no); + } + + return rv; +} + +unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs) +{ + unsigned int opcode; + unsigned char mod_rm; + int reg; + unsigned char *p; + int i, shorted, enlarged, rexr; + unsigned long rv; + + p = (unsigned char *)ins_addr; + p += skip_prefix(p, &shorted, &enlarged, &rexr); + p += get_opcode(p, &opcode); + for (i = 0; i < ARRAY_SIZE(reg_rop); i++) + if (reg_rop[i] == opcode) { + rv = REG_READ; + goto do_work; + } + + for (i = 0; i < ARRAY_SIZE(reg_wop); i++) + if (reg_wop[i] == opcode) { + rv = REG_WRITE; + goto do_work; + } + + printk(KERN_ERR "mmiotrace: Not a register instruction, opcode " + "0x%02x\n", opcode); + goto err; + +do_work: + mod_rm = *p; + reg = ((mod_rm >> 3) & 0x7) | (rexr << 3); + switch (get_ins_reg_width(ins_addr)) { + case 1: + return *get_reg_w8(reg, regs); + + case 2: + return *(unsigned short *)get_reg_w32(reg, regs); + + case 4: + return *(unsigned int *)get_reg_w32(reg, regs); + +#ifdef __amd64__ + case 8: + return *(unsigned long *)get_reg_w32(reg, regs); +#endif + + default: + printk(KERN_ERR "mmiotrace: Error width# %d\n", reg); + } + +err: + return 0; +} + +unsigned long get_ins_imm_val(unsigned long ins_addr) +{ + unsigned int opcode; + unsigned char mod_rm; + unsigned char mod; + unsigned char *p; + int i, shorted, enlarged, rexr; + unsigned long rv; + + p = (unsigned char *)ins_addr; + p += skip_prefix(p, &shorted, &enlarged, &rexr); + p += get_opcode(p, &opcode); + for (i = 0; i < ARRAY_SIZE(imm_wop); i++) + if (imm_wop[i] == opcode) { + rv = IMM_WRITE; + goto do_work; + } + + printk(KERN_ERR "mmiotrace: Not an immediate instruction, opcode " + "0x%02x\n", opcode); + goto err; + +do_work: + mod_rm = *p; + mod = mod_rm >> 6; + p++; + switch (mod) { + case 0: + /* if r/m is 5 we have a 32 disp (IA32 Manual 3, Table 2-2) */ + /* AMD64: XXX Check for address size prefix? */ + if ((mod_rm & 0x7) == 0x5) + p += 4; + break; + + case 1: + p += 1; + break; + + case 2: + p += 4; + break; + + case 3: + default: + printk(KERN_ERR "mmiotrace: not a memory access instruction " + "at 0x%lx, rm_mod=0x%02x\n", + ins_addr, mod_rm); + } + + switch (get_ins_reg_width(ins_addr)) { + case 1: + return *(unsigned char *)p; + + case 2: + return *(unsigned short *)p; + + case 4: + return *(unsigned int *)p; + +#ifdef __amd64__ + case 8: + return *(unsigned long *)p; +#endif + + default: + printk(KERN_ERR "mmiotrace: Error: width.\n"); + } + +err: + return 0; +} diff --git a/arch/x86/kernel/mmiotrace/pf_in.h b/arch/x86/kernel/mmiotrace/pf_in.h new file mode 100644 index 0000000..e05341a --- /dev/null +++ b/arch/x86/kernel/mmiotrace/pf_in.h @@ -0,0 +1,39 @@ +/* + * Fault Injection Test harness (FI) + * Copyright (C) Intel Crop. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, + * USA. + * + */ + +#ifndef __PF_H_ +#define __PF_H_ + +enum reason_type { + NOT_ME, /* page fault is not in regions */ + NOTHING, /* access others point in regions */ + REG_READ, /* read from addr to reg */ + REG_WRITE, /* write from reg to addr */ + IMM_WRITE, /* write from imm to addr */ + OTHERS /* Other instructions can not intercept */ +}; + +enum reason_type get_ins_type(unsigned long ins_addr); +unsigned int get_ins_mem_width(unsigned long ins_addr); +unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs); +unsigned long get_ins_imm_val(unsigned long ins_addr); + +#endif /* __PF_H_ */ diff --git a/arch/x86/kernel/mmiotrace/testmmiotrace.c b/arch/x86/kernel/mmiotrace/testmmiotrace.c new file mode 100644 index 0000000..40e66b0 --- /dev/null +++ b/arch/x86/kernel/mmiotrace/testmmiotrace.c @@ -0,0 +1,77 @@ +/* + * Written by Pekka Paalanen, 2008 + */ +#include +#include + +extern void __iomem *ioremap_nocache_trace(unsigned long offset, + unsigned long size); +extern void iounmap_trace(volatile void __iomem *addr); + +#define MODULE_NAME "testmmiotrace" + +static unsigned long mmio_address; +module_param(mmio_address, ulong, 0); +MODULE_PARM_DESC(mmio_address, "Start address of the mapping of 16 kB."); + +static void do_write_test(void __iomem *p) +{ + unsigned int i; + for (i = 0; i < 256; i++) + iowrite8(i, p + i); + for (i = 1024; i < (5 * 1024); i += 2) + iowrite16(i * 12 + 7, p + i); + for (i = (5 * 1024); i < (16 * 1024); i += 4) + iowrite32(i * 212371 + 13, p + i); +} + +static void do_read_test(void __iomem *p) +{ + unsigned int i; + volatile unsigned int v; + for (i = 0; i < 256; i++) + v = ioread8(p + i); + for (i = 1024; i < (5 * 1024); i += 2) + v = ioread16(p + i); + for (i = (5 * 1024); i < (16 * 1024); i += 4) + v = ioread32(p + i); +} + +static void do_test(void) +{ + void __iomem *p = ioremap_nocache_trace(mmio_address, 0x4000); + if (!p) { + printk(KERN_ERR MODULE_NAME ": could not ioremap IO memory, " + "aborting.\n"); + return; + } + do_write_test(p); + do_read_test(p); + iounmap_trace(p); +} + +static int __init init(void) +{ + if (mmio_address == 0) { + printk(KERN_ERR MODULE_NAME ": you have to use the module " + "argument mmio_address.\n"); + printk(KERN_ERR MODULE_NAME ": DO NOT LOAD THIS MODULE UNLESS" + " YOU REALLY KNOW WHAT YOU ARE DOING!\n"); + return -ENXIO; + } + + printk(KERN_WARNING MODULE_NAME ": WARNING: mapping 16 kB @ 0x%08lx " + "in PCI address space, and writing " + "rubbish in there.\n", mmio_address); + do_test(); + return 0; +} + +static void __exit cleanup(void) +{ + printk(KERN_DEBUG MODULE_NAME ": unloaded.\n"); +} + +module_init(init); +module_exit(cleanup); +MODULE_LICENSE("GPL"); diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h new file mode 100644 index 0000000..cb24782 --- /dev/null +++ b/include/linux/mmiotrace.h @@ -0,0 +1,62 @@ +#ifndef MMIOTRACE_H +#define MMIOTRACE_H + +#include + +#define MMIO_VERSION 0x04 + +/* mm_io_header.type */ +#define MMIO_OPCODE_MASK 0xff +#define MMIO_OPCODE_SHIFT 0 +#define MMIO_WIDTH_MASK 0xff00 +#define MMIO_WIDTH_SHIFT 8 +#define MMIO_MAGIC (0x6f000000 | (MMIO_VERSION<<16)) +#define MMIO_MAGIC_MASK 0xffff0000 + +enum mm_io_opcode { /* payload type: */ + MMIO_READ = 0x1, /* struct mm_io_rw */ + MMIO_WRITE = 0x2, /* struct mm_io_rw */ + MMIO_PROBE = 0x3, /* struct mm_io_map */ + MMIO_UNPROBE = 0x4, /* struct mm_io_map */ + MMIO_MARKER = 0x5, /* raw char data */ + MMIO_UNKNOWN_OP = 0x6, /* struct mm_io_rw */ +}; + +struct mm_io_header { + __u32 type; + __u32 sec; /* timestamp */ + __u32 nsec; + __u32 pid; /* PID of the process, or 0 for kernel core */ + __u16 data_len; /* length of the following payload */ +}; + +struct mm_io_rw { + __u64 address; /* virtual address of register */ + __u64 value; + __u64 pc; /* optional program counter */ +}; + +struct mm_io_map { + __u64 phys; /* base address in PCI space */ + __u64 addr; /* base virtual address */ + __u64 len; /* mapping size */ + __u64 pc; /* optional program counter */ +}; + + +/* + * These structures are used to allow a single relay_write() + * call to write a full packet. + */ + +struct mm_io_header_rw { + struct mm_io_header header; + struct mm_io_rw rw; +} __attribute__((packed)); + +struct mm_io_header_map { + struct mm_io_header header; + struct mm_io_map map; +} __attribute__((packed)); + +#endif /* MMIOTRACE_H */ -- cgit v0.10.2 From 75bb88350e0501b3cf5ac096a1008757844414a9 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:20:56 +0200 Subject: x86 mmiotrace: use lookup_address() Use lookup_address() from pageattr.c instead of doing the same manually. Also had to EXPORT_SYMBOL_GPL(lookup_address) to make this work for modules. This also fixes "undefined symbol 'init_mm'" compile error for x86_32. Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/mmiotrace/kmmio.c b/arch/x86/kernel/mmiotrace/kmmio.c index 8ba48f9..28411da 100644 --- a/arch/x86/kernel/mmiotrace/kmmio.c +++ b/arch/x86/kernel/mmiotrace/kmmio.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "kmmio.h" @@ -117,40 +118,55 @@ static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page) return NULL; } -static void arm_kmmio_fault_page(unsigned long page, int *large) +static void arm_kmmio_fault_page(unsigned long page, int *page_level) { unsigned long address = page & PAGE_MASK; - pgd_t *pgd = pgd_offset_k(address); - pud_t *pud = pud_offset(pgd, address); - pmd_t *pmd = pmd_offset(pud, address); - pte_t *pte = pte_offset_kernel(pmd, address); + int level; + pte_t *pte = lookup_address(address, &level); - if (pmd_large(*pmd)) { + if (!pte) { + printk(KERN_ERR "Error in %s: no pte for page 0x%08lx\n", + __FUNCTION__, page); + return; + } + + if (level == PG_LEVEL_2M) { + pmd_t *pmd = (pmd_t *)pte; set_pmd(pmd, __pmd(pmd_val(*pmd) & ~_PAGE_PRESENT)); - if (large) - *large = 1; } else { + /* PG_LEVEL_4K */ set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT)); } + if (page_level) + *page_level = level; + __flush_tlb_one(page); } -static void disarm_kmmio_fault_page(unsigned long page, int *large) +static void disarm_kmmio_fault_page(unsigned long page, int *page_level) { unsigned long address = page & PAGE_MASK; - pgd_t *pgd = pgd_offset_k(address); - pud_t *pud = pud_offset(pgd, address); - pmd_t *pmd = pmd_offset(pud, address); - pte_t *pte = pte_offset_kernel(pmd, address); + int level; + pte_t *pte = lookup_address(address, &level); - if (large && *large) { + if (!pte) { + printk(KERN_ERR "Error in %s: no pte for page 0x%08lx\n", + __FUNCTION__, page); + return; + } + + if (level == PG_LEVEL_2M) { + pmd_t *pmd = (pmd_t *)pte; set_pmd(pmd, __pmd(pmd_val(*pmd) | _PAGE_PRESENT)); - *large = 0; } else { + /* PG_LEVEL_4K */ set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT)); } + if (page_level) + *page_level = level; + __flush_tlb_one(page); } diff --git a/arch/x86/kernel/mmiotrace/mmio-mod.c b/arch/x86/kernel/mmiotrace/mmio-mod.c index 73561fe..e43947d 100644 --- a/arch/x86/kernel/mmiotrace/mmio-mod.c +++ b/arch/x86/kernel/mmiotrace/mmio-mod.c @@ -120,19 +120,24 @@ static int write_marker(struct file *file, const char __user *buffer, static void print_pte(unsigned long address) { - pgd_t *pgd = pgd_offset_k(address); - pud_t *pud = pud_offset(pgd, address); - pmd_t *pmd = pmd_offset(pud, address); - if (pmd_large(*pmd)) { + int level; + pte_t *pte = lookup_address(address, &level); + + if (!pte) { + printk(KERN_ERR "Error in %s: no pte for page 0x%08lx\n", + __FUNCTION__, address); + return; + } + + if (level == PG_LEVEL_2M) { printk(KERN_EMERG MODULE_NAME ": 4MB pages are not " "currently supported: %lx\n", address); BUG(); } printk(KERN_DEBUG MODULE_NAME ": pte for 0x%lx: 0x%lx 0x%lx\n", - address, - pte_val(*pte_offset_kernel(pmd, address)), - pte_val(*pte_offset_kernel(pmd, address)) & _PAGE_PRESENT); + address, pte_val(*pte), + pte_val(*pte) & _PAGE_PRESENT); } /* diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 60bcb5b..57970f2 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -227,6 +227,7 @@ pte_t *lookup_address(unsigned long address, unsigned int *level) return pte_offset_kernel(pmd, address); } +EXPORT_SYMBOL_GPL(lookup_address); /* * Set the new pmd in all the pgds we know about: -- cgit v0.10.2 From fe1ffafa80f6673101c6560c2bacfe3df10372ee Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:20:56 +0200 Subject: x86 mmiotrace: fix relay-buffer-full flag for SMP Relay has per-cpu buffers, but mmiotrace was using only a single flag for detecting buffer full/not-full transitions. The new code makes this per-cpu and actually counts missed events. Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/mmiotrace/mmio-mod.c b/arch/x86/kernel/mmiotrace/mmio-mod.c index e43947d..0019dcd 100644 --- a/arch/x86/kernel/mmiotrace/mmio-mod.c +++ b/arch/x86/kernel/mmiotrace/mmio-mod.c @@ -29,6 +29,7 @@ #include #include #include /* for ISA_START_ADDRESS */ +#include #include "kmmio.h" #include "pf_in.h" @@ -47,9 +48,13 @@ struct trap_reason { int active_traces; }; +/* Accessed per-cpu. */ static struct trap_reason pf_reason[NR_CPUS]; static struct mm_io_header_rw cpu_trace[NR_CPUS]; +/* Access to this is not per-cpu. */ +static atomic_t dropped[NR_CPUS]; + static struct file_operations mmio_fops = { .owner = THIS_MODULE, }; @@ -57,7 +62,6 @@ static struct file_operations mmio_fops = { static const size_t subbuf_size = 256*1024; static struct rchan *chan; static struct dentry *dir; -static int suspended; /* XXX should this be per cpu? */ static struct proc_dir_entry *proc_marker_file; /* module parameters */ @@ -269,19 +273,21 @@ static void post(struct kmmio_probe *p, unsigned long condition, static int subbuf_start_handler(struct rchan_buf *buf, void *subbuf, void *prev_subbuf, size_t prev_padding) { + unsigned int cpu = buf->cpu; + atomic_t *drop = &dropped[cpu]; + int count; if (relay_buf_full(buf)) { - if (!suspended) { - suspended = 1; - printk(KERN_ERR MODULE_NAME - ": cpu %d buffer full!!!\n", - smp_processor_id()); + if (atomic_inc_return(drop) == 1) { + printk(KERN_ERR MODULE_NAME ": cpu %d buffer full!\n", + cpu); } return 0; - } else if (suspended) { - suspended = 0; + } else if ((count = atomic_read(drop))) { printk(KERN_ERR MODULE_NAME - ": cpu %d buffer no longer full.\n", - smp_processor_id()); + ": cpu %d buffer no longer full, " + "missed %d events.\n", + cpu, count); + atomic_sub(count, drop); } return 1; -- cgit v0.10.2 From 63ffa3e456c1a9884a3ebac997d91e3fdae18d78 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:20:57 +0200 Subject: x86 mmiotrace: comment about user space ABI Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h index cb24782..6ec288f 100644 --- a/include/linux/mmiotrace.h +++ b/include/linux/mmiotrace.h @@ -3,6 +3,10 @@ #include +/* + * If you change anything here, you must bump MMIO_VERSION. + * This is the relay data format for user space. + */ #define MMIO_VERSION 0x04 /* mm_io_header.type */ @@ -23,7 +27,7 @@ enum mm_io_opcode { /* payload type: */ }; struct mm_io_header { - __u32 type; + __u32 type; /* see MMIO_* macros above */ __u32 sec; /* timestamp */ __u32 nsec; __u32 pid; /* PID of the process, or 0 for kernel core */ -- cgit v0.10.2 From 10c43d2eb50c9a5ad60388b9d3c41c31150049e6 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:20:57 +0200 Subject: x86: explicit call to mmiotrace in do_page_fault() The custom page fault handler list is replaced with a single function pointer. All related functions and variables are renamed for mmiotrace. Signed-off-by: Pekka Paalanen Cc: Christoph Hellwig Cc: Arjan van de Ven Cc: pq@iki.fi Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 7c6496e..9491c0a 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -168,20 +168,18 @@ config IOMMU_LEAK Add a simple leak tracer to the IOMMU code. This is useful when you are debugging a buggy device driver that leaks IOMMU mappings. -config PAGE_FAULT_HANDLERS - bool "Custom page fault handlers" - depends on DEBUG_KERNEL - help - Allow the use of custom page fault handlers. A kernel module may - register a function that is called on every page fault. Custom - handlers are used by some debugging and reverse engineering tools. +config MMIOTRACE_HOOKS + bool + default n config MMIOTRACE tristate "Memory mapped IO tracing" - depends on DEBUG_KERNEL && PAGE_FAULT_HANDLERS && RELAY && DEBUG_FS + depends on DEBUG_KERNEL && RELAY && DEBUG_FS + select MMIOTRACE_HOOKS default n help This will build a kernel module called mmiotrace. + Making this a built-in is heavily discouraged. Mmiotrace traces Memory Mapped I/O access and is meant for debugging and reverse engineering. The kernel module offers wrapped diff --git a/arch/x86/kernel/mmiotrace/kmmio.c b/arch/x86/kernel/mmiotrace/kmmio.c index 28411da..e759f7c 100644 --- a/arch/x86/kernel/mmiotrace/kmmio.c +++ b/arch/x86/kernel/mmiotrace/kmmio.c @@ -51,10 +51,6 @@ static LIST_HEAD(kmmio_probes); static struct kmmio_context kmmio_ctx[NR_CPUS]; -static struct pf_handler kmmio_pf_hook = { - .handler = kmmio_page_fault -}; - static struct notifier_block nb_die = { .notifier_call = kmmio_die_notifier }; @@ -77,7 +73,8 @@ void cleanup_kmmio(void) * kmmio_page_table, kmmio_probes */ if (handler_registered) { - unregister_page_fault_handler(&kmmio_pf_hook); + if (mmiotrace_unregister_pf(&kmmio_page_fault)) + BUG(); synchronize_rcu(); } unregister_die_notifier(&nb_die); @@ -343,8 +340,11 @@ int register_kmmio_probe(struct kmmio_probe *p) } if (!handler_registered) { - register_page_fault_handler(&kmmio_pf_hook); - handler_registered++; + if (mmiotrace_register_pf(&kmmio_page_fault)) + printk(KERN_ERR "mmiotrace: Cannot register page " + "fault handler.\n"); + else + handler_registered++; } out: diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 343f5c1..e9a086a 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -49,53 +49,55 @@ #define PF_RSVD (1<<3) #define PF_INSTR (1<<4) -#ifdef CONFIG_PAGE_FAULT_HANDLERS -static HLIST_HEAD(pf_handlers); /* protected by RCU */ -static DEFINE_SPINLOCK(pf_handlers_writer); +#ifdef CONFIG_MMIOTRACE_HOOKS +static pf_handler_func mmiotrace_pf_handler; /* protected by RCU */ +static DEFINE_SPINLOCK(mmiotrace_handler_lock); -void register_page_fault_handler(struct pf_handler *new_pfh) +int mmiotrace_register_pf(pf_handler_func new_pfh) { + int ret = 0; unsigned long flags; - spin_lock_irqsave(&pf_handlers_writer, flags); - hlist_add_head_rcu(&new_pfh->hlist, &pf_handlers); - spin_unlock_irqrestore(&pf_handlers_writer, flags); + spin_lock_irqsave(&mmiotrace_handler_lock, flags); + if (mmiotrace_pf_handler) + ret = -EBUSY; + else + mmiotrace_pf_handler = new_pfh; + spin_unlock_irqrestore(&mmiotrace_handler_lock, flags); + return ret; } -EXPORT_SYMBOL_GPL(register_page_fault_handler); +EXPORT_SYMBOL_GPL(mmiotrace_register_pf); /** - * unregister_page_fault_handler: + * mmiotrace_unregister_pf: * The caller must ensure @old_pfh is not in use anymore before freeing it. - * This function does not guarantee it. The list of handlers is protected by - * RCU, so you can do this by e.g. calling synchronize_rcu(). + * This function does not guarantee it. The handler function pointer is + * protected by RCU, so you can do this by e.g. calling synchronize_rcu(). */ -void unregister_page_fault_handler(struct pf_handler *old_pfh) +int mmiotrace_unregister_pf(pf_handler_func old_pfh) { + int ret = 0; unsigned long flags; - spin_lock_irqsave(&pf_handlers_writer, flags); - hlist_del_rcu(&old_pfh->hlist); - spin_unlock_irqrestore(&pf_handlers_writer, flags); + spin_lock_irqsave(&mmiotrace_handler_lock, flags); + if (mmiotrace_pf_handler != old_pfh) + ret = -EPERM; + else + mmiotrace_pf_handler = NULL; + spin_unlock_irqrestore(&mmiotrace_handler_lock, flags); + return ret; } -EXPORT_SYMBOL_GPL(unregister_page_fault_handler); -#endif +EXPORT_SYMBOL_GPL(mmiotrace_unregister_pf); +#endif /* CONFIG_MMIOTRACE_HOOKS */ /* returns non-zero if do_page_fault() should return */ -static int handle_custom_pf(struct pt_regs *regs, unsigned long error_code, - unsigned long address) +static inline int call_mmiotrace(struct pt_regs *regs, + unsigned long error_code, + unsigned long address) { -#ifdef CONFIG_PAGE_FAULT_HANDLERS +#ifdef CONFIG_MMIOTRACE_HOOKS int ret = 0; - struct pf_handler *cur; - struct hlist_node *ncur; - - if (hlist_empty(&pf_handlers)) - return 0; - rcu_read_lock(); - hlist_for_each_entry_rcu(cur, ncur, &pf_handlers, hlist) { - ret = cur->handler(regs, error_code, address); - if (ret) - break; - } + if (mmiotrace_pf_handler) + ret = mmiotrace_pf_handler(regs, error_code, address); rcu_read_unlock(); return ret; #else @@ -655,7 +657,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) if (notify_page_fault(regs)) return; - if (handle_custom_pf(regs, error_code, address)) + if (call_mmiotrace(regs, error_code, address)) return; /* diff --git a/include/asm-x86/kdebug.h b/include/asm-x86/kdebug.h index a80f2d6..7063281 100644 --- a/include/asm-x86/kdebug.h +++ b/include/asm-x86/kdebug.h @@ -35,13 +35,11 @@ extern void show_regs(struct pt_regs *regs); extern unsigned long oops_begin(void); extern void oops_end(unsigned long, struct pt_regs *, int signr); -struct pf_handler { - struct hlist_node hlist; - int (*handler)(struct pt_regs *regs, unsigned long error_code, - unsigned long address); -}; +typedef int (*pf_handler_func)(struct pt_regs *regs, + unsigned long error_code, + unsigned long address); -extern void register_page_fault_handler(struct pf_handler *new_pfh); -extern void unregister_page_fault_handler(struct pf_handler *old_pfh); +extern int mmiotrace_register_pf(pf_handler_func new_pfh); +extern int mmiotrace_unregister_pf(pf_handler_func old_pfh); #endif -- cgit v0.10.2 From f513638030ca384b0bace4df64f0b82f6ae1e4c6 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:20:57 +0200 Subject: x86 mmiotrace: Use percpu instead of arrays. Signed-off-by: Pekka Paalanen Cc: Eric Dumazet Cc: pq@iki.fi Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/mmiotrace/kmmio.c b/arch/x86/kernel/mmiotrace/kmmio.c index e759f7c..5e239d0 100644 --- a/arch/x86/kernel/mmiotrace/kmmio.c +++ b/arch/x86/kernel/mmiotrace/kmmio.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -49,7 +50,8 @@ static unsigned int handler_registered; static struct list_head kmmio_page_table[KMMIO_PAGE_TABLE_SIZE]; static LIST_HEAD(kmmio_probes); -static struct kmmio_context kmmio_ctx[NR_CPUS]; +/* Accessed per-cpu */ +static DEFINE_PER_CPU(struct kmmio_context, kmmio_ctx); static struct notifier_block nb_die = { .notifier_call = kmmio_die_notifier @@ -173,8 +175,7 @@ static void disarm_kmmio_fault_page(unsigned long page, int *page_level) */ static int kmmio_handler(struct pt_regs *regs, unsigned long addr) { - struct kmmio_context *ctx; - int cpu; + struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx); /* * Preemption is now disabled to prevent process switch during @@ -187,8 +188,6 @@ static int kmmio_handler(struct pt_regs *regs, unsigned long addr) * And that interrupt triggers a kmmio trap? */ preempt_disable(); - cpu = smp_processor_id(); - ctx = &kmmio_ctx[cpu]; /* interrupts disabled and CPU-local data => atomicity guaranteed. */ if (ctx->active) { @@ -199,7 +198,7 @@ static int kmmio_handler(struct pt_regs *regs, unsigned long addr) */ printk(KERN_EMERG "mmiotrace: recursive probe hit on CPU %d, " "for address %lu. Ignoring.\n", - cpu, addr); + smp_processor_id(), addr); goto no_kmmio; } ctx->active++; @@ -231,6 +230,7 @@ static int kmmio_handler(struct pt_regs *regs, unsigned long addr) /* We hold lock, now we set present bit in PTE and single step. */ disarm_kmmio_fault_page(ctx->fpage->page, NULL); + put_cpu_var(kmmio_ctx); return 1; no_kmmio_locked: @@ -238,6 +238,7 @@ no_kmmio_locked: ctx->active--; no_kmmio: preempt_enable_no_resched(); + put_cpu_var(kmmio_ctx); /* page fault not handled by kmmio */ return 0; } @@ -249,11 +250,11 @@ no_kmmio: */ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) { - int cpu = smp_processor_id(); - struct kmmio_context *ctx = &kmmio_ctx[cpu]; + int ret = 0; + struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx); if (!ctx->active) - return 0; + goto out; if (ctx->probe && ctx->probe->post_handler) ctx->probe->post_handler(ctx->probe, condition, regs); @@ -273,10 +274,12 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) * will have TF set, in which case, continue the remaining processing * of do_debug, as if this is not a probe hit. */ - if (regs->flags & TF_MASK) - return 0; + if (!(regs->flags & TF_MASK)) + ret = 1; - return 1; +out: + put_cpu_var(kmmio_ctx); + return ret; } static int add_kmmio_fault_page(unsigned long page) diff --git a/arch/x86/kernel/mmiotrace/mmio-mod.c b/arch/x86/kernel/mmiotrace/mmio-mod.c index 0019dcd..f9c6092 100644 --- a/arch/x86/kernel/mmiotrace/mmio-mod.c +++ b/arch/x86/kernel/mmiotrace/mmio-mod.c @@ -30,6 +30,7 @@ #include #include /* for ISA_START_ADDRESS */ #include +#include #include "kmmio.h" #include "pf_in.h" @@ -49,11 +50,11 @@ struct trap_reason { }; /* Accessed per-cpu. */ -static struct trap_reason pf_reason[NR_CPUS]; -static struct mm_io_header_rw cpu_trace[NR_CPUS]; +static DEFINE_PER_CPU(struct trap_reason, pf_reason); +static DEFINE_PER_CPU(struct mm_io_header_rw, cpu_trace); /* Access to this is not per-cpu. */ -static atomic_t dropped[NR_CPUS]; +static DEFINE_PER_CPU(atomic_t, dropped); static struct file_operations mmio_fops = { .owner = THIS_MODULE, @@ -150,15 +151,15 @@ static void print_pte(unsigned long address) */ static void die_kmmio_nesting_error(struct pt_regs *regs, unsigned long addr) { - const unsigned long cpu = smp_processor_id(); + const struct trap_reason *my_reason = &get_cpu_var(pf_reason); printk(KERN_EMERG MODULE_NAME ": unexpected fault for address: %lx, " "last fault for address: %lx\n", - addr, pf_reason[cpu].addr); + addr, my_reason->addr); print_pte(addr); #ifdef __i386__ print_symbol(KERN_EMERG "faulting EIP is at %s\n", regs->ip); print_symbol(KERN_EMERG "last faulting EIP was at %s\n", - pf_reason[cpu].ip); + my_reason->ip); printk(KERN_EMERG "eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", regs->ax, regs->bx, regs->cx, regs->dx); @@ -168,100 +169,105 @@ static void die_kmmio_nesting_error(struct pt_regs *regs, unsigned long addr) #else print_symbol(KERN_EMERG "faulting RIP is at %s\n", regs->ip); print_symbol(KERN_EMERG "last faulting RIP was at %s\n", - pf_reason[cpu].ip); + my_reason->ip); printk(KERN_EMERG "rax: %016lx rcx: %016lx rdx: %016lx\n", regs->ax, regs->cx, regs->dx); printk(KERN_EMERG "rsi: %016lx rdi: %016lx " "rbp: %016lx rsp: %016lx\n", regs->si, regs->di, regs->bp, regs->sp); #endif + put_cpu_var(pf_reason); BUG(); } static void pre(struct kmmio_probe *p, struct pt_regs *regs, unsigned long addr) { - const unsigned long cpu = smp_processor_id(); + struct trap_reason *my_reason = &get_cpu_var(pf_reason); + struct mm_io_header_rw *my_trace = &get_cpu_var(cpu_trace); const unsigned long instptr = instruction_pointer(regs); const enum reason_type type = get_ins_type(instptr); /* it doesn't make sense to have more than one active trace per cpu */ - if (pf_reason[cpu].active_traces) + if (my_reason->active_traces) die_kmmio_nesting_error(regs, addr); else - pf_reason[cpu].active_traces++; + my_reason->active_traces++; - pf_reason[cpu].type = type; - pf_reason[cpu].addr = addr; - pf_reason[cpu].ip = instptr; + my_reason->type = type; + my_reason->addr = addr; + my_reason->ip = instptr; - cpu_trace[cpu].header.type = MMIO_MAGIC; - cpu_trace[cpu].header.pid = 0; - cpu_trace[cpu].header.data_len = sizeof(struct mm_io_rw); - cpu_trace[cpu].rw.address = addr; + my_trace->header.type = MMIO_MAGIC; + my_trace->header.pid = 0; + my_trace->header.data_len = sizeof(struct mm_io_rw); + my_trace->rw.address = addr; /* * Only record the program counter when requested. * It may taint clean-room reverse engineering. */ if (trace_pc) - cpu_trace[cpu].rw.pc = instptr; + my_trace->rw.pc = instptr; else - cpu_trace[cpu].rw.pc = 0; + my_trace->rw.pc = 0; - record_timestamp(&cpu_trace[cpu].header); + record_timestamp(&my_trace->header); switch (type) { case REG_READ: - cpu_trace[cpu].header.type |= + my_trace->header.type |= (MMIO_READ << MMIO_OPCODE_SHIFT) | (get_ins_mem_width(instptr) << MMIO_WIDTH_SHIFT); break; case REG_WRITE: - cpu_trace[cpu].header.type |= + my_trace->header.type |= (MMIO_WRITE << MMIO_OPCODE_SHIFT) | (get_ins_mem_width(instptr) << MMIO_WIDTH_SHIFT); - cpu_trace[cpu].rw.value = get_ins_reg_val(instptr, regs); + my_trace->rw.value = get_ins_reg_val(instptr, regs); break; case IMM_WRITE: - cpu_trace[cpu].header.type |= + my_trace->header.type |= (MMIO_WRITE << MMIO_OPCODE_SHIFT) | (get_ins_mem_width(instptr) << MMIO_WIDTH_SHIFT); - cpu_trace[cpu].rw.value = get_ins_imm_val(instptr); + my_trace->rw.value = get_ins_imm_val(instptr); break; default: { unsigned char *ip = (unsigned char *)instptr; - cpu_trace[cpu].header.type |= + my_trace->header.type |= (MMIO_UNKNOWN_OP << MMIO_OPCODE_SHIFT); - cpu_trace[cpu].rw.value = (*ip) << 16 | - *(ip + 1) << 8 | - *(ip + 2); + my_trace->rw.value = (*ip) << 16 | *(ip + 1) << 8 | + *(ip + 2); } } + put_cpu_var(cpu_trace); + put_cpu_var(pf_reason); } static void post(struct kmmio_probe *p, unsigned long condition, struct pt_regs *regs) { - const unsigned long cpu = smp_processor_id(); + struct trap_reason *my_reason = &get_cpu_var(pf_reason); + struct mm_io_header_rw *my_trace = &get_cpu_var(cpu_trace); /* this should always return the active_trace count to 0 */ - pf_reason[cpu].active_traces--; - if (pf_reason[cpu].active_traces) { + my_reason->active_traces--; + if (my_reason->active_traces) { printk(KERN_EMERG MODULE_NAME ": unexpected post handler"); BUG(); } - switch (pf_reason[cpu].type) { + switch (my_reason->type) { case REG_READ: - cpu_trace[cpu].rw.value = get_ins_reg_val(pf_reason[cpu].ip, - regs); + my_trace->rw.value = get_ins_reg_val(my_reason->ip, regs); break; default: break; } - relay_write(chan, &cpu_trace[cpu], sizeof(struct mm_io_header_rw)); + relay_write(chan, my_trace, sizeof(*my_trace)); + put_cpu_var(cpu_trace); + put_cpu_var(pf_reason); } /* @@ -274,7 +280,7 @@ static int subbuf_start_handler(struct rchan_buf *buf, void *subbuf, void *prev_subbuf, size_t prev_padding) { unsigned int cpu = buf->cpu; - atomic_t *drop = &dropped[cpu]; + atomic_t *drop = &per_cpu(dropped, cpu); int count; if (relay_buf_full(buf)) { if (atomic_inc_return(drop) == 1) { -- cgit v0.10.2 From 0fd0e3da4557c479b820b9a4a7afa25b4637ddf2 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:20:57 +0200 Subject: x86: mmiotrace full patch, preview 1 kmmio.c handles the list of mmio probes with callbacks, list of traced pages, and attaching into the page fault handler and die notifier. It arms, traps and disarms the given pages, this is the core of mmiotrace. mmio-mod.c is a user interface, hooking into ioremap functions and registering the mmio probes. It also decodes the required information from trapped mmio accesses via the pre and post callbacks in each probe. Currently, hooking into ioremap functions works by redefining the symbols of the target (binary) kernel module, so that it calls the traced versions of the functions. The most notable changes done since the last discussion are: - kmmio.c is a built-in, not part of the module - direct call from fault.c to kmmio.c, removing all dynamic hooks - prepare for unregistering probes at any time - make kmmio re-initializable and accessible to more than one user - rewrite kmmio locking to remove all spinlocks from page fault path Can I abuse call_rcu() like I do in kmmio.c:unregister_kmmio_probe() or is there a better way? The function called via call_rcu() itself calls call_rcu() again, will this work or break? There I need a second grace period for RCU after the first grace period for page faults. Mmiotrace itself (mmio-mod.c) is still a module, I am going to attack that next. At some point I will start looking into how to make mmiotrace a tracer component of ftrace (thanks for the hint, Ingo). Ftrace should make the user space part of mmiotracing as simple as 'cat /debug/trace/mmio > dump.txt'. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/init_task.c b/arch/x86/kernel/init_task.c index 027a5b6..a4f93b4 100644 --- a/arch/x86/kernel/init_task.c +++ b/arch/x86/kernel/init_task.c @@ -15,7 +15,6 @@ static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); EXPORT_UNUSED_SYMBOL(init_mm); /* will be removed in 2.6.26 */ -EXPORT_SYMBOL_GPL(init_mm); /* * Initial thread structure. diff --git a/arch/x86/kernel/mmiotrace/Makefile b/arch/x86/kernel/mmiotrace/Makefile index d6905f7..cf1e747 100644 --- a/arch/x86/kernel/mmiotrace/Makefile +++ b/arch/x86/kernel/mmiotrace/Makefile @@ -1,4 +1,4 @@ -obj-$(CONFIG_MMIOTRACE) += mmiotrace.o -mmiotrace-objs := pf_in.o kmmio.o mmio-mod.o - -obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o +obj-$(CONFIG_MMIOTRACE_HOOKS) += kmmio.o +obj-$(CONFIG_MMIOTRACE) += mmiotrace.o +mmiotrace-objs := pf_in.o mmio-mod.o +obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o diff --git a/arch/x86/kernel/mmiotrace/kmmio.c b/arch/x86/kernel/mmiotrace/kmmio.c index 5e239d0..539a9b1 100644 --- a/arch/x86/kernel/mmiotrace/kmmio.c +++ b/arch/x86/kernel/mmiotrace/kmmio.c @@ -6,6 +6,7 @@ */ #include +#include #include #include #include @@ -17,70 +18,119 @@ #include #include #include +#include #include #include #include #include #include -#include "kmmio.h" +#include -#define KMMIO_HASH_BITS 6 -#define KMMIO_TABLE_SIZE (1 << KMMIO_HASH_BITS) #define KMMIO_PAGE_HASH_BITS 4 #define KMMIO_PAGE_TABLE_SIZE (1 << KMMIO_PAGE_HASH_BITS) +struct kmmio_fault_page { + struct list_head list; + struct kmmio_fault_page *release_next; + unsigned long page; /* location of the fault page */ + + /* + * Number of times this page has been registered as a part + * of a probe. If zero, page is disarmed and this may be freed. + * Used only by writers (RCU). + */ + int count; +}; + +struct kmmio_delayed_release { + struct rcu_head rcu; + struct kmmio_fault_page *release_list; +}; + struct kmmio_context { struct kmmio_fault_page *fpage; struct kmmio_probe *probe; unsigned long saved_flags; + unsigned long addr; int active; }; -static int kmmio_page_fault(struct pt_regs *regs, unsigned long error_code, - unsigned long address); static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val, void *args); +static DECLARE_MUTEX(kmmio_init_mutex); static DEFINE_SPINLOCK(kmmio_lock); /* These are protected by kmmio_lock */ +static int kmmio_initialized; unsigned int kmmio_count; -static unsigned int handler_registered; + +/* Read-protected by RCU, write-protected by kmmio_lock. */ static struct list_head kmmio_page_table[KMMIO_PAGE_TABLE_SIZE]; static LIST_HEAD(kmmio_probes); +static struct list_head *kmmio_page_list(unsigned long page) +{ + return &kmmio_page_table[hash_long(page, KMMIO_PAGE_HASH_BITS)]; +} + /* Accessed per-cpu */ static DEFINE_PER_CPU(struct kmmio_context, kmmio_ctx); +/* protected by kmmio_init_mutex */ static struct notifier_block nb_die = { .notifier_call = kmmio_die_notifier }; -int init_kmmio(void) +/** + * Makes sure kmmio is initialized and usable. + * This must be called before any other kmmio function defined here. + * May sleep. + */ +void reference_kmmio(void) { - int i; - for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++) - INIT_LIST_HEAD(&kmmio_page_table[i]); - - register_die_notifier(&nb_die); - return 0; + down(&kmmio_init_mutex); + spin_lock_irq(&kmmio_lock); + if (!kmmio_initialized) { + int i; + for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++) + INIT_LIST_HEAD(&kmmio_page_table[i]); + if (register_die_notifier(&nb_die)) + BUG(); + } + kmmio_initialized++; + spin_unlock_irq(&kmmio_lock); + up(&kmmio_init_mutex); } +EXPORT_SYMBOL_GPL(reference_kmmio); -void cleanup_kmmio(void) +/** + * Clean up kmmio after use. This must be called for every call to + * reference_kmmio(). All probes registered after the corresponding + * reference_kmmio() must have been unregistered when calling this. + * May sleep. + */ +void unreference_kmmio(void) { - /* - * Assume the following have been already cleaned by calling - * unregister_kmmio_probe() appropriately: - * kmmio_page_table, kmmio_probes - */ - if (handler_registered) { - if (mmiotrace_unregister_pf(&kmmio_page_fault)) - BUG(); - synchronize_rcu(); + bool unreg = false; + + down(&kmmio_init_mutex); + spin_lock_irq(&kmmio_lock); + + if (kmmio_initialized == 1) { + BUG_ON(is_kmmio_active()); + unreg = true; } - unregister_die_notifier(&nb_die); + kmmio_initialized--; + BUG_ON(kmmio_initialized < 0); + spin_unlock_irq(&kmmio_lock); + + if (unreg) + unregister_die_notifier(&nb_die); /* calls sync_rcu() */ + up(&kmmio_init_mutex); } +EXPORT_SYMBOL(unreference_kmmio); /* * this is basically a dynamic stabbing problem: @@ -90,33 +140,33 @@ void cleanup_kmmio(void) * Overlap a Point (might be simple) * Space Efficient Dynamic Stabbing with Fast Queries - Mikkel Thorup */ -/* Get the kmmio at this addr (if any). You must be holding kmmio_lock. */ +/* Get the kmmio at this addr (if any). You must be holding RCU read lock. */ static struct kmmio_probe *get_kmmio_probe(unsigned long addr) { struct kmmio_probe *p; - list_for_each_entry(p, &kmmio_probes, list) { + list_for_each_entry_rcu(p, &kmmio_probes, list) { if (addr >= p->addr && addr <= (p->addr + p->len)) return p; } return NULL; } +/* You must be holding RCU read lock. */ static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page) { - struct list_head *head, *tmp; + struct list_head *head; + struct kmmio_fault_page *p; page &= PAGE_MASK; - head = &kmmio_page_table[hash_long(page, KMMIO_PAGE_HASH_BITS)]; - list_for_each(tmp, head) { - struct kmmio_fault_page *p - = list_entry(tmp, struct kmmio_fault_page, list); + head = kmmio_page_list(page); + list_for_each_entry_rcu(p, head, list) { if (p->page == page) return p; } - return NULL; } +/** Mark the given page as not present. Access to it will trigger a fault. */ static void arm_kmmio_fault_page(unsigned long page, int *page_level) { unsigned long address = page & PAGE_MASK; @@ -124,8 +174,8 @@ static void arm_kmmio_fault_page(unsigned long page, int *page_level) pte_t *pte = lookup_address(address, &level); if (!pte) { - printk(KERN_ERR "Error in %s: no pte for page 0x%08lx\n", - __FUNCTION__, page); + pr_err("kmmio: Error in %s: no pte for page 0x%08lx\n", + __func__, page); return; } @@ -143,6 +193,7 @@ static void arm_kmmio_fault_page(unsigned long page, int *page_level) __flush_tlb_one(page); } +/** Mark the given page as present. */ static void disarm_kmmio_fault_page(unsigned long page, int *page_level) { unsigned long address = page & PAGE_MASK; @@ -150,8 +201,8 @@ static void disarm_kmmio_fault_page(unsigned long page, int *page_level) pte_t *pte = lookup_address(address, &level); if (!pte) { - printk(KERN_ERR "Error in %s: no pte for page 0x%08lx\n", - __FUNCTION__, page); + pr_err("kmmio: Error in %s: no pte for page 0x%08lx\n", + __func__, page); return; } @@ -170,12 +221,24 @@ static void disarm_kmmio_fault_page(unsigned long page, int *page_level) } /* + * This is being called from do_page_fault(). + * + * We may be in an interrupt or a critical section. Also prefecthing may + * trigger a page fault. We may be in the middle of process switch. + * We cannot take any locks, because we could be executing especially + * within a kmmio critical section. + * + * Local interrupts are disabled, so preemption cannot happen. + * Do not enable interrupts, do not sleep, and watch out for other CPUs. + */ +/* * Interrupts are disabled on entry as trap3 is an interrupt gate * and they remain disabled thorough out this function. */ -static int kmmio_handler(struct pt_regs *regs, unsigned long addr) +int kmmio_handler(struct pt_regs *regs, unsigned long addr) { - struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx); + struct kmmio_context *ctx; + struct kmmio_fault_page *faultpage; /* * Preemption is now disabled to prevent process switch during @@ -186,40 +249,40 @@ static int kmmio_handler(struct pt_regs *regs, unsigned long addr) * XXX what if an interrupt occurs between returning from * do_page_fault() and entering the single-step exception handler? * And that interrupt triggers a kmmio trap? + * XXX If we tracing an interrupt service routine or whatever, is + * this enough to keep it on the current cpu? */ preempt_disable(); - /* interrupts disabled and CPU-local data => atomicity guaranteed. */ + rcu_read_lock(); + faultpage = get_kmmio_fault_page(addr); + if (!faultpage) { + /* + * Either this page fault is not caused by kmmio, or + * another CPU just pulled the kmmio probe from under + * our feet. In the latter case all hell breaks loose. + */ + goto no_kmmio; + } + + ctx = &get_cpu_var(kmmio_ctx); if (ctx->active) { /* - * This avoids a deadlock with kmmio_lock. + * Prevent overwriting already in-flight context. * If this page fault really was due to kmmio trap, * all hell breaks loose. */ - printk(KERN_EMERG "mmiotrace: recursive probe hit on CPU %d, " - "for address %lu. Ignoring.\n", + pr_emerg("kmmio: recursive probe hit on CPU %d, " + "for address 0x%08lx. Ignoring.\n", smp_processor_id(), addr); - goto no_kmmio; + goto no_kmmio_ctx; } ctx->active++; - /* - * Acquire the kmmio lock to prevent changes affecting - * get_kmmio_fault_page() and get_kmmio_probe(), since we save their - * returned pointers. - * The lock is released in post_kmmio_handler(). - * XXX: could/should get_kmmio_*() be using RCU instead of spinlock? - */ - spin_lock(&kmmio_lock); - - ctx->fpage = get_kmmio_fault_page(addr); - if (!ctx->fpage) { - /* this page fault is not caused by kmmio */ - goto no_kmmio_locked; - } - + ctx->fpage = faultpage; ctx->probe = get_kmmio_probe(addr); ctx->saved_flags = (regs->flags & (TF_MASK|IF_MASK)); + ctx->addr = addr; if (ctx->probe && ctx->probe->pre_handler) ctx->probe->pre_handler(ctx->probe, regs, addr); @@ -227,46 +290,62 @@ static int kmmio_handler(struct pt_regs *regs, unsigned long addr) regs->flags |= TF_MASK; regs->flags &= ~IF_MASK; - /* We hold lock, now we set present bit in PTE and single step. */ + /* Now we set present bit in PTE and single step. */ disarm_kmmio_fault_page(ctx->fpage->page, NULL); put_cpu_var(kmmio_ctx); + rcu_read_unlock(); return 1; -no_kmmio_locked: - spin_unlock(&kmmio_lock); - ctx->active--; +no_kmmio_ctx: + put_cpu_var(kmmio_ctx); no_kmmio: + rcu_read_unlock(); preempt_enable_no_resched(); - put_cpu_var(kmmio_ctx); - /* page fault not handled by kmmio */ - return 0; + return 0; /* page fault not handled by kmmio */ } /* * Interrupts are disabled on entry as trap1 is an interrupt gate * and they remain disabled thorough out this function. - * And we hold kmmio lock. + * This must always get called as the pair to kmmio_handler(). */ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) { int ret = 0; + struct kmmio_probe *probe; + struct kmmio_fault_page *faultpage; struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx); if (!ctx->active) goto out; + rcu_read_lock(); + + faultpage = get_kmmio_fault_page(ctx->addr); + probe = get_kmmio_probe(ctx->addr); + if (faultpage != ctx->fpage || probe != ctx->probe) { + /* + * The trace setup changed after kmmio_handler() and before + * running this respective post handler. User does not want + * the result anymore. + */ + ctx->probe = NULL; + ctx->fpage = NULL; + } + if (ctx->probe && ctx->probe->post_handler) ctx->probe->post_handler(ctx->probe, condition, regs); - arm_kmmio_fault_page(ctx->fpage->page, NULL); + if (ctx->fpage) + arm_kmmio_fault_page(ctx->fpage->page, NULL); regs->flags &= ~TF_MASK; regs->flags |= ctx->saved_flags; /* These were acquired in kmmio_handler(). */ ctx->active--; - spin_unlock(&kmmio_lock); + BUG_ON(ctx->active); preempt_enable_no_resched(); /* @@ -277,11 +356,13 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) if (!(regs->flags & TF_MASK)) ret = 1; + rcu_read_unlock(); out: put_cpu_var(kmmio_ctx); return ret; } +/* You must be holding kmmio_lock. */ static int add_kmmio_fault_page(unsigned long page) { struct kmmio_fault_page *f; @@ -289,6 +370,8 @@ static int add_kmmio_fault_page(unsigned long page) page &= PAGE_MASK; f = get_kmmio_fault_page(page); if (f) { + if (!f->count) + arm_kmmio_fault_page(f->page, NULL); f->count++; return 0; } @@ -299,15 +382,16 @@ static int add_kmmio_fault_page(unsigned long page) f->count = 1; f->page = page; - list_add(&f->list, - &kmmio_page_table[hash_long(f->page, KMMIO_PAGE_HASH_BITS)]); + list_add_rcu(&f->list, kmmio_page_list(f->page)); arm_kmmio_fault_page(f->page, NULL); return 0; } -static void release_kmmio_fault_page(unsigned long page) +/* You must be holding kmmio_lock. */ +static void release_kmmio_fault_page(unsigned long page, + struct kmmio_fault_page **release_list) { struct kmmio_fault_page *f; @@ -317,9 +401,11 @@ static void release_kmmio_fault_page(unsigned long page) return; f->count--; + BUG_ON(f->count < 0); if (!f->count) { disarm_kmmio_fault_page(f->page, NULL); - list_del(&f->list); + f->release_next = *release_list; + *release_list = f; } } @@ -334,68 +420,113 @@ int register_kmmio_probe(struct kmmio_probe *p) ret = -EEXIST; goto out; } - list_add(&p->list, &kmmio_probes); - /*printk("adding fault pages...\n");*/ + list_add_rcu(&p->list, &kmmio_probes); while (size < p->len) { if (add_kmmio_fault_page(p->addr + size)) - printk(KERN_ERR "mmio: Unable to set page fault.\n"); + pr_err("kmmio: Unable to set page fault.\n"); size += PAGE_SIZE; } - - if (!handler_registered) { - if (mmiotrace_register_pf(&kmmio_page_fault)) - printk(KERN_ERR "mmiotrace: Cannot register page " - "fault handler.\n"); - else - handler_registered++; - } - out: spin_unlock_irq(&kmmio_lock); /* * XXX: What should I do here? * Here was a call to global_flush_tlb(), but it does not exist - * anymore. + * anymore. It seems it's not needed after all. */ return ret; } +EXPORT_SYMBOL(register_kmmio_probe); +static void rcu_free_kmmio_fault_pages(struct rcu_head *head) +{ + struct kmmio_delayed_release *dr = container_of( + head, + struct kmmio_delayed_release, + rcu); + struct kmmio_fault_page *p = dr->release_list; + while (p) { + struct kmmio_fault_page *next = p->release_next; + BUG_ON(p->count); + kfree(p); + p = next; + } + kfree(dr); +} + +static void remove_kmmio_fault_pages(struct rcu_head *head) +{ + struct kmmio_delayed_release *dr = container_of( + head, + struct kmmio_delayed_release, + rcu); + struct kmmio_fault_page *p = dr->release_list; + struct kmmio_fault_page **prevp = &dr->release_list; + unsigned long flags; + spin_lock_irqsave(&kmmio_lock, flags); + while (p) { + if (!p->count) + list_del_rcu(&p->list); + else + *prevp = p->release_next; + prevp = &p->release_next; + p = p->release_next; + } + spin_unlock_irqrestore(&kmmio_lock, flags); + /* This is the real RCU destroy call. */ + call_rcu(&dr->rcu, rcu_free_kmmio_fault_pages); +} + +/* + * Remove a kmmio probe. You have to synchronize_rcu() before you can be + * sure that the callbacks will not be called anymore. + * + * Unregistering a kmmio fault page has three steps: + * 1. release_kmmio_fault_page() + * Disarm the page, wait a grace period to let all faults finish. + * 2. remove_kmmio_fault_pages() + * Remove the pages from kmmio_page_table. + * 3. rcu_free_kmmio_fault_pages() + * Actally free the kmmio_fault_page structs as with RCU. + */ void unregister_kmmio_probe(struct kmmio_probe *p) { unsigned long size = 0; + struct kmmio_fault_page *release_list = NULL; + struct kmmio_delayed_release *drelease; spin_lock_irq(&kmmio_lock); while (size < p->len) { - release_kmmio_fault_page(p->addr + size); + release_kmmio_fault_page(p->addr + size, &release_list); size += PAGE_SIZE; } - list_del(&p->list); + list_del_rcu(&p->list); kmmio_count--; spin_unlock_irq(&kmmio_lock); -} -/* - * According to 2.6.20, mainly x86_64 arch: - * This is being called from do_page_fault(), via the page fault notifier - * chain. The chain is called for both user space faults and kernel space - * faults (address >= TASK_SIZE64), except not on faults serviced by - * vmalloc_fault(). - * - * We may be in an interrupt or a critical section. Also prefecthing may - * trigger a page fault. We may be in the middle of process switch. - * The page fault hook functionality has put us inside RCU read lock. - * - * Local interrupts are disabled, so preemption cannot happen. - * Do not enable interrupts, do not sleep, and watch out for other CPUs. - */ -static int kmmio_page_fault(struct pt_regs *regs, unsigned long error_code, - unsigned long address) -{ - if (is_kmmio_active()) - if (kmmio_handler(regs, address) == 1) - return -1; - return 0; + drelease = kmalloc(sizeof(*drelease), GFP_ATOMIC); + if (!drelease) { + pr_crit("kmmio: leaking kmmio_fault_page objects.\n"); + return; + } + drelease->release_list = release_list; + + /* + * This is not really RCU here. We have just disarmed a set of + * pages so that they cannot trigger page faults anymore. However, + * we cannot remove the pages from kmmio_page_table, + * because a probe hit might be in flight on another CPU. The + * pages are collected into a list, and they will be removed from + * kmmio_page_table when it is certain that no probe hit related to + * these pages can be in flight. RCU grace period sounds like a + * good choice. + * + * If we removed the pages too early, kmmio page fault handler might + * not find the respective kmmio_fault_page and determine it's not + * a kmmio fault, when it actually is. This would lead to madness. + */ + call_rcu(&drelease->rcu, remove_kmmio_fault_pages); } +EXPORT_SYMBOL(unregister_kmmio_probe); static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val, void *args) diff --git a/arch/x86/kernel/mmiotrace/kmmio.h b/arch/x86/kernel/mmiotrace/kmmio.h deleted file mode 100644 index 85b7f68..0000000 --- a/arch/x86/kernel/mmiotrace/kmmio.h +++ /dev/null @@ -1,58 +0,0 @@ -#ifndef _LINUX_KMMIO_H -#define _LINUX_KMMIO_H - -#include -#include -#include -#include -#include -#include -#include - -struct kmmio_probe; -struct kmmio_fault_page; -struct pt_regs; - -typedef void (*kmmio_pre_handler_t)(struct kmmio_probe *, - struct pt_regs *, unsigned long addr); -typedef void (*kmmio_post_handler_t)(struct kmmio_probe *, - unsigned long condition, struct pt_regs *); - -struct kmmio_probe { - struct list_head list; - - /* start location of the probe point */ - unsigned long addr; - - /* length of the probe region */ - unsigned long len; - - /* Called before addr is executed. */ - kmmio_pre_handler_t pre_handler; - - /* Called after addr is executed, unless... */ - kmmio_post_handler_t post_handler; -}; - -struct kmmio_fault_page { - struct list_head list; - - /* location of the fault page */ - unsigned long page; - - int count; -}; - -/* kmmio is active by some kmmio_probes? */ -static inline int is_kmmio_active(void) -{ - extern unsigned int kmmio_count; - return kmmio_count; -} - -int init_kmmio(void); -void cleanup_kmmio(void); -int register_kmmio_probe(struct kmmio_probe *p); -void unregister_kmmio_probe(struct kmmio_probe *p); - -#endif /* _LINUX_KMMIO_H */ diff --git a/arch/x86/kernel/mmiotrace/mmio-mod.c b/arch/x86/kernel/mmiotrace/mmio-mod.c index f9c6092..e1a5085 100644 --- a/arch/x86/kernel/mmiotrace/mmio-mod.c +++ b/arch/x86/kernel/mmiotrace/mmio-mod.c @@ -32,7 +32,6 @@ #include #include -#include "kmmio.h" #include "pf_in.h" /* This app's relay channel files will appear in /debug/mmio-trace */ @@ -129,18 +128,17 @@ static void print_pte(unsigned long address) pte_t *pte = lookup_address(address, &level); if (!pte) { - printk(KERN_ERR "Error in %s: no pte for page 0x%08lx\n", - __FUNCTION__, address); + pr_err(MODULE_NAME ": Error in %s: no pte for page 0x%08lx\n", + __func__, address); return; } if (level == PG_LEVEL_2M) { - printk(KERN_EMERG MODULE_NAME ": 4MB pages are not " - "currently supported: %lx\n", - address); + pr_emerg(MODULE_NAME ": 4MB pages are not currently " + "supported: %lx\n", address); BUG(); } - printk(KERN_DEBUG MODULE_NAME ": pte for 0x%lx: 0x%lx 0x%lx\n", + pr_info(MODULE_NAME ": pte for 0x%lx: 0x%lx 0x%lx\n", address, pte_val(*pte), pte_val(*pte) & _PAGE_PRESENT); } @@ -152,7 +150,7 @@ static void print_pte(unsigned long address) static void die_kmmio_nesting_error(struct pt_regs *regs, unsigned long addr) { const struct trap_reason *my_reason = &get_cpu_var(pf_reason); - printk(KERN_EMERG MODULE_NAME ": unexpected fault for address: %lx, " + pr_emerg(MODULE_NAME ": unexpected fault for address: %lx, " "last fault for address: %lx\n", addr, my_reason->addr); print_pte(addr); @@ -160,20 +158,17 @@ static void die_kmmio_nesting_error(struct pt_regs *regs, unsigned long addr) print_symbol(KERN_EMERG "faulting EIP is at %s\n", regs->ip); print_symbol(KERN_EMERG "last faulting EIP was at %s\n", my_reason->ip); - printk(KERN_EMERG - "eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", + pr_emerg("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", regs->ax, regs->bx, regs->cx, regs->dx); - printk(KERN_EMERG - "esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", + pr_emerg("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", regs->si, regs->di, regs->bp, regs->sp); #else print_symbol(KERN_EMERG "faulting RIP is at %s\n", regs->ip); print_symbol(KERN_EMERG "last faulting RIP was at %s\n", my_reason->ip); - printk(KERN_EMERG "rax: %016lx rcx: %016lx rdx: %016lx\n", + pr_emerg("rax: %016lx rcx: %016lx rdx: %016lx\n", regs->ax, regs->cx, regs->dx); - printk(KERN_EMERG "rsi: %016lx rdi: %016lx " - "rbp: %016lx rsp: %016lx\n", + pr_emerg("rsi: %016lx rdi: %016lx rbp: %016lx rsp: %016lx\n", regs->si, regs->di, regs->bp, regs->sp); #endif put_cpu_var(pf_reason); @@ -251,10 +246,15 @@ static void post(struct kmmio_probe *p, unsigned long condition, struct trap_reason *my_reason = &get_cpu_var(pf_reason); struct mm_io_header_rw *my_trace = &get_cpu_var(cpu_trace); + /* + * XXX: This might not get called, if the probe is removed while + * trace hit is on flight. + */ + /* this should always return the active_trace count to 0 */ my_reason->active_traces--; if (my_reason->active_traces) { - printk(KERN_EMERG MODULE_NAME ": unexpected post handler"); + pr_emerg(MODULE_NAME ": unexpected post handler"); BUG(); } @@ -283,16 +283,15 @@ static int subbuf_start_handler(struct rchan_buf *buf, void *subbuf, atomic_t *drop = &per_cpu(dropped, cpu); int count; if (relay_buf_full(buf)) { - if (atomic_inc_return(drop) == 1) { - printk(KERN_ERR MODULE_NAME ": cpu %d buffer full!\n", - cpu); - } + if (atomic_inc_return(drop) == 1) + pr_err(MODULE_NAME ": cpu %d buffer full!\n", cpu); return 0; - } else if ((count = atomic_read(drop))) { - printk(KERN_ERR MODULE_NAME - ": cpu %d buffer no longer full, " - "missed %d events.\n", - cpu, count); + } + count = atomic_read(drop); + if (count) { + pr_err(MODULE_NAME ": cpu %d buffer no longer full, " + "missed %d events.\n", + cpu, count); atomic_sub(count, drop); } @@ -407,8 +406,8 @@ static void ioremap_trace_core(unsigned long offset, unsigned long size, /* Don't trace the low PCI/ISA area, it's always mapped.. */ if (!ISA_trace && (offset < ISA_END_ADDRESS) && (offset + size > ISA_START_ADDRESS)) { - printk(KERN_NOTICE MODULE_NAME ": Ignoring map of low " - "PCI/ISA area (0x%lx-0x%lx)\n", + pr_notice(MODULE_NAME ": Ignoring map of low PCI/ISA area " + "(0x%lx-0x%lx)\n", offset, offset + size); return; } @@ -418,7 +417,7 @@ static void ioremap_trace_core(unsigned long offset, unsigned long size, void __iomem *ioremap_cache_trace(unsigned long offset, unsigned long size) { void __iomem *p = ioremap_cache(offset, size); - printk(KERN_DEBUG MODULE_NAME ": ioremap_cache(0x%lx, 0x%lx) = %p\n", + pr_debug(MODULE_NAME ": ioremap_cache(0x%lx, 0x%lx) = %p\n", offset, size, p); ioremap_trace_core(offset, size, p); return p; @@ -428,7 +427,7 @@ EXPORT_SYMBOL(ioremap_cache_trace); void __iomem *ioremap_nocache_trace(unsigned long offset, unsigned long size) { void __iomem *p = ioremap_nocache(offset, size); - printk(KERN_DEBUG MODULE_NAME ": ioremap_nocache(0x%lx, 0x%lx) = %p\n", + pr_debug(MODULE_NAME ": ioremap_nocache(0x%lx, 0x%lx) = %p\n", offset, size, p); ioremap_trace_core(offset, size, p); return p; @@ -455,7 +454,7 @@ void iounmap_trace(volatile void __iomem *addr) }; struct remap_trace *trace; struct remap_trace *tmp; - printk(KERN_DEBUG MODULE_NAME ": Unmapping %p.\n", addr); + pr_debug(MODULE_NAME ": Unmapping %p.\n", addr); record_timestamp(&event.header); spin_lock(&trace_list_lock); @@ -481,7 +480,7 @@ static void clear_trace_list(void) spin_lock(&trace_list_lock); list_for_each_entry_safe(trace, tmp, &trace_list, list) { - printk(KERN_WARNING MODULE_NAME ": purging non-iounmapped " + pr_warning(MODULE_NAME ": purging non-iounmapped " "trace @0x%08lx, size 0x%lx.\n", trace->probe.addr, trace->probe.len); if (!nommiotrace) @@ -500,39 +499,37 @@ static int __init init(void) dir = debugfs_create_dir(APP_DIR, NULL); if (!dir) { - printk(KERN_ERR MODULE_NAME - ": Couldn't create relay app directory.\n"); + pr_err(MODULE_NAME ": Couldn't create relay app directory.\n"); return -ENOMEM; } chan = create_channel(subbuf_size, n_subbufs); if (!chan) { debugfs_remove(dir); - printk(KERN_ERR MODULE_NAME - ": relay app channel creation failed\n"); + pr_err(MODULE_NAME ": relay app channel creation failed\n"); return -ENOMEM; } - init_kmmio(); + reference_kmmio(); proc_marker_file = create_proc_entry(MARKER_FILE, 0, NULL); if (proc_marker_file) proc_marker_file->write_proc = write_marker; - printk(KERN_DEBUG MODULE_NAME ": loaded.\n"); + pr_debug(MODULE_NAME ": loaded.\n"); if (nommiotrace) - printk(KERN_DEBUG MODULE_NAME ": MMIO tracing disabled.\n"); + pr_info(MODULE_NAME ": MMIO tracing disabled.\n"); if (ISA_trace) - printk(KERN_WARNING MODULE_NAME - ": Warning! low ISA range will be traced.\n"); + pr_warning(MODULE_NAME ": Warning! low ISA range will be " + "traced.\n"); return 0; } static void __exit cleanup(void) { - printk(KERN_DEBUG MODULE_NAME ": unload...\n"); + pr_debug(MODULE_NAME ": unload...\n"); clear_trace_list(); - cleanup_kmmio(); + unreference_kmmio(); remove_proc_entry(MARKER_FILE, NULL); destroy_channel(); if (dir) diff --git a/arch/x86/kernel/mmiotrace/pf_in.c b/arch/x86/kernel/mmiotrace/pf_in.c index 67ea520..efa1911 100644 --- a/arch/x86/kernel/mmiotrace/pf_in.c +++ b/arch/x86/kernel/mmiotrace/pf_in.c @@ -19,7 +19,7 @@ * */ -/* $Id: pf_in.c,v 1.1.1.1 2002/11/12 05:56:32 brlock Exp $ +/* Id: pf_in.c,v 1.1.1.1 2002/11/12 05:56:32 brlock Exp * Copyright by Intel Crop., 2002 * Louis Zhuang (louis.zhuang@intel.com) * diff --git a/arch/x86/kernel/mmiotrace/testmmiotrace.c b/arch/x86/kernel/mmiotrace/testmmiotrace.c index 40e66b0..5ecff57 100644 --- a/arch/x86/kernel/mmiotrace/testmmiotrace.c +++ b/arch/x86/kernel/mmiotrace/testmmiotrace.c @@ -41,8 +41,7 @@ static void do_test(void) { void __iomem *p = ioremap_nocache_trace(mmio_address, 0x4000); if (!p) { - printk(KERN_ERR MODULE_NAME ": could not ioremap IO memory, " - "aborting.\n"); + pr_err(MODULE_NAME ": could not ioremap, aborting.\n"); return; } do_write_test(p); @@ -53,14 +52,14 @@ static void do_test(void) static int __init init(void) { if (mmio_address == 0) { - printk(KERN_ERR MODULE_NAME ": you have to use the module " - "argument mmio_address.\n"); - printk(KERN_ERR MODULE_NAME ": DO NOT LOAD THIS MODULE UNLESS" + pr_err(MODULE_NAME ": you have to use the module argument " + "mmio_address.\n"); + pr_err(MODULE_NAME ": DO NOT LOAD THIS MODULE UNLESS" " YOU REALLY KNOW WHAT YOU ARE DOING!\n"); return -ENXIO; } - printk(KERN_WARNING MODULE_NAME ": WARNING: mapping 16 kB @ 0x%08lx " + pr_warning(MODULE_NAME ": WARNING: mapping 16 kB @ 0x%08lx " "in PCI address space, and writing " "rubbish in there.\n", mmio_address); do_test(); @@ -69,7 +68,7 @@ static int __init init(void) static void __exit cleanup(void) { - printk(KERN_DEBUG MODULE_NAME ": unloaded.\n"); + pr_debug(MODULE_NAME ": unloaded.\n"); } module_init(init); diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index e9a086a..8c828a6 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -49,60 +50,14 @@ #define PF_RSVD (1<<3) #define PF_INSTR (1<<4) -#ifdef CONFIG_MMIOTRACE_HOOKS -static pf_handler_func mmiotrace_pf_handler; /* protected by RCU */ -static DEFINE_SPINLOCK(mmiotrace_handler_lock); - -int mmiotrace_register_pf(pf_handler_func new_pfh) -{ - int ret = 0; - unsigned long flags; - spin_lock_irqsave(&mmiotrace_handler_lock, flags); - if (mmiotrace_pf_handler) - ret = -EBUSY; - else - mmiotrace_pf_handler = new_pfh; - spin_unlock_irqrestore(&mmiotrace_handler_lock, flags); - return ret; -} -EXPORT_SYMBOL_GPL(mmiotrace_register_pf); - -/** - * mmiotrace_unregister_pf: - * The caller must ensure @old_pfh is not in use anymore before freeing it. - * This function does not guarantee it. The handler function pointer is - * protected by RCU, so you can do this by e.g. calling synchronize_rcu(). - */ -int mmiotrace_unregister_pf(pf_handler_func old_pfh) -{ - int ret = 0; - unsigned long flags; - spin_lock_irqsave(&mmiotrace_handler_lock, flags); - if (mmiotrace_pf_handler != old_pfh) - ret = -EPERM; - else - mmiotrace_pf_handler = NULL; - spin_unlock_irqrestore(&mmiotrace_handler_lock, flags); - return ret; -} -EXPORT_SYMBOL_GPL(mmiotrace_unregister_pf); -#endif /* CONFIG_MMIOTRACE_HOOKS */ - -/* returns non-zero if do_page_fault() should return */ -static inline int call_mmiotrace(struct pt_regs *regs, - unsigned long error_code, - unsigned long address) +static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr) { #ifdef CONFIG_MMIOTRACE_HOOKS - int ret = 0; - rcu_read_lock(); - if (mmiotrace_pf_handler) - ret = mmiotrace_pf_handler(regs, error_code, address); - rcu_read_unlock(); - return ret; -#else - return 0; + if (unlikely(is_kmmio_active())) + if (kmmio_handler(regs, addr) == 1) + return -1; #endif + return 0; } static inline int notify_page_fault(struct pt_regs *regs) @@ -657,7 +612,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) if (notify_page_fault(regs)) return; - if (call_mmiotrace(regs, error_code, address)) + if (unlikely(kmmio_fault(regs, address))) return; /* diff --git a/include/asm-x86/kdebug.h b/include/asm-x86/kdebug.h index 7063281..96651bb 100644 --- a/include/asm-x86/kdebug.h +++ b/include/asm-x86/kdebug.h @@ -35,11 +35,4 @@ extern void show_regs(struct pt_regs *regs); extern unsigned long oops_begin(void); extern void oops_end(unsigned long, struct pt_regs *, int signr); -typedef int (*pf_handler_func)(struct pt_regs *regs, - unsigned long error_code, - unsigned long address); - -extern int mmiotrace_register_pf(pf_handler_func new_pfh); -extern int mmiotrace_unregister_pf(pf_handler_func old_pfh); - #endif diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h index 6ec288f..d87a6cd 100644 --- a/include/linux/mmiotrace.h +++ b/include/linux/mmiotrace.h @@ -3,6 +3,44 @@ #include +#ifdef __KERNEL__ + +#include + +struct kmmio_probe; +struct pt_regs; + +typedef void (*kmmio_pre_handler_t)(struct kmmio_probe *, + struct pt_regs *, unsigned long addr); +typedef void (*kmmio_post_handler_t)(struct kmmio_probe *, + unsigned long condition, struct pt_regs *); + +struct kmmio_probe { + struct list_head list; + unsigned long addr; /* start location of the probe point */ + unsigned long len; /* length of the probe region */ + kmmio_pre_handler_t pre_handler; /* Called before addr is executed. */ + kmmio_post_handler_t post_handler; /* Called after addr is executed */ +}; + +/* kmmio is active by some kmmio_probes? */ +static inline int is_kmmio_active(void) +{ + extern unsigned int kmmio_count; + return kmmio_count; +} + +extern void reference_kmmio(void); +extern void unreference_kmmio(void); +extern int register_kmmio_probe(struct kmmio_probe *p); +extern void unregister_kmmio_probe(struct kmmio_probe *p); + +/* Called from page fault handler. */ +extern int kmmio_handler(struct pt_regs *regs, unsigned long addr); + +#endif /* __KERNEL__ */ + + /* * If you change anything here, you must bump MMIO_VERSION. * This is the relay data format for user space. -- cgit v0.10.2 From d61fc44853f46fb002228b18aa5f30db21fcd4ac Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:20:57 +0200 Subject: x86: mmiotrace, preview 2 Kconfig.debug, Makefile and testmmiotrace.c style fixes. Use real mutex instead of mutex. Fix failure path in register probe func. kmmio: RCU read-locked over single stepping. Generate mapping id's. Make mmio-mod.c built-in and rewrite its locking. Add debugfs file to enable/disable mmiotracing. kmmio: use irqsave spinlocks. Lots of cleanups in mmio-mod.c Marker file moved from /proc into debugfs. Call mmiotrace entrypoints directly from ioremap.c. Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 9491c0a..aa0d646 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -170,22 +170,19 @@ config IOMMU_LEAK config MMIOTRACE_HOOKS bool - default n config MMIOTRACE - tristate "Memory mapped IO tracing" + bool "Memory mapped IO tracing" depends on DEBUG_KERNEL && RELAY && DEBUG_FS select MMIOTRACE_HOOKS - default n + default y help - This will build a kernel module called mmiotrace. - Making this a built-in is heavily discouraged. - - Mmiotrace traces Memory Mapped I/O access and is meant for debugging - and reverse engineering. The kernel module offers wrapped - versions of the ioremap family of functions. The driver to be traced - must be modified to call these wrappers. A user space program is - required to collect the MMIO data. + Mmiotrace traces Memory Mapped I/O access and is meant for + debugging and reverse engineering. It is called from the ioremap + implementation and works via page faults. A user space program is + required to collect the MMIO data from debugfs files. + Tracing is disabled by default and can be enabled from a debugfs + file. See http://nouveau.freedesktop.org/wiki/MmioTrace If you are not helping to develop drivers, say N. @@ -193,7 +190,6 @@ config MMIOTRACE config MMIOTRACE_TEST tristate "Test module for mmiotrace" depends on MMIOTRACE && m - default n help This is a dumb module for testing mmiotrace. It is very dangerous as it will write garbage to IO memory starting at a given address. diff --git a/arch/x86/kernel/mmiotrace/Makefile b/arch/x86/kernel/mmiotrace/Makefile index cf1e747..dbcd8d5 100644 --- a/arch/x86/kernel/mmiotrace/Makefile +++ b/arch/x86/kernel/mmiotrace/Makefile @@ -1,4 +1,4 @@ obj-$(CONFIG_MMIOTRACE_HOOKS) += kmmio.o obj-$(CONFIG_MMIOTRACE) += mmiotrace.o -mmiotrace-objs := pf_in.o mmio-mod.o +mmiotrace-y := pf_in.o mmio-mod.o obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o diff --git a/arch/x86/kernel/mmiotrace/kmmio.c b/arch/x86/kernel/mmiotrace/kmmio.c index 539a9b1..efb4679 100644 --- a/arch/x86/kernel/mmiotrace/kmmio.c +++ b/arch/x86/kernel/mmiotrace/kmmio.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -59,7 +60,7 @@ struct kmmio_context { static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val, void *args); -static DECLARE_MUTEX(kmmio_init_mutex); +static DEFINE_MUTEX(kmmio_init_mutex); static DEFINE_SPINLOCK(kmmio_lock); /* These are protected by kmmio_lock */ @@ -90,7 +91,7 @@ static struct notifier_block nb_die = { */ void reference_kmmio(void) { - down(&kmmio_init_mutex); + mutex_lock(&kmmio_init_mutex); spin_lock_irq(&kmmio_lock); if (!kmmio_initialized) { int i; @@ -101,7 +102,7 @@ void reference_kmmio(void) } kmmio_initialized++; spin_unlock_irq(&kmmio_lock); - up(&kmmio_init_mutex); + mutex_unlock(&kmmio_init_mutex); } EXPORT_SYMBOL_GPL(reference_kmmio); @@ -115,7 +116,7 @@ void unreference_kmmio(void) { bool unreg = false; - down(&kmmio_init_mutex); + mutex_lock(&kmmio_init_mutex); spin_lock_irq(&kmmio_lock); if (kmmio_initialized == 1) { @@ -128,7 +129,7 @@ void unreference_kmmio(void) if (unreg) unregister_die_notifier(&nb_die); /* calls sync_rcu() */ - up(&kmmio_init_mutex); + mutex_unlock(&kmmio_init_mutex); } EXPORT_SYMBOL(unreference_kmmio); @@ -244,17 +245,13 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) * Preemption is now disabled to prevent process switch during * single stepping. We can only handle one active kmmio trace * per cpu, so ensure that we finish it before something else - * gets to run. - * - * XXX what if an interrupt occurs between returning from - * do_page_fault() and entering the single-step exception handler? - * And that interrupt triggers a kmmio trap? - * XXX If we tracing an interrupt service routine or whatever, is - * this enough to keep it on the current cpu? + * gets to run. We also hold the RCU read lock over single + * stepping to avoid looking up the probe and kmmio_fault_page + * again. */ preempt_disable(); - rcu_read_lock(); + faultpage = get_kmmio_fault_page(addr); if (!faultpage) { /* @@ -287,14 +284,24 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) if (ctx->probe && ctx->probe->pre_handler) ctx->probe->pre_handler(ctx->probe, regs, addr); + /* + * Enable single-stepping and disable interrupts for the faulting + * context. Local interrupts must not get enabled during stepping. + */ regs->flags |= TF_MASK; regs->flags &= ~IF_MASK; /* Now we set present bit in PTE and single step. */ disarm_kmmio_fault_page(ctx->fpage->page, NULL); + /* + * If another cpu accesses the same page while we are stepping, + * the access will not be caught. It will simply succeed and the + * only downside is we lose the event. If this becomes a problem, + * the user should drop to single cpu before tracing. + */ + put_cpu_var(kmmio_ctx); - rcu_read_unlock(); return 1; no_kmmio_ctx: @@ -313,32 +320,15 @@ no_kmmio: static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) { int ret = 0; - struct kmmio_probe *probe; - struct kmmio_fault_page *faultpage; struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx); if (!ctx->active) goto out; - rcu_read_lock(); - - faultpage = get_kmmio_fault_page(ctx->addr); - probe = get_kmmio_probe(ctx->addr); - if (faultpage != ctx->fpage || probe != ctx->probe) { - /* - * The trace setup changed after kmmio_handler() and before - * running this respective post handler. User does not want - * the result anymore. - */ - ctx->probe = NULL; - ctx->fpage = NULL; - } - if (ctx->probe && ctx->probe->post_handler) ctx->probe->post_handler(ctx->probe, condition, regs); - if (ctx->fpage) - arm_kmmio_fault_page(ctx->fpage->page, NULL); + arm_kmmio_fault_page(ctx->fpage->page, NULL); regs->flags &= ~TF_MASK; regs->flags |= ctx->saved_flags; @@ -346,6 +336,7 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) /* These were acquired in kmmio_handler(). */ ctx->active--; BUG_ON(ctx->active); + rcu_read_unlock(); preempt_enable_no_resched(); /* @@ -355,8 +346,6 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) */ if (!(regs->flags & TF_MASK)) ret = 1; - - rcu_read_unlock(); out: put_cpu_var(kmmio_ctx); return ret; @@ -411,15 +400,16 @@ static void release_kmmio_fault_page(unsigned long page, int register_kmmio_probe(struct kmmio_probe *p) { + unsigned long flags; int ret = 0; unsigned long size = 0; - spin_lock_irq(&kmmio_lock); - kmmio_count++; + spin_lock_irqsave(&kmmio_lock, flags); if (get_kmmio_probe(p->addr)) { ret = -EEXIST; goto out; } + kmmio_count++; list_add_rcu(&p->list, &kmmio_probes); while (size < p->len) { if (add_kmmio_fault_page(p->addr + size)) @@ -427,7 +417,7 @@ int register_kmmio_probe(struct kmmio_probe *p) size += PAGE_SIZE; } out: - spin_unlock_irq(&kmmio_lock); + spin_unlock_irqrestore(&kmmio_lock, flags); /* * XXX: What should I do here? * Here was a call to global_flush_tlb(), but it does not exist @@ -478,7 +468,8 @@ static void remove_kmmio_fault_pages(struct rcu_head *head) /* * Remove a kmmio probe. You have to synchronize_rcu() before you can be - * sure that the callbacks will not be called anymore. + * sure that the callbacks will not be called anymore. Only after that + * you may actually release your struct kmmio_probe. * * Unregistering a kmmio fault page has three steps: * 1. release_kmmio_fault_page() @@ -490,18 +481,19 @@ static void remove_kmmio_fault_pages(struct rcu_head *head) */ void unregister_kmmio_probe(struct kmmio_probe *p) { + unsigned long flags; unsigned long size = 0; struct kmmio_fault_page *release_list = NULL; struct kmmio_delayed_release *drelease; - spin_lock_irq(&kmmio_lock); + spin_lock_irqsave(&kmmio_lock, flags); while (size < p->len) { release_kmmio_fault_page(p->addr + size, &release_list); size += PAGE_SIZE; } list_del_rcu(&p->list); kmmio_count--; - spin_unlock_irq(&kmmio_lock); + spin_unlock_irqrestore(&kmmio_lock, flags); drelease = kmalloc(sizeof(*drelease), GFP_ATOMIC); if (!drelease) { diff --git a/arch/x86/kernel/mmiotrace/mmio-mod.c b/arch/x86/kernel/mmiotrace/mmio-mod.c index e1a5085..7386440 100644 --- a/arch/x86/kernel/mmiotrace/mmio-mod.c +++ b/arch/x86/kernel/mmiotrace/mmio-mod.c @@ -19,6 +19,8 @@ * * Derived from the read-mod example from relay-examples by Tom Zanussi. */ +#define DEBUG 1 + #include #include #include @@ -34,12 +36,12 @@ #include "pf_in.h" -/* This app's relay channel files will appear in /debug/mmio-trace */ -#define APP_DIR "mmio-trace" -/* the marker injection file in /proc */ -#define MARKER_FILE "mmio-marker" +#define NAME "mmiotrace: " -#define MODULE_NAME "mmiotrace" +/* This app's relay channel files will appear in /debug/mmio-trace */ +static const char APP_DIR[] = "mmio-trace"; +/* the marker injection file in /debug/APP_DIR */ +static const char MARKER_FILE[] = "mmio-marker"; struct trap_reason { unsigned long addr; @@ -48,6 +50,15 @@ struct trap_reason { int active_traces; }; +struct remap_trace { + struct list_head list; + struct kmmio_probe probe; + unsigned long phys; + unsigned long id; +}; + +static const size_t subbuf_size = 256*1024; + /* Accessed per-cpu. */ static DEFINE_PER_CPU(struct trap_reason, pf_reason); static DEFINE_PER_CPU(struct mm_io_header_rw, cpu_trace); @@ -55,33 +66,53 @@ static DEFINE_PER_CPU(struct mm_io_header_rw, cpu_trace); /* Access to this is not per-cpu. */ static DEFINE_PER_CPU(atomic_t, dropped); -static struct file_operations mmio_fops = { - .owner = THIS_MODULE, -}; +static struct dentry *dir; +static struct dentry *enabled_file; +static struct dentry *marker_file; -static const size_t subbuf_size = 256*1024; +static DEFINE_MUTEX(mmiotrace_mutex); +static DEFINE_SPINLOCK(trace_lock); +static atomic_t mmiotrace_enabled; +static LIST_HEAD(trace_list); /* struct remap_trace */ static struct rchan *chan; -static struct dentry *dir; -static struct proc_dir_entry *proc_marker_file; + +/* + * Locking in this file: + * - mmiotrace_mutex enforces enable/disable_mmiotrace() critical sections. + * - mmiotrace_enabled may be modified only when holding mmiotrace_mutex + * and trace_lock. + * - Routines depending on is_enabled() must take trace_lock. + * - trace_list users must hold trace_lock. + * - is_enabled() guarantees that chan is valid. + * - pre/post callbacks assume the effect of is_enabled() being true. + */ /* module parameters */ -static unsigned int n_subbufs = 32*4; -static unsigned long filter_offset; -static int nommiotrace; -static int ISA_trace; -static int trace_pc; +static unsigned int n_subbufs = 32*4; +static unsigned long filter_offset; +static int nommiotrace; +static int ISA_trace; +static int trace_pc; +static int enable_now; module_param(n_subbufs, uint, 0); module_param(filter_offset, ulong, 0); module_param(nommiotrace, bool, 0); module_param(ISA_trace, bool, 0); module_param(trace_pc, bool, 0); +module_param(enable_now, bool, 0); MODULE_PARM_DESC(n_subbufs, "Number of 256kB buffers, default 128."); MODULE_PARM_DESC(filter_offset, "Start address of traced mappings."); MODULE_PARM_DESC(nommiotrace, "Disable actual MMIO tracing."); MODULE_PARM_DESC(ISA_trace, "Do not exclude the low ISA range."); MODULE_PARM_DESC(trace_pc, "Record address of faulting instructions."); +MODULE_PARM_DESC(enable_now, "Start mmiotracing immediately on module load."); + +static bool is_enabled(void) +{ + return atomic_read(&mmiotrace_enabled); +} static void record_timestamp(struct mm_io_header *header) { @@ -93,15 +124,15 @@ static void record_timestamp(struct mm_io_header *header) } /* - * Write callback for the /proc entry: + * Write callback for the debugfs entry: * Read a marker and write it to the mmio trace log */ -static int write_marker(struct file *file, const char __user *buffer, - unsigned long count, void *data) +static ssize_t write_marker(struct file *file, const char __user *buffer, + size_t count, loff_t *ppos) { char *event = NULL; struct mm_io_header *headp; - int len = (count > 65535) ? 65535 : count; + ssize_t len = (count > 65535) ? 65535 : count; event = kzalloc(sizeof(*headp) + len, GFP_KERNEL); if (!event) @@ -117,7 +148,12 @@ static int write_marker(struct file *file, const char __user *buffer, return -EFAULT; } - relay_write(chan, event, sizeof(*headp) + len); + spin_lock_irq(&trace_lock); + if (is_enabled()) + relay_write(chan, event, sizeof(*headp) + len); + else + len = -EINVAL; + spin_unlock_irq(&trace_lock); kfree(event); return len; } @@ -128,19 +164,18 @@ static void print_pte(unsigned long address) pte_t *pte = lookup_address(address, &level); if (!pte) { - pr_err(MODULE_NAME ": Error in %s: no pte for page 0x%08lx\n", + pr_err(NAME "Error in %s: no pte for page 0x%08lx\n", __func__, address); return; } if (level == PG_LEVEL_2M) { - pr_emerg(MODULE_NAME ": 4MB pages are not currently " - "supported: %lx\n", address); + pr_emerg(NAME "4MB pages are not currently supported: " + "0x%08lx\n", address); BUG(); } - pr_info(MODULE_NAME ": pte for 0x%lx: 0x%lx 0x%lx\n", - address, pte_val(*pte), - pte_val(*pte) & _PAGE_PRESENT); + pr_info(NAME "pte for 0x%lx: 0x%lx 0x%lx\n", address, pte_val(*pte), + pte_val(*pte) & _PAGE_PRESENT); } /* @@ -150,22 +185,18 @@ static void print_pte(unsigned long address) static void die_kmmio_nesting_error(struct pt_regs *regs, unsigned long addr) { const struct trap_reason *my_reason = &get_cpu_var(pf_reason); - pr_emerg(MODULE_NAME ": unexpected fault for address: %lx, " - "last fault for address: %lx\n", + pr_emerg(NAME "unexpected fault for address: 0x%08lx, " + "last fault for address: 0x%08lx\n", addr, my_reason->addr); print_pte(addr); + print_symbol(KERN_EMERG "faulting IP is at %s\n", regs->ip); + print_symbol(KERN_EMERG "last faulting IP was at %s\n", my_reason->ip); #ifdef __i386__ - print_symbol(KERN_EMERG "faulting EIP is at %s\n", regs->ip); - print_symbol(KERN_EMERG "last faulting EIP was at %s\n", - my_reason->ip); pr_emerg("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", regs->ax, regs->bx, regs->cx, regs->dx); pr_emerg("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", regs->si, regs->di, regs->bp, regs->sp); #else - print_symbol(KERN_EMERG "faulting RIP is at %s\n", regs->ip); - print_symbol(KERN_EMERG "last faulting RIP was at %s\n", - my_reason->ip); pr_emerg("rax: %016lx rcx: %016lx rdx: %016lx\n", regs->ax, regs->cx, regs->dx); pr_emerg("rsi: %016lx rdi: %016lx rbp: %016lx rsp: %016lx\n", @@ -197,6 +228,10 @@ static void pre(struct kmmio_probe *p, struct pt_regs *regs, my_trace->header.pid = 0; my_trace->header.data_len = sizeof(struct mm_io_rw); my_trace->rw.address = addr; + /* + * struct remap_trace *trace = p->user_data; + * phys = addr - trace->probe.addr + trace->phys; + */ /* * Only record the program counter when requested. @@ -246,15 +281,10 @@ static void post(struct kmmio_probe *p, unsigned long condition, struct trap_reason *my_reason = &get_cpu_var(pf_reason); struct mm_io_header_rw *my_trace = &get_cpu_var(cpu_trace); - /* - * XXX: This might not get called, if the probe is removed while - * trace hit is on flight. - */ - /* this should always return the active_trace count to 0 */ my_reason->active_traces--; if (my_reason->active_traces) { - pr_emerg(MODULE_NAME ": unexpected post handler"); + pr_emerg(NAME "unexpected post handler"); BUG(); } @@ -284,20 +314,23 @@ static int subbuf_start_handler(struct rchan_buf *buf, void *subbuf, int count; if (relay_buf_full(buf)) { if (atomic_inc_return(drop) == 1) - pr_err(MODULE_NAME ": cpu %d buffer full!\n", cpu); + pr_err(NAME "cpu %d buffer full!\n", cpu); return 0; } count = atomic_read(drop); if (count) { - pr_err(MODULE_NAME ": cpu %d buffer no longer full, " - "missed %d events.\n", - cpu, count); + pr_err(NAME "cpu %d buffer no longer full, missed %d events.\n", + cpu, count); atomic_sub(count, drop); } return 1; } +static struct file_operations mmio_fops = { + .owner = THIS_MODULE, +}; + /* file_create() callback. Creates relay file in debugfs. */ static struct dentry *create_buf_file_handler(const char *filename, struct dentry *parent, @@ -333,34 +366,10 @@ static struct rchan_callbacks relay_callbacks = { .remove_buf_file = remove_buf_file_handler, }; -/* - * create_channel - creates channel /debug/APP_DIR/cpuXXX - * Returns channel on success, NULL otherwise - */ -static struct rchan *create_channel(unsigned size, unsigned n) -{ - return relay_open("cpu", dir, size, n, &relay_callbacks, NULL); -} - -/* destroy_channel - destroys channel /debug/APP_DIR/cpuXXX */ -static void destroy_channel(void) -{ - if (chan) { - relay_close(chan); - chan = NULL; - } -} - -struct remap_trace { - struct list_head list; - struct kmmio_probe probe; -}; -static LIST_HEAD(trace_list); -static DEFINE_SPINLOCK(trace_list_lock); - -static void do_ioremap_trace_core(unsigned long offset, unsigned long size, +static void ioremap_trace_core(unsigned long offset, unsigned long size, void __iomem *addr) { + static atomic_t next_id; struct remap_trace *trace = kmalloc(sizeof(*trace), GFP_KERNEL); struct mm_io_header_map event = { .header = { @@ -380,61 +389,49 @@ static void do_ioremap_trace_core(unsigned long offset, unsigned long size, }; record_timestamp(&event.header); + if (!trace) { + pr_err(NAME "kmalloc failed in ioremap\n"); + return; + } + *trace = (struct remap_trace) { .probe = { .addr = (unsigned long)addr, .len = size, .pre_handler = pre, .post_handler = post, - } + .user_data = trace + }, + .phys = offset, + .id = atomic_inc_return(&next_id) }; + spin_lock_irq(&trace_lock); + if (!is_enabled()) + goto not_enabled; + relay_write(chan, &event, sizeof(event)); - spin_lock(&trace_list_lock); list_add_tail(&trace->list, &trace_list); - spin_unlock(&trace_list_lock); if (!nommiotrace) register_kmmio_probe(&trace->probe); + +not_enabled: + spin_unlock_irq(&trace_lock); } -static void ioremap_trace_core(unsigned long offset, unsigned long size, - void __iomem *addr) +void +mmiotrace_ioremap(unsigned long offset, unsigned long size, void __iomem *addr) { - if ((filter_offset) && (offset != filter_offset)) + if (!is_enabled()) /* recheck and proper locking in *_core() */ return; - /* Don't trace the low PCI/ISA area, it's always mapped.. */ - if (!ISA_trace && (offset < ISA_END_ADDRESS) && - (offset + size > ISA_START_ADDRESS)) { - pr_notice(MODULE_NAME ": Ignoring map of low PCI/ISA area " - "(0x%lx-0x%lx)\n", - offset, offset + size); + pr_debug(NAME "ioremap_*(0x%lx, 0x%lx) = %p\n", offset, size, addr); + if ((filter_offset) && (offset != filter_offset)) return; - } - do_ioremap_trace_core(offset, size, addr); -} - -void __iomem *ioremap_cache_trace(unsigned long offset, unsigned long size) -{ - void __iomem *p = ioremap_cache(offset, size); - pr_debug(MODULE_NAME ": ioremap_cache(0x%lx, 0x%lx) = %p\n", - offset, size, p); - ioremap_trace_core(offset, size, p); - return p; + ioremap_trace_core(offset, size, addr); } -EXPORT_SYMBOL(ioremap_cache_trace); -void __iomem *ioremap_nocache_trace(unsigned long offset, unsigned long size) -{ - void __iomem *p = ioremap_nocache(offset, size); - pr_debug(MODULE_NAME ": ioremap_nocache(0x%lx, 0x%lx) = %p\n", - offset, size, p); - ioremap_trace_core(offset, size, p); - return p; -} -EXPORT_SYMBOL(ioremap_nocache_trace); - -void iounmap_trace(volatile void __iomem *addr) +static void iounmap_trace_core(volatile void __iomem *addr) { struct mm_io_header_map event = { .header = { @@ -454,84 +451,212 @@ void iounmap_trace(volatile void __iomem *addr) }; struct remap_trace *trace; struct remap_trace *tmp; - pr_debug(MODULE_NAME ": Unmapping %p.\n", addr); + struct remap_trace *found_trace = NULL; + + pr_debug(NAME "Unmapping %p.\n", addr); record_timestamp(&event.header); - spin_lock(&trace_list_lock); + spin_lock_irq(&trace_lock); + if (!is_enabled()) + goto not_enabled; + list_for_each_entry_safe(trace, tmp, &trace_list, list) { if ((unsigned long)addr == trace->probe.addr) { if (!nommiotrace) unregister_kmmio_probe(&trace->probe); list_del(&trace->list); - kfree(trace); + found_trace = trace; break; } } - spin_unlock(&trace_list_lock); relay_write(chan, &event, sizeof(event)); - iounmap(addr); + +not_enabled: + spin_unlock_irq(&trace_lock); + if (found_trace) { + synchronize_rcu(); /* unregister_kmmio_probe() requirement */ + kfree(found_trace); + } +} + +void mmiotrace_iounmap(volatile void __iomem *addr) +{ + might_sleep(); + if (is_enabled()) /* recheck and proper locking in *_core() */ + iounmap_trace_core(addr); } -EXPORT_SYMBOL(iounmap_trace); static void clear_trace_list(void) { struct remap_trace *trace; struct remap_trace *tmp; - spin_lock(&trace_list_lock); - list_for_each_entry_safe(trace, tmp, &trace_list, list) { - pr_warning(MODULE_NAME ": purging non-iounmapped " + /* + * No locking required, because the caller ensures we are in a + * critical section via mutex, and is_enabled() is false, + * i.e. nothing can traverse or modify this list. + * Caller also ensures is_enabled() cannot change. + */ + list_for_each_entry(trace, &trace_list, list) { + pr_notice(NAME "purging non-iounmapped " "trace @0x%08lx, size 0x%lx.\n", trace->probe.addr, trace->probe.len); if (!nommiotrace) unregister_kmmio_probe(&trace->probe); + } + synchronize_rcu(); /* unregister_kmmio_probe() requirement */ + + list_for_each_entry_safe(trace, tmp, &trace_list, list) { list_del(&trace->list); kfree(trace); + } +} + +static ssize_t read_enabled_file_bool(struct file *file, + char __user *user_buf, size_t count, loff_t *ppos) +{ + char buf[3]; + + if (is_enabled()) + buf[0] = '1'; + else + buf[0] = '0'; + buf[1] = '\n'; + buf[2] = '\0'; + return simple_read_from_buffer(user_buf, count, ppos, buf, 2); +} + +static void enable_mmiotrace(void); +static void disable_mmiotrace(void); + +static ssize_t write_enabled_file_bool(struct file *file, + const char __user *user_buf, size_t count, loff_t *ppos) +{ + char buf[32]; + int buf_size = min(count, (sizeof(buf)-1)); + + if (copy_from_user(buf, user_buf, buf_size)) + return -EFAULT; + + switch (buf[0]) { + case 'y': + case 'Y': + case '1': + enable_mmiotrace(); + break; + case 'n': + case 'N': + case '0': + disable_mmiotrace(); break; } - spin_unlock(&trace_list_lock); + + return count; +} + +/* this ripped from kernel/kprobes.c */ +static struct file_operations fops_enabled = { + .owner = THIS_MODULE, + .read = read_enabled_file_bool, + .write = write_enabled_file_bool +}; + +static struct file_operations fops_marker = { + .owner = THIS_MODULE, + .write = write_marker +}; + +static void enable_mmiotrace(void) +{ + mutex_lock(&mmiotrace_mutex); + if (is_enabled()) + goto out; + + chan = relay_open("cpu", dir, subbuf_size, n_subbufs, + &relay_callbacks, NULL); + if (!chan) { + pr_err(NAME "relay app channel creation failed.\n"); + goto out; + } + + reference_kmmio(); + + marker_file = debugfs_create_file("marker", 0660, dir, NULL, + &fops_marker); + if (!marker_file) + pr_err(NAME "marker file creation failed.\n"); + + if (nommiotrace) + pr_info(NAME "MMIO tracing disabled.\n"); + if (ISA_trace) + pr_warning(NAME "Warning! low ISA range will be traced.\n"); + spin_lock_irq(&trace_lock); + atomic_inc(&mmiotrace_enabled); + spin_unlock_irq(&trace_lock); + pr_info(NAME "enabled.\n"); +out: + mutex_unlock(&mmiotrace_mutex); +} + +static void disable_mmiotrace(void) +{ + mutex_lock(&mmiotrace_mutex); + if (!is_enabled()) + goto out; + + spin_lock_irq(&trace_lock); + atomic_dec(&mmiotrace_enabled); + BUG_ON(is_enabled()); + spin_unlock_irq(&trace_lock); + + clear_trace_list(); /* guarantees: no more kmmio callbacks */ + unreference_kmmio(); + if (marker_file) { + debugfs_remove(marker_file); + marker_file = NULL; + } + if (chan) { + relay_close(chan); + chan = NULL; + } + + pr_info(NAME "disabled.\n"); +out: + mutex_unlock(&mmiotrace_mutex); } static int __init init(void) { + pr_debug(NAME "load...\n"); if (n_subbufs < 2) return -EINVAL; dir = debugfs_create_dir(APP_DIR, NULL); if (!dir) { - pr_err(MODULE_NAME ": Couldn't create relay app directory.\n"); + pr_err(NAME "Couldn't create relay app directory.\n"); return -ENOMEM; } - chan = create_channel(subbuf_size, n_subbufs); - if (!chan) { + enabled_file = debugfs_create_file("enabled", 0600, dir, NULL, + &fops_enabled); + if (!enabled_file) { + pr_err(NAME "Couldn't create enabled file.\n"); debugfs_remove(dir); - pr_err(MODULE_NAME ": relay app channel creation failed\n"); return -ENOMEM; } - reference_kmmio(); - - proc_marker_file = create_proc_entry(MARKER_FILE, 0, NULL); - if (proc_marker_file) - proc_marker_file->write_proc = write_marker; + if (enable_now) + enable_mmiotrace(); - pr_debug(MODULE_NAME ": loaded.\n"); - if (nommiotrace) - pr_info(MODULE_NAME ": MMIO tracing disabled.\n"); - if (ISA_trace) - pr_warning(MODULE_NAME ": Warning! low ISA range will be " - "traced.\n"); return 0; } static void __exit cleanup(void) { - pr_debug(MODULE_NAME ": unload...\n"); - clear_trace_list(); - unreference_kmmio(); - remove_proc_entry(MARKER_FILE, NULL); - destroy_channel(); + pr_debug(NAME "unload...\n"); + if (enabled_file) + debugfs_remove(enabled_file); + disable_mmiotrace(); if (dir) debugfs_remove(dir); } diff --git a/arch/x86/kernel/mmiotrace/testmmiotrace.c b/arch/x86/kernel/mmiotrace/testmmiotrace.c index 5ecff57..cfa60b2 100644 --- a/arch/x86/kernel/mmiotrace/testmmiotrace.c +++ b/arch/x86/kernel/mmiotrace/testmmiotrace.c @@ -4,10 +4,6 @@ #include #include -extern void __iomem *ioremap_nocache_trace(unsigned long offset, - unsigned long size); -extern void iounmap_trace(volatile void __iomem *addr); - #define MODULE_NAME "testmmiotrace" static unsigned long mmio_address; @@ -28,25 +24,24 @@ static void do_write_test(void __iomem *p) static void do_read_test(void __iomem *p) { unsigned int i; - volatile unsigned int v; for (i = 0; i < 256; i++) - v = ioread8(p + i); + ioread8(p + i); for (i = 1024; i < (5 * 1024); i += 2) - v = ioread16(p + i); + ioread16(p + i); for (i = (5 * 1024); i < (16 * 1024); i += 4) - v = ioread32(p + i); + ioread32(p + i); } static void do_test(void) { - void __iomem *p = ioremap_nocache_trace(mmio_address, 0x4000); + void __iomem *p = ioremap_nocache(mmio_address, 0x4000); if (!p) { pr_err(MODULE_NAME ": could not ioremap, aborting.\n"); return; } do_write_test(p); do_read_test(p); - iounmap_trace(p); + iounmap(p); } static int __init init(void) diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 71bb315..8927c87 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -126,6 +127,7 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, unsigned long new_prot_val; pgprot_t prot; int retval; + void __iomem *ret_addr; /* Don't allow wraparound or zero size */ last_addr = phys_addr + size - 1; @@ -233,7 +235,10 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, return NULL; } - return (void __iomem *) (vaddr + offset); + ret_addr = (void __iomem *) (vaddr + offset); + mmiotrace_ioremap(phys_addr, size, ret_addr); + + return ret_addr; } /** @@ -325,6 +330,8 @@ void iounmap(volatile void __iomem *addr) addr = (volatile void __iomem *) (PAGE_MASK & (unsigned long __force)addr); + mmiotrace_iounmap(addr); + /* Use the vm area unlocked, assuming the caller ensures there isn't another iounmap for the same address in parallel. Reuse of the virtual address is prevented by diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h index d87a6cd..cb5efd0 100644 --- a/include/linux/mmiotrace.h +++ b/include/linux/mmiotrace.h @@ -16,11 +16,12 @@ typedef void (*kmmio_post_handler_t)(struct kmmio_probe *, unsigned long condition, struct pt_regs *); struct kmmio_probe { - struct list_head list; + struct list_head list; /* kmmio internal list */ unsigned long addr; /* start location of the probe point */ unsigned long len; /* length of the probe region */ kmmio_pre_handler_t pre_handler; /* Called before addr is executed. */ kmmio_post_handler_t post_handler; /* Called after addr is executed */ + void *user_data; }; /* kmmio is active by some kmmio_probes? */ @@ -38,6 +39,21 @@ extern void unregister_kmmio_probe(struct kmmio_probe *p); /* Called from page fault handler. */ extern int kmmio_handler(struct pt_regs *regs, unsigned long addr); +/* Called from ioremap.c */ +#ifdef CONFIG_MMIOTRACE +extern void +mmiotrace_ioremap(unsigned long offset, unsigned long size, void __iomem *addr); +extern void mmiotrace_iounmap(volatile void __iomem *addr); +#else +static inline void +mmiotrace_ioremap(unsigned long offset, unsigned long size, void __iomem *addr) +{ +} +static inline void mmiotrace_iounmap(volatile void __iomem *addr) +{ +} +#endif /* CONFIG_MMIOTRACE_HOOKS */ + #endif /* __KERNEL__ */ -- cgit v0.10.2 From f984b51e0779a6dd30feedc41404013ca54e5d05 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:20:57 +0200 Subject: ftrace: add mmiotrace plugin On Sat, 22 Mar 2008 13:07:47 +0100 Ingo Molnar wrote: > > > i'd suggest the following: pull x86.git and sched-devel.git into a > > > single tree [the two will combine without rejects]. Then try to add a > > > kernel/tracing/trace_mmiotrace.c ftrace plugin. The trace_sysprof.c > > > plugin might be a good example. > > > > I did this and now I have mmiotrace enabled/disabled via the tracing > > framework (what do we call this, since ftrace is one of the tracers?). > > cool! could you send the patches for that? (even if they are not fully > functional yet) Patch attached in the end. Nice to see how much code disappeared. I tried to mark all the features I had to break with XXX-comments. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index aa0d646..7e4b849 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -173,7 +173,8 @@ config MMIOTRACE_HOOKS config MMIOTRACE bool "Memory mapped IO tracing" - depends on DEBUG_KERNEL && RELAY && DEBUG_FS + depends on DEBUG_KERNEL && RELAY + select TRACING select MMIOTRACE_HOOKS default y help diff --git a/arch/x86/kernel/mmiotrace/mmio-mod.c b/arch/x86/kernel/mmiotrace/mmio-mod.c index 7386440..c7a67d7 100644 --- a/arch/x86/kernel/mmiotrace/mmio-mod.c +++ b/arch/x86/kernel/mmiotrace/mmio-mod.c @@ -22,9 +22,8 @@ #define DEBUG 1 #include -#include #include -#include +#include #include #include #include @@ -63,18 +62,18 @@ static const size_t subbuf_size = 256*1024; static DEFINE_PER_CPU(struct trap_reason, pf_reason); static DEFINE_PER_CPU(struct mm_io_header_rw, cpu_trace); +#if 0 /* XXX: no way gather this info anymore */ /* Access to this is not per-cpu. */ static DEFINE_PER_CPU(atomic_t, dropped); +#endif static struct dentry *dir; -static struct dentry *enabled_file; static struct dentry *marker_file; static DEFINE_MUTEX(mmiotrace_mutex); static DEFINE_SPINLOCK(trace_lock); static atomic_t mmiotrace_enabled; static LIST_HEAD(trace_list); /* struct remap_trace */ -static struct rchan *chan; /* * Locking in this file: @@ -93,36 +92,24 @@ static unsigned long filter_offset; static int nommiotrace; static int ISA_trace; static int trace_pc; -static int enable_now; module_param(n_subbufs, uint, 0); module_param(filter_offset, ulong, 0); module_param(nommiotrace, bool, 0); module_param(ISA_trace, bool, 0); module_param(trace_pc, bool, 0); -module_param(enable_now, bool, 0); MODULE_PARM_DESC(n_subbufs, "Number of 256kB buffers, default 128."); MODULE_PARM_DESC(filter_offset, "Start address of traced mappings."); MODULE_PARM_DESC(nommiotrace, "Disable actual MMIO tracing."); MODULE_PARM_DESC(ISA_trace, "Do not exclude the low ISA range."); MODULE_PARM_DESC(trace_pc, "Record address of faulting instructions."); -MODULE_PARM_DESC(enable_now, "Start mmiotracing immediately on module load."); static bool is_enabled(void) { return atomic_read(&mmiotrace_enabled); } -static void record_timestamp(struct mm_io_header *header) -{ - struct timespec now; - - getnstimeofday(&now); - header->sec = now.tv_sec; - header->nsec = now.tv_nsec; -} - /* * Write callback for the debugfs entry: * Read a marker and write it to the mmio trace log @@ -141,7 +128,6 @@ static ssize_t write_marker(struct file *file, const char __user *buffer, headp = (struct mm_io_header *)event; headp->type = MMIO_MAGIC | (MMIO_MARKER << MMIO_OPCODE_SHIFT); headp->data_len = len; - record_timestamp(headp); if (copy_from_user(event + sizeof(*headp), buffer, len)) { kfree(event); @@ -149,9 +135,11 @@ static ssize_t write_marker(struct file *file, const char __user *buffer, } spin_lock_irq(&trace_lock); +#if 0 /* XXX: convert this to use tracing */ if (is_enabled()) relay_write(chan, event, sizeof(*headp) + len); else +#endif len = -EINVAL; spin_unlock_irq(&trace_lock); kfree(event); @@ -242,7 +230,11 @@ static void pre(struct kmmio_probe *p, struct pt_regs *regs, else my_trace->rw.pc = 0; - record_timestamp(&my_trace->header); + /* + * XXX: the timestamp recorded will be *after* the tracing has been + * done, not at the time we hit the instruction. SMP implications + * on event ordering? + */ switch (type) { case REG_READ: @@ -295,77 +287,19 @@ static void post(struct kmmio_probe *p, unsigned long condition, default: break; } - relay_write(chan, my_trace, sizeof(*my_trace)); + + /* + * XXX: Several required values are ignored: + * - mapping id + * - program counter + * Also the address should be physical, not virtual. + */ + mmio_trace_record(my_trace->header.type, my_trace->rw.address, + my_trace->rw.value); put_cpu_var(cpu_trace); put_cpu_var(pf_reason); } -/* - * subbuf_start() relay callback. - * - * Defined so that we know when events are dropped due to the buffer-full - * condition. - */ -static int subbuf_start_handler(struct rchan_buf *buf, void *subbuf, - void *prev_subbuf, size_t prev_padding) -{ - unsigned int cpu = buf->cpu; - atomic_t *drop = &per_cpu(dropped, cpu); - int count; - if (relay_buf_full(buf)) { - if (atomic_inc_return(drop) == 1) - pr_err(NAME "cpu %d buffer full!\n", cpu); - return 0; - } - count = atomic_read(drop); - if (count) { - pr_err(NAME "cpu %d buffer no longer full, missed %d events.\n", - cpu, count); - atomic_sub(count, drop); - } - - return 1; -} - -static struct file_operations mmio_fops = { - .owner = THIS_MODULE, -}; - -/* file_create() callback. Creates relay file in debugfs. */ -static struct dentry *create_buf_file_handler(const char *filename, - struct dentry *parent, - int mode, - struct rchan_buf *buf, - int *is_global) -{ - struct dentry *buf_file; - - mmio_fops.read = relay_file_operations.read; - mmio_fops.open = relay_file_operations.open; - mmio_fops.poll = relay_file_operations.poll; - mmio_fops.mmap = relay_file_operations.mmap; - mmio_fops.release = relay_file_operations.release; - mmio_fops.splice_read = relay_file_operations.splice_read; - - buf_file = debugfs_create_file(filename, mode, parent, buf, - &mmio_fops); - - return buf_file; -} - -/* file_remove() default callback. Removes relay file in debugfs. */ -static int remove_buf_file_handler(struct dentry *dentry) -{ - debugfs_remove(dentry); - return 0; -} - -static struct rchan_callbacks relay_callbacks = { - .subbuf_start = subbuf_start_handler, - .create_buf_file = create_buf_file_handler, - .remove_buf_file = remove_buf_file_handler, -}; - static void ioremap_trace_core(unsigned long offset, unsigned long size, void __iomem *addr) { @@ -387,7 +321,6 @@ static void ioremap_trace_core(unsigned long offset, unsigned long size, .pc = 0 } }; - record_timestamp(&event.header); if (!trace) { pr_err(NAME "kmalloc failed in ioremap\n"); @@ -410,7 +343,10 @@ static void ioremap_trace_core(unsigned long offset, unsigned long size, if (!is_enabled()) goto not_enabled; - relay_write(chan, &event, sizeof(event)); + /* + * XXX: Insufficient data recorded! + */ + mmio_trace_record(event.header.type, event.map.addr, event.map.len); list_add_tail(&trace->list, &trace_list); if (!nommiotrace) register_kmmio_probe(&trace->probe); @@ -454,7 +390,6 @@ static void iounmap_trace_core(volatile void __iomem *addr) struct remap_trace *found_trace = NULL; pr_debug(NAME "Unmapping %p.\n", addr); - record_timestamp(&event.header); spin_lock_irq(&trace_lock); if (!is_enabled()) @@ -469,7 +404,8 @@ static void iounmap_trace_core(volatile void __iomem *addr) break; } } - relay_write(chan, &event, sizeof(event)); + mmio_trace_record(event.header.type, event.map.addr, + found_trace ? found_trace->id : -1); not_enabled: spin_unlock_irq(&trace_lock); @@ -512,77 +448,23 @@ static void clear_trace_list(void) } } -static ssize_t read_enabled_file_bool(struct file *file, - char __user *user_buf, size_t count, loff_t *ppos) -{ - char buf[3]; - - if (is_enabled()) - buf[0] = '1'; - else - buf[0] = '0'; - buf[1] = '\n'; - buf[2] = '\0'; - return simple_read_from_buffer(user_buf, count, ppos, buf, 2); -} - -static void enable_mmiotrace(void); -static void disable_mmiotrace(void); - -static ssize_t write_enabled_file_bool(struct file *file, - const char __user *user_buf, size_t count, loff_t *ppos) -{ - char buf[32]; - int buf_size = min(count, (sizeof(buf)-1)); - - if (copy_from_user(buf, user_buf, buf_size)) - return -EFAULT; - - switch (buf[0]) { - case 'y': - case 'Y': - case '1': - enable_mmiotrace(); - break; - case 'n': - case 'N': - case '0': - disable_mmiotrace(); - break; - } - - return count; -} - -/* this ripped from kernel/kprobes.c */ -static struct file_operations fops_enabled = { - .owner = THIS_MODULE, - .read = read_enabled_file_bool, - .write = write_enabled_file_bool -}; - static struct file_operations fops_marker = { .owner = THIS_MODULE, .write = write_marker }; -static void enable_mmiotrace(void) +void enable_mmiotrace(void) { mutex_lock(&mmiotrace_mutex); if (is_enabled()) goto out; - chan = relay_open("cpu", dir, subbuf_size, n_subbufs, - &relay_callbacks, NULL); - if (!chan) { - pr_err(NAME "relay app channel creation failed.\n"); - goto out; - } - reference_kmmio(); +#if 0 /* XXX: tracing does not support text entries */ marker_file = debugfs_create_file("marker", 0660, dir, NULL, &fops_marker); +#endif if (!marker_file) pr_err(NAME "marker file creation failed.\n"); @@ -598,7 +480,7 @@ out: mutex_unlock(&mmiotrace_mutex); } -static void disable_mmiotrace(void) +void disable_mmiotrace(void) { mutex_lock(&mmiotrace_mutex); if (!is_enabled()) @@ -615,17 +497,13 @@ static void disable_mmiotrace(void) debugfs_remove(marker_file); marker_file = NULL; } - if (chan) { - relay_close(chan); - chan = NULL; - } pr_info(NAME "disabled.\n"); out: mutex_unlock(&mmiotrace_mutex); } -static int __init init(void) +int __init init_mmiotrace(void) { pr_debug(NAME "load...\n"); if (n_subbufs < 2) @@ -636,31 +514,5 @@ static int __init init(void) pr_err(NAME "Couldn't create relay app directory.\n"); return -ENOMEM; } - - enabled_file = debugfs_create_file("enabled", 0600, dir, NULL, - &fops_enabled); - if (!enabled_file) { - pr_err(NAME "Couldn't create enabled file.\n"); - debugfs_remove(dir); - return -ENOMEM; - } - - if (enable_now) - enable_mmiotrace(); - return 0; } - -static void __exit cleanup(void) -{ - pr_debug(NAME "unload...\n"); - if (enabled_file) - debugfs_remove(enabled_file); - disable_mmiotrace(); - if (dir) - debugfs_remove(dir); -} - -module_init(init); -module_exit(cleanup); -MODULE_LICENSE("GPL"); diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h index cb5efd0..579b3b06 100644 --- a/include/linux/mmiotrace.h +++ b/include/linux/mmiotrace.h @@ -54,6 +54,12 @@ static inline void mmiotrace_iounmap(volatile void __iomem *addr) } #endif /* CONFIG_MMIOTRACE_HOOKS */ +/* in kernel/trace/trace_mmiotrace.c */ +extern int __init init_mmiotrace(void); +extern void enable_mmiotrace(void); +extern void disable_mmiotrace(void); +extern void mmio_trace_record(u32 type, unsigned long addr, unsigned long arg); + #endif /* __KERNEL__ */ diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index d9efbbf..c44a7dc 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -18,5 +18,6 @@ obj-$(CONFIG_FTRACE) += trace_functions.o obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o +obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o libftrace-y := ftrace.o diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c new file mode 100644 index 0000000..e4dd03c --- /dev/null +++ b/kernel/trace/trace_mmiotrace.c @@ -0,0 +1,84 @@ +/* + * Memory mapped I/O tracing + * + * Copyright (C) 2008 Pekka Paalanen + */ + +#define DEBUG 1 + +#include +#include + +#include "trace.h" + +extern void +__trace_special(void *__tr, void *__data, + unsigned long arg1, unsigned long arg2, unsigned long arg3); + +static struct trace_array *mmio_trace_array; + + +static void mmio_trace_init(struct trace_array *tr) +{ + pr_debug("in %s\n", __func__); + mmio_trace_array = tr; + if (tr->ctrl) + enable_mmiotrace(); +} + +static void mmio_trace_reset(struct trace_array *tr) +{ + pr_debug("in %s\n", __func__); + if (tr->ctrl) + disable_mmiotrace(); +} + +static void mmio_trace_ctrl_update(struct trace_array *tr) +{ + pr_debug("in %s\n", __func__); + if (tr->ctrl) + enable_mmiotrace(); + else + disable_mmiotrace(); +} + +static struct tracer mmio_tracer __read_mostly = +{ + .name = "mmiotrace", + .init = mmio_trace_init, + .reset = mmio_trace_reset, + .ctrl_update = mmio_trace_ctrl_update, +}; + +__init static int init_mmio_trace(void) +{ + int ret = init_mmiotrace(); + if (ret) + return ret; + return register_tracer(&mmio_tracer); +} +device_initcall(init_mmio_trace); + +void mmio_trace_record(u32 type, unsigned long addr, unsigned long arg) +{ + struct trace_array *tr = mmio_trace_array; + struct trace_array_cpu *data = tr->data[smp_processor_id()]; + + if (!current || current->pid == 0) { + /* + * XXX: This is a problem. We need to able to record, no + * matter what. tracing_generic_entry_update() would crash. + */ + static unsigned limit; + if (limit++ < 12) + pr_err("Error in %s: no current.\n", __func__); + return; + } + if (!tr || !data) { + static unsigned limit; + if (limit++ < 12) + pr_err("%s: no tr or data\n", __func__); + return; + } + __trace_special(tr, data, type, addr, arg); +} -- cgit v0.10.2 From bd8ac686c73c7e925fcfe0b02dc4e7b947127864 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:20:57 +0200 Subject: ftrace: mmiotrace, updates here is a patch that makes mmiotrace work almost well within the tracing framework. The patch applies on top of my previous patch. I have my own output formatting in place now. Summary of changes: - fix the NULL dereference that was due to not calling tracing_reset() - add print_line() callback into struct tracer - implement print_line() for mmiotrace, producing up-to-spec text - add my output header, but that is not really called in the right place - rewrote the main structs in mmiotrace - added two new trace entry types: TRACE_MMIO_RW and TRACE_MMIO_MAP - made some functions in trace.c non-static - check current==NULL in tracing_generic_entry_update() - fix(?) comparison in trace_seq_printf() Things seem to work fine except a few issues. Markers (text lines injected into mmiotrace log) are missing, I did not feel hacking them in before we have variable length entries. My output header is printed only for 'trace' file, but not 'trace_pipe'. For some reason, despite my quick fix, iter->trace is NULL in print_trace_line() when called from 'trace_pipe' file, which means I don't get proper output formatting. I only tried by loading nouveau.ko, which just detects the card, and that is traced fine. I didn't try further. Map, two reads and unmap. Works perfectly. I am missing the information about overflows, I'd prefer to have a counter for lost events. I didn't try, but I guess currently there is no way of knowning when it overflows? So, not too far from being fully operational, it seems :-) And looking at the diffstat, there also is some 700-900 lines of user space code that just became obsolete. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 7e4b849..1d6de0d 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -173,7 +173,7 @@ config MMIOTRACE_HOOKS config MMIOTRACE bool "Memory mapped IO tracing" - depends on DEBUG_KERNEL && RELAY + depends on DEBUG_KERNEL select TRACING select MMIOTRACE_HOOKS default y diff --git a/arch/x86/kernel/mmiotrace/mmio-mod.c b/arch/x86/kernel/mmiotrace/mmio-mod.c index c7a67d7..62abc28 100644 --- a/arch/x86/kernel/mmiotrace/mmio-mod.c +++ b/arch/x86/kernel/mmiotrace/mmio-mod.c @@ -37,11 +37,6 @@ #define NAME "mmiotrace: " -/* This app's relay channel files will appear in /debug/mmio-trace */ -static const char APP_DIR[] = "mmio-trace"; -/* the marker injection file in /debug/APP_DIR */ -static const char MARKER_FILE[] = "mmio-marker"; - struct trap_reason { unsigned long addr; unsigned long ip; @@ -56,18 +51,15 @@ struct remap_trace { unsigned long id; }; -static const size_t subbuf_size = 256*1024; - /* Accessed per-cpu. */ static DEFINE_PER_CPU(struct trap_reason, pf_reason); -static DEFINE_PER_CPU(struct mm_io_header_rw, cpu_trace); +static DEFINE_PER_CPU(struct mmiotrace_rw, cpu_trace); #if 0 /* XXX: no way gather this info anymore */ /* Access to this is not per-cpu. */ static DEFINE_PER_CPU(atomic_t, dropped); #endif -static struct dentry *dir; static struct dentry *marker_file; static DEFINE_MUTEX(mmiotrace_mutex); @@ -82,24 +74,21 @@ static LIST_HEAD(trace_list); /* struct remap_trace */ * and trace_lock. * - Routines depending on is_enabled() must take trace_lock. * - trace_list users must hold trace_lock. - * - is_enabled() guarantees that chan is valid. + * - is_enabled() guarantees that mmio_trace_record is allowed. * - pre/post callbacks assume the effect of is_enabled() being true. */ /* module parameters */ -static unsigned int n_subbufs = 32*4; static unsigned long filter_offset; static int nommiotrace; static int ISA_trace; static int trace_pc; -module_param(n_subbufs, uint, 0); module_param(filter_offset, ulong, 0); module_param(nommiotrace, bool, 0); module_param(ISA_trace, bool, 0); module_param(trace_pc, bool, 0); -MODULE_PARM_DESC(n_subbufs, "Number of 256kB buffers, default 128."); MODULE_PARM_DESC(filter_offset, "Start address of traced mappings."); MODULE_PARM_DESC(nommiotrace, "Disable actual MMIO tracing."); MODULE_PARM_DESC(ISA_trace, "Do not exclude the low ISA range."); @@ -110,6 +99,7 @@ static bool is_enabled(void) return atomic_read(&mmiotrace_enabled); } +#if 0 /* XXX: needs rewrite */ /* * Write callback for the debugfs entry: * Read a marker and write it to the mmio trace log @@ -145,6 +135,7 @@ static ssize_t write_marker(struct file *file, const char __user *buffer, kfree(event); return len; } +#endif static void print_pte(unsigned long address) { @@ -198,9 +189,10 @@ static void pre(struct kmmio_probe *p, struct pt_regs *regs, unsigned long addr) { struct trap_reason *my_reason = &get_cpu_var(pf_reason); - struct mm_io_header_rw *my_trace = &get_cpu_var(cpu_trace); + struct mmiotrace_rw *my_trace = &get_cpu_var(cpu_trace); const unsigned long instptr = instruction_pointer(regs); const enum reason_type type = get_ins_type(instptr); + struct remap_trace *trace = p->user_data; /* it doesn't make sense to have more than one active trace per cpu */ if (my_reason->active_traces) @@ -212,23 +204,17 @@ static void pre(struct kmmio_probe *p, struct pt_regs *regs, my_reason->addr = addr; my_reason->ip = instptr; - my_trace->header.type = MMIO_MAGIC; - my_trace->header.pid = 0; - my_trace->header.data_len = sizeof(struct mm_io_rw); - my_trace->rw.address = addr; - /* - * struct remap_trace *trace = p->user_data; - * phys = addr - trace->probe.addr + trace->phys; - */ + my_trace->phys = addr - trace->probe.addr + trace->phys; + my_trace->map_id = trace->id; /* * Only record the program counter when requested. * It may taint clean-room reverse engineering. */ if (trace_pc) - my_trace->rw.pc = instptr; + my_trace->pc = instptr; else - my_trace->rw.pc = 0; + my_trace->pc = 0; /* * XXX: the timestamp recorded will be *after* the tracing has been @@ -238,28 +224,25 @@ static void pre(struct kmmio_probe *p, struct pt_regs *regs, switch (type) { case REG_READ: - my_trace->header.type |= - (MMIO_READ << MMIO_OPCODE_SHIFT) | - (get_ins_mem_width(instptr) << MMIO_WIDTH_SHIFT); + my_trace->opcode = MMIO_READ; + my_trace->width = get_ins_mem_width(instptr); break; case REG_WRITE: - my_trace->header.type |= - (MMIO_WRITE << MMIO_OPCODE_SHIFT) | - (get_ins_mem_width(instptr) << MMIO_WIDTH_SHIFT); - my_trace->rw.value = get_ins_reg_val(instptr, regs); + my_trace->opcode = MMIO_WRITE; + my_trace->width = get_ins_mem_width(instptr); + my_trace->value = get_ins_reg_val(instptr, regs); break; case IMM_WRITE: - my_trace->header.type |= - (MMIO_WRITE << MMIO_OPCODE_SHIFT) | - (get_ins_mem_width(instptr) << MMIO_WIDTH_SHIFT); - my_trace->rw.value = get_ins_imm_val(instptr); + my_trace->opcode = MMIO_WRITE; + my_trace->width = get_ins_mem_width(instptr); + my_trace->value = get_ins_imm_val(instptr); break; default: { unsigned char *ip = (unsigned char *)instptr; - my_trace->header.type |= - (MMIO_UNKNOWN_OP << MMIO_OPCODE_SHIFT); - my_trace->rw.value = (*ip) << 16 | *(ip + 1) << 8 | + my_trace->opcode = MMIO_UNKNOWN_OP; + my_trace->width = 0; + my_trace->value = (*ip) << 16 | *(ip + 1) << 8 | *(ip + 2); } } @@ -271,7 +254,7 @@ static void post(struct kmmio_probe *p, unsigned long condition, struct pt_regs *regs) { struct trap_reason *my_reason = &get_cpu_var(pf_reason); - struct mm_io_header_rw *my_trace = &get_cpu_var(cpu_trace); + struct mmiotrace_rw *my_trace = &get_cpu_var(cpu_trace); /* this should always return the active_trace count to 0 */ my_reason->active_traces--; @@ -282,20 +265,13 @@ static void post(struct kmmio_probe *p, unsigned long condition, switch (my_reason->type) { case REG_READ: - my_trace->rw.value = get_ins_reg_val(my_reason->ip, regs); + my_trace->value = get_ins_reg_val(my_reason->ip, regs); break; default: break; } - /* - * XXX: Several required values are ignored: - * - mapping id - * - program counter - * Also the address should be physical, not virtual. - */ - mmio_trace_record(my_trace->header.type, my_trace->rw.address, - my_trace->rw.value); + mmio_trace_rw(my_trace); put_cpu_var(cpu_trace); put_cpu_var(pf_reason); } @@ -305,21 +281,11 @@ static void ioremap_trace_core(unsigned long offset, unsigned long size, { static atomic_t next_id; struct remap_trace *trace = kmalloc(sizeof(*trace), GFP_KERNEL); - struct mm_io_header_map event = { - .header = { - .type = MMIO_MAGIC | - (MMIO_PROBE << MMIO_OPCODE_SHIFT), - .sec = 0, - .nsec = 0, - .pid = 0, - .data_len = sizeof(struct mm_io_map) - }, - .map = { - .phys = offset, - .addr = (unsigned long)addr, - .len = size, - .pc = 0 - } + struct mmiotrace_map map = { + .phys = offset, + .virt = (unsigned long)addr, + .len = size, + .opcode = MMIO_PROBE }; if (!trace) { @@ -338,15 +304,13 @@ static void ioremap_trace_core(unsigned long offset, unsigned long size, .phys = offset, .id = atomic_inc_return(&next_id) }; + map.map_id = trace->id; spin_lock_irq(&trace_lock); if (!is_enabled()) goto not_enabled; - /* - * XXX: Insufficient data recorded! - */ - mmio_trace_record(event.header.type, event.map.addr, event.map.len); + mmio_trace_mapping(&map); list_add_tail(&trace->list, &trace_list); if (!nommiotrace) register_kmmio_probe(&trace->probe); @@ -369,21 +333,11 @@ mmiotrace_ioremap(unsigned long offset, unsigned long size, void __iomem *addr) static void iounmap_trace_core(volatile void __iomem *addr) { - struct mm_io_header_map event = { - .header = { - .type = MMIO_MAGIC | - (MMIO_UNPROBE << MMIO_OPCODE_SHIFT), - .sec = 0, - .nsec = 0, - .pid = 0, - .data_len = sizeof(struct mm_io_map) - }, - .map = { - .phys = 0, - .addr = (unsigned long)addr, - .len = 0, - .pc = 0 - } + struct mmiotrace_map map = { + .phys = 0, + .virt = (unsigned long)addr, + .len = 0, + .opcode = MMIO_UNPROBE }; struct remap_trace *trace; struct remap_trace *tmp; @@ -404,8 +358,8 @@ static void iounmap_trace_core(volatile void __iomem *addr) break; } } - mmio_trace_record(event.header.type, event.map.addr, - found_trace ? found_trace->id : -1); + map.map_id = (found_trace) ? found_trace->id : -1; + mmio_trace_mapping(&map); not_enabled: spin_unlock_irq(&trace_lock); @@ -448,10 +402,12 @@ static void clear_trace_list(void) } } +#if 0 /* XXX: out of order */ static struct file_operations fops_marker = { .owner = THIS_MODULE, .write = write_marker }; +#endif void enable_mmiotrace(void) { @@ -464,9 +420,9 @@ void enable_mmiotrace(void) #if 0 /* XXX: tracing does not support text entries */ marker_file = debugfs_create_file("marker", 0660, dir, NULL, &fops_marker); -#endif if (!marker_file) pr_err(NAME "marker file creation failed.\n"); +#endif if (nommiotrace) pr_info(NAME "MMIO tracing disabled.\n"); @@ -502,17 +458,3 @@ void disable_mmiotrace(void) out: mutex_unlock(&mmiotrace_mutex); } - -int __init init_mmiotrace(void) -{ - pr_debug(NAME "load...\n"); - if (n_subbufs < 2) - return -EINVAL; - - dir = debugfs_create_dir(APP_DIR, NULL); - if (!dir) { - pr_err(NAME "Couldn't create relay app directory.\n"); - return -ENOMEM; - } - return 0; -} diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h index 579b3b06..c88a9c1 100644 --- a/include/linux/mmiotrace.h +++ b/include/linux/mmiotrace.h @@ -54,73 +54,38 @@ static inline void mmiotrace_iounmap(volatile void __iomem *addr) } #endif /* CONFIG_MMIOTRACE_HOOKS */ -/* in kernel/trace/trace_mmiotrace.c */ -extern int __init init_mmiotrace(void); -extern void enable_mmiotrace(void); -extern void disable_mmiotrace(void); -extern void mmio_trace_record(u32 type, unsigned long addr, unsigned long arg); - -#endif /* __KERNEL__ */ - - -/* - * If you change anything here, you must bump MMIO_VERSION. - * This is the relay data format for user space. - */ -#define MMIO_VERSION 0x04 - -/* mm_io_header.type */ -#define MMIO_OPCODE_MASK 0xff -#define MMIO_OPCODE_SHIFT 0 -#define MMIO_WIDTH_MASK 0xff00 -#define MMIO_WIDTH_SHIFT 8 -#define MMIO_MAGIC (0x6f000000 | (MMIO_VERSION<<16)) -#define MMIO_MAGIC_MASK 0xffff0000 - -enum mm_io_opcode { /* payload type: */ - MMIO_READ = 0x1, /* struct mm_io_rw */ - MMIO_WRITE = 0x2, /* struct mm_io_rw */ - MMIO_PROBE = 0x3, /* struct mm_io_map */ - MMIO_UNPROBE = 0x4, /* struct mm_io_map */ +enum mm_io_opcode { + MMIO_READ = 0x1, /* struct mmiotrace_rw */ + MMIO_WRITE = 0x2, /* struct mmiotrace_rw */ + MMIO_PROBE = 0x3, /* struct mmiotrace_map */ + MMIO_UNPROBE = 0x4, /* struct mmiotrace_map */ MMIO_MARKER = 0x5, /* raw char data */ - MMIO_UNKNOWN_OP = 0x6, /* struct mm_io_rw */ + MMIO_UNKNOWN_OP = 0x6, /* struct mmiotrace_rw */ }; -struct mm_io_header { - __u32 type; /* see MMIO_* macros above */ - __u32 sec; /* timestamp */ - __u32 nsec; - __u32 pid; /* PID of the process, or 0 for kernel core */ - __u16 data_len; /* length of the following payload */ +struct mmiotrace_rw { + unsigned long phys; /* PCI address of register */ + unsigned long value; + unsigned long pc; /* optional program counter */ + int map_id; + unsigned char opcode; /* one of MMIO_{READ,WRITE,UNKNOWN_OP} */ + unsigned char width; /* size of register access in bytes */ }; -struct mm_io_rw { - __u64 address; /* virtual address of register */ - __u64 value; - __u64 pc; /* optional program counter */ +struct mmiotrace_map { + unsigned long phys; /* base address in PCI space */ + unsigned long virt; /* base virtual address */ + unsigned long len; /* mapping size */ + int map_id; + unsigned char opcode; /* MMIO_PROBE or MMIO_UNPROBE */ }; -struct mm_io_map { - __u64 phys; /* base address in PCI space */ - __u64 addr; /* base virtual address */ - __u64 len; /* mapping size */ - __u64 pc; /* optional program counter */ -}; - - -/* - * These structures are used to allow a single relay_write() - * call to write a full packet. - */ - -struct mm_io_header_rw { - struct mm_io_header header; - struct mm_io_rw rw; -} __attribute__((packed)); +/* in kernel/trace/trace_mmiotrace.c */ +extern void enable_mmiotrace(void); +extern void disable_mmiotrace(void); +extern void mmio_trace_rw(struct mmiotrace_rw *rw); +extern void mmio_trace_mapping(struct mmiotrace_map *map); -struct mm_io_header_map { - struct mm_io_header header; - struct mm_io_map map; -} __attribute__((packed)); +#endif /* __KERNEL__ */ #endif /* MMIOTRACE_H */ diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 3271916..d14fe49 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -831,6 +831,40 @@ ftrace(struct trace_array *tr, struct trace_array_cpu *data, trace_function(tr, data, ip, parent_ip, flags); } +#ifdef CONFIG_MMIOTRACE +void __trace_mmiotrace_rw(struct trace_array *tr, struct trace_array_cpu *data, + struct mmiotrace_rw *rw) +{ + struct trace_entry *entry; + unsigned long irq_flags; + + spin_lock_irqsave(&data->lock, irq_flags); + entry = tracing_get_trace_entry(tr, data); + tracing_generic_entry_update(entry, 0); + entry->type = TRACE_MMIO_RW; + entry->mmiorw = *rw; + spin_unlock_irqrestore(&data->lock, irq_flags); + + trace_wake_up(); +} + +void __trace_mmiotrace_map(struct trace_array *tr, struct trace_array_cpu *data, + struct mmiotrace_map *map) +{ + struct trace_entry *entry; + unsigned long irq_flags; + + spin_lock_irqsave(&data->lock, irq_flags); + entry = tracing_get_trace_entry(tr, data); + tracing_generic_entry_update(entry, 0); + entry->type = TRACE_MMIO_MAP; + entry->mmiomap = *map; + spin_unlock_irqrestore(&data->lock, irq_flags); + + trace_wake_up(); +} +#endif + void __trace_stack(struct trace_array *tr, struct trace_array_cpu *data, unsigned long flags, diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index c460e85..0ef9ef7 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -5,6 +5,7 @@ #include #include #include +#include enum trace_type { __TRACE_FIRST_TYPE = 0, @@ -14,6 +15,8 @@ enum trace_type { TRACE_WAKE, TRACE_STACK, TRACE_SPECIAL, + TRACE_MMIO_RW, + TRACE_MMIO_MAP, __TRACE_LAST_TYPE }; @@ -75,6 +78,8 @@ struct trace_entry { struct ctx_switch_entry ctx; struct special_entry special; struct stack_entry stack; + struct mmiotrace_rw mmiorw; + struct mmiotrace_map mmiomap; }; }; @@ -255,6 +260,15 @@ extern unsigned long ftrace_update_tot_cnt; extern int DYN_FTRACE_TEST_NAME(void); #endif +#ifdef CONFIG_MMIOTRACE +extern void __trace_mmiotrace_rw(struct trace_array *tr, + struct trace_array_cpu *data, + struct mmiotrace_rw *rw); +extern void __trace_mmiotrace_map(struct trace_array *tr, + struct trace_array_cpu *data, + struct mmiotrace_map *map); +#endif + #ifdef CONFIG_FTRACE_STARTUP_TEST #ifdef CONFIG_FTRACE extern int trace_selftest_startup_function(struct tracer *trace, diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index e4dd03c..3a12b1a 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -11,19 +11,26 @@ #include "trace.h" -extern void -__trace_special(void *__tr, void *__data, - unsigned long arg1, unsigned long arg2, unsigned long arg3); - static struct trace_array *mmio_trace_array; +static void mmio_reset_data(struct trace_array *tr) +{ + int cpu; + + tr->time_start = ftrace_now(tr->cpu); + + for_each_online_cpu(cpu) + tracing_reset(tr->data[cpu]); +} static void mmio_trace_init(struct trace_array *tr) { pr_debug("in %s\n", __func__); mmio_trace_array = tr; - if (tr->ctrl) + if (tr->ctrl) { + mmio_reset_data(tr); enable_mmiotrace(); + } } static void mmio_trace_reset(struct trace_array *tr) @@ -31,15 +38,110 @@ static void mmio_trace_reset(struct trace_array *tr) pr_debug("in %s\n", __func__); if (tr->ctrl) disable_mmiotrace(); + mmio_reset_data(tr); + mmio_trace_array = NULL; } static void mmio_trace_ctrl_update(struct trace_array *tr) { pr_debug("in %s\n", __func__); - if (tr->ctrl) + if (tr->ctrl) { + mmio_reset_data(tr); enable_mmiotrace(); - else + } else { disable_mmiotrace(); + } +} + +/* XXX: This is not called for trace_pipe file! */ +void mmio_print_header(struct trace_iterator *iter) +{ + struct trace_seq *s = &iter->seq; + trace_seq_printf(s, "VERSION broken 20070824\n"); + /* TODO: print /proc/bus/pci/devices contents as PCIDEV lines */ +} + +static int mmio_print_rw(struct trace_iterator *iter) +{ + struct trace_entry *entry = iter->ent; + struct mmiotrace_rw *rw = &entry->mmiorw; + struct trace_seq *s = &iter->seq; + unsigned long long t = ns2usecs(entry->t); + unsigned long usec_rem = do_div(t, 1000000ULL); + unsigned secs = (unsigned long)t; + int ret = 1; + + switch (entry->mmiorw.opcode) { + case MMIO_READ: + ret = trace_seq_printf(s, + "R %d %lu.%06lu %d 0x%lx 0x%lx 0x%lx %d\n", + rw->width, secs, usec_rem, rw->map_id, rw->phys, + rw->value, rw->pc, entry->pid); + break; + case MMIO_WRITE: + ret = trace_seq_printf(s, + "W %d %lu.%06lu %d 0x%lx 0x%lx 0x%lx %d\n", + rw->width, secs, usec_rem, rw->map_id, rw->phys, + rw->value, rw->pc, entry->pid); + break; + case MMIO_UNKNOWN_OP: + ret = trace_seq_printf(s, + "UNKNOWN %lu.%06lu %d 0x%lx %02x,%02x,%02x 0x%lx %d\n", + secs, usec_rem, rw->map_id, rw->phys, + (rw->value >> 16) & 0xff, (rw->value >> 8) & 0xff, + (rw->value >> 0) & 0xff, rw->pc, entry->pid); + break; + default: + ret = trace_seq_printf(s, "rw what?\n"); + break; + } + if (ret) + return 1; + return 0; +} + +static int mmio_print_map(struct trace_iterator *iter) +{ + struct trace_entry *entry = iter->ent; + struct mmiotrace_map *m = &entry->mmiomap; + struct trace_seq *s = &iter->seq; + unsigned long long t = ns2usecs(entry->t); + unsigned long usec_rem = do_div(t, 1000000ULL); + unsigned secs = (unsigned long)t; + int ret = 1; + + switch (entry->mmiorw.opcode) { + case MMIO_PROBE: + ret = trace_seq_printf(s, + "MAP %lu.%06lu %d 0x%lx 0x%lx 0x%lx 0x%lx %d\n", + secs, usec_rem, m->map_id, m->phys, m->virt, m->len, + 0UL, entry->pid); + break; + case MMIO_UNPROBE: + ret = trace_seq_printf(s, + "UNMAP %lu.%06lu %d 0x%lx %d\n", + secs, usec_rem, m->map_id, 0UL, entry->pid); + break; + default: + ret = trace_seq_printf(s, "map what?\n"); + break; + } + if (ret) + return 1; + return 0; +} + +/* return 0 to abort printing without consuming current entry in pipe mode */ +static int mmio_print_line(struct trace_iterator *iter) +{ + switch (iter->ent->type) { + case TRACE_MMIO_RW: + return mmio_print_rw(iter); + case TRACE_MMIO_MAP: + return mmio_print_map(iter); + default: + return 1; /* ignore unknown entries */ + } } static struct tracer mmio_tracer __read_mostly = @@ -47,38 +149,31 @@ static struct tracer mmio_tracer __read_mostly = .name = "mmiotrace", .init = mmio_trace_init, .reset = mmio_trace_reset, + .open = mmio_print_header, .ctrl_update = mmio_trace_ctrl_update, + .print_line = mmio_print_line, }; __init static int init_mmio_trace(void) { - int ret = init_mmiotrace(); - if (ret) - return ret; return register_tracer(&mmio_tracer); } device_initcall(init_mmio_trace); -void mmio_trace_record(u32 type, unsigned long addr, unsigned long arg) +void mmio_trace_rw(struct mmiotrace_rw *rw) { struct trace_array *tr = mmio_trace_array; struct trace_array_cpu *data = tr->data[smp_processor_id()]; + __trace_mmiotrace_rw(tr, data, rw); +} - if (!current || current->pid == 0) { - /* - * XXX: This is a problem. We need to able to record, no - * matter what. tracing_generic_entry_update() would crash. - */ - static unsigned limit; - if (limit++ < 12) - pr_err("Error in %s: no current.\n", __func__); - return; - } - if (!tr || !data) { - static unsigned limit; - if (limit++ < 12) - pr_err("%s: no tr or data\n", __func__); - return; - } - __trace_special(tr, data, type, addr, arg); +void mmio_trace_mapping(struct mmiotrace_map *map) +{ + struct trace_array *tr = mmio_trace_array; + struct trace_array_cpu *data; + + preempt_disable(); + data = tr->data[smp_processor_id()]; + __trace_mmiotrace_map(tr, data, map); + preempt_enable(); } -- cgit v0.10.2 From 138295373ccf7625fcb0218dfea114837983bc39 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:20:58 +0200 Subject: ftrace: mmiotrace update, #2 another weekend, another patch. This should apply on top of my previous patch from March 23rd. Summary of changes: - Print PCI device list in output header - work around recursive probe hits on SMP - refactor dis/arm_kmmio_fault_page() and add check for page levels - remove un/reference_kmmio(), the die notifier hook is registered permanently into the list - explicitly check for single stepping in die notifier callback I have tested this version on my UP Athlon64 desktop with Nouveau, and SMP Core 2 Duo laptop with the proprietary nvidia driver. Both systems are 64-bit. One previously unknown bug crept into daylight: the ftrace framework's output routines print the first entry last after buffer has wrapped around. The most important regressions compared to non-ftrace mmiotrace at this time are: - failure of trace_pipe file - illegal lines in output file - unaware of losing data due to buffer full Personally I'd like to see these three solved before submitting to mainline. Other issues may come up once we know when we lose events. Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/mmiotrace/kmmio.c b/arch/x86/kernel/mmiotrace/kmmio.c index efb4679..cd0d95f 100644 --- a/arch/x86/kernel/mmiotrace/kmmio.c +++ b/arch/x86/kernel/mmiotrace/kmmio.c @@ -5,15 +5,12 @@ * 2008 Pekka Paalanen */ -#include #include #include #include #include #include -#include #include -#include #include #include #include @@ -22,10 +19,9 @@ #include #include #include -#include #include -#include - +#include +#include #include #define KMMIO_PAGE_HASH_BITS 4 @@ -57,14 +53,9 @@ struct kmmio_context { int active; }; -static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val, - void *args); - -static DEFINE_MUTEX(kmmio_init_mutex); static DEFINE_SPINLOCK(kmmio_lock); -/* These are protected by kmmio_lock */ -static int kmmio_initialized; +/* Protected by kmmio_lock */ unsigned int kmmio_count; /* Read-protected by RCU, write-protected by kmmio_lock. */ @@ -79,60 +70,6 @@ static struct list_head *kmmio_page_list(unsigned long page) /* Accessed per-cpu */ static DEFINE_PER_CPU(struct kmmio_context, kmmio_ctx); -/* protected by kmmio_init_mutex */ -static struct notifier_block nb_die = { - .notifier_call = kmmio_die_notifier -}; - -/** - * Makes sure kmmio is initialized and usable. - * This must be called before any other kmmio function defined here. - * May sleep. - */ -void reference_kmmio(void) -{ - mutex_lock(&kmmio_init_mutex); - spin_lock_irq(&kmmio_lock); - if (!kmmio_initialized) { - int i; - for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++) - INIT_LIST_HEAD(&kmmio_page_table[i]); - if (register_die_notifier(&nb_die)) - BUG(); - } - kmmio_initialized++; - spin_unlock_irq(&kmmio_lock); - mutex_unlock(&kmmio_init_mutex); -} -EXPORT_SYMBOL_GPL(reference_kmmio); - -/** - * Clean up kmmio after use. This must be called for every call to - * reference_kmmio(). All probes registered after the corresponding - * reference_kmmio() must have been unregistered when calling this. - * May sleep. - */ -void unreference_kmmio(void) -{ - bool unreg = false; - - mutex_lock(&kmmio_init_mutex); - spin_lock_irq(&kmmio_lock); - - if (kmmio_initialized == 1) { - BUG_ON(is_kmmio_active()); - unreg = true; - } - kmmio_initialized--; - BUG_ON(kmmio_initialized < 0); - spin_unlock_irq(&kmmio_lock); - - if (unreg) - unregister_die_notifier(&nb_die); /* calls sync_rcu() */ - mutex_unlock(&kmmio_init_mutex); -} -EXPORT_SYMBOL(unreference_kmmio); - /* * this is basically a dynamic stabbing problem: * Could use the existing prio tree code or @@ -167,58 +104,56 @@ static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page) return NULL; } -/** Mark the given page as not present. Access to it will trigger a fault. */ -static void arm_kmmio_fault_page(unsigned long page, int *page_level) +static void set_page_present(unsigned long addr, bool present, int *pglevel) { - unsigned long address = page & PAGE_MASK; + pteval_t pteval; + pmdval_t pmdval; int level; - pte_t *pte = lookup_address(address, &level); + pmd_t *pmd; + pte_t *pte = lookup_address(addr, &level); if (!pte) { - pr_err("kmmio: Error in %s: no pte for page 0x%08lx\n", - __func__, page); + pr_err("kmmio: no pte for page 0x%08lx\n", addr); return; } - if (level == PG_LEVEL_2M) { - pmd_t *pmd = (pmd_t *)pte; - set_pmd(pmd, __pmd(pmd_val(*pmd) & ~_PAGE_PRESENT)); - } else { - /* PG_LEVEL_4K */ - set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT)); + if (pglevel) + *pglevel = level; + + switch (level) { + case PG_LEVEL_2M: + pmd = (pmd_t *)pte; + pmdval = pmd_val(*pmd) & ~_PAGE_PRESENT; + if (present) + pmdval |= _PAGE_PRESENT; + set_pmd(pmd, __pmd(pmdval)); + break; + + case PG_LEVEL_4K: + pteval = pte_val(*pte) & ~_PAGE_PRESENT; + if (present) + pteval |= _PAGE_PRESENT; + set_pte_atomic(pte, __pte(pteval)); + break; + + default: + pr_err("kmmio: unexpected page level 0x%x.\n", level); + return; } - if (page_level) - *page_level = level; + __flush_tlb_one(addr); +} - __flush_tlb_one(page); +/** Mark the given page as not present. Access to it will trigger a fault. */ +static void arm_kmmio_fault_page(unsigned long page, int *page_level) +{ + set_page_present(page & PAGE_MASK, false, page_level); } /** Mark the given page as present. */ static void disarm_kmmio_fault_page(unsigned long page, int *page_level) { - unsigned long address = page & PAGE_MASK; - int level; - pte_t *pte = lookup_address(address, &level); - - if (!pte) { - pr_err("kmmio: Error in %s: no pte for page 0x%08lx\n", - __func__, page); - return; - } - - if (level == PG_LEVEL_2M) { - pmd_t *pmd = (pmd_t *)pte; - set_pmd(pmd, __pmd(pmd_val(*pmd) | _PAGE_PRESENT)); - } else { - /* PG_LEVEL_4K */ - set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT)); - } - - if (page_level) - *page_level = level; - - __flush_tlb_one(page); + set_page_present(page & PAGE_MASK, true, page_level); } /* @@ -240,6 +175,7 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) { struct kmmio_context *ctx; struct kmmio_fault_page *faultpage; + int ret = 0; /* default to fault not handled */ /* * Preemption is now disabled to prevent process switch during @@ -257,21 +193,35 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) /* * Either this page fault is not caused by kmmio, or * another CPU just pulled the kmmio probe from under - * our feet. In the latter case all hell breaks loose. + * our feet. The latter case should not be possible. */ goto no_kmmio; } ctx = &get_cpu_var(kmmio_ctx); if (ctx->active) { + disarm_kmmio_fault_page(faultpage->page, NULL); + if (addr == ctx->addr) { + /* + * On SMP we sometimes get recursive probe hits on the + * same address. Context is already saved, fall out. + */ + pr_debug("kmmio: duplicate probe hit on CPU %d, for " + "address 0x%08lx.\n", + smp_processor_id(), addr); + ret = 1; + goto no_kmmio_ctx; + } /* * Prevent overwriting already in-flight context. - * If this page fault really was due to kmmio trap, - * all hell breaks loose. + * This should not happen, let's hope disarming at least + * prevents a panic. */ pr_emerg("kmmio: recursive probe hit on CPU %d, " "for address 0x%08lx. Ignoring.\n", smp_processor_id(), addr); + pr_emerg("kmmio: previous hit was at 0x%08lx.\n", + ctx->addr); goto no_kmmio_ctx; } ctx->active++; @@ -302,14 +252,14 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) */ put_cpu_var(kmmio_ctx); - return 1; + return 1; /* fault handled */ no_kmmio_ctx: put_cpu_var(kmmio_ctx); no_kmmio: rcu_read_unlock(); preempt_enable_no_resched(); - return 0; /* page fault not handled by kmmio */ + return ret; } /* @@ -322,8 +272,11 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) int ret = 0; struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx); - if (!ctx->active) + if (!ctx->active) { + pr_debug("kmmio: spurious debug trap on CPU %d.\n", + smp_processor_id()); goto out; + } if (ctx->probe && ctx->probe->post_handler) ctx->probe->post_handler(ctx->probe, condition, regs); @@ -525,9 +478,22 @@ static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val, { struct die_args *arg = args; - if (val == DIE_DEBUG) + if (val == DIE_DEBUG && (arg->err & DR_STEP)) if (post_kmmio_handler(arg->err, arg->regs) == 1) return NOTIFY_STOP; return NOTIFY_DONE; } + +static struct notifier_block nb_die = { + .notifier_call = kmmio_die_notifier +}; + +static int __init init_kmmio(void) +{ + int i; + for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++) + INIT_LIST_HEAD(&kmmio_page_table[i]); + return register_die_notifier(&nb_die); +} +fs_initcall(init_kmmio); /* should be before device_initcall() */ diff --git a/arch/x86/kernel/mmiotrace/mmio-mod.c b/arch/x86/kernel/mmiotrace/mmio-mod.c index 62abc28..8256546 100644 --- a/arch/x86/kernel/mmiotrace/mmio-mod.c +++ b/arch/x86/kernel/mmiotrace/mmio-mod.c @@ -415,8 +415,6 @@ void enable_mmiotrace(void) if (is_enabled()) goto out; - reference_kmmio(); - #if 0 /* XXX: tracing does not support text entries */ marker_file = debugfs_create_file("marker", 0660, dir, NULL, &fops_marker); @@ -448,7 +446,6 @@ void disable_mmiotrace(void) spin_unlock_irq(&trace_lock); clear_trace_list(); /* guarantees: no more kmmio callbacks */ - unreference_kmmio(); if (marker_file) { debugfs_remove(marker_file); marker_file = NULL; diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h index c88a9c1..dd6b64b 100644 --- a/include/linux/mmiotrace.h +++ b/include/linux/mmiotrace.h @@ -31,8 +31,6 @@ static inline int is_kmmio_active(void) return kmmio_count; } -extern void reference_kmmio(void); -extern void unreference_kmmio(void); extern int register_kmmio_probe(struct kmmio_probe *p); extern void unregister_kmmio_probe(struct kmmio_probe *p); diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index 3a12b1a..361472b 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -8,6 +8,7 @@ #include #include +#include #include "trace.h" @@ -53,12 +54,52 @@ static void mmio_trace_ctrl_update(struct trace_array *tr) } } +static int mmio_print_pcidev(struct trace_seq *s, const struct pci_dev *dev) +{ + int ret = 0; + int i; + resource_size_t start, end; + const struct pci_driver *drv = pci_dev_driver(dev); + + /* XXX: incomplete checks for trace_seq_printf() return value */ + ret += trace_seq_printf(s, "PCIDEV %02x%02x %04x%04x %x", + dev->bus->number, dev->devfn, + dev->vendor, dev->device, dev->irq); + /* + * XXX: is pci_resource_to_user() appropriate, since we are + * supposed to interpret the __ioremap() phys_addr argument based on + * these printed values? + */ + for (i = 0; i < 7; i++) { + pci_resource_to_user(dev, i, &dev->resource[i], &start, &end); + ret += trace_seq_printf(s, " %llx", + (unsigned long long)(start | + (dev->resource[i].flags & PCI_REGION_FLAG_MASK))); + } + for (i = 0; i < 7; i++) { + pci_resource_to_user(dev, i, &dev->resource[i], &start, &end); + ret += trace_seq_printf(s, " %llx", + dev->resource[i].start < dev->resource[i].end ? + (unsigned long long)(end - start) + 1 : 0); + } + if (drv) + ret += trace_seq_printf(s, " %s\n", drv->name); + else + ret += trace_seq_printf(s, " \n"); + return ret; +} + /* XXX: This is not called for trace_pipe file! */ -void mmio_print_header(struct trace_iterator *iter) +static void mmio_print_header(struct trace_iterator *iter) { struct trace_seq *s = &iter->seq; - trace_seq_printf(s, "VERSION broken 20070824\n"); - /* TODO: print /proc/bus/pci/devices contents as PCIDEV lines */ + struct pci_dev *dev = NULL; + + trace_seq_printf(s, "VERSION 20070824\n"); + + for_each_pci_dev(dev) + mmio_print_pcidev(s, dev); + /* XXX: return value? What if header is very long? */ } static int mmio_print_rw(struct trace_iterator *iter) -- cgit v0.10.2 From 801a175bf601f9a9d5e86e92dee9adeeb6625da8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:58 +0200 Subject: mmiotrace: ftrace fix Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d14fe49..4dcc4e8 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -838,12 +838,16 @@ void __trace_mmiotrace_rw(struct trace_array *tr, struct trace_array_cpu *data, struct trace_entry *entry; unsigned long irq_flags; - spin_lock_irqsave(&data->lock, irq_flags); + raw_local_irq_save(irq_flags); + __raw_spin_lock(&data->lock); + entry = tracing_get_trace_entry(tr, data); tracing_generic_entry_update(entry, 0); entry->type = TRACE_MMIO_RW; entry->mmiorw = *rw; - spin_unlock_irqrestore(&data->lock, irq_flags); + + __raw_spin_unlock(&data->lock); + raw_local_irq_restore(irq_flags); trace_wake_up(); } @@ -854,12 +858,16 @@ void __trace_mmiotrace_map(struct trace_array *tr, struct trace_array_cpu *data, struct trace_entry *entry; unsigned long irq_flags; - spin_lock_irqsave(&data->lock, irq_flags); + raw_local_irq_save(irq_flags); + __raw_spin_lock(&data->lock); + entry = tracing_get_trace_entry(tr, data); tracing_generic_entry_update(entry, 0); entry->type = TRACE_MMIO_MAP; entry->mmiomap = *map; - spin_unlock_irqrestore(&data->lock, irq_flags); + + __raw_spin_unlock(&data->lock); + raw_local_irq_restore(irq_flags); trace_wake_up(); } -- cgit v0.10.2 From 49023168261a7f9a2fd4a1ca1adbfea922556015 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:58 +0200 Subject: mmiotrace: cleanup Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/mmiotrace/kmmio.c b/arch/x86/kernel/mmiotrace/kmmio.c index cd0d95f..3ad27b8 100644 --- a/arch/x86/kernel/mmiotrace/kmmio.c +++ b/arch/x86/kernel/mmiotrace/kmmio.c @@ -228,7 +228,7 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) ctx->fpage = faultpage; ctx->probe = get_kmmio_probe(addr); - ctx->saved_flags = (regs->flags & (TF_MASK|IF_MASK)); + ctx->saved_flags = (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF)); ctx->addr = addr; if (ctx->probe && ctx->probe->pre_handler) @@ -238,8 +238,8 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) * Enable single-stepping and disable interrupts for the faulting * context. Local interrupts must not get enabled during stepping. */ - regs->flags |= TF_MASK; - regs->flags &= ~IF_MASK; + regs->flags |= X86_EFLAGS_TF; + regs->flags &= ~X86_EFLAGS_IF; /* Now we set present bit in PTE and single step. */ disarm_kmmio_fault_page(ctx->fpage->page, NULL); @@ -283,7 +283,7 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) arm_kmmio_fault_page(ctx->fpage->page, NULL); - regs->flags &= ~TF_MASK; + regs->flags &= ~X86_EFLAGS_TF; regs->flags |= ctx->saved_flags; /* These were acquired in kmmio_handler(). */ @@ -297,7 +297,7 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) * will have TF set, in which case, continue the remaining processing * of do_debug, as if this is not a probe hit. */ - if (!(regs->flags & TF_MASK)) + if (!(regs->flags & X86_EFLAGS_TF)) ret = 1; out: put_cpu_var(kmmio_ctx); -- cgit v0.10.2 From ff3a3e9ba5e4273a8bc10570adab4a390fb90757 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:20:59 +0200 Subject: x86 mmiotrace: move files into arch/x86/mm/. Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index a51ac15..739d49a 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -79,8 +79,6 @@ obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_VM86) += vm86_32.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o -obj-$(CONFIG_MMIOTRACE) += mmiotrace/ - obj-$(CONFIG_HPET_TIMER) += hpet.o obj-$(CONFIG_K8_NB) += k8.o diff --git a/arch/x86/kernel/mmiotrace/Makefile b/arch/x86/kernel/mmiotrace/Makefile deleted file mode 100644 index dbcd8d5..0000000 --- a/arch/x86/kernel/mmiotrace/Makefile +++ /dev/null @@ -1,4 +0,0 @@ -obj-$(CONFIG_MMIOTRACE_HOOKS) += kmmio.o -obj-$(CONFIG_MMIOTRACE) += mmiotrace.o -mmiotrace-y := pf_in.o mmio-mod.o -obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o diff --git a/arch/x86/kernel/mmiotrace/kmmio.c b/arch/x86/kernel/mmiotrace/kmmio.c deleted file mode 100644 index 3ad27b8..0000000 --- a/arch/x86/kernel/mmiotrace/kmmio.c +++ /dev/null @@ -1,499 +0,0 @@ -/* Support for MMIO probes. - * Benfit many code from kprobes - * (C) 2002 Louis Zhuang . - * 2007 Alexander Eichner - * 2008 Pekka Paalanen - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define KMMIO_PAGE_HASH_BITS 4 -#define KMMIO_PAGE_TABLE_SIZE (1 << KMMIO_PAGE_HASH_BITS) - -struct kmmio_fault_page { - struct list_head list; - struct kmmio_fault_page *release_next; - unsigned long page; /* location of the fault page */ - - /* - * Number of times this page has been registered as a part - * of a probe. If zero, page is disarmed and this may be freed. - * Used only by writers (RCU). - */ - int count; -}; - -struct kmmio_delayed_release { - struct rcu_head rcu; - struct kmmio_fault_page *release_list; -}; - -struct kmmio_context { - struct kmmio_fault_page *fpage; - struct kmmio_probe *probe; - unsigned long saved_flags; - unsigned long addr; - int active; -}; - -static DEFINE_SPINLOCK(kmmio_lock); - -/* Protected by kmmio_lock */ -unsigned int kmmio_count; - -/* Read-protected by RCU, write-protected by kmmio_lock. */ -static struct list_head kmmio_page_table[KMMIO_PAGE_TABLE_SIZE]; -static LIST_HEAD(kmmio_probes); - -static struct list_head *kmmio_page_list(unsigned long page) -{ - return &kmmio_page_table[hash_long(page, KMMIO_PAGE_HASH_BITS)]; -} - -/* Accessed per-cpu */ -static DEFINE_PER_CPU(struct kmmio_context, kmmio_ctx); - -/* - * this is basically a dynamic stabbing problem: - * Could use the existing prio tree code or - * Possible better implementations: - * The Interval Skip List: A Data Structure for Finding All Intervals That - * Overlap a Point (might be simple) - * Space Efficient Dynamic Stabbing with Fast Queries - Mikkel Thorup - */ -/* Get the kmmio at this addr (if any). You must be holding RCU read lock. */ -static struct kmmio_probe *get_kmmio_probe(unsigned long addr) -{ - struct kmmio_probe *p; - list_for_each_entry_rcu(p, &kmmio_probes, list) { - if (addr >= p->addr && addr <= (p->addr + p->len)) - return p; - } - return NULL; -} - -/* You must be holding RCU read lock. */ -static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page) -{ - struct list_head *head; - struct kmmio_fault_page *p; - - page &= PAGE_MASK; - head = kmmio_page_list(page); - list_for_each_entry_rcu(p, head, list) { - if (p->page == page) - return p; - } - return NULL; -} - -static void set_page_present(unsigned long addr, bool present, int *pglevel) -{ - pteval_t pteval; - pmdval_t pmdval; - int level; - pmd_t *pmd; - pte_t *pte = lookup_address(addr, &level); - - if (!pte) { - pr_err("kmmio: no pte for page 0x%08lx\n", addr); - return; - } - - if (pglevel) - *pglevel = level; - - switch (level) { - case PG_LEVEL_2M: - pmd = (pmd_t *)pte; - pmdval = pmd_val(*pmd) & ~_PAGE_PRESENT; - if (present) - pmdval |= _PAGE_PRESENT; - set_pmd(pmd, __pmd(pmdval)); - break; - - case PG_LEVEL_4K: - pteval = pte_val(*pte) & ~_PAGE_PRESENT; - if (present) - pteval |= _PAGE_PRESENT; - set_pte_atomic(pte, __pte(pteval)); - break; - - default: - pr_err("kmmio: unexpected page level 0x%x.\n", level); - return; - } - - __flush_tlb_one(addr); -} - -/** Mark the given page as not present. Access to it will trigger a fault. */ -static void arm_kmmio_fault_page(unsigned long page, int *page_level) -{ - set_page_present(page & PAGE_MASK, false, page_level); -} - -/** Mark the given page as present. */ -static void disarm_kmmio_fault_page(unsigned long page, int *page_level) -{ - set_page_present(page & PAGE_MASK, true, page_level); -} - -/* - * This is being called from do_page_fault(). - * - * We may be in an interrupt or a critical section. Also prefecthing may - * trigger a page fault. We may be in the middle of process switch. - * We cannot take any locks, because we could be executing especially - * within a kmmio critical section. - * - * Local interrupts are disabled, so preemption cannot happen. - * Do not enable interrupts, do not sleep, and watch out for other CPUs. - */ -/* - * Interrupts are disabled on entry as trap3 is an interrupt gate - * and they remain disabled thorough out this function. - */ -int kmmio_handler(struct pt_regs *regs, unsigned long addr) -{ - struct kmmio_context *ctx; - struct kmmio_fault_page *faultpage; - int ret = 0; /* default to fault not handled */ - - /* - * Preemption is now disabled to prevent process switch during - * single stepping. We can only handle one active kmmio trace - * per cpu, so ensure that we finish it before something else - * gets to run. We also hold the RCU read lock over single - * stepping to avoid looking up the probe and kmmio_fault_page - * again. - */ - preempt_disable(); - rcu_read_lock(); - - faultpage = get_kmmio_fault_page(addr); - if (!faultpage) { - /* - * Either this page fault is not caused by kmmio, or - * another CPU just pulled the kmmio probe from under - * our feet. The latter case should not be possible. - */ - goto no_kmmio; - } - - ctx = &get_cpu_var(kmmio_ctx); - if (ctx->active) { - disarm_kmmio_fault_page(faultpage->page, NULL); - if (addr == ctx->addr) { - /* - * On SMP we sometimes get recursive probe hits on the - * same address. Context is already saved, fall out. - */ - pr_debug("kmmio: duplicate probe hit on CPU %d, for " - "address 0x%08lx.\n", - smp_processor_id(), addr); - ret = 1; - goto no_kmmio_ctx; - } - /* - * Prevent overwriting already in-flight context. - * This should not happen, let's hope disarming at least - * prevents a panic. - */ - pr_emerg("kmmio: recursive probe hit on CPU %d, " - "for address 0x%08lx. Ignoring.\n", - smp_processor_id(), addr); - pr_emerg("kmmio: previous hit was at 0x%08lx.\n", - ctx->addr); - goto no_kmmio_ctx; - } - ctx->active++; - - ctx->fpage = faultpage; - ctx->probe = get_kmmio_probe(addr); - ctx->saved_flags = (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF)); - ctx->addr = addr; - - if (ctx->probe && ctx->probe->pre_handler) - ctx->probe->pre_handler(ctx->probe, regs, addr); - - /* - * Enable single-stepping and disable interrupts for the faulting - * context. Local interrupts must not get enabled during stepping. - */ - regs->flags |= X86_EFLAGS_TF; - regs->flags &= ~X86_EFLAGS_IF; - - /* Now we set present bit in PTE and single step. */ - disarm_kmmio_fault_page(ctx->fpage->page, NULL); - - /* - * If another cpu accesses the same page while we are stepping, - * the access will not be caught. It will simply succeed and the - * only downside is we lose the event. If this becomes a problem, - * the user should drop to single cpu before tracing. - */ - - put_cpu_var(kmmio_ctx); - return 1; /* fault handled */ - -no_kmmio_ctx: - put_cpu_var(kmmio_ctx); -no_kmmio: - rcu_read_unlock(); - preempt_enable_no_resched(); - return ret; -} - -/* - * Interrupts are disabled on entry as trap1 is an interrupt gate - * and they remain disabled thorough out this function. - * This must always get called as the pair to kmmio_handler(). - */ -static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) -{ - int ret = 0; - struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx); - - if (!ctx->active) { - pr_debug("kmmio: spurious debug trap on CPU %d.\n", - smp_processor_id()); - goto out; - } - - if (ctx->probe && ctx->probe->post_handler) - ctx->probe->post_handler(ctx->probe, condition, regs); - - arm_kmmio_fault_page(ctx->fpage->page, NULL); - - regs->flags &= ~X86_EFLAGS_TF; - regs->flags |= ctx->saved_flags; - - /* These were acquired in kmmio_handler(). */ - ctx->active--; - BUG_ON(ctx->active); - rcu_read_unlock(); - preempt_enable_no_resched(); - - /* - * if somebody else is singlestepping across a probe point, flags - * will have TF set, in which case, continue the remaining processing - * of do_debug, as if this is not a probe hit. - */ - if (!(regs->flags & X86_EFLAGS_TF)) - ret = 1; -out: - put_cpu_var(kmmio_ctx); - return ret; -} - -/* You must be holding kmmio_lock. */ -static int add_kmmio_fault_page(unsigned long page) -{ - struct kmmio_fault_page *f; - - page &= PAGE_MASK; - f = get_kmmio_fault_page(page); - if (f) { - if (!f->count) - arm_kmmio_fault_page(f->page, NULL); - f->count++; - return 0; - } - - f = kmalloc(sizeof(*f), GFP_ATOMIC); - if (!f) - return -1; - - f->count = 1; - f->page = page; - list_add_rcu(&f->list, kmmio_page_list(f->page)); - - arm_kmmio_fault_page(f->page, NULL); - - return 0; -} - -/* You must be holding kmmio_lock. */ -static void release_kmmio_fault_page(unsigned long page, - struct kmmio_fault_page **release_list) -{ - struct kmmio_fault_page *f; - - page &= PAGE_MASK; - f = get_kmmio_fault_page(page); - if (!f) - return; - - f->count--; - BUG_ON(f->count < 0); - if (!f->count) { - disarm_kmmio_fault_page(f->page, NULL); - f->release_next = *release_list; - *release_list = f; - } -} - -int register_kmmio_probe(struct kmmio_probe *p) -{ - unsigned long flags; - int ret = 0; - unsigned long size = 0; - - spin_lock_irqsave(&kmmio_lock, flags); - if (get_kmmio_probe(p->addr)) { - ret = -EEXIST; - goto out; - } - kmmio_count++; - list_add_rcu(&p->list, &kmmio_probes); - while (size < p->len) { - if (add_kmmio_fault_page(p->addr + size)) - pr_err("kmmio: Unable to set page fault.\n"); - size += PAGE_SIZE; - } -out: - spin_unlock_irqrestore(&kmmio_lock, flags); - /* - * XXX: What should I do here? - * Here was a call to global_flush_tlb(), but it does not exist - * anymore. It seems it's not needed after all. - */ - return ret; -} -EXPORT_SYMBOL(register_kmmio_probe); - -static void rcu_free_kmmio_fault_pages(struct rcu_head *head) -{ - struct kmmio_delayed_release *dr = container_of( - head, - struct kmmio_delayed_release, - rcu); - struct kmmio_fault_page *p = dr->release_list; - while (p) { - struct kmmio_fault_page *next = p->release_next; - BUG_ON(p->count); - kfree(p); - p = next; - } - kfree(dr); -} - -static void remove_kmmio_fault_pages(struct rcu_head *head) -{ - struct kmmio_delayed_release *dr = container_of( - head, - struct kmmio_delayed_release, - rcu); - struct kmmio_fault_page *p = dr->release_list; - struct kmmio_fault_page **prevp = &dr->release_list; - unsigned long flags; - spin_lock_irqsave(&kmmio_lock, flags); - while (p) { - if (!p->count) - list_del_rcu(&p->list); - else - *prevp = p->release_next; - prevp = &p->release_next; - p = p->release_next; - } - spin_unlock_irqrestore(&kmmio_lock, flags); - /* This is the real RCU destroy call. */ - call_rcu(&dr->rcu, rcu_free_kmmio_fault_pages); -} - -/* - * Remove a kmmio probe. You have to synchronize_rcu() before you can be - * sure that the callbacks will not be called anymore. Only after that - * you may actually release your struct kmmio_probe. - * - * Unregistering a kmmio fault page has three steps: - * 1. release_kmmio_fault_page() - * Disarm the page, wait a grace period to let all faults finish. - * 2. remove_kmmio_fault_pages() - * Remove the pages from kmmio_page_table. - * 3. rcu_free_kmmio_fault_pages() - * Actally free the kmmio_fault_page structs as with RCU. - */ -void unregister_kmmio_probe(struct kmmio_probe *p) -{ - unsigned long flags; - unsigned long size = 0; - struct kmmio_fault_page *release_list = NULL; - struct kmmio_delayed_release *drelease; - - spin_lock_irqsave(&kmmio_lock, flags); - while (size < p->len) { - release_kmmio_fault_page(p->addr + size, &release_list); - size += PAGE_SIZE; - } - list_del_rcu(&p->list); - kmmio_count--; - spin_unlock_irqrestore(&kmmio_lock, flags); - - drelease = kmalloc(sizeof(*drelease), GFP_ATOMIC); - if (!drelease) { - pr_crit("kmmio: leaking kmmio_fault_page objects.\n"); - return; - } - drelease->release_list = release_list; - - /* - * This is not really RCU here. We have just disarmed a set of - * pages so that they cannot trigger page faults anymore. However, - * we cannot remove the pages from kmmio_page_table, - * because a probe hit might be in flight on another CPU. The - * pages are collected into a list, and they will be removed from - * kmmio_page_table when it is certain that no probe hit related to - * these pages can be in flight. RCU grace period sounds like a - * good choice. - * - * If we removed the pages too early, kmmio page fault handler might - * not find the respective kmmio_fault_page and determine it's not - * a kmmio fault, when it actually is. This would lead to madness. - */ - call_rcu(&drelease->rcu, remove_kmmio_fault_pages); -} -EXPORT_SYMBOL(unregister_kmmio_probe); - -static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val, - void *args) -{ - struct die_args *arg = args; - - if (val == DIE_DEBUG && (arg->err & DR_STEP)) - if (post_kmmio_handler(arg->err, arg->regs) == 1) - return NOTIFY_STOP; - - return NOTIFY_DONE; -} - -static struct notifier_block nb_die = { - .notifier_call = kmmio_die_notifier -}; - -static int __init init_kmmio(void) -{ - int i; - for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++) - INIT_LIST_HEAD(&kmmio_page_table[i]); - return register_die_notifier(&nb_die); -} -fs_initcall(init_kmmio); /* should be before device_initcall() */ diff --git a/arch/x86/kernel/mmiotrace/mmio-mod.c b/arch/x86/kernel/mmiotrace/mmio-mod.c deleted file mode 100644 index 8256546..0000000 --- a/arch/x86/kernel/mmiotrace/mmio-mod.c +++ /dev/null @@ -1,457 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - * Copyright (C) IBM Corporation, 2005 - * Jeff Muizelaar, 2006, 2007 - * Pekka Paalanen, 2008 - * - * Derived from the read-mod example from relay-examples by Tom Zanussi. - */ -#define DEBUG 1 - -#include -#include -#include -#include -#include -#include -#include -#include -#include /* for ISA_START_ADDRESS */ -#include -#include - -#include "pf_in.h" - -#define NAME "mmiotrace: " - -struct trap_reason { - unsigned long addr; - unsigned long ip; - enum reason_type type; - int active_traces; -}; - -struct remap_trace { - struct list_head list; - struct kmmio_probe probe; - unsigned long phys; - unsigned long id; -}; - -/* Accessed per-cpu. */ -static DEFINE_PER_CPU(struct trap_reason, pf_reason); -static DEFINE_PER_CPU(struct mmiotrace_rw, cpu_trace); - -#if 0 /* XXX: no way gather this info anymore */ -/* Access to this is not per-cpu. */ -static DEFINE_PER_CPU(atomic_t, dropped); -#endif - -static struct dentry *marker_file; - -static DEFINE_MUTEX(mmiotrace_mutex); -static DEFINE_SPINLOCK(trace_lock); -static atomic_t mmiotrace_enabled; -static LIST_HEAD(trace_list); /* struct remap_trace */ - -/* - * Locking in this file: - * - mmiotrace_mutex enforces enable/disable_mmiotrace() critical sections. - * - mmiotrace_enabled may be modified only when holding mmiotrace_mutex - * and trace_lock. - * - Routines depending on is_enabled() must take trace_lock. - * - trace_list users must hold trace_lock. - * - is_enabled() guarantees that mmio_trace_record is allowed. - * - pre/post callbacks assume the effect of is_enabled() being true. - */ - -/* module parameters */ -static unsigned long filter_offset; -static int nommiotrace; -static int ISA_trace; -static int trace_pc; - -module_param(filter_offset, ulong, 0); -module_param(nommiotrace, bool, 0); -module_param(ISA_trace, bool, 0); -module_param(trace_pc, bool, 0); - -MODULE_PARM_DESC(filter_offset, "Start address of traced mappings."); -MODULE_PARM_DESC(nommiotrace, "Disable actual MMIO tracing."); -MODULE_PARM_DESC(ISA_trace, "Do not exclude the low ISA range."); -MODULE_PARM_DESC(trace_pc, "Record address of faulting instructions."); - -static bool is_enabled(void) -{ - return atomic_read(&mmiotrace_enabled); -} - -#if 0 /* XXX: needs rewrite */ -/* - * Write callback for the debugfs entry: - * Read a marker and write it to the mmio trace log - */ -static ssize_t write_marker(struct file *file, const char __user *buffer, - size_t count, loff_t *ppos) -{ - char *event = NULL; - struct mm_io_header *headp; - ssize_t len = (count > 65535) ? 65535 : count; - - event = kzalloc(sizeof(*headp) + len, GFP_KERNEL); - if (!event) - return -ENOMEM; - - headp = (struct mm_io_header *)event; - headp->type = MMIO_MAGIC | (MMIO_MARKER << MMIO_OPCODE_SHIFT); - headp->data_len = len; - - if (copy_from_user(event + sizeof(*headp), buffer, len)) { - kfree(event); - return -EFAULT; - } - - spin_lock_irq(&trace_lock); -#if 0 /* XXX: convert this to use tracing */ - if (is_enabled()) - relay_write(chan, event, sizeof(*headp) + len); - else -#endif - len = -EINVAL; - spin_unlock_irq(&trace_lock); - kfree(event); - return len; -} -#endif - -static void print_pte(unsigned long address) -{ - int level; - pte_t *pte = lookup_address(address, &level); - - if (!pte) { - pr_err(NAME "Error in %s: no pte for page 0x%08lx\n", - __func__, address); - return; - } - - if (level == PG_LEVEL_2M) { - pr_emerg(NAME "4MB pages are not currently supported: " - "0x%08lx\n", address); - BUG(); - } - pr_info(NAME "pte for 0x%lx: 0x%lx 0x%lx\n", address, pte_val(*pte), - pte_val(*pte) & _PAGE_PRESENT); -} - -/* - * For some reason the pre/post pairs have been called in an - * unmatched order. Report and die. - */ -static void die_kmmio_nesting_error(struct pt_regs *regs, unsigned long addr) -{ - const struct trap_reason *my_reason = &get_cpu_var(pf_reason); - pr_emerg(NAME "unexpected fault for address: 0x%08lx, " - "last fault for address: 0x%08lx\n", - addr, my_reason->addr); - print_pte(addr); - print_symbol(KERN_EMERG "faulting IP is at %s\n", regs->ip); - print_symbol(KERN_EMERG "last faulting IP was at %s\n", my_reason->ip); -#ifdef __i386__ - pr_emerg("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", - regs->ax, regs->bx, regs->cx, regs->dx); - pr_emerg("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", - regs->si, regs->di, regs->bp, regs->sp); -#else - pr_emerg("rax: %016lx rcx: %016lx rdx: %016lx\n", - regs->ax, regs->cx, regs->dx); - pr_emerg("rsi: %016lx rdi: %016lx rbp: %016lx rsp: %016lx\n", - regs->si, regs->di, regs->bp, regs->sp); -#endif - put_cpu_var(pf_reason); - BUG(); -} - -static void pre(struct kmmio_probe *p, struct pt_regs *regs, - unsigned long addr) -{ - struct trap_reason *my_reason = &get_cpu_var(pf_reason); - struct mmiotrace_rw *my_trace = &get_cpu_var(cpu_trace); - const unsigned long instptr = instruction_pointer(regs); - const enum reason_type type = get_ins_type(instptr); - struct remap_trace *trace = p->user_data; - - /* it doesn't make sense to have more than one active trace per cpu */ - if (my_reason->active_traces) - die_kmmio_nesting_error(regs, addr); - else - my_reason->active_traces++; - - my_reason->type = type; - my_reason->addr = addr; - my_reason->ip = instptr; - - my_trace->phys = addr - trace->probe.addr + trace->phys; - my_trace->map_id = trace->id; - - /* - * Only record the program counter when requested. - * It may taint clean-room reverse engineering. - */ - if (trace_pc) - my_trace->pc = instptr; - else - my_trace->pc = 0; - - /* - * XXX: the timestamp recorded will be *after* the tracing has been - * done, not at the time we hit the instruction. SMP implications - * on event ordering? - */ - - switch (type) { - case REG_READ: - my_trace->opcode = MMIO_READ; - my_trace->width = get_ins_mem_width(instptr); - break; - case REG_WRITE: - my_trace->opcode = MMIO_WRITE; - my_trace->width = get_ins_mem_width(instptr); - my_trace->value = get_ins_reg_val(instptr, regs); - break; - case IMM_WRITE: - my_trace->opcode = MMIO_WRITE; - my_trace->width = get_ins_mem_width(instptr); - my_trace->value = get_ins_imm_val(instptr); - break; - default: - { - unsigned char *ip = (unsigned char *)instptr; - my_trace->opcode = MMIO_UNKNOWN_OP; - my_trace->width = 0; - my_trace->value = (*ip) << 16 | *(ip + 1) << 8 | - *(ip + 2); - } - } - put_cpu_var(cpu_trace); - put_cpu_var(pf_reason); -} - -static void post(struct kmmio_probe *p, unsigned long condition, - struct pt_regs *regs) -{ - struct trap_reason *my_reason = &get_cpu_var(pf_reason); - struct mmiotrace_rw *my_trace = &get_cpu_var(cpu_trace); - - /* this should always return the active_trace count to 0 */ - my_reason->active_traces--; - if (my_reason->active_traces) { - pr_emerg(NAME "unexpected post handler"); - BUG(); - } - - switch (my_reason->type) { - case REG_READ: - my_trace->value = get_ins_reg_val(my_reason->ip, regs); - break; - default: - break; - } - - mmio_trace_rw(my_trace); - put_cpu_var(cpu_trace); - put_cpu_var(pf_reason); -} - -static void ioremap_trace_core(unsigned long offset, unsigned long size, - void __iomem *addr) -{ - static atomic_t next_id; - struct remap_trace *trace = kmalloc(sizeof(*trace), GFP_KERNEL); - struct mmiotrace_map map = { - .phys = offset, - .virt = (unsigned long)addr, - .len = size, - .opcode = MMIO_PROBE - }; - - if (!trace) { - pr_err(NAME "kmalloc failed in ioremap\n"); - return; - } - - *trace = (struct remap_trace) { - .probe = { - .addr = (unsigned long)addr, - .len = size, - .pre_handler = pre, - .post_handler = post, - .user_data = trace - }, - .phys = offset, - .id = atomic_inc_return(&next_id) - }; - map.map_id = trace->id; - - spin_lock_irq(&trace_lock); - if (!is_enabled()) - goto not_enabled; - - mmio_trace_mapping(&map); - list_add_tail(&trace->list, &trace_list); - if (!nommiotrace) - register_kmmio_probe(&trace->probe); - -not_enabled: - spin_unlock_irq(&trace_lock); -} - -void -mmiotrace_ioremap(unsigned long offset, unsigned long size, void __iomem *addr) -{ - if (!is_enabled()) /* recheck and proper locking in *_core() */ - return; - - pr_debug(NAME "ioremap_*(0x%lx, 0x%lx) = %p\n", offset, size, addr); - if ((filter_offset) && (offset != filter_offset)) - return; - ioremap_trace_core(offset, size, addr); -} - -static void iounmap_trace_core(volatile void __iomem *addr) -{ - struct mmiotrace_map map = { - .phys = 0, - .virt = (unsigned long)addr, - .len = 0, - .opcode = MMIO_UNPROBE - }; - struct remap_trace *trace; - struct remap_trace *tmp; - struct remap_trace *found_trace = NULL; - - pr_debug(NAME "Unmapping %p.\n", addr); - - spin_lock_irq(&trace_lock); - if (!is_enabled()) - goto not_enabled; - - list_for_each_entry_safe(trace, tmp, &trace_list, list) { - if ((unsigned long)addr == trace->probe.addr) { - if (!nommiotrace) - unregister_kmmio_probe(&trace->probe); - list_del(&trace->list); - found_trace = trace; - break; - } - } - map.map_id = (found_trace) ? found_trace->id : -1; - mmio_trace_mapping(&map); - -not_enabled: - spin_unlock_irq(&trace_lock); - if (found_trace) { - synchronize_rcu(); /* unregister_kmmio_probe() requirement */ - kfree(found_trace); - } -} - -void mmiotrace_iounmap(volatile void __iomem *addr) -{ - might_sleep(); - if (is_enabled()) /* recheck and proper locking in *_core() */ - iounmap_trace_core(addr); -} - -static void clear_trace_list(void) -{ - struct remap_trace *trace; - struct remap_trace *tmp; - - /* - * No locking required, because the caller ensures we are in a - * critical section via mutex, and is_enabled() is false, - * i.e. nothing can traverse or modify this list. - * Caller also ensures is_enabled() cannot change. - */ - list_for_each_entry(trace, &trace_list, list) { - pr_notice(NAME "purging non-iounmapped " - "trace @0x%08lx, size 0x%lx.\n", - trace->probe.addr, trace->probe.len); - if (!nommiotrace) - unregister_kmmio_probe(&trace->probe); - } - synchronize_rcu(); /* unregister_kmmio_probe() requirement */ - - list_for_each_entry_safe(trace, tmp, &trace_list, list) { - list_del(&trace->list); - kfree(trace); - } -} - -#if 0 /* XXX: out of order */ -static struct file_operations fops_marker = { - .owner = THIS_MODULE, - .write = write_marker -}; -#endif - -void enable_mmiotrace(void) -{ - mutex_lock(&mmiotrace_mutex); - if (is_enabled()) - goto out; - -#if 0 /* XXX: tracing does not support text entries */ - marker_file = debugfs_create_file("marker", 0660, dir, NULL, - &fops_marker); - if (!marker_file) - pr_err(NAME "marker file creation failed.\n"); -#endif - - if (nommiotrace) - pr_info(NAME "MMIO tracing disabled.\n"); - if (ISA_trace) - pr_warning(NAME "Warning! low ISA range will be traced.\n"); - spin_lock_irq(&trace_lock); - atomic_inc(&mmiotrace_enabled); - spin_unlock_irq(&trace_lock); - pr_info(NAME "enabled.\n"); -out: - mutex_unlock(&mmiotrace_mutex); -} - -void disable_mmiotrace(void) -{ - mutex_lock(&mmiotrace_mutex); - if (!is_enabled()) - goto out; - - spin_lock_irq(&trace_lock); - atomic_dec(&mmiotrace_enabled); - BUG_ON(is_enabled()); - spin_unlock_irq(&trace_lock); - - clear_trace_list(); /* guarantees: no more kmmio callbacks */ - if (marker_file) { - debugfs_remove(marker_file); - marker_file = NULL; - } - - pr_info(NAME "disabled.\n"); -out: - mutex_unlock(&mmiotrace_mutex); -} diff --git a/arch/x86/kernel/mmiotrace/pf_in.c b/arch/x86/kernel/mmiotrace/pf_in.c deleted file mode 100644 index efa1911..0000000 --- a/arch/x86/kernel/mmiotrace/pf_in.c +++ /dev/null @@ -1,489 +0,0 @@ -/* - * Fault Injection Test harness (FI) - * Copyright (C) Intel Crop. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, - * USA. - * - */ - -/* Id: pf_in.c,v 1.1.1.1 2002/11/12 05:56:32 brlock Exp - * Copyright by Intel Crop., 2002 - * Louis Zhuang (louis.zhuang@intel.com) - * - * Bjorn Steinbrink (B.Steinbrink@gmx.de), 2007 - */ - -#include -#include /* struct pt_regs */ -#include "pf_in.h" - -#ifdef __i386__ -/* IA32 Manual 3, 2-1 */ -static unsigned char prefix_codes[] = { - 0xF0, 0xF2, 0xF3, 0x2E, 0x36, 0x3E, 0x26, 0x64, - 0x65, 0x2E, 0x3E, 0x66, 0x67 -}; -/* IA32 Manual 3, 3-432*/ -static unsigned int reg_rop[] = { - 0x8A, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F -}; -static unsigned int reg_wop[] = { 0x88, 0x89 }; -static unsigned int imm_wop[] = { 0xC6, 0xC7 }; -/* IA32 Manual 3, 3-432*/ -static unsigned int rw8[] = { 0x88, 0x8A, 0xC6 }; -static unsigned int rw32[] = { - 0x89, 0x8B, 0xC7, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F -}; -static unsigned int mw8[] = { 0x88, 0x8A, 0xC6, 0xB60F, 0xBE0F }; -static unsigned int mw16[] = { 0xB70F, 0xBF0F }; -static unsigned int mw32[] = { 0x89, 0x8B, 0xC7 }; -static unsigned int mw64[] = {}; -#else /* not __i386__ */ -static unsigned char prefix_codes[] = { - 0x66, 0x67, 0x2E, 0x3E, 0x26, 0x64, 0x65, 0x36, - 0xF0, 0xF3, 0xF2, - /* REX Prefixes */ - 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, - 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f -}; -/* AMD64 Manual 3, Appendix A*/ -static unsigned int reg_rop[] = { - 0x8A, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F -}; -static unsigned int reg_wop[] = { 0x88, 0x89 }; -static unsigned int imm_wop[] = { 0xC6, 0xC7 }; -static unsigned int rw8[] = { 0xC6, 0x88, 0x8A }; -static unsigned int rw32[] = { - 0xC7, 0x89, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F -}; -/* 8 bit only */ -static unsigned int mw8[] = { 0xC6, 0x88, 0x8A, 0xB60F, 0xBE0F }; -/* 16 bit only */ -static unsigned int mw16[] = { 0xB70F, 0xBF0F }; -/* 16 or 32 bit */ -static unsigned int mw32[] = { 0xC7 }; -/* 16, 32 or 64 bit */ -static unsigned int mw64[] = { 0x89, 0x8B }; -#endif /* not __i386__ */ - -static int skip_prefix(unsigned char *addr, int *shorted, int *enlarged, - int *rexr) -{ - int i; - unsigned char *p = addr; - *shorted = 0; - *enlarged = 0; - *rexr = 0; - -restart: - for (i = 0; i < ARRAY_SIZE(prefix_codes); i++) { - if (*p == prefix_codes[i]) { - if (*p == 0x66) - *shorted = 1; -#ifdef __amd64__ - if ((*p & 0xf8) == 0x48) - *enlarged = 1; - if ((*p & 0xf4) == 0x44) - *rexr = 1; -#endif - p++; - goto restart; - } - } - - return (p - addr); -} - -static int get_opcode(unsigned char *addr, unsigned int *opcode) -{ - int len; - - if (*addr == 0x0F) { - /* 0x0F is extension instruction */ - *opcode = *(unsigned short *)addr; - len = 2; - } else { - *opcode = *addr; - len = 1; - } - - return len; -} - -#define CHECK_OP_TYPE(opcode, array, type) \ - for (i = 0; i < ARRAY_SIZE(array); i++) { \ - if (array[i] == opcode) { \ - rv = type; \ - goto exit; \ - } \ - } - -enum reason_type get_ins_type(unsigned long ins_addr) -{ - unsigned int opcode; - unsigned char *p; - int shorted, enlarged, rexr; - int i; - enum reason_type rv = OTHERS; - - p = (unsigned char *)ins_addr; - p += skip_prefix(p, &shorted, &enlarged, &rexr); - p += get_opcode(p, &opcode); - - CHECK_OP_TYPE(opcode, reg_rop, REG_READ); - CHECK_OP_TYPE(opcode, reg_wop, REG_WRITE); - CHECK_OP_TYPE(opcode, imm_wop, IMM_WRITE); - -exit: - return rv; -} -#undef CHECK_OP_TYPE - -static unsigned int get_ins_reg_width(unsigned long ins_addr) -{ - unsigned int opcode; - unsigned char *p; - int i, shorted, enlarged, rexr; - - p = (unsigned char *)ins_addr; - p += skip_prefix(p, &shorted, &enlarged, &rexr); - p += get_opcode(p, &opcode); - - for (i = 0; i < ARRAY_SIZE(rw8); i++) - if (rw8[i] == opcode) - return 1; - - for (i = 0; i < ARRAY_SIZE(rw32); i++) - if (rw32[i] == opcode) - return (shorted ? 2 : (enlarged ? 8 : 4)); - - printk(KERN_ERR "mmiotrace: Unknown opcode 0x%02x\n", opcode); - return 0; -} - -unsigned int get_ins_mem_width(unsigned long ins_addr) -{ - unsigned int opcode; - unsigned char *p; - int i, shorted, enlarged, rexr; - - p = (unsigned char *)ins_addr; - p += skip_prefix(p, &shorted, &enlarged, &rexr); - p += get_opcode(p, &opcode); - - for (i = 0; i < ARRAY_SIZE(mw8); i++) - if (mw8[i] == opcode) - return 1; - - for (i = 0; i < ARRAY_SIZE(mw16); i++) - if (mw16[i] == opcode) - return 2; - - for (i = 0; i < ARRAY_SIZE(mw32); i++) - if (mw32[i] == opcode) - return shorted ? 2 : 4; - - for (i = 0; i < ARRAY_SIZE(mw64); i++) - if (mw64[i] == opcode) - return shorted ? 2 : (enlarged ? 8 : 4); - - printk(KERN_ERR "mmiotrace: Unknown opcode 0x%02x\n", opcode); - return 0; -} - -/* - * Define register ident in mod/rm byte. - * Note: these are NOT the same as in ptrace-abi.h. - */ -enum { - arg_AL = 0, - arg_CL = 1, - arg_DL = 2, - arg_BL = 3, - arg_AH = 4, - arg_CH = 5, - arg_DH = 6, - arg_BH = 7, - - arg_AX = 0, - arg_CX = 1, - arg_DX = 2, - arg_BX = 3, - arg_SP = 4, - arg_BP = 5, - arg_SI = 6, - arg_DI = 7, -#ifdef __amd64__ - arg_R8 = 8, - arg_R9 = 9, - arg_R10 = 10, - arg_R11 = 11, - arg_R12 = 12, - arg_R13 = 13, - arg_R14 = 14, - arg_R15 = 15 -#endif -}; - -static unsigned char *get_reg_w8(int no, struct pt_regs *regs) -{ - unsigned char *rv = NULL; - - switch (no) { - case arg_AL: - rv = (unsigned char *)®s->ax; - break; - case arg_BL: - rv = (unsigned char *)®s->bx; - break; - case arg_CL: - rv = (unsigned char *)®s->cx; - break; - case arg_DL: - rv = (unsigned char *)®s->dx; - break; - case arg_AH: - rv = 1 + (unsigned char *)®s->ax; - break; - case arg_BH: - rv = 1 + (unsigned char *)®s->bx; - break; - case arg_CH: - rv = 1 + (unsigned char *)®s->cx; - break; - case arg_DH: - rv = 1 + (unsigned char *)®s->dx; - break; -#ifdef __amd64__ - case arg_R8: - rv = (unsigned char *)®s->r8; - break; - case arg_R9: - rv = (unsigned char *)®s->r9; - break; - case arg_R10: - rv = (unsigned char *)®s->r10; - break; - case arg_R11: - rv = (unsigned char *)®s->r11; - break; - case arg_R12: - rv = (unsigned char *)®s->r12; - break; - case arg_R13: - rv = (unsigned char *)®s->r13; - break; - case arg_R14: - rv = (unsigned char *)®s->r14; - break; - case arg_R15: - rv = (unsigned char *)®s->r15; - break; -#endif - default: - printk(KERN_ERR "mmiotrace: Error reg no# %d\n", no); - break; - } - return rv; -} - -static unsigned long *get_reg_w32(int no, struct pt_regs *regs) -{ - unsigned long *rv = NULL; - - switch (no) { - case arg_AX: - rv = ®s->ax; - break; - case arg_BX: - rv = ®s->bx; - break; - case arg_CX: - rv = ®s->cx; - break; - case arg_DX: - rv = ®s->dx; - break; - case arg_SP: - rv = ®s->sp; - break; - case arg_BP: - rv = ®s->bp; - break; - case arg_SI: - rv = ®s->si; - break; - case arg_DI: - rv = ®s->di; - break; -#ifdef __amd64__ - case arg_R8: - rv = ®s->r8; - break; - case arg_R9: - rv = ®s->r9; - break; - case arg_R10: - rv = ®s->r10; - break; - case arg_R11: - rv = ®s->r11; - break; - case arg_R12: - rv = ®s->r12; - break; - case arg_R13: - rv = ®s->r13; - break; - case arg_R14: - rv = ®s->r14; - break; - case arg_R15: - rv = ®s->r15; - break; -#endif - default: - printk(KERN_ERR "mmiotrace: Error reg no# %d\n", no); - } - - return rv; -} - -unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs) -{ - unsigned int opcode; - unsigned char mod_rm; - int reg; - unsigned char *p; - int i, shorted, enlarged, rexr; - unsigned long rv; - - p = (unsigned char *)ins_addr; - p += skip_prefix(p, &shorted, &enlarged, &rexr); - p += get_opcode(p, &opcode); - for (i = 0; i < ARRAY_SIZE(reg_rop); i++) - if (reg_rop[i] == opcode) { - rv = REG_READ; - goto do_work; - } - - for (i = 0; i < ARRAY_SIZE(reg_wop); i++) - if (reg_wop[i] == opcode) { - rv = REG_WRITE; - goto do_work; - } - - printk(KERN_ERR "mmiotrace: Not a register instruction, opcode " - "0x%02x\n", opcode); - goto err; - -do_work: - mod_rm = *p; - reg = ((mod_rm >> 3) & 0x7) | (rexr << 3); - switch (get_ins_reg_width(ins_addr)) { - case 1: - return *get_reg_w8(reg, regs); - - case 2: - return *(unsigned short *)get_reg_w32(reg, regs); - - case 4: - return *(unsigned int *)get_reg_w32(reg, regs); - -#ifdef __amd64__ - case 8: - return *(unsigned long *)get_reg_w32(reg, regs); -#endif - - default: - printk(KERN_ERR "mmiotrace: Error width# %d\n", reg); - } - -err: - return 0; -} - -unsigned long get_ins_imm_val(unsigned long ins_addr) -{ - unsigned int opcode; - unsigned char mod_rm; - unsigned char mod; - unsigned char *p; - int i, shorted, enlarged, rexr; - unsigned long rv; - - p = (unsigned char *)ins_addr; - p += skip_prefix(p, &shorted, &enlarged, &rexr); - p += get_opcode(p, &opcode); - for (i = 0; i < ARRAY_SIZE(imm_wop); i++) - if (imm_wop[i] == opcode) { - rv = IMM_WRITE; - goto do_work; - } - - printk(KERN_ERR "mmiotrace: Not an immediate instruction, opcode " - "0x%02x\n", opcode); - goto err; - -do_work: - mod_rm = *p; - mod = mod_rm >> 6; - p++; - switch (mod) { - case 0: - /* if r/m is 5 we have a 32 disp (IA32 Manual 3, Table 2-2) */ - /* AMD64: XXX Check for address size prefix? */ - if ((mod_rm & 0x7) == 0x5) - p += 4; - break; - - case 1: - p += 1; - break; - - case 2: - p += 4; - break; - - case 3: - default: - printk(KERN_ERR "mmiotrace: not a memory access instruction " - "at 0x%lx, rm_mod=0x%02x\n", - ins_addr, mod_rm); - } - - switch (get_ins_reg_width(ins_addr)) { - case 1: - return *(unsigned char *)p; - - case 2: - return *(unsigned short *)p; - - case 4: - return *(unsigned int *)p; - -#ifdef __amd64__ - case 8: - return *(unsigned long *)p; -#endif - - default: - printk(KERN_ERR "mmiotrace: Error: width.\n"); - } - -err: - return 0; -} diff --git a/arch/x86/kernel/mmiotrace/pf_in.h b/arch/x86/kernel/mmiotrace/pf_in.h deleted file mode 100644 index e05341a..0000000 --- a/arch/x86/kernel/mmiotrace/pf_in.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Fault Injection Test harness (FI) - * Copyright (C) Intel Crop. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, - * USA. - * - */ - -#ifndef __PF_H_ -#define __PF_H_ - -enum reason_type { - NOT_ME, /* page fault is not in regions */ - NOTHING, /* access others point in regions */ - REG_READ, /* read from addr to reg */ - REG_WRITE, /* write from reg to addr */ - IMM_WRITE, /* write from imm to addr */ - OTHERS /* Other instructions can not intercept */ -}; - -enum reason_type get_ins_type(unsigned long ins_addr); -unsigned int get_ins_mem_width(unsigned long ins_addr); -unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs); -unsigned long get_ins_imm_val(unsigned long ins_addr); - -#endif /* __PF_H_ */ diff --git a/arch/x86/kernel/mmiotrace/testmmiotrace.c b/arch/x86/kernel/mmiotrace/testmmiotrace.c deleted file mode 100644 index cfa60b2..0000000 --- a/arch/x86/kernel/mmiotrace/testmmiotrace.c +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Written by Pekka Paalanen, 2008 - */ -#include -#include - -#define MODULE_NAME "testmmiotrace" - -static unsigned long mmio_address; -module_param(mmio_address, ulong, 0); -MODULE_PARM_DESC(mmio_address, "Start address of the mapping of 16 kB."); - -static void do_write_test(void __iomem *p) -{ - unsigned int i; - for (i = 0; i < 256; i++) - iowrite8(i, p + i); - for (i = 1024; i < (5 * 1024); i += 2) - iowrite16(i * 12 + 7, p + i); - for (i = (5 * 1024); i < (16 * 1024); i += 4) - iowrite32(i * 212371 + 13, p + i); -} - -static void do_read_test(void __iomem *p) -{ - unsigned int i; - for (i = 0; i < 256; i++) - ioread8(p + i); - for (i = 1024; i < (5 * 1024); i += 2) - ioread16(p + i); - for (i = (5 * 1024); i < (16 * 1024); i += 4) - ioread32(p + i); -} - -static void do_test(void) -{ - void __iomem *p = ioremap_nocache(mmio_address, 0x4000); - if (!p) { - pr_err(MODULE_NAME ": could not ioremap, aborting.\n"); - return; - } - do_write_test(p); - do_read_test(p); - iounmap(p); -} - -static int __init init(void) -{ - if (mmio_address == 0) { - pr_err(MODULE_NAME ": you have to use the module argument " - "mmio_address.\n"); - pr_err(MODULE_NAME ": DO NOT LOAD THIS MODULE UNLESS" - " YOU REALLY KNOW WHAT YOU ARE DOING!\n"); - return -ENXIO; - } - - pr_warning(MODULE_NAME ": WARNING: mapping 16 kB @ 0x%08lx " - "in PCI address space, and writing " - "rubbish in there.\n", mmio_address); - do_test(); - return 0; -} - -static void __exit cleanup(void) -{ - pr_debug(MODULE_NAME ": unloaded.\n"); -} - -module_init(init); -module_exit(cleanup); -MODULE_LICENSE("GPL"); diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index b7b3e4c..07dab50 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -8,6 +8,11 @@ obj-$(CONFIG_X86_PTDUMP) += dump_pagetables.o obj-$(CONFIG_HIGHMEM) += highmem_32.o +obj-$(CONFIG_MMIOTRACE_HOOKS) += kmmio.o +obj-$(CONFIG_MMIOTRACE) += mmiotrace.o +mmiotrace-y := pf_in.o mmio-mod.o +obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o + ifeq ($(CONFIG_X86_32),y) obj-$(CONFIG_NUMA) += discontig_32.o else diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c new file mode 100644 index 0000000..3ad27b8 --- /dev/null +++ b/arch/x86/mm/kmmio.c @@ -0,0 +1,499 @@ +/* Support for MMIO probes. + * Benfit many code from kprobes + * (C) 2002 Louis Zhuang . + * 2007 Alexander Eichner + * 2008 Pekka Paalanen + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define KMMIO_PAGE_HASH_BITS 4 +#define KMMIO_PAGE_TABLE_SIZE (1 << KMMIO_PAGE_HASH_BITS) + +struct kmmio_fault_page { + struct list_head list; + struct kmmio_fault_page *release_next; + unsigned long page; /* location of the fault page */ + + /* + * Number of times this page has been registered as a part + * of a probe. If zero, page is disarmed and this may be freed. + * Used only by writers (RCU). + */ + int count; +}; + +struct kmmio_delayed_release { + struct rcu_head rcu; + struct kmmio_fault_page *release_list; +}; + +struct kmmio_context { + struct kmmio_fault_page *fpage; + struct kmmio_probe *probe; + unsigned long saved_flags; + unsigned long addr; + int active; +}; + +static DEFINE_SPINLOCK(kmmio_lock); + +/* Protected by kmmio_lock */ +unsigned int kmmio_count; + +/* Read-protected by RCU, write-protected by kmmio_lock. */ +static struct list_head kmmio_page_table[KMMIO_PAGE_TABLE_SIZE]; +static LIST_HEAD(kmmio_probes); + +static struct list_head *kmmio_page_list(unsigned long page) +{ + return &kmmio_page_table[hash_long(page, KMMIO_PAGE_HASH_BITS)]; +} + +/* Accessed per-cpu */ +static DEFINE_PER_CPU(struct kmmio_context, kmmio_ctx); + +/* + * this is basically a dynamic stabbing problem: + * Could use the existing prio tree code or + * Possible better implementations: + * The Interval Skip List: A Data Structure for Finding All Intervals That + * Overlap a Point (might be simple) + * Space Efficient Dynamic Stabbing with Fast Queries - Mikkel Thorup + */ +/* Get the kmmio at this addr (if any). You must be holding RCU read lock. */ +static struct kmmio_probe *get_kmmio_probe(unsigned long addr) +{ + struct kmmio_probe *p; + list_for_each_entry_rcu(p, &kmmio_probes, list) { + if (addr >= p->addr && addr <= (p->addr + p->len)) + return p; + } + return NULL; +} + +/* You must be holding RCU read lock. */ +static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page) +{ + struct list_head *head; + struct kmmio_fault_page *p; + + page &= PAGE_MASK; + head = kmmio_page_list(page); + list_for_each_entry_rcu(p, head, list) { + if (p->page == page) + return p; + } + return NULL; +} + +static void set_page_present(unsigned long addr, bool present, int *pglevel) +{ + pteval_t pteval; + pmdval_t pmdval; + int level; + pmd_t *pmd; + pte_t *pte = lookup_address(addr, &level); + + if (!pte) { + pr_err("kmmio: no pte for page 0x%08lx\n", addr); + return; + } + + if (pglevel) + *pglevel = level; + + switch (level) { + case PG_LEVEL_2M: + pmd = (pmd_t *)pte; + pmdval = pmd_val(*pmd) & ~_PAGE_PRESENT; + if (present) + pmdval |= _PAGE_PRESENT; + set_pmd(pmd, __pmd(pmdval)); + break; + + case PG_LEVEL_4K: + pteval = pte_val(*pte) & ~_PAGE_PRESENT; + if (present) + pteval |= _PAGE_PRESENT; + set_pte_atomic(pte, __pte(pteval)); + break; + + default: + pr_err("kmmio: unexpected page level 0x%x.\n", level); + return; + } + + __flush_tlb_one(addr); +} + +/** Mark the given page as not present. Access to it will trigger a fault. */ +static void arm_kmmio_fault_page(unsigned long page, int *page_level) +{ + set_page_present(page & PAGE_MASK, false, page_level); +} + +/** Mark the given page as present. */ +static void disarm_kmmio_fault_page(unsigned long page, int *page_level) +{ + set_page_present(page & PAGE_MASK, true, page_level); +} + +/* + * This is being called from do_page_fault(). + * + * We may be in an interrupt or a critical section. Also prefecthing may + * trigger a page fault. We may be in the middle of process switch. + * We cannot take any locks, because we could be executing especially + * within a kmmio critical section. + * + * Local interrupts are disabled, so preemption cannot happen. + * Do not enable interrupts, do not sleep, and watch out for other CPUs. + */ +/* + * Interrupts are disabled on entry as trap3 is an interrupt gate + * and they remain disabled thorough out this function. + */ +int kmmio_handler(struct pt_regs *regs, unsigned long addr) +{ + struct kmmio_context *ctx; + struct kmmio_fault_page *faultpage; + int ret = 0; /* default to fault not handled */ + + /* + * Preemption is now disabled to prevent process switch during + * single stepping. We can only handle one active kmmio trace + * per cpu, so ensure that we finish it before something else + * gets to run. We also hold the RCU read lock over single + * stepping to avoid looking up the probe and kmmio_fault_page + * again. + */ + preempt_disable(); + rcu_read_lock(); + + faultpage = get_kmmio_fault_page(addr); + if (!faultpage) { + /* + * Either this page fault is not caused by kmmio, or + * another CPU just pulled the kmmio probe from under + * our feet. The latter case should not be possible. + */ + goto no_kmmio; + } + + ctx = &get_cpu_var(kmmio_ctx); + if (ctx->active) { + disarm_kmmio_fault_page(faultpage->page, NULL); + if (addr == ctx->addr) { + /* + * On SMP we sometimes get recursive probe hits on the + * same address. Context is already saved, fall out. + */ + pr_debug("kmmio: duplicate probe hit on CPU %d, for " + "address 0x%08lx.\n", + smp_processor_id(), addr); + ret = 1; + goto no_kmmio_ctx; + } + /* + * Prevent overwriting already in-flight context. + * This should not happen, let's hope disarming at least + * prevents a panic. + */ + pr_emerg("kmmio: recursive probe hit on CPU %d, " + "for address 0x%08lx. Ignoring.\n", + smp_processor_id(), addr); + pr_emerg("kmmio: previous hit was at 0x%08lx.\n", + ctx->addr); + goto no_kmmio_ctx; + } + ctx->active++; + + ctx->fpage = faultpage; + ctx->probe = get_kmmio_probe(addr); + ctx->saved_flags = (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF)); + ctx->addr = addr; + + if (ctx->probe && ctx->probe->pre_handler) + ctx->probe->pre_handler(ctx->probe, regs, addr); + + /* + * Enable single-stepping and disable interrupts for the faulting + * context. Local interrupts must not get enabled during stepping. + */ + regs->flags |= X86_EFLAGS_TF; + regs->flags &= ~X86_EFLAGS_IF; + + /* Now we set present bit in PTE and single step. */ + disarm_kmmio_fault_page(ctx->fpage->page, NULL); + + /* + * If another cpu accesses the same page while we are stepping, + * the access will not be caught. It will simply succeed and the + * only downside is we lose the event. If this becomes a problem, + * the user should drop to single cpu before tracing. + */ + + put_cpu_var(kmmio_ctx); + return 1; /* fault handled */ + +no_kmmio_ctx: + put_cpu_var(kmmio_ctx); +no_kmmio: + rcu_read_unlock(); + preempt_enable_no_resched(); + return ret; +} + +/* + * Interrupts are disabled on entry as trap1 is an interrupt gate + * and they remain disabled thorough out this function. + * This must always get called as the pair to kmmio_handler(). + */ +static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) +{ + int ret = 0; + struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx); + + if (!ctx->active) { + pr_debug("kmmio: spurious debug trap on CPU %d.\n", + smp_processor_id()); + goto out; + } + + if (ctx->probe && ctx->probe->post_handler) + ctx->probe->post_handler(ctx->probe, condition, regs); + + arm_kmmio_fault_page(ctx->fpage->page, NULL); + + regs->flags &= ~X86_EFLAGS_TF; + regs->flags |= ctx->saved_flags; + + /* These were acquired in kmmio_handler(). */ + ctx->active--; + BUG_ON(ctx->active); + rcu_read_unlock(); + preempt_enable_no_resched(); + + /* + * if somebody else is singlestepping across a probe point, flags + * will have TF set, in which case, continue the remaining processing + * of do_debug, as if this is not a probe hit. + */ + if (!(regs->flags & X86_EFLAGS_TF)) + ret = 1; +out: + put_cpu_var(kmmio_ctx); + return ret; +} + +/* You must be holding kmmio_lock. */ +static int add_kmmio_fault_page(unsigned long page) +{ + struct kmmio_fault_page *f; + + page &= PAGE_MASK; + f = get_kmmio_fault_page(page); + if (f) { + if (!f->count) + arm_kmmio_fault_page(f->page, NULL); + f->count++; + return 0; + } + + f = kmalloc(sizeof(*f), GFP_ATOMIC); + if (!f) + return -1; + + f->count = 1; + f->page = page; + list_add_rcu(&f->list, kmmio_page_list(f->page)); + + arm_kmmio_fault_page(f->page, NULL); + + return 0; +} + +/* You must be holding kmmio_lock. */ +static void release_kmmio_fault_page(unsigned long page, + struct kmmio_fault_page **release_list) +{ + struct kmmio_fault_page *f; + + page &= PAGE_MASK; + f = get_kmmio_fault_page(page); + if (!f) + return; + + f->count--; + BUG_ON(f->count < 0); + if (!f->count) { + disarm_kmmio_fault_page(f->page, NULL); + f->release_next = *release_list; + *release_list = f; + } +} + +int register_kmmio_probe(struct kmmio_probe *p) +{ + unsigned long flags; + int ret = 0; + unsigned long size = 0; + + spin_lock_irqsave(&kmmio_lock, flags); + if (get_kmmio_probe(p->addr)) { + ret = -EEXIST; + goto out; + } + kmmio_count++; + list_add_rcu(&p->list, &kmmio_probes); + while (size < p->len) { + if (add_kmmio_fault_page(p->addr + size)) + pr_err("kmmio: Unable to set page fault.\n"); + size += PAGE_SIZE; + } +out: + spin_unlock_irqrestore(&kmmio_lock, flags); + /* + * XXX: What should I do here? + * Here was a call to global_flush_tlb(), but it does not exist + * anymore. It seems it's not needed after all. + */ + return ret; +} +EXPORT_SYMBOL(register_kmmio_probe); + +static void rcu_free_kmmio_fault_pages(struct rcu_head *head) +{ + struct kmmio_delayed_release *dr = container_of( + head, + struct kmmio_delayed_release, + rcu); + struct kmmio_fault_page *p = dr->release_list; + while (p) { + struct kmmio_fault_page *next = p->release_next; + BUG_ON(p->count); + kfree(p); + p = next; + } + kfree(dr); +} + +static void remove_kmmio_fault_pages(struct rcu_head *head) +{ + struct kmmio_delayed_release *dr = container_of( + head, + struct kmmio_delayed_release, + rcu); + struct kmmio_fault_page *p = dr->release_list; + struct kmmio_fault_page **prevp = &dr->release_list; + unsigned long flags; + spin_lock_irqsave(&kmmio_lock, flags); + while (p) { + if (!p->count) + list_del_rcu(&p->list); + else + *prevp = p->release_next; + prevp = &p->release_next; + p = p->release_next; + } + spin_unlock_irqrestore(&kmmio_lock, flags); + /* This is the real RCU destroy call. */ + call_rcu(&dr->rcu, rcu_free_kmmio_fault_pages); +} + +/* + * Remove a kmmio probe. You have to synchronize_rcu() before you can be + * sure that the callbacks will not be called anymore. Only after that + * you may actually release your struct kmmio_probe. + * + * Unregistering a kmmio fault page has three steps: + * 1. release_kmmio_fault_page() + * Disarm the page, wait a grace period to let all faults finish. + * 2. remove_kmmio_fault_pages() + * Remove the pages from kmmio_page_table. + * 3. rcu_free_kmmio_fault_pages() + * Actally free the kmmio_fault_page structs as with RCU. + */ +void unregister_kmmio_probe(struct kmmio_probe *p) +{ + unsigned long flags; + unsigned long size = 0; + struct kmmio_fault_page *release_list = NULL; + struct kmmio_delayed_release *drelease; + + spin_lock_irqsave(&kmmio_lock, flags); + while (size < p->len) { + release_kmmio_fault_page(p->addr + size, &release_list); + size += PAGE_SIZE; + } + list_del_rcu(&p->list); + kmmio_count--; + spin_unlock_irqrestore(&kmmio_lock, flags); + + drelease = kmalloc(sizeof(*drelease), GFP_ATOMIC); + if (!drelease) { + pr_crit("kmmio: leaking kmmio_fault_page objects.\n"); + return; + } + drelease->release_list = release_list; + + /* + * This is not really RCU here. We have just disarmed a set of + * pages so that they cannot trigger page faults anymore. However, + * we cannot remove the pages from kmmio_page_table, + * because a probe hit might be in flight on another CPU. The + * pages are collected into a list, and they will be removed from + * kmmio_page_table when it is certain that no probe hit related to + * these pages can be in flight. RCU grace period sounds like a + * good choice. + * + * If we removed the pages too early, kmmio page fault handler might + * not find the respective kmmio_fault_page and determine it's not + * a kmmio fault, when it actually is. This would lead to madness. + */ + call_rcu(&drelease->rcu, remove_kmmio_fault_pages); +} +EXPORT_SYMBOL(unregister_kmmio_probe); + +static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val, + void *args) +{ + struct die_args *arg = args; + + if (val == DIE_DEBUG && (arg->err & DR_STEP)) + if (post_kmmio_handler(arg->err, arg->regs) == 1) + return NOTIFY_STOP; + + return NOTIFY_DONE; +} + +static struct notifier_block nb_die = { + .notifier_call = kmmio_die_notifier +}; + +static int __init init_kmmio(void) +{ + int i; + for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++) + INIT_LIST_HEAD(&kmmio_page_table[i]); + return register_die_notifier(&nb_die); +} +fs_initcall(init_kmmio); /* should be before device_initcall() */ diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c new file mode 100644 index 0000000..8256546 --- /dev/null +++ b/arch/x86/mm/mmio-mod.c @@ -0,0 +1,457 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2005 + * Jeff Muizelaar, 2006, 2007 + * Pekka Paalanen, 2008 + * + * Derived from the read-mod example from relay-examples by Tom Zanussi. + */ +#define DEBUG 1 + +#include +#include +#include +#include +#include +#include +#include +#include +#include /* for ISA_START_ADDRESS */ +#include +#include + +#include "pf_in.h" + +#define NAME "mmiotrace: " + +struct trap_reason { + unsigned long addr; + unsigned long ip; + enum reason_type type; + int active_traces; +}; + +struct remap_trace { + struct list_head list; + struct kmmio_probe probe; + unsigned long phys; + unsigned long id; +}; + +/* Accessed per-cpu. */ +static DEFINE_PER_CPU(struct trap_reason, pf_reason); +static DEFINE_PER_CPU(struct mmiotrace_rw, cpu_trace); + +#if 0 /* XXX: no way gather this info anymore */ +/* Access to this is not per-cpu. */ +static DEFINE_PER_CPU(atomic_t, dropped); +#endif + +static struct dentry *marker_file; + +static DEFINE_MUTEX(mmiotrace_mutex); +static DEFINE_SPINLOCK(trace_lock); +static atomic_t mmiotrace_enabled; +static LIST_HEAD(trace_list); /* struct remap_trace */ + +/* + * Locking in this file: + * - mmiotrace_mutex enforces enable/disable_mmiotrace() critical sections. + * - mmiotrace_enabled may be modified only when holding mmiotrace_mutex + * and trace_lock. + * - Routines depending on is_enabled() must take trace_lock. + * - trace_list users must hold trace_lock. + * - is_enabled() guarantees that mmio_trace_record is allowed. + * - pre/post callbacks assume the effect of is_enabled() being true. + */ + +/* module parameters */ +static unsigned long filter_offset; +static int nommiotrace; +static int ISA_trace; +static int trace_pc; + +module_param(filter_offset, ulong, 0); +module_param(nommiotrace, bool, 0); +module_param(ISA_trace, bool, 0); +module_param(trace_pc, bool, 0); + +MODULE_PARM_DESC(filter_offset, "Start address of traced mappings."); +MODULE_PARM_DESC(nommiotrace, "Disable actual MMIO tracing."); +MODULE_PARM_DESC(ISA_trace, "Do not exclude the low ISA range."); +MODULE_PARM_DESC(trace_pc, "Record address of faulting instructions."); + +static bool is_enabled(void) +{ + return atomic_read(&mmiotrace_enabled); +} + +#if 0 /* XXX: needs rewrite */ +/* + * Write callback for the debugfs entry: + * Read a marker and write it to the mmio trace log + */ +static ssize_t write_marker(struct file *file, const char __user *buffer, + size_t count, loff_t *ppos) +{ + char *event = NULL; + struct mm_io_header *headp; + ssize_t len = (count > 65535) ? 65535 : count; + + event = kzalloc(sizeof(*headp) + len, GFP_KERNEL); + if (!event) + return -ENOMEM; + + headp = (struct mm_io_header *)event; + headp->type = MMIO_MAGIC | (MMIO_MARKER << MMIO_OPCODE_SHIFT); + headp->data_len = len; + + if (copy_from_user(event + sizeof(*headp), buffer, len)) { + kfree(event); + return -EFAULT; + } + + spin_lock_irq(&trace_lock); +#if 0 /* XXX: convert this to use tracing */ + if (is_enabled()) + relay_write(chan, event, sizeof(*headp) + len); + else +#endif + len = -EINVAL; + spin_unlock_irq(&trace_lock); + kfree(event); + return len; +} +#endif + +static void print_pte(unsigned long address) +{ + int level; + pte_t *pte = lookup_address(address, &level); + + if (!pte) { + pr_err(NAME "Error in %s: no pte for page 0x%08lx\n", + __func__, address); + return; + } + + if (level == PG_LEVEL_2M) { + pr_emerg(NAME "4MB pages are not currently supported: " + "0x%08lx\n", address); + BUG(); + } + pr_info(NAME "pte for 0x%lx: 0x%lx 0x%lx\n", address, pte_val(*pte), + pte_val(*pte) & _PAGE_PRESENT); +} + +/* + * For some reason the pre/post pairs have been called in an + * unmatched order. Report and die. + */ +static void die_kmmio_nesting_error(struct pt_regs *regs, unsigned long addr) +{ + const struct trap_reason *my_reason = &get_cpu_var(pf_reason); + pr_emerg(NAME "unexpected fault for address: 0x%08lx, " + "last fault for address: 0x%08lx\n", + addr, my_reason->addr); + print_pte(addr); + print_symbol(KERN_EMERG "faulting IP is at %s\n", regs->ip); + print_symbol(KERN_EMERG "last faulting IP was at %s\n", my_reason->ip); +#ifdef __i386__ + pr_emerg("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", + regs->ax, regs->bx, regs->cx, regs->dx); + pr_emerg("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", + regs->si, regs->di, regs->bp, regs->sp); +#else + pr_emerg("rax: %016lx rcx: %016lx rdx: %016lx\n", + regs->ax, regs->cx, regs->dx); + pr_emerg("rsi: %016lx rdi: %016lx rbp: %016lx rsp: %016lx\n", + regs->si, regs->di, regs->bp, regs->sp); +#endif + put_cpu_var(pf_reason); + BUG(); +} + +static void pre(struct kmmio_probe *p, struct pt_regs *regs, + unsigned long addr) +{ + struct trap_reason *my_reason = &get_cpu_var(pf_reason); + struct mmiotrace_rw *my_trace = &get_cpu_var(cpu_trace); + const unsigned long instptr = instruction_pointer(regs); + const enum reason_type type = get_ins_type(instptr); + struct remap_trace *trace = p->user_data; + + /* it doesn't make sense to have more than one active trace per cpu */ + if (my_reason->active_traces) + die_kmmio_nesting_error(regs, addr); + else + my_reason->active_traces++; + + my_reason->type = type; + my_reason->addr = addr; + my_reason->ip = instptr; + + my_trace->phys = addr - trace->probe.addr + trace->phys; + my_trace->map_id = trace->id; + + /* + * Only record the program counter when requested. + * It may taint clean-room reverse engineering. + */ + if (trace_pc) + my_trace->pc = instptr; + else + my_trace->pc = 0; + + /* + * XXX: the timestamp recorded will be *after* the tracing has been + * done, not at the time we hit the instruction. SMP implications + * on event ordering? + */ + + switch (type) { + case REG_READ: + my_trace->opcode = MMIO_READ; + my_trace->width = get_ins_mem_width(instptr); + break; + case REG_WRITE: + my_trace->opcode = MMIO_WRITE; + my_trace->width = get_ins_mem_width(instptr); + my_trace->value = get_ins_reg_val(instptr, regs); + break; + case IMM_WRITE: + my_trace->opcode = MMIO_WRITE; + my_trace->width = get_ins_mem_width(instptr); + my_trace->value = get_ins_imm_val(instptr); + break; + default: + { + unsigned char *ip = (unsigned char *)instptr; + my_trace->opcode = MMIO_UNKNOWN_OP; + my_trace->width = 0; + my_trace->value = (*ip) << 16 | *(ip + 1) << 8 | + *(ip + 2); + } + } + put_cpu_var(cpu_trace); + put_cpu_var(pf_reason); +} + +static void post(struct kmmio_probe *p, unsigned long condition, + struct pt_regs *regs) +{ + struct trap_reason *my_reason = &get_cpu_var(pf_reason); + struct mmiotrace_rw *my_trace = &get_cpu_var(cpu_trace); + + /* this should always return the active_trace count to 0 */ + my_reason->active_traces--; + if (my_reason->active_traces) { + pr_emerg(NAME "unexpected post handler"); + BUG(); + } + + switch (my_reason->type) { + case REG_READ: + my_trace->value = get_ins_reg_val(my_reason->ip, regs); + break; + default: + break; + } + + mmio_trace_rw(my_trace); + put_cpu_var(cpu_trace); + put_cpu_var(pf_reason); +} + +static void ioremap_trace_core(unsigned long offset, unsigned long size, + void __iomem *addr) +{ + static atomic_t next_id; + struct remap_trace *trace = kmalloc(sizeof(*trace), GFP_KERNEL); + struct mmiotrace_map map = { + .phys = offset, + .virt = (unsigned long)addr, + .len = size, + .opcode = MMIO_PROBE + }; + + if (!trace) { + pr_err(NAME "kmalloc failed in ioremap\n"); + return; + } + + *trace = (struct remap_trace) { + .probe = { + .addr = (unsigned long)addr, + .len = size, + .pre_handler = pre, + .post_handler = post, + .user_data = trace + }, + .phys = offset, + .id = atomic_inc_return(&next_id) + }; + map.map_id = trace->id; + + spin_lock_irq(&trace_lock); + if (!is_enabled()) + goto not_enabled; + + mmio_trace_mapping(&map); + list_add_tail(&trace->list, &trace_list); + if (!nommiotrace) + register_kmmio_probe(&trace->probe); + +not_enabled: + spin_unlock_irq(&trace_lock); +} + +void +mmiotrace_ioremap(unsigned long offset, unsigned long size, void __iomem *addr) +{ + if (!is_enabled()) /* recheck and proper locking in *_core() */ + return; + + pr_debug(NAME "ioremap_*(0x%lx, 0x%lx) = %p\n", offset, size, addr); + if ((filter_offset) && (offset != filter_offset)) + return; + ioremap_trace_core(offset, size, addr); +} + +static void iounmap_trace_core(volatile void __iomem *addr) +{ + struct mmiotrace_map map = { + .phys = 0, + .virt = (unsigned long)addr, + .len = 0, + .opcode = MMIO_UNPROBE + }; + struct remap_trace *trace; + struct remap_trace *tmp; + struct remap_trace *found_trace = NULL; + + pr_debug(NAME "Unmapping %p.\n", addr); + + spin_lock_irq(&trace_lock); + if (!is_enabled()) + goto not_enabled; + + list_for_each_entry_safe(trace, tmp, &trace_list, list) { + if ((unsigned long)addr == trace->probe.addr) { + if (!nommiotrace) + unregister_kmmio_probe(&trace->probe); + list_del(&trace->list); + found_trace = trace; + break; + } + } + map.map_id = (found_trace) ? found_trace->id : -1; + mmio_trace_mapping(&map); + +not_enabled: + spin_unlock_irq(&trace_lock); + if (found_trace) { + synchronize_rcu(); /* unregister_kmmio_probe() requirement */ + kfree(found_trace); + } +} + +void mmiotrace_iounmap(volatile void __iomem *addr) +{ + might_sleep(); + if (is_enabled()) /* recheck and proper locking in *_core() */ + iounmap_trace_core(addr); +} + +static void clear_trace_list(void) +{ + struct remap_trace *trace; + struct remap_trace *tmp; + + /* + * No locking required, because the caller ensures we are in a + * critical section via mutex, and is_enabled() is false, + * i.e. nothing can traverse or modify this list. + * Caller also ensures is_enabled() cannot change. + */ + list_for_each_entry(trace, &trace_list, list) { + pr_notice(NAME "purging non-iounmapped " + "trace @0x%08lx, size 0x%lx.\n", + trace->probe.addr, trace->probe.len); + if (!nommiotrace) + unregister_kmmio_probe(&trace->probe); + } + synchronize_rcu(); /* unregister_kmmio_probe() requirement */ + + list_for_each_entry_safe(trace, tmp, &trace_list, list) { + list_del(&trace->list); + kfree(trace); + } +} + +#if 0 /* XXX: out of order */ +static struct file_operations fops_marker = { + .owner = THIS_MODULE, + .write = write_marker +}; +#endif + +void enable_mmiotrace(void) +{ + mutex_lock(&mmiotrace_mutex); + if (is_enabled()) + goto out; + +#if 0 /* XXX: tracing does not support text entries */ + marker_file = debugfs_create_file("marker", 0660, dir, NULL, + &fops_marker); + if (!marker_file) + pr_err(NAME "marker file creation failed.\n"); +#endif + + if (nommiotrace) + pr_info(NAME "MMIO tracing disabled.\n"); + if (ISA_trace) + pr_warning(NAME "Warning! low ISA range will be traced.\n"); + spin_lock_irq(&trace_lock); + atomic_inc(&mmiotrace_enabled); + spin_unlock_irq(&trace_lock); + pr_info(NAME "enabled.\n"); +out: + mutex_unlock(&mmiotrace_mutex); +} + +void disable_mmiotrace(void) +{ + mutex_lock(&mmiotrace_mutex); + if (!is_enabled()) + goto out; + + spin_lock_irq(&trace_lock); + atomic_dec(&mmiotrace_enabled); + BUG_ON(is_enabled()); + spin_unlock_irq(&trace_lock); + + clear_trace_list(); /* guarantees: no more kmmio callbacks */ + if (marker_file) { + debugfs_remove(marker_file); + marker_file = NULL; + } + + pr_info(NAME "disabled.\n"); +out: + mutex_unlock(&mmiotrace_mutex); +} diff --git a/arch/x86/mm/pf_in.c b/arch/x86/mm/pf_in.c new file mode 100644 index 0000000..efa1911 --- /dev/null +++ b/arch/x86/mm/pf_in.c @@ -0,0 +1,489 @@ +/* + * Fault Injection Test harness (FI) + * Copyright (C) Intel Crop. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, + * USA. + * + */ + +/* Id: pf_in.c,v 1.1.1.1 2002/11/12 05:56:32 brlock Exp + * Copyright by Intel Crop., 2002 + * Louis Zhuang (louis.zhuang@intel.com) + * + * Bjorn Steinbrink (B.Steinbrink@gmx.de), 2007 + */ + +#include +#include /* struct pt_regs */ +#include "pf_in.h" + +#ifdef __i386__ +/* IA32 Manual 3, 2-1 */ +static unsigned char prefix_codes[] = { + 0xF0, 0xF2, 0xF3, 0x2E, 0x36, 0x3E, 0x26, 0x64, + 0x65, 0x2E, 0x3E, 0x66, 0x67 +}; +/* IA32 Manual 3, 3-432*/ +static unsigned int reg_rop[] = { + 0x8A, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F +}; +static unsigned int reg_wop[] = { 0x88, 0x89 }; +static unsigned int imm_wop[] = { 0xC6, 0xC7 }; +/* IA32 Manual 3, 3-432*/ +static unsigned int rw8[] = { 0x88, 0x8A, 0xC6 }; +static unsigned int rw32[] = { + 0x89, 0x8B, 0xC7, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F +}; +static unsigned int mw8[] = { 0x88, 0x8A, 0xC6, 0xB60F, 0xBE0F }; +static unsigned int mw16[] = { 0xB70F, 0xBF0F }; +static unsigned int mw32[] = { 0x89, 0x8B, 0xC7 }; +static unsigned int mw64[] = {}; +#else /* not __i386__ */ +static unsigned char prefix_codes[] = { + 0x66, 0x67, 0x2E, 0x3E, 0x26, 0x64, 0x65, 0x36, + 0xF0, 0xF3, 0xF2, + /* REX Prefixes */ + 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, + 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f +}; +/* AMD64 Manual 3, Appendix A*/ +static unsigned int reg_rop[] = { + 0x8A, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F +}; +static unsigned int reg_wop[] = { 0x88, 0x89 }; +static unsigned int imm_wop[] = { 0xC6, 0xC7 }; +static unsigned int rw8[] = { 0xC6, 0x88, 0x8A }; +static unsigned int rw32[] = { + 0xC7, 0x89, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F +}; +/* 8 bit only */ +static unsigned int mw8[] = { 0xC6, 0x88, 0x8A, 0xB60F, 0xBE0F }; +/* 16 bit only */ +static unsigned int mw16[] = { 0xB70F, 0xBF0F }; +/* 16 or 32 bit */ +static unsigned int mw32[] = { 0xC7 }; +/* 16, 32 or 64 bit */ +static unsigned int mw64[] = { 0x89, 0x8B }; +#endif /* not __i386__ */ + +static int skip_prefix(unsigned char *addr, int *shorted, int *enlarged, + int *rexr) +{ + int i; + unsigned char *p = addr; + *shorted = 0; + *enlarged = 0; + *rexr = 0; + +restart: + for (i = 0; i < ARRAY_SIZE(prefix_codes); i++) { + if (*p == prefix_codes[i]) { + if (*p == 0x66) + *shorted = 1; +#ifdef __amd64__ + if ((*p & 0xf8) == 0x48) + *enlarged = 1; + if ((*p & 0xf4) == 0x44) + *rexr = 1; +#endif + p++; + goto restart; + } + } + + return (p - addr); +} + +static int get_opcode(unsigned char *addr, unsigned int *opcode) +{ + int len; + + if (*addr == 0x0F) { + /* 0x0F is extension instruction */ + *opcode = *(unsigned short *)addr; + len = 2; + } else { + *opcode = *addr; + len = 1; + } + + return len; +} + +#define CHECK_OP_TYPE(opcode, array, type) \ + for (i = 0; i < ARRAY_SIZE(array); i++) { \ + if (array[i] == opcode) { \ + rv = type; \ + goto exit; \ + } \ + } + +enum reason_type get_ins_type(unsigned long ins_addr) +{ + unsigned int opcode; + unsigned char *p; + int shorted, enlarged, rexr; + int i; + enum reason_type rv = OTHERS; + + p = (unsigned char *)ins_addr; + p += skip_prefix(p, &shorted, &enlarged, &rexr); + p += get_opcode(p, &opcode); + + CHECK_OP_TYPE(opcode, reg_rop, REG_READ); + CHECK_OP_TYPE(opcode, reg_wop, REG_WRITE); + CHECK_OP_TYPE(opcode, imm_wop, IMM_WRITE); + +exit: + return rv; +} +#undef CHECK_OP_TYPE + +static unsigned int get_ins_reg_width(unsigned long ins_addr) +{ + unsigned int opcode; + unsigned char *p; + int i, shorted, enlarged, rexr; + + p = (unsigned char *)ins_addr; + p += skip_prefix(p, &shorted, &enlarged, &rexr); + p += get_opcode(p, &opcode); + + for (i = 0; i < ARRAY_SIZE(rw8); i++) + if (rw8[i] == opcode) + return 1; + + for (i = 0; i < ARRAY_SIZE(rw32); i++) + if (rw32[i] == opcode) + return (shorted ? 2 : (enlarged ? 8 : 4)); + + printk(KERN_ERR "mmiotrace: Unknown opcode 0x%02x\n", opcode); + return 0; +} + +unsigned int get_ins_mem_width(unsigned long ins_addr) +{ + unsigned int opcode; + unsigned char *p; + int i, shorted, enlarged, rexr; + + p = (unsigned char *)ins_addr; + p += skip_prefix(p, &shorted, &enlarged, &rexr); + p += get_opcode(p, &opcode); + + for (i = 0; i < ARRAY_SIZE(mw8); i++) + if (mw8[i] == opcode) + return 1; + + for (i = 0; i < ARRAY_SIZE(mw16); i++) + if (mw16[i] == opcode) + return 2; + + for (i = 0; i < ARRAY_SIZE(mw32); i++) + if (mw32[i] == opcode) + return shorted ? 2 : 4; + + for (i = 0; i < ARRAY_SIZE(mw64); i++) + if (mw64[i] == opcode) + return shorted ? 2 : (enlarged ? 8 : 4); + + printk(KERN_ERR "mmiotrace: Unknown opcode 0x%02x\n", opcode); + return 0; +} + +/* + * Define register ident in mod/rm byte. + * Note: these are NOT the same as in ptrace-abi.h. + */ +enum { + arg_AL = 0, + arg_CL = 1, + arg_DL = 2, + arg_BL = 3, + arg_AH = 4, + arg_CH = 5, + arg_DH = 6, + arg_BH = 7, + + arg_AX = 0, + arg_CX = 1, + arg_DX = 2, + arg_BX = 3, + arg_SP = 4, + arg_BP = 5, + arg_SI = 6, + arg_DI = 7, +#ifdef __amd64__ + arg_R8 = 8, + arg_R9 = 9, + arg_R10 = 10, + arg_R11 = 11, + arg_R12 = 12, + arg_R13 = 13, + arg_R14 = 14, + arg_R15 = 15 +#endif +}; + +static unsigned char *get_reg_w8(int no, struct pt_regs *regs) +{ + unsigned char *rv = NULL; + + switch (no) { + case arg_AL: + rv = (unsigned char *)®s->ax; + break; + case arg_BL: + rv = (unsigned char *)®s->bx; + break; + case arg_CL: + rv = (unsigned char *)®s->cx; + break; + case arg_DL: + rv = (unsigned char *)®s->dx; + break; + case arg_AH: + rv = 1 + (unsigned char *)®s->ax; + break; + case arg_BH: + rv = 1 + (unsigned char *)®s->bx; + break; + case arg_CH: + rv = 1 + (unsigned char *)®s->cx; + break; + case arg_DH: + rv = 1 + (unsigned char *)®s->dx; + break; +#ifdef __amd64__ + case arg_R8: + rv = (unsigned char *)®s->r8; + break; + case arg_R9: + rv = (unsigned char *)®s->r9; + break; + case arg_R10: + rv = (unsigned char *)®s->r10; + break; + case arg_R11: + rv = (unsigned char *)®s->r11; + break; + case arg_R12: + rv = (unsigned char *)®s->r12; + break; + case arg_R13: + rv = (unsigned char *)®s->r13; + break; + case arg_R14: + rv = (unsigned char *)®s->r14; + break; + case arg_R15: + rv = (unsigned char *)®s->r15; + break; +#endif + default: + printk(KERN_ERR "mmiotrace: Error reg no# %d\n", no); + break; + } + return rv; +} + +static unsigned long *get_reg_w32(int no, struct pt_regs *regs) +{ + unsigned long *rv = NULL; + + switch (no) { + case arg_AX: + rv = ®s->ax; + break; + case arg_BX: + rv = ®s->bx; + break; + case arg_CX: + rv = ®s->cx; + break; + case arg_DX: + rv = ®s->dx; + break; + case arg_SP: + rv = ®s->sp; + break; + case arg_BP: + rv = ®s->bp; + break; + case arg_SI: + rv = ®s->si; + break; + case arg_DI: + rv = ®s->di; + break; +#ifdef __amd64__ + case arg_R8: + rv = ®s->r8; + break; + case arg_R9: + rv = ®s->r9; + break; + case arg_R10: + rv = ®s->r10; + break; + case arg_R11: + rv = ®s->r11; + break; + case arg_R12: + rv = ®s->r12; + break; + case arg_R13: + rv = ®s->r13; + break; + case arg_R14: + rv = ®s->r14; + break; + case arg_R15: + rv = ®s->r15; + break; +#endif + default: + printk(KERN_ERR "mmiotrace: Error reg no# %d\n", no); + } + + return rv; +} + +unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs) +{ + unsigned int opcode; + unsigned char mod_rm; + int reg; + unsigned char *p; + int i, shorted, enlarged, rexr; + unsigned long rv; + + p = (unsigned char *)ins_addr; + p += skip_prefix(p, &shorted, &enlarged, &rexr); + p += get_opcode(p, &opcode); + for (i = 0; i < ARRAY_SIZE(reg_rop); i++) + if (reg_rop[i] == opcode) { + rv = REG_READ; + goto do_work; + } + + for (i = 0; i < ARRAY_SIZE(reg_wop); i++) + if (reg_wop[i] == opcode) { + rv = REG_WRITE; + goto do_work; + } + + printk(KERN_ERR "mmiotrace: Not a register instruction, opcode " + "0x%02x\n", opcode); + goto err; + +do_work: + mod_rm = *p; + reg = ((mod_rm >> 3) & 0x7) | (rexr << 3); + switch (get_ins_reg_width(ins_addr)) { + case 1: + return *get_reg_w8(reg, regs); + + case 2: + return *(unsigned short *)get_reg_w32(reg, regs); + + case 4: + return *(unsigned int *)get_reg_w32(reg, regs); + +#ifdef __amd64__ + case 8: + return *(unsigned long *)get_reg_w32(reg, regs); +#endif + + default: + printk(KERN_ERR "mmiotrace: Error width# %d\n", reg); + } + +err: + return 0; +} + +unsigned long get_ins_imm_val(unsigned long ins_addr) +{ + unsigned int opcode; + unsigned char mod_rm; + unsigned char mod; + unsigned char *p; + int i, shorted, enlarged, rexr; + unsigned long rv; + + p = (unsigned char *)ins_addr; + p += skip_prefix(p, &shorted, &enlarged, &rexr); + p += get_opcode(p, &opcode); + for (i = 0; i < ARRAY_SIZE(imm_wop); i++) + if (imm_wop[i] == opcode) { + rv = IMM_WRITE; + goto do_work; + } + + printk(KERN_ERR "mmiotrace: Not an immediate instruction, opcode " + "0x%02x\n", opcode); + goto err; + +do_work: + mod_rm = *p; + mod = mod_rm >> 6; + p++; + switch (mod) { + case 0: + /* if r/m is 5 we have a 32 disp (IA32 Manual 3, Table 2-2) */ + /* AMD64: XXX Check for address size prefix? */ + if ((mod_rm & 0x7) == 0x5) + p += 4; + break; + + case 1: + p += 1; + break; + + case 2: + p += 4; + break; + + case 3: + default: + printk(KERN_ERR "mmiotrace: not a memory access instruction " + "at 0x%lx, rm_mod=0x%02x\n", + ins_addr, mod_rm); + } + + switch (get_ins_reg_width(ins_addr)) { + case 1: + return *(unsigned char *)p; + + case 2: + return *(unsigned short *)p; + + case 4: + return *(unsigned int *)p; + +#ifdef __amd64__ + case 8: + return *(unsigned long *)p; +#endif + + default: + printk(KERN_ERR "mmiotrace: Error: width.\n"); + } + +err: + return 0; +} diff --git a/arch/x86/mm/pf_in.h b/arch/x86/mm/pf_in.h new file mode 100644 index 0000000..e05341a --- /dev/null +++ b/arch/x86/mm/pf_in.h @@ -0,0 +1,39 @@ +/* + * Fault Injection Test harness (FI) + * Copyright (C) Intel Crop. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, + * USA. + * + */ + +#ifndef __PF_H_ +#define __PF_H_ + +enum reason_type { + NOT_ME, /* page fault is not in regions */ + NOTHING, /* access others point in regions */ + REG_READ, /* read from addr to reg */ + REG_WRITE, /* write from reg to addr */ + IMM_WRITE, /* write from imm to addr */ + OTHERS /* Other instructions can not intercept */ +}; + +enum reason_type get_ins_type(unsigned long ins_addr); +unsigned int get_ins_mem_width(unsigned long ins_addr); +unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs); +unsigned long get_ins_imm_val(unsigned long ins_addr); + +#endif /* __PF_H_ */ diff --git a/arch/x86/mm/testmmiotrace.c b/arch/x86/mm/testmmiotrace.c new file mode 100644 index 0000000..cfa60b2 --- /dev/null +++ b/arch/x86/mm/testmmiotrace.c @@ -0,0 +1,71 @@ +/* + * Written by Pekka Paalanen, 2008 + */ +#include +#include + +#define MODULE_NAME "testmmiotrace" + +static unsigned long mmio_address; +module_param(mmio_address, ulong, 0); +MODULE_PARM_DESC(mmio_address, "Start address of the mapping of 16 kB."); + +static void do_write_test(void __iomem *p) +{ + unsigned int i; + for (i = 0; i < 256; i++) + iowrite8(i, p + i); + for (i = 1024; i < (5 * 1024); i += 2) + iowrite16(i * 12 + 7, p + i); + for (i = (5 * 1024); i < (16 * 1024); i += 4) + iowrite32(i * 212371 + 13, p + i); +} + +static void do_read_test(void __iomem *p) +{ + unsigned int i; + for (i = 0; i < 256; i++) + ioread8(p + i); + for (i = 1024; i < (5 * 1024); i += 2) + ioread16(p + i); + for (i = (5 * 1024); i < (16 * 1024); i += 4) + ioread32(p + i); +} + +static void do_test(void) +{ + void __iomem *p = ioremap_nocache(mmio_address, 0x4000); + if (!p) { + pr_err(MODULE_NAME ": could not ioremap, aborting.\n"); + return; + } + do_write_test(p); + do_read_test(p); + iounmap(p); +} + +static int __init init(void) +{ + if (mmio_address == 0) { + pr_err(MODULE_NAME ": you have to use the module argument " + "mmio_address.\n"); + pr_err(MODULE_NAME ": DO NOT LOAD THIS MODULE UNLESS" + " YOU REALLY KNOW WHAT YOU ARE DOING!\n"); + return -ENXIO; + } + + pr_warning(MODULE_NAME ": WARNING: mapping 16 kB @ 0x%08lx " + "in PCI address space, and writing " + "rubbish in there.\n", mmio_address); + do_test(); + return 0; +} + +static void __exit cleanup(void) +{ + pr_debug(MODULE_NAME ": unloaded.\n"); +} + +module_init(init); +module_exit(cleanup); +MODULE_LICENSE("GPL"); -- cgit v0.10.2 From e4b37ee68609037ffcaa2fcfae47cd31a605bb9e Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:20:59 +0200 Subject: x86 mmiotrace: remove ISA_trace parameter. This had become a no-op. Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c index 8256546..ab2bb77 100644 --- a/arch/x86/mm/mmio-mod.c +++ b/arch/x86/mm/mmio-mod.c @@ -81,17 +81,14 @@ static LIST_HEAD(trace_list); /* struct remap_trace */ /* module parameters */ static unsigned long filter_offset; static int nommiotrace; -static int ISA_trace; static int trace_pc; module_param(filter_offset, ulong, 0); module_param(nommiotrace, bool, 0); -module_param(ISA_trace, bool, 0); module_param(trace_pc, bool, 0); MODULE_PARM_DESC(filter_offset, "Start address of traced mappings."); MODULE_PARM_DESC(nommiotrace, "Disable actual MMIO tracing."); -MODULE_PARM_DESC(ISA_trace, "Do not exclude the low ISA range."); MODULE_PARM_DESC(trace_pc, "Record address of faulting instructions."); static bool is_enabled(void) @@ -424,8 +421,6 @@ void enable_mmiotrace(void) if (nommiotrace) pr_info(NAME "MMIO tracing disabled.\n"); - if (ISA_trace) - pr_warning(NAME "Warning! low ISA range will be traced.\n"); spin_lock_irq(&trace_lock); atomic_inc(&mmiotrace_enabled); spin_unlock_irq(&trace_lock); -- cgit v0.10.2 From 736ca61fa81874b3fee205a593251b1869d0bcf1 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:20:59 +0200 Subject: x86 mmiotrace: Do not print bogus pid Non-zero pid indicates the MMIO access originated in user space. We do not catch that kind of accesses yet, so always print zero for now. Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index 361472b..79be4a1 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -117,20 +117,20 @@ static int mmio_print_rw(struct trace_iterator *iter) ret = trace_seq_printf(s, "R %d %lu.%06lu %d 0x%lx 0x%lx 0x%lx %d\n", rw->width, secs, usec_rem, rw->map_id, rw->phys, - rw->value, rw->pc, entry->pid); + rw->value, rw->pc, 0); break; case MMIO_WRITE: ret = trace_seq_printf(s, "W %d %lu.%06lu %d 0x%lx 0x%lx 0x%lx %d\n", rw->width, secs, usec_rem, rw->map_id, rw->phys, - rw->value, rw->pc, entry->pid); + rw->value, rw->pc, 0); break; case MMIO_UNKNOWN_OP: ret = trace_seq_printf(s, "UNKNOWN %lu.%06lu %d 0x%lx %02x,%02x,%02x 0x%lx %d\n", secs, usec_rem, rw->map_id, rw->phys, (rw->value >> 16) & 0xff, (rw->value >> 8) & 0xff, - (rw->value >> 0) & 0xff, rw->pc, entry->pid); + (rw->value >> 0) & 0xff, rw->pc, 0); break; default: ret = trace_seq_printf(s, "rw what?\n"); -- cgit v0.10.2 From c6c67c1afcce71335b18ed8769b1165c468bfb03 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:20:59 +0200 Subject: mmiotrace: add user documentation Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/Documentation/tracers/mmiotrace.txt b/Documentation/tracers/mmiotrace.txt new file mode 100644 index 0000000..84246f7 --- /dev/null +++ b/Documentation/tracers/mmiotrace.txt @@ -0,0 +1,153 @@ + In-kernel memory-mapped I/O tracing + + +Home page and links to optional user space tools: + + http://nouveau.freedesktop.org/wiki/MmioTrace + +MMIO tracing was originally developed by Intel around 2003 for their Fault +Injection Test Harness. In Dec 2006 - Jan 2007, using the code from Intel, +Jeff Muizelaar created a tool for tracing MMIO accesses with the Nouveau +project in mind. Since then many people have contributed. + +Mmiotrace was built for reverse engineering any memory-mapped IO device with +the Nouveau project as the first real user. Only x86 and x86_64 architectures +are supported. + +Out-of-tree mmiotrace was originally modified for mainline inclusion and +ftrace framework by Pekka Paalanen . + + +Preparation +----------- + +Mmiotrace feature is compiled in by the CONFIG_MMIOTRACE option. Tracing is +disabled by default, so it is safe to have this set to yes. SMP systems are +supported, but tracing is unreliable and may miss events if more than one CPU +is on-line, therefore mmiotrace takes all but one CPU off-line during run-time +activation [not implemented]. + + +Usage Quick Reference +--------------------- + +$ mount -t debugfs debugfs /debug +$ echo mmiotrace > /debug/tracing/current_tracer +$ cat /debug/tracing/trace_pipe > mydump.txt & +Start X or whatever. +$ echo "X is up" > /debug/tracing/marker +$ echo none > /debug/tracing/current_tracer +Check kernel log. + + +Usage +----- + +Make sure debugfs is mounted to /debug. If not, (requires root privileges) +$ mount -t debugfs debugfs /debug + +Check that the driver you are about to trace is not loaded. + +Activate mmiotrace (requires root privileges): +$ echo mmiotrace > /debug/tracing/current_tracer + +Start storing the trace: +$ cat /debug/tracing/trace_pipe > mydump.txt & +The 'cat' process should stay running (sleeping) in the background. + +Load the driver you want to trace and use it. Mmiotrace will only catch MMIO +accesses to areas that are ioremapped while mmiotrace is active. + +[Unimplemented feature:] +During tracing you can place comments (markers) into the trace by +$ echo "X is up" > /debug/tracing/marker +This makes it easier to see which part of the (huge) trace corresponds to +which action. It is recommended to place descriptive markers about what you +do. + +Shut down mmiotrace (requires root privileges): +$ echo none > /debug/tracing/current_tracer +The 'cat' process exits. If it does not, kill it by 'fg' and pressing ctrl+c. + +[This feature is not implemented yet!] +Check your kernel log. If there are messages about mmiotrace losing events, +this is due to buffer overrun, and the trace is incomplete. You should enlarge +the buffers and try again. [How?] + +If you are doing a trace for a driver project, e.g. Nouveau, you should also +do the following before sending your results: +$ lspci -vvv > lspci.txt +$ dmesg > dmesg.txt +$ tar zcf pciid-nick-mmiotrace.tar.gz mydump.txt lspci.txt dmesg.txt +and then send the .tar.gz file. The trace compresses considerably. Replace +"pciid" and "nick" with the PCI ID or model name of your piece of hardware +under investigation and your nick name. + + +How Mmiotrace Works +------------------- + +Access to hardware IO-memory is gained by mapping addresses from PCI bus by +calling one of the ioremap_*() functions. Mmiotrace is hooked into the +__ioremap() function and gets called whenever a mapping is created. Mapping is +an event that is recorded into the trace log. Note, that ISA range mappings +are not caught, since the mapping always exists and is returned directly. + +MMIO accesses are recorded via page faults. Just before __ioremap() returns, +the mapped pages are marked as not present. Any access to the pages causes a +fault. The page fault handler calls mmiotrace to handle the fault. Mmiotrace +marks the page present, sets TF flag to achieve single stepping and exits the +fault handler. The instruction that faulted is executed and debug trap is +entered. Here mmiotrace again marks the page as not present. The instruction +is decoded to get the type of operation (read/write), data width and the value +read or written. These are stored to the trace log. + +Setting the page present in the page fault handler has a race condition on SMP +machines. During the single stepping other CPUs may run freely on that page +and events can be missed without a notice. Re-enabling other CPUs during +tracing is discouraged. + + +Trace Log Format +---------------- + +The raw log is text and easily filtered with e.g. grep and awk. One record is +one line in the log. A record starts with a keyword, followed by keyword +dependant arguments. Arguments are separated by a space, or continue until the +end of line. The format for version 20070824 is as follows: + +Explanation Keyword Space separated arguments +--------------------------------------------------------------------------- + +read event R width, timestamp, map id, physical, value, PC, PID +write event W width, timestamp, map id, physical, value, PC, PID +ioremap event MAP timestamp, map id, physical, virtual, length, PC, PID +iounmap event UNMAP timestamp, map id, PC, PID +marker MARK timestamp, text +version VERSION the string "20070824" +info for reader LSPCI one line from lspci -v +PCI address map PCIDEV space separated /proc/bus/pci/devices data +unk. opcode UNKNOWN timestamp, map id, physical, data, PC, PID + +Timestamp is in seconds with decimals. Physical is a PCI bus address, virtual +is a kernel virtual address. Width is the data width in bytes and value is the +data value. Map id is an arbitrary id number identifying the mapping that was +used in an operation. PC is the program counter and PID is process id. PC is +zero if it is not recorded. PID is always zero as tracing MMIO accesses +originating in user space memory is not yet supported. + +For instance, the following awk filter will pass all 32-bit writes that target +physical addresses in the range [0xfb73ce40, 0xfb800000[ + +$ awk '/W 4 / { adr=strtonum($5); if (adr >= 0xfb73ce40 && +adr < 0xfb800000) print; }' + + +Tools for Developers +-------------------- + +The user space tools include utilities for: +- replacing numeric addresses and values with hardware register names +- replaying MMIO logs, i.e., re-executing the recorded writes + + diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 1d6de0d..b28ace2 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -180,12 +180,10 @@ config MMIOTRACE help Mmiotrace traces Memory Mapped I/O access and is meant for debugging and reverse engineering. It is called from the ioremap - implementation and works via page faults. A user space program is - required to collect the MMIO data from debugfs files. - Tracing is disabled by default and can be enabled from a debugfs - file. + implementation and works via page faults. Tracing is disabled by + default and can be enabled run-time. - See http://nouveau.freedesktop.org/wiki/MmioTrace + See Documentation/tracers/mmiotrace.txt. If you are not helping to develop drivers, say N. config MMIOTRACE_TEST -- cgit v0.10.2 From 0663bb6cd9a457fbd8ca95c627bb762d07321a39 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 12 May 2008 21:20:59 +0200 Subject: mmiotrace: fix printk format Fix gcc printk format warnings: next-20080415/arch/x86/mm/mmio-mod.c: In function 'print_pte': next-20080415/arch/x86/mm/mmio-mod.c:154: warning: format '%lx' expects type 'long unsigned int', but argument 3 has type 'pteval_t' next-20080415/arch/x86/mm/mmio-mod.c:154: warning: format '%lx' expects type 'long unsigned int', but argument 4 has type 'pteval_t' next-20080415/arch/x86/mm/mmio-mod.c: At top level: next-20080415/arch/x86/mm/mmio-mod.c:403: warning: 'downed_cpus' defined but not used Signed-off-by: Randy Dunlap Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c index ab2bb77..6d6cac8 100644 --- a/arch/x86/mm/mmio-mod.c +++ b/arch/x86/mm/mmio-mod.c @@ -150,8 +150,9 @@ static void print_pte(unsigned long address) "0x%08lx\n", address); BUG(); } - pr_info(NAME "pte for 0x%lx: 0x%lx 0x%lx\n", address, pte_val(*pte), - pte_val(*pte) & _PAGE_PRESENT); + pr_info(NAME "pte for 0x%lx: 0x%llx 0x%llx\n", address, + (unsigned long long)pte_val(*pte), + (unsigned long long)pte_val(*pte) & _PAGE_PRESENT); } /* -- cgit v0.10.2 From 37b3619257d3190f47f233d7ed626d4b9916462c Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 12 May 2008 21:20:59 +0200 Subject: x86/mmiotrace: uses/depends on PCI Don't try to build mmiotrace when CONFIG_PCI=n. next-20080416/kernel/trace/trace_mmiotrace.c: In function 'mmio_print_pcidev': next-20080416/kernel/trace/trace_mmiotrace.c:62: error: implicit declaration of function 'pci_dev_driver' Signed-off-by: Randy Dunlap Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index b28ace2..1e53df0 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -173,7 +173,7 @@ config MMIOTRACE_HOOKS config MMIOTRACE bool "Memory mapped IO tracing" - depends on DEBUG_KERNEL + depends on DEBUG_KERNEL && PCI select TRACING select MMIOTRACE_HOOKS default y @@ -181,7 +181,7 @@ config MMIOTRACE Mmiotrace traces Memory Mapped I/O access and is meant for debugging and reverse engineering. It is called from the ioremap implementation and works via page faults. Tracing is disabled by - default and can be enabled run-time. + default and can be enabled at run-time. See Documentation/tracers/mmiotrace.txt. If you are not helping to develop drivers, say N. -- cgit v0.10.2 From 7423d1115f18627666d475fccc7c62394406ff8c Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:21:02 +0200 Subject: x86 mmiotrace: dynamically disable non-boot CPUs From 8979ee55cb6a429c4edd72ebec2244b849f6a79a Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Sat, 12 Apr 2008 00:18:57 +0300 Mmiotrace is not reliable with multiple CPUs and may miss events. Drop to single CPU when mmiotrace is activated. Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c index 6d6cac8..1f77d85 100644 --- a/arch/x86/mm/mmio-mod.c +++ b/arch/x86/mm/mmio-mod.c @@ -32,6 +32,7 @@ #include /* for ISA_START_ADDRESS */ #include #include +#include #include "pf_in.h" @@ -400,6 +401,64 @@ static void clear_trace_list(void) } } +#ifdef CONFIG_HOTPLUG_CPU +static cpumask_t downed_cpus; + +static void enter_uniprocessor(void) +{ + int cpu; + int err; + + get_online_cpus(); + downed_cpus = cpu_online_map; + cpu_clear(first_cpu(cpu_online_map), downed_cpus); + if (num_online_cpus() > 1) + pr_notice(NAME "Disabling non-boot CPUs...\n"); + put_online_cpus(); + + for_each_cpu_mask(cpu, downed_cpus) { + err = cpu_down(cpu); + if (!err) { + pr_info(NAME "CPU%d is down.\n", cpu); + } else { + pr_err(NAME "Error taking CPU%d down: %d\n", cpu, err); + } + } + if (num_online_cpus() > 1) + pr_warning(NAME "multiple CPUs still online, " + "may miss events.\n"); +} + +static void leave_uniprocessor(void) +{ + int cpu; + int err; + + if (cpus_weight(downed_cpus) == 0) + return; + pr_notice(NAME "Re-enabling CPUs...\n"); + for_each_cpu_mask(cpu, downed_cpus) { + err = cpu_up(cpu); + if (!err) + pr_info(NAME "enabled CPU%d.\n", cpu); + else + pr_err(NAME "cannot re-enable CPU%d: %d\n", cpu, err); + } +} + +#else /* !CONFIG_HOTPLUG_CPU */ +static void enter_uniprocessor(void) +{ + if (num_online_cpus() > 1) + pr_warning(NAME "multiple CPUs are online, may miss events. " + "Suggest booting with maxcpus=1 kernel argument.\n"); +} + +static void leave_uniprocessor(void) +{ +} +#endif + #if 0 /* XXX: out of order */ static struct file_operations fops_marker = { .owner = THIS_MODULE, @@ -422,6 +481,7 @@ void enable_mmiotrace(void) if (nommiotrace) pr_info(NAME "MMIO tracing disabled.\n"); + enter_uniprocessor(); spin_lock_irq(&trace_lock); atomic_inc(&mmiotrace_enabled); spin_unlock_irq(&trace_lock); @@ -442,6 +502,7 @@ void disable_mmiotrace(void) spin_unlock_irq(&trace_lock); clear_trace_list(); /* guarantees: no more kmmio callbacks */ + leave_uniprocessor(); if (marker_file) { debugfs_remove(marker_file); marker_file = NULL; -- cgit v0.10.2 From d0a7e8ca5b996d36219e6fc002907291c8ee677b Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:21:02 +0200 Subject: mmiotrace: print header using the read hook. Now the header is printed only for `trace_pipe' file. Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index 79be4a1..6d2edbd 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -12,6 +12,10 @@ #include "trace.h" +struct header_iter { + struct pci_dev *dev; +}; + static struct trace_array *mmio_trace_array; static void mmio_reset_data(struct trace_array *tr) @@ -89,17 +93,57 @@ static int mmio_print_pcidev(struct trace_seq *s, const struct pci_dev *dev) return ret; } -/* XXX: This is not called for trace_pipe file! */ -static void mmio_print_header(struct trace_iterator *iter) +static void destroy_header_iter(struct header_iter *hiter) +{ + if (!hiter) + return; + pci_dev_put(hiter->dev); + kfree(hiter); +} + +static void mmio_pipe_open(struct trace_iterator *iter) { + struct header_iter *hiter; struct trace_seq *s = &iter->seq; - struct pci_dev *dev = NULL; trace_seq_printf(s, "VERSION 20070824\n"); - for_each_pci_dev(dev) - mmio_print_pcidev(s, dev); - /* XXX: return value? What if header is very long? */ + hiter = kzalloc(sizeof(*hiter), GFP_KERNEL); + if (!hiter) + return; + + hiter->dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, NULL); + iter->private = hiter; +} + +/* XXX: This is not called when the pipe is closed! */ +static void mmio_close(struct trace_iterator *iter) +{ + struct header_iter *hiter = iter->private; + destroy_header_iter(hiter); + iter->private = NULL; +} + +static ssize_t mmio_read(struct trace_iterator *iter, struct file *filp, + char __user *ubuf, size_t cnt, loff_t *ppos) +{ + ssize_t ret; + struct header_iter *hiter = iter->private; + struct trace_seq *s = &iter->seq; + + if (!hiter) + return 0; + + mmio_print_pcidev(s, hiter->dev); + hiter->dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, hiter->dev); + + if (!hiter->dev) { + destroy_header_iter(hiter); + iter->private = NULL; + } + + ret = trace_seq_to_user(s, ubuf, cnt); + return (ret == -EBUSY) ? 0 : ret; } static int mmio_print_rw(struct trace_iterator *iter) @@ -190,7 +234,9 @@ static struct tracer mmio_tracer __read_mostly = .name = "mmiotrace", .init = mmio_trace_init, .reset = mmio_trace_reset, - .open = mmio_print_header, + .pipe_open = mmio_pipe_open, + .close = mmio_close, + .read = mmio_read, .ctrl_update = mmio_trace_ctrl_update, .print_line = mmio_print_line, }; -- cgit v0.10.2 From 2039238b79b51a50f8477f53f33750e1c3fc146a Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:21:02 +0200 Subject: mmiotrace: print overrun counts Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index 6d2edbd..d0f649a 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -17,11 +17,13 @@ struct header_iter { }; static struct trace_array *mmio_trace_array; +static bool overrun_detected; static void mmio_reset_data(struct trace_array *tr) { int cpu; + overrun_detected = false; tr->time_start = ftrace_now(tr->cpu); for_each_online_cpu(cpu) @@ -124,12 +126,34 @@ static void mmio_close(struct trace_iterator *iter) iter->private = NULL; } +static unsigned long count_overruns(struct trace_iterator *iter) +{ + int cpu; + unsigned long cnt = 0; + for_each_online_cpu(cpu) { + cnt += iter->overrun[cpu]; + iter->overrun[cpu] = 0; + } + return cnt; +} + static ssize_t mmio_read(struct trace_iterator *iter, struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { ssize_t ret; struct header_iter *hiter = iter->private; struct trace_seq *s = &iter->seq; + unsigned long n; + + n = count_overruns(iter); + if (n) { + /* XXX: This is later than where events were lost. */ + trace_seq_printf(s, "MARK 0.000000 Lost %lu events.\n", n); + if (!overrun_detected) + pr_warning("mmiotrace has lost events.\n"); + overrun_detected = true; + goto print_out; + } if (!hiter) return 0; @@ -142,6 +166,7 @@ static ssize_t mmio_read(struct trace_iterator *iter, struct file *filp, iter->private = NULL; } +print_out: ret = trace_seq_to_user(s, ubuf, cnt); return (ret == -EBUSY) ? 0 : ret; } -- cgit v0.10.2 From e0fd5c2fa188311667267c02a702ae699a9fc2bd Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:21:02 +0200 Subject: mmiotrace: do not print bogus pid for maps either Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index d0f649a..3c1dacd 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -225,12 +225,12 @@ static int mmio_print_map(struct trace_iterator *iter) ret = trace_seq_printf(s, "MAP %lu.%06lu %d 0x%lx 0x%lx 0x%lx 0x%lx %d\n", secs, usec_rem, m->map_id, m->phys, m->virt, m->len, - 0UL, entry->pid); + 0UL, 0); break; case MMIO_UNPROBE: ret = trace_seq_printf(s, "UNMAP %lu.%06lu %d 0x%lx %d\n", - secs, usec_rem, m->map_id, 0UL, entry->pid); + secs, usec_rem, m->map_id, 0UL, 0); break; default: ret = trace_seq_printf(s, "map what?\n"); -- cgit v0.10.2 From 6f6f394d9ca61fcc73ecfd2f2bf58e92dc1ab269 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:21:03 +0200 Subject: doc: update mmiotrace doc to current status Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar diff --git a/Documentation/tracers/mmiotrace.txt b/Documentation/tracers/mmiotrace.txt index 84246f7..a4afb56 100644 --- a/Documentation/tracers/mmiotrace.txt +++ b/Documentation/tracers/mmiotrace.txt @@ -25,7 +25,8 @@ Mmiotrace feature is compiled in by the CONFIG_MMIOTRACE option. Tracing is disabled by default, so it is safe to have this set to yes. SMP systems are supported, but tracing is unreliable and may miss events if more than one CPU is on-line, therefore mmiotrace takes all but one CPU off-line during run-time -activation [not implemented]. +activation. You can re-enable CPUs by hand, but you have been warned, there +is no way to automatically detect if you are losing events due to CPUs racing. Usage Quick Reference @@ -37,7 +38,7 @@ $ cat /debug/tracing/trace_pipe > mydump.txt & Start X or whatever. $ echo "X is up" > /debug/tracing/marker $ echo none > /debug/tracing/current_tracer -Check kernel log. +Check for lost events. Usage @@ -67,12 +68,22 @@ do. Shut down mmiotrace (requires root privileges): $ echo none > /debug/tracing/current_tracer -The 'cat' process exits. If it does not, kill it by 'fg' and pressing ctrl+c. - -[This feature is not implemented yet!] -Check your kernel log. If there are messages about mmiotrace losing events, -this is due to buffer overrun, and the trace is incomplete. You should enlarge -the buffers and try again. [How?] +The 'cat' process exits. If it does not, kill it by issuing 'fg' command and +pressing ctrl+c. + +Check that mmiotrace did not lose events due to a buffer filling up. Either +$ grep -i lost mydump.txt +which tells you exactly how many events were lost, or use +$ dmesg +to view your kernel log and look for "mmiotrace has lost events" warning. If +events were lost, the trace is incomplete. You should enlarge the buffers and +try again. Buffers are enlarged by first seeing how large the current buffers +are: +$ cat /debug/tracing/trace_entries +gives you a number. Approximately double this number and write it back, for +instance: +$ echo 128000 > /debug/tracing/trace_entries +Then start again from the top. If you are doing a trace for a driver project, e.g. Nouveau, you should also do the following before sending your results: -- cgit v0.10.2 From 970e6fa03885f32cc43e42cb08c73a5f54cd8bd9 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:21:03 +0200 Subject: mmiotrace: code style cleanups From c2da03771e29159627c5c7b9509ec70bce9f91ee Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 28 Apr 2008 21:25:22 +0300 Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 3ad27b8..6a92d91 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -17,10 +17,10 @@ #include #include #include -#include +#include #include #include -#include +#include #include #include diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c index 1f77d85..a8d2a00 100644 --- a/arch/x86/mm/mmio-mod.c +++ b/arch/x86/mm/mmio-mod.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include @@ -418,11 +418,10 @@ static void enter_uniprocessor(void) for_each_cpu_mask(cpu, downed_cpus) { err = cpu_down(cpu); - if (!err) { + if (!err) pr_info(NAME "CPU%d is down.\n", cpu); - } else { + else pr_err(NAME "Error taking CPU%d down: %d\n", cpu, err); - } } if (num_online_cpus() > 1) pr_warning(NAME "multiple CPUs still online, " diff --git a/arch/x86/mm/testmmiotrace.c b/arch/x86/mm/testmmiotrace.c index cfa60b2..d877c5b 100644 --- a/arch/x86/mm/testmmiotrace.c +++ b/arch/x86/mm/testmmiotrace.c @@ -2,7 +2,7 @@ * Written by Pekka Paalanen, 2008 */ #include -#include +#include #define MODULE_NAME "testmmiotrace" diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h index dd6b64b..de8e912 100644 --- a/include/linux/mmiotrace.h +++ b/include/linux/mmiotrace.h @@ -1,9 +1,7 @@ #ifndef MMIOTRACE_H #define MMIOTRACE_H -#include - -#ifdef __KERNEL__ +#include #include @@ -84,6 +82,4 @@ extern void disable_mmiotrace(void); extern void mmio_trace_rw(struct mmiotrace_rw *rw); extern void mmio_trace_mapping(struct mmiotrace_map *map); -#endif /* __KERNEL__ */ - #endif /* MMIOTRACE_H */ -- cgit v0.10.2 From 87e547fe41a8b57d6d80afc67a0031fbe477eb0d Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:21:03 +0200 Subject: x86 mmiotrace: fix page-unaligned ioremaps mmiotrace_ioremap() expects to receive the original unaligned map phys address and size. Also fix {un,}register_kmmio_probe() to deal properly with unaligned size. Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 8927c87..a7c80a6 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -123,6 +123,8 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, { unsigned long pfn, offset, vaddr; resource_size_t last_addr; + const resource_size_t unaligned_phys_addr = phys_addr; + const unsigned long unaligned_size = size; struct vm_struct *area; unsigned long new_prot_val; pgprot_t prot; @@ -236,7 +238,7 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, } ret_addr = (void __iomem *) (vaddr + offset); - mmiotrace_ioremap(phys_addr, size, ret_addr); + mmiotrace_ioremap(unaligned_phys_addr, unaligned_size, ret_addr); return ret_addr; } diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 6a92d91..93b1797 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -351,11 +351,19 @@ static void release_kmmio_fault_page(unsigned long page, } } +/* + * With page-unaligned ioremaps, one or two armed pages may contain + * addresses from outside the intended mapping. Events for these addresses + * are currently silently dropped. The events may result only from programming + * mistakes by accessing addresses before the beginning or past the end of a + * mapping. + */ int register_kmmio_probe(struct kmmio_probe *p) { unsigned long flags; int ret = 0; unsigned long size = 0; + const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK); spin_lock_irqsave(&kmmio_lock, flags); if (get_kmmio_probe(p->addr)) { @@ -364,7 +372,7 @@ int register_kmmio_probe(struct kmmio_probe *p) } kmmio_count++; list_add_rcu(&p->list, &kmmio_probes); - while (size < p->len) { + while (size < size_lim) { if (add_kmmio_fault_page(p->addr + size)) pr_err("kmmio: Unable to set page fault.\n"); size += PAGE_SIZE; @@ -436,11 +444,12 @@ void unregister_kmmio_probe(struct kmmio_probe *p) { unsigned long flags; unsigned long size = 0; + const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK); struct kmmio_fault_page *release_list = NULL; struct kmmio_delayed_release *drelease; spin_lock_irqsave(&kmmio_lock, flags); - while (size < p->len) { + while (size < size_lim) { release_kmmio_fault_page(p->addr + size, &release_list); size += PAGE_SIZE; } diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c index a8d2a00..278998c 100644 --- a/arch/x86/mm/mmio-mod.c +++ b/arch/x86/mm/mmio-mod.c @@ -280,6 +280,7 @@ static void ioremap_trace_core(unsigned long offset, unsigned long size, { static atomic_t next_id; struct remap_trace *trace = kmalloc(sizeof(*trace), GFP_KERNEL); + /* These are page-unaligned. */ struct mmiotrace_map map = { .phys = offset, .virt = (unsigned long)addr, -- cgit v0.10.2 From dee310d0adf41019aca476052ac3085ff286d9be Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:21:03 +0200 Subject: x86 mmiotrace: use resource_size_t for phys addresses Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c index 278998c..3b04a01 100644 --- a/arch/x86/mm/mmio-mod.c +++ b/arch/x86/mm/mmio-mod.c @@ -48,7 +48,7 @@ struct trap_reason { struct remap_trace { struct list_head list; struct kmmio_probe probe; - unsigned long phys; + resource_size_t phys; unsigned long id; }; @@ -275,7 +275,7 @@ static void post(struct kmmio_probe *p, unsigned long condition, put_cpu_var(pf_reason); } -static void ioremap_trace_core(unsigned long offset, unsigned long size, +static void ioremap_trace_core(resource_size_t offset, unsigned long size, void __iomem *addr) { static atomic_t next_id; @@ -319,13 +319,14 @@ not_enabled: spin_unlock_irq(&trace_lock); } -void -mmiotrace_ioremap(unsigned long offset, unsigned long size, void __iomem *addr) +void mmiotrace_ioremap(resource_size_t offset, unsigned long size, + void __iomem *addr) { if (!is_enabled()) /* recheck and proper locking in *_core() */ return; - pr_debug(NAME "ioremap_*(0x%lx, 0x%lx) = %p\n", offset, size, addr); + pr_debug(NAME "ioremap_*(0x%llx, 0x%lx) = %p\n", + (unsigned long long)offset, size, addr); if ((filter_offset) && (offset != filter_offset)) return; ioremap_trace_core(offset, size, addr); diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h index de8e912..5cbbc37 100644 --- a/include/linux/mmiotrace.h +++ b/include/linux/mmiotrace.h @@ -2,7 +2,6 @@ #define MMIOTRACE_H #include - #include struct kmmio_probe; @@ -37,14 +36,15 @@ extern int kmmio_handler(struct pt_regs *regs, unsigned long addr); /* Called from ioremap.c */ #ifdef CONFIG_MMIOTRACE -extern void -mmiotrace_ioremap(unsigned long offset, unsigned long size, void __iomem *addr); +extern void mmiotrace_ioremap(resource_size_t offset, unsigned long size, + void __iomem *addr); extern void mmiotrace_iounmap(volatile void __iomem *addr); #else -static inline void -mmiotrace_ioremap(unsigned long offset, unsigned long size, void __iomem *addr) +static inline void mmiotrace_ioremap(resource_size_t offset, + unsigned long size, void __iomem *addr) { } + static inline void mmiotrace_iounmap(volatile void __iomem *addr) { } @@ -60,7 +60,7 @@ enum mm_io_opcode { }; struct mmiotrace_rw { - unsigned long phys; /* PCI address of register */ + resource_size_t phys; /* PCI address of register */ unsigned long value; unsigned long pc; /* optional program counter */ int map_id; @@ -69,7 +69,7 @@ struct mmiotrace_rw { }; struct mmiotrace_map { - unsigned long phys; /* base address in PCI space */ + resource_size_t phys; /* base address in PCI space */ unsigned long virt; /* base virtual address */ unsigned long len; /* mapping size */ int map_id; diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index 3c1dacd..b13dc19 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -184,20 +184,23 @@ static int mmio_print_rw(struct trace_iterator *iter) switch (entry->mmiorw.opcode) { case MMIO_READ: ret = trace_seq_printf(s, - "R %d %lu.%06lu %d 0x%lx 0x%lx 0x%lx %d\n", - rw->width, secs, usec_rem, rw->map_id, rw->phys, + "R %d %lu.%06lu %d 0x%llx 0x%lx 0x%lx %d\n", + rw->width, secs, usec_rem, rw->map_id, + (unsigned long long)rw->phys, rw->value, rw->pc, 0); break; case MMIO_WRITE: ret = trace_seq_printf(s, - "W %d %lu.%06lu %d 0x%lx 0x%lx 0x%lx %d\n", - rw->width, secs, usec_rem, rw->map_id, rw->phys, + "W %d %lu.%06lu %d 0x%llx 0x%lx 0x%lx %d\n", + rw->width, secs, usec_rem, rw->map_id, + (unsigned long long)rw->phys, rw->value, rw->pc, 0); break; case MMIO_UNKNOWN_OP: ret = trace_seq_printf(s, - "UNKNOWN %lu.%06lu %d 0x%lx %02x,%02x,%02x 0x%lx %d\n", - secs, usec_rem, rw->map_id, rw->phys, + "UNKNOWN %lu.%06lu %d 0x%llx %02x,%02x,%02x 0x%lx %d\n", + secs, usec_rem, rw->map_id, + (unsigned long long)rw->phys, (rw->value >> 16) & 0xff, (rw->value >> 8) & 0xff, (rw->value >> 0) & 0xff, rw->pc, 0); break; @@ -223,8 +226,9 @@ static int mmio_print_map(struct trace_iterator *iter) switch (entry->mmiorw.opcode) { case MMIO_PROBE: ret = trace_seq_printf(s, - "MAP %lu.%06lu %d 0x%lx 0x%lx 0x%lx 0x%lx %d\n", - secs, usec_rem, m->map_id, m->phys, m->virt, m->len, + "MAP %lu.%06lu %d 0x%llx 0x%lx 0x%lx 0x%lx %d\n", + secs, usec_rem, m->map_id, + (unsigned long long)m->phys, m->virt, m->len, 0UL, 0); break; case MMIO_UNPROBE: -- cgit v0.10.2 From a50445d76c22a34ae149704ea5adaef171c8acb7 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:21:03 +0200 Subject: mmiotrace: rename kmmio_probe::user_data to :private. Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c index 3b04a01..ed0e0e9 100644 --- a/arch/x86/mm/mmio-mod.c +++ b/arch/x86/mm/mmio-mod.c @@ -191,7 +191,7 @@ static void pre(struct kmmio_probe *p, struct pt_regs *regs, struct mmiotrace_rw *my_trace = &get_cpu_var(cpu_trace); const unsigned long instptr = instruction_pointer(regs); const enum reason_type type = get_ins_type(instptr); - struct remap_trace *trace = p->user_data; + struct remap_trace *trace = p->private; /* it doesn't make sense to have more than one active trace per cpu */ if (my_reason->active_traces) @@ -299,7 +299,7 @@ static void ioremap_trace_core(resource_size_t offset, unsigned long size, .len = size, .pre_handler = pre, .post_handler = post, - .user_data = trace + .private = trace }, .phys = offset, .id = atomic_inc_return(&next_id) diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h index 5cbbc37..61d19e1 100644 --- a/include/linux/mmiotrace.h +++ b/include/linux/mmiotrace.h @@ -18,7 +18,7 @@ struct kmmio_probe { unsigned long len; /* length of the probe region */ kmmio_pre_handler_t pre_handler; /* Called before addr is executed. */ kmmio_post_handler_t post_handler; /* Called after addr is executed */ - void *user_data; + void *private; }; /* kmmio is active by some kmmio_probes? */ -- cgit v0.10.2 From 790e2a290b499b0400254e6870ec27969065d122 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:21:14 +0200 Subject: x86 mmiotrace: page level is unsigned Fixes some sparse warnings. Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 93b1797..b65871e 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -104,11 +104,12 @@ static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page) return NULL; } -static void set_page_present(unsigned long addr, bool present, int *pglevel) +static void set_page_present(unsigned long addr, bool present, + unsigned int *pglevel) { pteval_t pteval; pmdval_t pmdval; - int level; + unsigned int level; pmd_t *pmd; pte_t *pte = lookup_address(addr, &level); @@ -145,15 +146,15 @@ static void set_page_present(unsigned long addr, bool present, int *pglevel) } /** Mark the given page as not present. Access to it will trigger a fault. */ -static void arm_kmmio_fault_page(unsigned long page, int *page_level) +static void arm_kmmio_fault_page(unsigned long page, unsigned int *pglevel) { - set_page_present(page & PAGE_MASK, false, page_level); + set_page_present(page & PAGE_MASK, false, pglevel); } /** Mark the given page as present. */ -static void disarm_kmmio_fault_page(unsigned long page, int *page_level) +static void disarm_kmmio_fault_page(unsigned long page, unsigned int *pglevel) { - set_page_present(page & PAGE_MASK, true, page_level); + set_page_present(page & PAGE_MASK, true, pglevel); } /* diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c index ed0e0e9..e7397e1 100644 --- a/arch/x86/mm/mmio-mod.c +++ b/arch/x86/mm/mmio-mod.c @@ -137,7 +137,7 @@ static ssize_t write_marker(struct file *file, const char __user *buffer, static void print_pte(unsigned long address) { - int level; + unsigned int level; pte_t *pte = lookup_address(address, &level); if (!pte) { -- cgit v0.10.2 From 4d2df795f0c3eb91f97a666f47716121a2f166ed Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 24 May 2008 15:00:46 +0200 Subject: sysprof: make it depend on X86 Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index e101c9a..263e9e6 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -77,7 +77,7 @@ config PREEMPT_TRACER config SYSPROF_TRACER bool "Sysprof Tracer" - depends on DEBUG_KERNEL + depends on X86 select TRACING help This tracer provides the trace needed by the 'Sysprof' userspace -- cgit v0.10.2 From 40bd21740012132afb62d78ac3e6b82372b2fbc2 Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Wed, 21 May 2008 11:44:02 +0200 Subject: x86: automatical unification of i8259.c Make conversion of i8259 very mechanical -- i8259 was generated by diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 5e618c3..f71a76e 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -18,7 +18,7 @@ CFLAGS_tsc_64.o := $(nostackp) obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o obj-y += traps_$(BITS).o irq_$(BITS).o obj-y += time_$(BITS).o ioport.o ldt.o -obj-y += setup_$(BITS).o i8259_$(BITS).o setup.o +obj-y += setup_$(BITS).o i8259.o i8259_$(BITS).o setup.o obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o setup64.o diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c new file mode 100644 index 0000000..2decba6 --- /dev/null +++ b/arch/x86/kernel/i8259.c @@ -0,0 +1,368 @@ +#ifdef CONFIG_X86_64 +#include +#endif /* CONFIG_X86_64 */ +#include +#include +#include +#include +#include +#ifdef CONFIG_X86_64 +#include +#endif /* CONFIG_X86_64 */ +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_X86_64 +#include +#endif /* CONFIG_X86_64 */ +#include +#include +#include +#ifndef CONFIG_X86_64 +#include +#else /* CONFIG_X86_64 */ +#include +#endif /* CONFIG_X86_64 */ +#include +#include +#include +#include +#ifndef CONFIG_X86_64 +#include +#endif /* ! CONFIG_X86_64 */ +#include + +/* + * This is the 'legacy' 8259A Programmable Interrupt Controller, + * present in the majority of PC/AT boxes. + * plus some generic x86 specific things if generic specifics makes + * any sense at all. + */ + +static int i8259A_auto_eoi; +DEFINE_SPINLOCK(i8259A_lock); +static void mask_and_ack_8259A(unsigned int); + +struct irq_chip i8259A_chip = { + .name = "XT-PIC", + .mask = disable_8259A_irq, + .disable = disable_8259A_irq, + .unmask = enable_8259A_irq, + .mask_ack = mask_and_ack_8259A, +}; + +/* + * 8259A PIC functions to handle ISA devices: + */ + +/* + * This contains the irq mask for both 8259A irq controllers, + */ +unsigned int cached_irq_mask = 0xffff; + +/* + * Not all IRQs can be routed through the IO-APIC, eg. on certain (older) + * boards the timer interrupt is not really connected to any IO-APIC pin, + * it's fed to the master 8259A's IR0 line only. + * + * Any '1' bit in this mask means the IRQ is routed through the IO-APIC. + * this 'mixed mode' IRQ handling costs nothing because it's only used + * at IRQ setup time. + */ +unsigned long io_apic_irqs; + +void disable_8259A_irq(unsigned int irq) +{ + unsigned int mask = 1 << irq; + unsigned long flags; + + spin_lock_irqsave(&i8259A_lock, flags); + cached_irq_mask |= mask; + if (irq & 8) + outb(cached_slave_mask, PIC_SLAVE_IMR); + else + outb(cached_master_mask, PIC_MASTER_IMR); + spin_unlock_irqrestore(&i8259A_lock, flags); +} + +void enable_8259A_irq(unsigned int irq) +{ + unsigned int mask = ~(1 << irq); + unsigned long flags; + + spin_lock_irqsave(&i8259A_lock, flags); + cached_irq_mask &= mask; + if (irq & 8) + outb(cached_slave_mask, PIC_SLAVE_IMR); + else + outb(cached_master_mask, PIC_MASTER_IMR); + spin_unlock_irqrestore(&i8259A_lock, flags); +} + +int i8259A_irq_pending(unsigned int irq) +{ + unsigned int mask = 1<> 8); + spin_unlock_irqrestore(&i8259A_lock, flags); + + return ret; +} + +void make_8259A_irq(unsigned int irq) +{ + disable_irq_nosync(irq); + io_apic_irqs &= ~(1<> 8); + outb(0x0A,PIC_SLAVE_CMD); /* back to the IRR register */ + return value; +} + +/* + * Careful! The 8259A is a fragile beast, it pretty + * much _has_ to be done exactly like this (mask it + * first, _then_ send the EOI, and the order of EOI + * to the two 8259s is important! + */ +static void mask_and_ack_8259A(unsigned int irq) +{ + unsigned int irqmask = 1 << irq; + unsigned long flags; + + spin_lock_irqsave(&i8259A_lock, flags); + /* + * Lightweight spurious IRQ detection. We do not want + * to overdo spurious IRQ handling - it's usually a sign + * of hardware problems, so we only do the checks we can + * do without slowing down good hardware unnecessarily. + * + * Note that IRQ7 and IRQ15 (the two spurious IRQs + * usually resulting from the 8259A-1|2 PICs) occur + * even if the IRQ is masked in the 8259A. Thus we + * can check spurious 8259A IRQs without doing the + * quite slow i8259A_irq_real() call for every IRQ. + * This does not cover 100% of spurious interrupts, + * but should be enough to warn the user that there + * is something bad going on ... + */ + if (cached_irq_mask & irqmask) + goto spurious_8259A_irq; + cached_irq_mask |= irqmask; + +handle_real_irq: + if (irq & 8) { + inb(PIC_SLAVE_IMR); /* DUMMY - (do we need this?) */ + outb(cached_slave_mask, PIC_SLAVE_IMR); +#ifndef CONFIG_X86_64 + outb(0x60+(irq&7),PIC_SLAVE_CMD);/* 'Specific EOI' to slave */ + outb(0x60+PIC_CASCADE_IR,PIC_MASTER_CMD); /* 'Specific EOI' to master-IRQ2 */ +#else /* CONFIG_X86_64 */ + /* 'Specific EOI' to slave */ + outb(0x60+(irq&7),PIC_SLAVE_CMD); + /* 'Specific EOI' to master-IRQ2 */ + outb(0x60+PIC_CASCADE_IR,PIC_MASTER_CMD); +#endif /* CONFIG_X86_64 */ + } else { + inb(PIC_MASTER_IMR); /* DUMMY - (do we need this?) */ + outb(cached_master_mask, PIC_MASTER_IMR); +#ifndef CONFIG_X86_64 + outb(0x60+irq,PIC_MASTER_CMD); /* 'Specific EOI to master */ +#else /* CONFIG_X86_64 */ + /* 'Specific EOI' to master */ + outb(0x60+irq,PIC_MASTER_CMD); +#endif /* CONFIG_X86_64 */ + } + spin_unlock_irqrestore(&i8259A_lock, flags); + return; + +spurious_8259A_irq: + /* + * this is the slow path - should happen rarely. + */ + if (i8259A_irq_real(irq)) + /* + * oops, the IRQ _is_ in service according to the + * 8259A - not spurious, go handle it. + */ + goto handle_real_irq; + + { + static int spurious_irq_mask; + /* + * At this point we can be sure the IRQ is spurious, + * lets ACK and report it. [once per IRQ] + */ + if (!(spurious_irq_mask & irqmask)) { +#ifndef CONFIG_X86_64 + printk(KERN_DEBUG "spurious 8259A interrupt: IRQ%d.\n", irq); +#else /* CONFIG_X86_64 */ + printk(KERN_DEBUG + "spurious 8259A interrupt: IRQ%d.\n", irq); +#endif /* CONFIG_X86_64 */ + spurious_irq_mask |= irqmask; + } + atomic_inc(&irq_err_count); + /* + * Theoretically we do not have to handle this IRQ, + * but in Linux this does not cause problems and is + * simpler for us. + */ + goto handle_real_irq; + } +} + +static char irq_trigger[2]; +/** + * ELCR registers (0x4d0, 0x4d1) control edge/level of IRQ + */ +static void restore_ELCR(char *trigger) +{ + outb(trigger[0], 0x4d0); + outb(trigger[1], 0x4d1); +} + +static void save_ELCR(char *trigger) +{ + /* IRQ 0,1,2,8,13 are marked as reserved */ + trigger[0] = inb(0x4d0) & 0xF8; + trigger[1] = inb(0x4d1) & 0xDE; +} + +static int i8259A_resume(struct sys_device *dev) +{ + init_8259A(i8259A_auto_eoi); + restore_ELCR(irq_trigger); + return 0; +} + +static int i8259A_suspend(struct sys_device *dev, pm_message_t state) +{ + save_ELCR(irq_trigger); + return 0; +} + +static int i8259A_shutdown(struct sys_device *dev) +{ + /* Put the i8259A into a quiescent state that + * the kernel initialization code can get it + * out of. + */ + outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ + outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-1 */ + return 0; +} + +static struct sysdev_class i8259_sysdev_class = { + .name = "i8259", + .suspend = i8259A_suspend, + .resume = i8259A_resume, + .shutdown = i8259A_shutdown, +}; + +static struct sys_device device_i8259A = { + .id = 0, + .cls = &i8259_sysdev_class, +}; + +static int __init i8259A_init_sysfs(void) +{ + int error = sysdev_class_register(&i8259_sysdev_class); + if (!error) + error = sysdev_register(&device_i8259A); + return error; +} + +device_initcall(i8259A_init_sysfs); + +void init_8259A(int auto_eoi) +{ + unsigned long flags; + + i8259A_auto_eoi = auto_eoi; + + spin_lock_irqsave(&i8259A_lock, flags); + + outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ + outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ + + /* + * outb_pic - this has to work on a wide range of PC hardware. + */ + outb_pic(0x11, PIC_MASTER_CMD); /* ICW1: select 8259A-1 init */ +#ifndef CONFIG_X86_64 + outb_pic(0x20 + 0, PIC_MASTER_IMR); /* ICW2: 8259A-1 IR0-7 mapped to 0x20-0x27 */ + outb_pic(1U << PIC_CASCADE_IR, PIC_MASTER_IMR); /* 8259A-1 (the master) has a slave on IR2 */ +#else /* CONFIG_X86_64 */ + /* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 */ + outb_pic(IRQ0_VECTOR, PIC_MASTER_IMR); + /* 8259A-1 (the master) has a slave on IR2 */ + outb_pic(0x04, PIC_MASTER_IMR); +#endif /* CONFIG_X86_64 */ + if (auto_eoi) /* master does Auto EOI */ + outb_pic(MASTER_ICW4_DEFAULT | PIC_ICW4_AEOI, PIC_MASTER_IMR); + else /* master expects normal EOI */ + outb_pic(MASTER_ICW4_DEFAULT, PIC_MASTER_IMR); + + outb_pic(0x11, PIC_SLAVE_CMD); /* ICW1: select 8259A-2 init */ +#ifndef CONFIG_X86_64 + outb_pic(0x20 + 8, PIC_SLAVE_IMR); /* ICW2: 8259A-2 IR0-7 mapped to 0x28-0x2f */ + outb_pic(PIC_CASCADE_IR, PIC_SLAVE_IMR); /* 8259A-2 is a slave on master's IR2 */ + outb_pic(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR); /* (slave's support for AEOI in flat mode is to be investigated) */ +#else /* CONFIG_X86_64 */ + /* ICW2: 8259A-2 IR0-7 mapped to 0x38-0x3f */ + outb_pic(IRQ8_VECTOR, PIC_SLAVE_IMR); + /* 8259A-2 is a slave on master's IR2 */ + outb_pic(PIC_CASCADE_IR, PIC_SLAVE_IMR); + /* (slave's support for AEOI in flat mode is to be investigated) */ + outb_pic(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR); + +#endif /* CONFIG_X86_64 */ + if (auto_eoi) + /* + * In AEOI mode we just have to mask the interrupt + * when acking. + */ + i8259A_chip.mask_ack = disable_8259A_irq; + else + i8259A_chip.mask_ack = mask_and_ack_8259A; + + udelay(100); /* wait for 8259A to initialize */ + + outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */ + outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */ + + spin_unlock_irqrestore(&i8259A_lock, flags); +} diff --git a/arch/x86/kernel/i8259_32.c b/arch/x86/kernel/i8259_32.c index fe631967..d669142 100644 --- a/arch/x86/kernel/i8259_32.c +++ b/arch/x86/kernel/i8259_32.c @@ -21,302 +21,7 @@ #include #include -/* - * This is the 'legacy' 8259A Programmable Interrupt Controller, - * present in the majority of PC/AT boxes. - * plus some generic x86 specific things if generic specifics makes - * any sense at all. - */ - -static int i8259A_auto_eoi; -DEFINE_SPINLOCK(i8259A_lock); -static void mask_and_ack_8259A(unsigned int); - -static struct irq_chip i8259A_chip = { - .name = "XT-PIC", - .mask = disable_8259A_irq, - .disable = disable_8259A_irq, - .unmask = enable_8259A_irq, - .mask_ack = mask_and_ack_8259A, -}; - -/* - * 8259A PIC functions to handle ISA devices: - */ - -/* - * This contains the irq mask for both 8259A irq controllers, - */ -unsigned int cached_irq_mask = 0xffff; - -/* - * Not all IRQs can be routed through the IO-APIC, eg. on certain (older) - * boards the timer interrupt is not really connected to any IO-APIC pin, - * it's fed to the master 8259A's IR0 line only. - * - * Any '1' bit in this mask means the IRQ is routed through the IO-APIC. - * this 'mixed mode' IRQ handling costs nothing because it's only used - * at IRQ setup time. - */ -unsigned long io_apic_irqs; - -void disable_8259A_irq(unsigned int irq) -{ - unsigned int mask = 1 << irq; - unsigned long flags; - - spin_lock_irqsave(&i8259A_lock, flags); - cached_irq_mask |= mask; - if (irq & 8) - outb(cached_slave_mask, PIC_SLAVE_IMR); - else - outb(cached_master_mask, PIC_MASTER_IMR); - spin_unlock_irqrestore(&i8259A_lock, flags); -} - -void enable_8259A_irq(unsigned int irq) -{ - unsigned int mask = ~(1 << irq); - unsigned long flags; - - spin_lock_irqsave(&i8259A_lock, flags); - cached_irq_mask &= mask; - if (irq & 8) - outb(cached_slave_mask, PIC_SLAVE_IMR); - else - outb(cached_master_mask, PIC_MASTER_IMR); - spin_unlock_irqrestore(&i8259A_lock, flags); -} - -int i8259A_irq_pending(unsigned int irq) -{ - unsigned int mask = 1<> 8); - spin_unlock_irqrestore(&i8259A_lock, flags); - - return ret; -} - -void make_8259A_irq(unsigned int irq) -{ - disable_irq_nosync(irq); - io_apic_irqs &= ~(1<> 8); - outb(0x0A,PIC_SLAVE_CMD); /* back to the IRR register */ - return value; -} - -/* - * Careful! The 8259A is a fragile beast, it pretty - * much _has_ to be done exactly like this (mask it - * first, _then_ send the EOI, and the order of EOI - * to the two 8259s is important! - */ -static void mask_and_ack_8259A(unsigned int irq) -{ - unsigned int irqmask = 1 << irq; - unsigned long flags; - - spin_lock_irqsave(&i8259A_lock, flags); - /* - * Lightweight spurious IRQ detection. We do not want - * to overdo spurious IRQ handling - it's usually a sign - * of hardware problems, so we only do the checks we can - * do without slowing down good hardware unnecessarily. - * - * Note that IRQ7 and IRQ15 (the two spurious IRQs - * usually resulting from the 8259A-1|2 PICs) occur - * even if the IRQ is masked in the 8259A. Thus we - * can check spurious 8259A IRQs without doing the - * quite slow i8259A_irq_real() call for every IRQ. - * This does not cover 100% of spurious interrupts, - * but should be enough to warn the user that there - * is something bad going on ... - */ - if (cached_irq_mask & irqmask) - goto spurious_8259A_irq; - cached_irq_mask |= irqmask; -handle_real_irq: - if (irq & 8) { - inb(PIC_SLAVE_IMR); /* DUMMY - (do we need this?) */ - outb(cached_slave_mask, PIC_SLAVE_IMR); - outb(0x60+(irq&7),PIC_SLAVE_CMD);/* 'Specific EOI' to slave */ - outb(0x60+PIC_CASCADE_IR,PIC_MASTER_CMD); /* 'Specific EOI' to master-IRQ2 */ - } else { - inb(PIC_MASTER_IMR); /* DUMMY - (do we need this?) */ - outb(cached_master_mask, PIC_MASTER_IMR); - outb(0x60+irq,PIC_MASTER_CMD); /* 'Specific EOI to master */ - } - spin_unlock_irqrestore(&i8259A_lock, flags); - return; - -spurious_8259A_irq: - /* - * this is the slow path - should happen rarely. - */ - if (i8259A_irq_real(irq)) - /* - * oops, the IRQ _is_ in service according to the - * 8259A - not spurious, go handle it. - */ - goto handle_real_irq; - - { - static int spurious_irq_mask; - /* - * At this point we can be sure the IRQ is spurious, - * lets ACK and report it. [once per IRQ] - */ - if (!(spurious_irq_mask & irqmask)) { - printk(KERN_DEBUG "spurious 8259A interrupt: IRQ%d.\n", irq); - spurious_irq_mask |= irqmask; - } - atomic_inc(&irq_err_count); - /* - * Theoretically we do not have to handle this IRQ, - * but in Linux this does not cause problems and is - * simpler for us. - */ - goto handle_real_irq; - } -} - -static char irq_trigger[2]; -/** - * ELCR registers (0x4d0, 0x4d1) control edge/level of IRQ - */ -static void restore_ELCR(char *trigger) -{ - outb(trigger[0], 0x4d0); - outb(trigger[1], 0x4d1); -} - -static void save_ELCR(char *trigger) -{ - /* IRQ 0,1,2,8,13 are marked as reserved */ - trigger[0] = inb(0x4d0) & 0xF8; - trigger[1] = inb(0x4d1) & 0xDE; -} - -static int i8259A_resume(struct sys_device *dev) -{ - init_8259A(i8259A_auto_eoi); - restore_ELCR(irq_trigger); - return 0; -} - -static int i8259A_suspend(struct sys_device *dev, pm_message_t state) -{ - save_ELCR(irq_trigger); - return 0; -} - -static int i8259A_shutdown(struct sys_device *dev) -{ - /* Put the i8259A into a quiescent state that - * the kernel initialization code can get it - * out of. - */ - outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ - outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-1 */ - return 0; -} - -static struct sysdev_class i8259_sysdev_class = { - .name = "i8259", - .suspend = i8259A_suspend, - .resume = i8259A_resume, - .shutdown = i8259A_shutdown, -}; - -static struct sys_device device_i8259A = { - .id = 0, - .cls = &i8259_sysdev_class, -}; - -static int __init i8259A_init_sysfs(void) -{ - int error = sysdev_class_register(&i8259_sysdev_class); - if (!error) - error = sysdev_register(&device_i8259A); - return error; -} - -device_initcall(i8259A_init_sysfs); - -void init_8259A(int auto_eoi) -{ - unsigned long flags; - - i8259A_auto_eoi = auto_eoi; - - spin_lock_irqsave(&i8259A_lock, flags); - - outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ - outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ - - /* - * outb_pic - this has to work on a wide range of PC hardware. - */ - outb_pic(0x11, PIC_MASTER_CMD); /* ICW1: select 8259A-1 init */ - outb_pic(0x20 + 0, PIC_MASTER_IMR); /* ICW2: 8259A-1 IR0-7 mapped to 0x20-0x27 */ - outb_pic(1U << PIC_CASCADE_IR, PIC_MASTER_IMR); /* 8259A-1 (the master) has a slave on IR2 */ - if (auto_eoi) /* master does Auto EOI */ - outb_pic(MASTER_ICW4_DEFAULT | PIC_ICW4_AEOI, PIC_MASTER_IMR); - else /* master expects normal EOI */ - outb_pic(MASTER_ICW4_DEFAULT, PIC_MASTER_IMR); - - outb_pic(0x11, PIC_SLAVE_CMD); /* ICW1: select 8259A-2 init */ - outb_pic(0x20 + 8, PIC_SLAVE_IMR); /* ICW2: 8259A-2 IR0-7 mapped to 0x28-0x2f */ - outb_pic(PIC_CASCADE_IR, PIC_SLAVE_IMR); /* 8259A-2 is a slave on master's IR2 */ - outb_pic(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR); /* (slave's support for AEOI in flat mode is to be investigated) */ - if (auto_eoi) - /* - * In AEOI mode we just have to mask the interrupt - * when acking. - */ - i8259A_chip.mask_ack = disable_8259A_irq; - else - i8259A_chip.mask_ack = mask_and_ack_8259A; - - udelay(100); /* wait for 8259A to initialize */ - - outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */ - outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */ - - spin_unlock_irqrestore(&i8259A_lock, flags); -} /* * Note that on a 486, we don't want to do a SIGFPE on an irq13 diff --git a/arch/x86/kernel/i8259_64.c b/arch/x86/kernel/i8259_64.c index fa57a15..a95d2bd 100644 --- a/arch/x86/kernel/i8259_64.c +++ b/arch/x86/kernel/i8259_64.c @@ -87,315 +87,6 @@ static void (*__initdata interrupt[NR_VECTORS - FIRST_EXTERNAL_VECTOR])(void) = #undef IRQ #undef IRQLIST_16 -/* - * This is the 'legacy' 8259A Programmable Interrupt Controller, - * present in the majority of PC/AT boxes. - * plus some generic x86 specific things if generic specifics makes - * any sense at all. - * this file should become arch/i386/kernel/irq.c when the old irq.c - * moves to arch independent land - */ - -static int i8259A_auto_eoi; -DEFINE_SPINLOCK(i8259A_lock); -static void mask_and_ack_8259A(unsigned int); - -static struct irq_chip i8259A_chip = { - .name = "XT-PIC", - .mask = disable_8259A_irq, - .disable = disable_8259A_irq, - .unmask = enable_8259A_irq, - .mask_ack = mask_and_ack_8259A, -}; - -/* - * 8259A PIC functions to handle ISA devices: - */ - -/* - * This contains the irq mask for both 8259A irq controllers, - */ -unsigned int cached_irq_mask = 0xffff; - -/* - * Not all IRQs can be routed through the IO-APIC, eg. on certain (older) - * boards the timer interrupt is not really connected to any IO-APIC pin, - * it's fed to the master 8259A's IR0 line only. - * - * Any '1' bit in this mask means the IRQ is routed through the IO-APIC. - * this 'mixed mode' IRQ handling costs nothing because it's only used - * at IRQ setup time. - */ -unsigned long io_apic_irqs; - -void disable_8259A_irq(unsigned int irq) -{ - unsigned int mask = 1 << irq; - unsigned long flags; - - spin_lock_irqsave(&i8259A_lock, flags); - cached_irq_mask |= mask; - if (irq & 8) - outb(cached_slave_mask, PIC_SLAVE_IMR); - else - outb(cached_master_mask, PIC_MASTER_IMR); - spin_unlock_irqrestore(&i8259A_lock, flags); -} - -void enable_8259A_irq(unsigned int irq) -{ - unsigned int mask = ~(1 << irq); - unsigned long flags; - - spin_lock_irqsave(&i8259A_lock, flags); - cached_irq_mask &= mask; - if (irq & 8) - outb(cached_slave_mask, PIC_SLAVE_IMR); - else - outb(cached_master_mask, PIC_MASTER_IMR); - spin_unlock_irqrestore(&i8259A_lock, flags); -} - -int i8259A_irq_pending(unsigned int irq) -{ - unsigned int mask = 1<> 8); - spin_unlock_irqrestore(&i8259A_lock, flags); - - return ret; -} - -void make_8259A_irq(unsigned int irq) -{ - disable_irq_nosync(irq); - io_apic_irqs &= ~(1<> 8); - outb(0x0A,PIC_SLAVE_CMD); /* back to the IRR register */ - return value; -} - -/* - * Careful! The 8259A is a fragile beast, it pretty - * much _has_ to be done exactly like this (mask it - * first, _then_ send the EOI, and the order of EOI - * to the two 8259s is important! - */ -static void mask_and_ack_8259A(unsigned int irq) -{ - unsigned int irqmask = 1 << irq; - unsigned long flags; - - spin_lock_irqsave(&i8259A_lock, flags); - /* - * Lightweight spurious IRQ detection. We do not want - * to overdo spurious IRQ handling - it's usually a sign - * of hardware problems, so we only do the checks we can - * do without slowing down good hardware unnecessarily. - * - * Note that IRQ7 and IRQ15 (the two spurious IRQs - * usually resulting from the 8259A-1|2 PICs) occur - * even if the IRQ is masked in the 8259A. Thus we - * can check spurious 8259A IRQs without doing the - * quite slow i8259A_irq_real() call for every IRQ. - * This does not cover 100% of spurious interrupts, - * but should be enough to warn the user that there - * is something bad going on ... - */ - if (cached_irq_mask & irqmask) - goto spurious_8259A_irq; - cached_irq_mask |= irqmask; - -handle_real_irq: - if (irq & 8) { - inb(PIC_SLAVE_IMR); /* DUMMY - (do we need this?) */ - outb(cached_slave_mask, PIC_SLAVE_IMR); - /* 'Specific EOI' to slave */ - outb(0x60+(irq&7),PIC_SLAVE_CMD); - /* 'Specific EOI' to master-IRQ2 */ - outb(0x60+PIC_CASCADE_IR,PIC_MASTER_CMD); - } else { - inb(PIC_MASTER_IMR); /* DUMMY - (do we need this?) */ - outb(cached_master_mask, PIC_MASTER_IMR); - /* 'Specific EOI' to master */ - outb(0x60+irq,PIC_MASTER_CMD); - } - spin_unlock_irqrestore(&i8259A_lock, flags); - return; - -spurious_8259A_irq: - /* - * this is the slow path - should happen rarely. - */ - if (i8259A_irq_real(irq)) - /* - * oops, the IRQ _is_ in service according to the - * 8259A - not spurious, go handle it. - */ - goto handle_real_irq; - - { - static int spurious_irq_mask; - /* - * At this point we can be sure the IRQ is spurious, - * lets ACK and report it. [once per IRQ] - */ - if (!(spurious_irq_mask & irqmask)) { - printk(KERN_DEBUG - "spurious 8259A interrupt: IRQ%d.\n", irq); - spurious_irq_mask |= irqmask; - } - atomic_inc(&irq_err_count); - /* - * Theoretically we do not have to handle this IRQ, - * but in Linux this does not cause problems and is - * simpler for us. - */ - goto handle_real_irq; - } -} - -static char irq_trigger[2]; -/** - * ELCR registers (0x4d0, 0x4d1) control edge/level of IRQ - */ -static void restore_ELCR(char *trigger) -{ - outb(trigger[0], 0x4d0); - outb(trigger[1], 0x4d1); -} - -static void save_ELCR(char *trigger) -{ - /* IRQ 0,1,2,8,13 are marked as reserved */ - trigger[0] = inb(0x4d0) & 0xF8; - trigger[1] = inb(0x4d1) & 0xDE; -} - -static int i8259A_resume(struct sys_device *dev) -{ - init_8259A(i8259A_auto_eoi); - restore_ELCR(irq_trigger); - return 0; -} - -static int i8259A_suspend(struct sys_device *dev, pm_message_t state) -{ - save_ELCR(irq_trigger); - return 0; -} - -static int i8259A_shutdown(struct sys_device *dev) -{ - /* Put the i8259A into a quiescent state that - * the kernel initialization code can get it - * out of. - */ - outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ - outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-1 */ - return 0; -} - -static struct sysdev_class i8259_sysdev_class = { - .name = "i8259", - .suspend = i8259A_suspend, - .resume = i8259A_resume, - .shutdown = i8259A_shutdown, -}; - -static struct sys_device device_i8259A = { - .id = 0, - .cls = &i8259_sysdev_class, -}; - -static int __init i8259A_init_sysfs(void) -{ - int error = sysdev_class_register(&i8259_sysdev_class); - if (!error) - error = sysdev_register(&device_i8259A); - return error; -} - -device_initcall(i8259A_init_sysfs); - -void init_8259A(int auto_eoi) -{ - unsigned long flags; - - i8259A_auto_eoi = auto_eoi; - - spin_lock_irqsave(&i8259A_lock, flags); - - outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ - outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ - - /* - * outb_pic - this has to work on a wide range of PC hardware. - */ - outb_pic(0x11, PIC_MASTER_CMD); /* ICW1: select 8259A-1 init */ - /* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 */ - outb_pic(IRQ0_VECTOR, PIC_MASTER_IMR); - /* 8259A-1 (the master) has a slave on IR2 */ - outb_pic(0x04, PIC_MASTER_IMR); - if (auto_eoi) /* master does Auto EOI */ - outb_pic(MASTER_ICW4_DEFAULT | PIC_ICW4_AEOI, PIC_MASTER_IMR); - else /* master expects normal EOI */ - outb_pic(MASTER_ICW4_DEFAULT, PIC_MASTER_IMR); - - outb_pic(0x11, PIC_SLAVE_CMD); /* ICW1: select 8259A-2 init */ - /* ICW2: 8259A-2 IR0-7 mapped to 0x38-0x3f */ - outb_pic(IRQ8_VECTOR, PIC_SLAVE_IMR); - /* 8259A-2 is a slave on master's IR2 */ - outb_pic(PIC_CASCADE_IR, PIC_SLAVE_IMR); - /* (slave's support for AEOI in flat mode is to be investigated) */ - outb_pic(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR); - - if (auto_eoi) - /* - * In AEOI mode we just have to mask the interrupt - * when acking. - */ - i8259A_chip.mask_ack = disable_8259A_irq; - else - i8259A_chip.mask_ack = mask_and_ack_8259A; - - udelay(100); /* wait for 8259A to initialize */ - - outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */ - outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */ - - spin_unlock_irqrestore(&i8259A_lock, flags); -} - diff --git a/include/asm-x86/i8259.h b/include/asm-x86/i8259.h index 45d4df3..2f98df9 100644 --- a/include/asm-x86/i8259.h +++ b/include/asm-x86/i8259.h @@ -55,4 +55,6 @@ static inline void outb_pic(unsigned char value, unsigned int port) udelay(2); } +extern struct irq_chip i8259A_chip; + #endif /* __ASM_I8259_H__ */ -- cgit v0.10.2 From 6b4d3afbe722ee71b8da346c2c4393938440c471 Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Wed, 21 May 2008 11:47:24 +0200 Subject: x86: i8259.c: remove #ifdefs around includes Remove #ifdefs around includes; including too much should be always safe. Signed-off-by: Pavel Machek Cc: macro@ds2.pg.gda.pl Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index 2decba6..8b7eb0c 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c @@ -1,14 +1,10 @@ -#ifdef CONFIG_X86_64 #include -#endif /* CONFIG_X86_64 */ #include #include #include #include #include -#ifdef CONFIG_X86_64 #include -#endif /* CONFIG_X86_64 */ #include #include #include @@ -16,24 +12,17 @@ #include #include -#ifdef CONFIG_X86_64 #include -#endif /* CONFIG_X86_64 */ #include #include #include -#ifndef CONFIG_X86_64 #include -#else /* CONFIG_X86_64 */ #include -#endif /* CONFIG_X86_64 */ #include #include #include #include -#ifndef CONFIG_X86_64 #include -#endif /* ! CONFIG_X86_64 */ #include /* -- cgit v0.10.2 From 4f6f3bac18c80ff6cf072d90796755c69cc4c748 Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Wed, 21 May 2008 11:52:52 +0200 Subject: x86: i8259.c: remove trivial ifdefs Remove #ifdefs where the only difference is formatting of comments. Signed-off-by: Pavel Machek Cc: macro@ds2.pg.gda.pl Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index 8b7eb0c..433e49e 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c @@ -175,24 +175,14 @@ handle_real_irq: if (irq & 8) { inb(PIC_SLAVE_IMR); /* DUMMY - (do we need this?) */ outb(cached_slave_mask, PIC_SLAVE_IMR); -#ifndef CONFIG_X86_64 - outb(0x60+(irq&7),PIC_SLAVE_CMD);/* 'Specific EOI' to slave */ - outb(0x60+PIC_CASCADE_IR,PIC_MASTER_CMD); /* 'Specific EOI' to master-IRQ2 */ -#else /* CONFIG_X86_64 */ /* 'Specific EOI' to slave */ - outb(0x60+(irq&7),PIC_SLAVE_CMD); + outb(0x60+(irq&7), PIC_SLAVE_CMD); /* 'Specific EOI' to master-IRQ2 */ - outb(0x60+PIC_CASCADE_IR,PIC_MASTER_CMD); -#endif /* CONFIG_X86_64 */ + outb(0x60+PIC_CASCADE_IR, PIC_MASTER_CMD); } else { inb(PIC_MASTER_IMR); /* DUMMY - (do we need this?) */ outb(cached_master_mask, PIC_MASTER_IMR); -#ifndef CONFIG_X86_64 - outb(0x60+irq,PIC_MASTER_CMD); /* 'Specific EOI to master */ -#else /* CONFIG_X86_64 */ - /* 'Specific EOI' to master */ - outb(0x60+irq,PIC_MASTER_CMD); -#endif /* CONFIG_X86_64 */ + outb(0x60+irq, PIC_MASTER_CMD); /* 'Specific EOI to master */ } spin_unlock_irqrestore(&i8259A_lock, flags); return; @@ -215,12 +205,8 @@ spurious_8259A_irq: * lets ACK and report it. [once per IRQ] */ if (!(spurious_irq_mask & irqmask)) { -#ifndef CONFIG_X86_64 - printk(KERN_DEBUG "spurious 8259A interrupt: IRQ%d.\n", irq); -#else /* CONFIG_X86_64 */ printk(KERN_DEBUG "spurious 8259A interrupt: IRQ%d.\n", irq); -#endif /* CONFIG_X86_64 */ spurious_irq_mask |= irqmask; } atomic_inc(&irq_err_count); -- cgit v0.10.2 From 4d9a6e6128dd8ada0d2feed2a9bb6506043e4655 Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Wed, 21 May 2008 11:57:52 +0200 Subject: x86: i8259: cleanup codingstyle Signed-off-by: Pavel Machek Cc: macro@ds2.pg.gda.pl Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index 433e49e..7a0fda8 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c @@ -129,14 +129,14 @@ static inline int i8259A_irq_real(unsigned int irq) int irqmask = 1<> 8); - outb(0x0A,PIC_SLAVE_CMD); /* back to the IRR register */ + outb(0x0A, PIC_SLAVE_CMD); /* back to the IRR register */ return value; } -- cgit v0.10.2 From f20b11e716936f85850654c49e06a1f955b9e5fc Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 24 May 2008 15:59:58 +0200 Subject: x86: rename the i8259_32/64.c leftovers to initirq_32/64.c The leftovers of the i8259 unification have nothing to do with i8259 at all. They contain interrupt init code and the i8259_xx name is just misleading now. Rename them to initirq_32/64.c Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index f71a76e..2a53ad2 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -18,7 +18,7 @@ CFLAGS_tsc_64.o := $(nostackp) obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o obj-y += traps_$(BITS).o irq_$(BITS).o obj-y += time_$(BITS).o ioport.o ldt.o -obj-y += setup_$(BITS).o i8259.o i8259_$(BITS).o setup.o +obj-y += setup_$(BITS).o i8259.o irqinit_$(BITS).o setup.o obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o setup64.o diff --git a/arch/x86/kernel/i8259_32.c b/arch/x86/kernel/i8259_32.c deleted file mode 100644 index d669142..0000000 --- a/arch/x86/kernel/i8259_32.c +++ /dev/null @@ -1,114 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - - -/* - * Note that on a 486, we don't want to do a SIGFPE on an irq13 - * as the irq is unreliable, and exception 16 works correctly - * (ie as explained in the intel literature). On a 386, you - * can't use exception 16 due to bad IBM design, so we have to - * rely on the less exact irq13. - * - * Careful.. Not only is IRQ13 unreliable, but it is also - * leads to races. IBM designers who came up with it should - * be shot. - */ - - -static irqreturn_t math_error_irq(int cpl, void *dev_id) -{ - extern void math_error(void __user *); - outb(0,0xF0); - if (ignore_fpu_irq || !boot_cpu_data.hard_math) - return IRQ_NONE; - math_error((void __user *)get_irq_regs()->ip); - return IRQ_HANDLED; -} - -/* - * New motherboards sometimes make IRQ 13 be a PCI interrupt, - * so allow interrupt sharing. - */ -static struct irqaction fpu_irq = { - .handler = math_error_irq, - .mask = CPU_MASK_NONE, - .name = "fpu", -}; - -void __init init_ISA_irqs (void) -{ - int i; - -#ifdef CONFIG_X86_LOCAL_APIC - init_bsp_APIC(); -#endif - init_8259A(0); - - /* - * 16 old-style INTA-cycle interrupts: - */ - for (i = 0; i < 16; i++) { - set_irq_chip_and_handler_name(i, &i8259A_chip, - handle_level_irq, "XT"); - } -} - -/* Overridden in paravirt.c */ -void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); - -void __init native_init_IRQ(void) -{ - int i; - - /* all the set up before the call gates are initialised */ - pre_intr_init_hook(); - - /* - * Cover the whole vector space, no vector can escape - * us. (some of these will be overridden and become - * 'special' SMP interrupts) - */ - for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) { - int vector = FIRST_EXTERNAL_VECTOR + i; - if (i >= NR_IRQS) - break; - /* SYSCALL_VECTOR was reserved in trap_init. */ - if (!test_bit(vector, used_vectors)) - set_intr_gate(vector, interrupt[i]); - } - - /* setup after call gates are initialised (usually add in - * the architecture specific gates) - */ - intr_init_hook(); - - /* - * External FPU? Set up irq13 if so, for - * original braindamaged IBM FERR coupling. - */ - if (boot_cpu_data.hard_math && !cpu_has_fpu) - setup_irq(FPU_IRQ, &fpu_irq); - - irq_ctx_init(smp_processor_id()); -} diff --git a/arch/x86/kernel/i8259_64.c b/arch/x86/kernel/i8259_64.c deleted file mode 100644 index a95d2bd..0000000 --- a/arch/x86/kernel/i8259_64.c +++ /dev/null @@ -1,203 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * Common place to define all x86 IRQ vectors - * - * This builds up the IRQ handler stubs using some ugly macros in irq.h - * - * These macros create the low-level assembly IRQ routines that save - * register context and call do_IRQ(). do_IRQ() then does all the - * operations that are needed to keep the AT (or SMP IOAPIC) - * interrupt-controller happy. - */ - -#define BI(x,y) \ - BUILD_IRQ(x##y) - -#define BUILD_16_IRQS(x) \ - BI(x,0) BI(x,1) BI(x,2) BI(x,3) \ - BI(x,4) BI(x,5) BI(x,6) BI(x,7) \ - BI(x,8) BI(x,9) BI(x,a) BI(x,b) \ - BI(x,c) BI(x,d) BI(x,e) BI(x,f) - -/* - * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts: - * (these are usually mapped to vectors 0x30-0x3f) - */ - -/* - * The IO-APIC gives us many more interrupt sources. Most of these - * are unused but an SMP system is supposed to have enough memory ... - * sometimes (mostly wrt. hw bugs) we get corrupted vectors all - * across the spectrum, so we really want to be prepared to get all - * of these. Plus, more powerful systems might have more than 64 - * IO-APIC registers. - * - * (these are usually mapped into the 0x30-0xff vector range) - */ - BUILD_16_IRQS(0x2) BUILD_16_IRQS(0x3) -BUILD_16_IRQS(0x4) BUILD_16_IRQS(0x5) BUILD_16_IRQS(0x6) BUILD_16_IRQS(0x7) -BUILD_16_IRQS(0x8) BUILD_16_IRQS(0x9) BUILD_16_IRQS(0xa) BUILD_16_IRQS(0xb) -BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd) BUILD_16_IRQS(0xe) BUILD_16_IRQS(0xf) - -#undef BUILD_16_IRQS -#undef BI - - -#define IRQ(x,y) \ - IRQ##x##y##_interrupt - -#define IRQLIST_16(x) \ - IRQ(x,0), IRQ(x,1), IRQ(x,2), IRQ(x,3), \ - IRQ(x,4), IRQ(x,5), IRQ(x,6), IRQ(x,7), \ - IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \ - IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f) - -/* for the irq vectors */ -static void (*__initdata interrupt[NR_VECTORS - FIRST_EXTERNAL_VECTOR])(void) = { - IRQLIST_16(0x2), IRQLIST_16(0x3), - IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7), - IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb), - IRQLIST_16(0xc), IRQLIST_16(0xd), IRQLIST_16(0xe), IRQLIST_16(0xf) -}; - -#undef IRQ -#undef IRQLIST_16 - - - - -/* - * IRQ2 is cascade interrupt to second interrupt controller - */ - -static struct irqaction irq2 = { - .handler = no_action, - .mask = CPU_MASK_NONE, - .name = "cascade", -}; -DEFINE_PER_CPU(vector_irq_t, vector_irq) = { - [0 ... IRQ0_VECTOR - 1] = -1, - [IRQ0_VECTOR] = 0, - [IRQ1_VECTOR] = 1, - [IRQ2_VECTOR] = 2, - [IRQ3_VECTOR] = 3, - [IRQ4_VECTOR] = 4, - [IRQ5_VECTOR] = 5, - [IRQ6_VECTOR] = 6, - [IRQ7_VECTOR] = 7, - [IRQ8_VECTOR] = 8, - [IRQ9_VECTOR] = 9, - [IRQ10_VECTOR] = 10, - [IRQ11_VECTOR] = 11, - [IRQ12_VECTOR] = 12, - [IRQ13_VECTOR] = 13, - [IRQ14_VECTOR] = 14, - [IRQ15_VECTOR] = 15, - [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1 -}; - -void __init init_ISA_irqs (void) -{ - int i; - - init_bsp_APIC(); - init_8259A(0); - - for (i = 0; i < NR_IRQS; i++) { - irq_desc[i].status = IRQ_DISABLED; - irq_desc[i].action = NULL; - irq_desc[i].depth = 1; - - if (i < 16) { - /* - * 16 old-style INTA-cycle interrupts: - */ - set_irq_chip_and_handler_name(i, &i8259A_chip, - handle_level_irq, "XT"); - } else { - /* - * 'high' PCI IRQs filled in on demand - */ - irq_desc[i].chip = &no_irq_chip; - } - } -} - -void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); - -void __init native_init_IRQ(void) -{ - int i; - - init_ISA_irqs(); - /* - * Cover the whole vector space, no vector can escape - * us. (some of these will be overridden and become - * 'special' SMP interrupts) - */ - for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) { - int vector = FIRST_EXTERNAL_VECTOR + i; - if (vector != IA32_SYSCALL_VECTOR) - set_intr_gate(vector, interrupt[i]); - } - -#ifdef CONFIG_SMP - /* - * The reschedule interrupt is a CPU-to-CPU reschedule-helper - * IPI, driven by wakeup. - */ - set_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); - - /* IPIs for invalidation */ - set_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0); - set_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1); - set_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2); - set_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3); - set_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4); - set_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5); - set_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6); - set_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7); - - /* IPI for generic function call */ - set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); - - /* Low priority IPI to cleanup after moving an irq */ - set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); -#endif - set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); - set_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); - - /* self generated IPI for local APIC timer */ - set_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); - - /* IPI vectors for APIC spurious and error interrupts */ - set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); - set_intr_gate(ERROR_APIC_VECTOR, error_interrupt); - - if (!acpi_ioapic) - setup_irq(2, &irq2); -} diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c new file mode 100644 index 0000000..d669142 --- /dev/null +++ b/arch/x86/kernel/irqinit_32.c @@ -0,0 +1,114 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +/* + * Note that on a 486, we don't want to do a SIGFPE on an irq13 + * as the irq is unreliable, and exception 16 works correctly + * (ie as explained in the intel literature). On a 386, you + * can't use exception 16 due to bad IBM design, so we have to + * rely on the less exact irq13. + * + * Careful.. Not only is IRQ13 unreliable, but it is also + * leads to races. IBM designers who came up with it should + * be shot. + */ + + +static irqreturn_t math_error_irq(int cpl, void *dev_id) +{ + extern void math_error(void __user *); + outb(0,0xF0); + if (ignore_fpu_irq || !boot_cpu_data.hard_math) + return IRQ_NONE; + math_error((void __user *)get_irq_regs()->ip); + return IRQ_HANDLED; +} + +/* + * New motherboards sometimes make IRQ 13 be a PCI interrupt, + * so allow interrupt sharing. + */ +static struct irqaction fpu_irq = { + .handler = math_error_irq, + .mask = CPU_MASK_NONE, + .name = "fpu", +}; + +void __init init_ISA_irqs (void) +{ + int i; + +#ifdef CONFIG_X86_LOCAL_APIC + init_bsp_APIC(); +#endif + init_8259A(0); + + /* + * 16 old-style INTA-cycle interrupts: + */ + for (i = 0; i < 16; i++) { + set_irq_chip_and_handler_name(i, &i8259A_chip, + handle_level_irq, "XT"); + } +} + +/* Overridden in paravirt.c */ +void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); + +void __init native_init_IRQ(void) +{ + int i; + + /* all the set up before the call gates are initialised */ + pre_intr_init_hook(); + + /* + * Cover the whole vector space, no vector can escape + * us. (some of these will be overridden and become + * 'special' SMP interrupts) + */ + for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) { + int vector = FIRST_EXTERNAL_VECTOR + i; + if (i >= NR_IRQS) + break; + /* SYSCALL_VECTOR was reserved in trap_init. */ + if (!test_bit(vector, used_vectors)) + set_intr_gate(vector, interrupt[i]); + } + + /* setup after call gates are initialised (usually add in + * the architecture specific gates) + */ + intr_init_hook(); + + /* + * External FPU? Set up irq13 if so, for + * original braindamaged IBM FERR coupling. + */ + if (boot_cpu_data.hard_math && !cpu_has_fpu) + setup_irq(FPU_IRQ, &fpu_irq); + + irq_ctx_init(smp_processor_id()); +} diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c new file mode 100644 index 0000000..a95d2bd --- /dev/null +++ b/arch/x86/kernel/irqinit_64.c @@ -0,0 +1,203 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Common place to define all x86 IRQ vectors + * + * This builds up the IRQ handler stubs using some ugly macros in irq.h + * + * These macros create the low-level assembly IRQ routines that save + * register context and call do_IRQ(). do_IRQ() then does all the + * operations that are needed to keep the AT (or SMP IOAPIC) + * interrupt-controller happy. + */ + +#define BI(x,y) \ + BUILD_IRQ(x##y) + +#define BUILD_16_IRQS(x) \ + BI(x,0) BI(x,1) BI(x,2) BI(x,3) \ + BI(x,4) BI(x,5) BI(x,6) BI(x,7) \ + BI(x,8) BI(x,9) BI(x,a) BI(x,b) \ + BI(x,c) BI(x,d) BI(x,e) BI(x,f) + +/* + * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts: + * (these are usually mapped to vectors 0x30-0x3f) + */ + +/* + * The IO-APIC gives us many more interrupt sources. Most of these + * are unused but an SMP system is supposed to have enough memory ... + * sometimes (mostly wrt. hw bugs) we get corrupted vectors all + * across the spectrum, so we really want to be prepared to get all + * of these. Plus, more powerful systems might have more than 64 + * IO-APIC registers. + * + * (these are usually mapped into the 0x30-0xff vector range) + */ + BUILD_16_IRQS(0x2) BUILD_16_IRQS(0x3) +BUILD_16_IRQS(0x4) BUILD_16_IRQS(0x5) BUILD_16_IRQS(0x6) BUILD_16_IRQS(0x7) +BUILD_16_IRQS(0x8) BUILD_16_IRQS(0x9) BUILD_16_IRQS(0xa) BUILD_16_IRQS(0xb) +BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd) BUILD_16_IRQS(0xe) BUILD_16_IRQS(0xf) + +#undef BUILD_16_IRQS +#undef BI + + +#define IRQ(x,y) \ + IRQ##x##y##_interrupt + +#define IRQLIST_16(x) \ + IRQ(x,0), IRQ(x,1), IRQ(x,2), IRQ(x,3), \ + IRQ(x,4), IRQ(x,5), IRQ(x,6), IRQ(x,7), \ + IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \ + IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f) + +/* for the irq vectors */ +static void (*__initdata interrupt[NR_VECTORS - FIRST_EXTERNAL_VECTOR])(void) = { + IRQLIST_16(0x2), IRQLIST_16(0x3), + IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7), + IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb), + IRQLIST_16(0xc), IRQLIST_16(0xd), IRQLIST_16(0xe), IRQLIST_16(0xf) +}; + +#undef IRQ +#undef IRQLIST_16 + + + + +/* + * IRQ2 is cascade interrupt to second interrupt controller + */ + +static struct irqaction irq2 = { + .handler = no_action, + .mask = CPU_MASK_NONE, + .name = "cascade", +}; +DEFINE_PER_CPU(vector_irq_t, vector_irq) = { + [0 ... IRQ0_VECTOR - 1] = -1, + [IRQ0_VECTOR] = 0, + [IRQ1_VECTOR] = 1, + [IRQ2_VECTOR] = 2, + [IRQ3_VECTOR] = 3, + [IRQ4_VECTOR] = 4, + [IRQ5_VECTOR] = 5, + [IRQ6_VECTOR] = 6, + [IRQ7_VECTOR] = 7, + [IRQ8_VECTOR] = 8, + [IRQ9_VECTOR] = 9, + [IRQ10_VECTOR] = 10, + [IRQ11_VECTOR] = 11, + [IRQ12_VECTOR] = 12, + [IRQ13_VECTOR] = 13, + [IRQ14_VECTOR] = 14, + [IRQ15_VECTOR] = 15, + [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1 +}; + +void __init init_ISA_irqs (void) +{ + int i; + + init_bsp_APIC(); + init_8259A(0); + + for (i = 0; i < NR_IRQS; i++) { + irq_desc[i].status = IRQ_DISABLED; + irq_desc[i].action = NULL; + irq_desc[i].depth = 1; + + if (i < 16) { + /* + * 16 old-style INTA-cycle interrupts: + */ + set_irq_chip_and_handler_name(i, &i8259A_chip, + handle_level_irq, "XT"); + } else { + /* + * 'high' PCI IRQs filled in on demand + */ + irq_desc[i].chip = &no_irq_chip; + } + } +} + +void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); + +void __init native_init_IRQ(void) +{ + int i; + + init_ISA_irqs(); + /* + * Cover the whole vector space, no vector can escape + * us. (some of these will be overridden and become + * 'special' SMP interrupts) + */ + for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) { + int vector = FIRST_EXTERNAL_VECTOR + i; + if (vector != IA32_SYSCALL_VECTOR) + set_intr_gate(vector, interrupt[i]); + } + +#ifdef CONFIG_SMP + /* + * The reschedule interrupt is a CPU-to-CPU reschedule-helper + * IPI, driven by wakeup. + */ + set_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); + + /* IPIs for invalidation */ + set_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0); + set_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1); + set_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2); + set_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3); + set_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4); + set_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5); + set_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6); + set_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7); + + /* IPI for generic function call */ + set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); + + /* Low priority IPI to cleanup after moving an irq */ + set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); +#endif + set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); + set_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); + + /* self generated IPI for local APIC timer */ + set_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); + + /* IPI vectors for APIC spurious and error interrupts */ + set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); + set_intr_gate(ERROR_APIC_VECTOR, error_interrupt); + + if (!acpi_ioapic) + setup_irq(2, &irq2); +} -- cgit v0.10.2 From fce39665abb71d01d74ac74eb13dd69a799dfc2f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 12 May 2008 15:43:36 +0200 Subject: x86: make init_ISA_irqs() static Moved to i8259 branch to avoid conflicts. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c index a95d2bd..64bc0f1 100644 --- a/arch/x86/kernel/irqinit_64.c +++ b/arch/x86/kernel/irqinit_64.c @@ -120,7 +120,7 @@ DEFINE_PER_CPU(vector_irq_t, vector_irq) = { [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1 }; -void __init init_ISA_irqs (void) +static void __init init_ISA_irqs (void) { int i; -- cgit v0.10.2 From 21fd5132b223a10bdf17713dd0bf321cbd6471d2 Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Wed, 21 May 2008 11:44:02 +0200 Subject: x86: automatical unification of i8259.c Make conversion of i8259 very mechanical -- i8259 was generated by diff -D, with too different parts left in i8259_32 and i8259_64.c. Only "by hand" changes were removal of #ifdef from middle of the comment (prevented compilation) and removal of one static to allow splitting into files. Of course, it will need some cleanups now, and those will follow. Signed-of-by: Pavel Machek diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 5e618c3..f71a76e 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -18,7 +18,7 @@ CFLAGS_tsc_64.o := $(nostackp) obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o obj-y += traps_$(BITS).o irq_$(BITS).o obj-y += time_$(BITS).o ioport.o ldt.o -obj-y += setup_$(BITS).o i8259_$(BITS).o setup.o +obj-y += setup_$(BITS).o i8259.o i8259_$(BITS).o setup.o obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o setup64.o diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c new file mode 100644 index 0000000..2decba6 --- /dev/null +++ b/arch/x86/kernel/i8259.c @@ -0,0 +1,368 @@ +#ifdef CONFIG_X86_64 +#include +#endif /* CONFIG_X86_64 */ +#include +#include +#include +#include +#include +#ifdef CONFIG_X86_64 +#include +#endif /* CONFIG_X86_64 */ +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_X86_64 +#include +#endif /* CONFIG_X86_64 */ +#include +#include +#include +#ifndef CONFIG_X86_64 +#include +#else /* CONFIG_X86_64 */ +#include +#endif /* CONFIG_X86_64 */ +#include +#include +#include +#include +#ifndef CONFIG_X86_64 +#include +#endif /* ! CONFIG_X86_64 */ +#include + +/* + * This is the 'legacy' 8259A Programmable Interrupt Controller, + * present in the majority of PC/AT boxes. + * plus some generic x86 specific things if generic specifics makes + * any sense at all. + */ + +static int i8259A_auto_eoi; +DEFINE_SPINLOCK(i8259A_lock); +static void mask_and_ack_8259A(unsigned int); + +struct irq_chip i8259A_chip = { + .name = "XT-PIC", + .mask = disable_8259A_irq, + .disable = disable_8259A_irq, + .unmask = enable_8259A_irq, + .mask_ack = mask_and_ack_8259A, +}; + +/* + * 8259A PIC functions to handle ISA devices: + */ + +/* + * This contains the irq mask for both 8259A irq controllers, + */ +unsigned int cached_irq_mask = 0xffff; + +/* + * Not all IRQs can be routed through the IO-APIC, eg. on certain (older) + * boards the timer interrupt is not really connected to any IO-APIC pin, + * it's fed to the master 8259A's IR0 line only. + * + * Any '1' bit in this mask means the IRQ is routed through the IO-APIC. + * this 'mixed mode' IRQ handling costs nothing because it's only used + * at IRQ setup time. + */ +unsigned long io_apic_irqs; + +void disable_8259A_irq(unsigned int irq) +{ + unsigned int mask = 1 << irq; + unsigned long flags; + + spin_lock_irqsave(&i8259A_lock, flags); + cached_irq_mask |= mask; + if (irq & 8) + outb(cached_slave_mask, PIC_SLAVE_IMR); + else + outb(cached_master_mask, PIC_MASTER_IMR); + spin_unlock_irqrestore(&i8259A_lock, flags); +} + +void enable_8259A_irq(unsigned int irq) +{ + unsigned int mask = ~(1 << irq); + unsigned long flags; + + spin_lock_irqsave(&i8259A_lock, flags); + cached_irq_mask &= mask; + if (irq & 8) + outb(cached_slave_mask, PIC_SLAVE_IMR); + else + outb(cached_master_mask, PIC_MASTER_IMR); + spin_unlock_irqrestore(&i8259A_lock, flags); +} + +int i8259A_irq_pending(unsigned int irq) +{ + unsigned int mask = 1<> 8); + spin_unlock_irqrestore(&i8259A_lock, flags); + + return ret; +} + +void make_8259A_irq(unsigned int irq) +{ + disable_irq_nosync(irq); + io_apic_irqs &= ~(1<> 8); + outb(0x0A,PIC_SLAVE_CMD); /* back to the IRR register */ + return value; +} + +/* + * Careful! The 8259A is a fragile beast, it pretty + * much _has_ to be done exactly like this (mask it + * first, _then_ send the EOI, and the order of EOI + * to the two 8259s is important! + */ +static void mask_and_ack_8259A(unsigned int irq) +{ + unsigned int irqmask = 1 << irq; + unsigned long flags; + + spin_lock_irqsave(&i8259A_lock, flags); + /* + * Lightweight spurious IRQ detection. We do not want + * to overdo spurious IRQ handling - it's usually a sign + * of hardware problems, so we only do the checks we can + * do without slowing down good hardware unnecessarily. + * + * Note that IRQ7 and IRQ15 (the two spurious IRQs + * usually resulting from the 8259A-1|2 PICs) occur + * even if the IRQ is masked in the 8259A. Thus we + * can check spurious 8259A IRQs without doing the + * quite slow i8259A_irq_real() call for every IRQ. + * This does not cover 100% of spurious interrupts, + * but should be enough to warn the user that there + * is something bad going on ... + */ + if (cached_irq_mask & irqmask) + goto spurious_8259A_irq; + cached_irq_mask |= irqmask; + +handle_real_irq: + if (irq & 8) { + inb(PIC_SLAVE_IMR); /* DUMMY - (do we need this?) */ + outb(cached_slave_mask, PIC_SLAVE_IMR); +#ifndef CONFIG_X86_64 + outb(0x60+(irq&7),PIC_SLAVE_CMD);/* 'Specific EOI' to slave */ + outb(0x60+PIC_CASCADE_IR,PIC_MASTER_CMD); /* 'Specific EOI' to master-IRQ2 */ +#else /* CONFIG_X86_64 */ + /* 'Specific EOI' to slave */ + outb(0x60+(irq&7),PIC_SLAVE_CMD); + /* 'Specific EOI' to master-IRQ2 */ + outb(0x60+PIC_CASCADE_IR,PIC_MASTER_CMD); +#endif /* CONFIG_X86_64 */ + } else { + inb(PIC_MASTER_IMR); /* DUMMY - (do we need this?) */ + outb(cached_master_mask, PIC_MASTER_IMR); +#ifndef CONFIG_X86_64 + outb(0x60+irq,PIC_MASTER_CMD); /* 'Specific EOI to master */ +#else /* CONFIG_X86_64 */ + /* 'Specific EOI' to master */ + outb(0x60+irq,PIC_MASTER_CMD); +#endif /* CONFIG_X86_64 */ + } + spin_unlock_irqrestore(&i8259A_lock, flags); + return; + +spurious_8259A_irq: + /* + * this is the slow path - should happen rarely. + */ + if (i8259A_irq_real(irq)) + /* + * oops, the IRQ _is_ in service according to the + * 8259A - not spurious, go handle it. + */ + goto handle_real_irq; + + { + static int spurious_irq_mask; + /* + * At this point we can be sure the IRQ is spurious, + * lets ACK and report it. [once per IRQ] + */ + if (!(spurious_irq_mask & irqmask)) { +#ifndef CONFIG_X86_64 + printk(KERN_DEBUG "spurious 8259A interrupt: IRQ%d.\n", irq); +#else /* CONFIG_X86_64 */ + printk(KERN_DEBUG + "spurious 8259A interrupt: IRQ%d.\n", irq); +#endif /* CONFIG_X86_64 */ + spurious_irq_mask |= irqmask; + } + atomic_inc(&irq_err_count); + /* + * Theoretically we do not have to handle this IRQ, + * but in Linux this does not cause problems and is + * simpler for us. + */ + goto handle_real_irq; + } +} + +static char irq_trigger[2]; +/** + * ELCR registers (0x4d0, 0x4d1) control edge/level of IRQ + */ +static void restore_ELCR(char *trigger) +{ + outb(trigger[0], 0x4d0); + outb(trigger[1], 0x4d1); +} + +static void save_ELCR(char *trigger) +{ + /* IRQ 0,1,2,8,13 are marked as reserved */ + trigger[0] = inb(0x4d0) & 0xF8; + trigger[1] = inb(0x4d1) & 0xDE; +} + +static int i8259A_resume(struct sys_device *dev) +{ + init_8259A(i8259A_auto_eoi); + restore_ELCR(irq_trigger); + return 0; +} + +static int i8259A_suspend(struct sys_device *dev, pm_message_t state) +{ + save_ELCR(irq_trigger); + return 0; +} + +static int i8259A_shutdown(struct sys_device *dev) +{ + /* Put the i8259A into a quiescent state that + * the kernel initialization code can get it + * out of. + */ + outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ + outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-1 */ + return 0; +} + +static struct sysdev_class i8259_sysdev_class = { + .name = "i8259", + .suspend = i8259A_suspend, + .resume = i8259A_resume, + .shutdown = i8259A_shutdown, +}; + +static struct sys_device device_i8259A = { + .id = 0, + .cls = &i8259_sysdev_class, +}; + +static int __init i8259A_init_sysfs(void) +{ + int error = sysdev_class_register(&i8259_sysdev_class); + if (!error) + error = sysdev_register(&device_i8259A); + return error; +} + +device_initcall(i8259A_init_sysfs); + +void init_8259A(int auto_eoi) +{ + unsigned long flags; + + i8259A_auto_eoi = auto_eoi; + + spin_lock_irqsave(&i8259A_lock, flags); + + outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ + outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ + + /* + * outb_pic - this has to work on a wide range of PC hardware. + */ + outb_pic(0x11, PIC_MASTER_CMD); /* ICW1: select 8259A-1 init */ +#ifndef CONFIG_X86_64 + outb_pic(0x20 + 0, PIC_MASTER_IMR); /* ICW2: 8259A-1 IR0-7 mapped to 0x20-0x27 */ + outb_pic(1U << PIC_CASCADE_IR, PIC_MASTER_IMR); /* 8259A-1 (the master) has a slave on IR2 */ +#else /* CONFIG_X86_64 */ + /* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 */ + outb_pic(IRQ0_VECTOR, PIC_MASTER_IMR); + /* 8259A-1 (the master) has a slave on IR2 */ + outb_pic(0x04, PIC_MASTER_IMR); +#endif /* CONFIG_X86_64 */ + if (auto_eoi) /* master does Auto EOI */ + outb_pic(MASTER_ICW4_DEFAULT | PIC_ICW4_AEOI, PIC_MASTER_IMR); + else /* master expects normal EOI */ + outb_pic(MASTER_ICW4_DEFAULT, PIC_MASTER_IMR); + + outb_pic(0x11, PIC_SLAVE_CMD); /* ICW1: select 8259A-2 init */ +#ifndef CONFIG_X86_64 + outb_pic(0x20 + 8, PIC_SLAVE_IMR); /* ICW2: 8259A-2 IR0-7 mapped to 0x28-0x2f */ + outb_pic(PIC_CASCADE_IR, PIC_SLAVE_IMR); /* 8259A-2 is a slave on master's IR2 */ + outb_pic(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR); /* (slave's support for AEOI in flat mode is to be investigated) */ +#else /* CONFIG_X86_64 */ + /* ICW2: 8259A-2 IR0-7 mapped to 0x38-0x3f */ + outb_pic(IRQ8_VECTOR, PIC_SLAVE_IMR); + /* 8259A-2 is a slave on master's IR2 */ + outb_pic(PIC_CASCADE_IR, PIC_SLAVE_IMR); + /* (slave's support for AEOI in flat mode is to be investigated) */ + outb_pic(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR); + +#endif /* CONFIG_X86_64 */ + if (auto_eoi) + /* + * In AEOI mode we just have to mask the interrupt + * when acking. + */ + i8259A_chip.mask_ack = disable_8259A_irq; + else + i8259A_chip.mask_ack = mask_and_ack_8259A; + + udelay(100); /* wait for 8259A to initialize */ + + outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */ + outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */ + + spin_unlock_irqrestore(&i8259A_lock, flags); +} diff --git a/arch/x86/kernel/i8259_32.c b/arch/x86/kernel/i8259_32.c index fe631967..d669142 100644 --- a/arch/x86/kernel/i8259_32.c +++ b/arch/x86/kernel/i8259_32.c @@ -21,302 +21,7 @@ #include #include -/* - * This is the 'legacy' 8259A Programmable Interrupt Controller, - * present in the majority of PC/AT boxes. - * plus some generic x86 specific things if generic specifics makes - * any sense at all. - */ - -static int i8259A_auto_eoi; -DEFINE_SPINLOCK(i8259A_lock); -static void mask_and_ack_8259A(unsigned int); - -static struct irq_chip i8259A_chip = { - .name = "XT-PIC", - .mask = disable_8259A_irq, - .disable = disable_8259A_irq, - .unmask = enable_8259A_irq, - .mask_ack = mask_and_ack_8259A, -}; - -/* - * 8259A PIC functions to handle ISA devices: - */ - -/* - * This contains the irq mask for both 8259A irq controllers, - */ -unsigned int cached_irq_mask = 0xffff; - -/* - * Not all IRQs can be routed through the IO-APIC, eg. on certain (older) - * boards the timer interrupt is not really connected to any IO-APIC pin, - * it's fed to the master 8259A's IR0 line only. - * - * Any '1' bit in this mask means the IRQ is routed through the IO-APIC. - * this 'mixed mode' IRQ handling costs nothing because it's only used - * at IRQ setup time. - */ -unsigned long io_apic_irqs; - -void disable_8259A_irq(unsigned int irq) -{ - unsigned int mask = 1 << irq; - unsigned long flags; - - spin_lock_irqsave(&i8259A_lock, flags); - cached_irq_mask |= mask; - if (irq & 8) - outb(cached_slave_mask, PIC_SLAVE_IMR); - else - outb(cached_master_mask, PIC_MASTER_IMR); - spin_unlock_irqrestore(&i8259A_lock, flags); -} - -void enable_8259A_irq(unsigned int irq) -{ - unsigned int mask = ~(1 << irq); - unsigned long flags; - - spin_lock_irqsave(&i8259A_lock, flags); - cached_irq_mask &= mask; - if (irq & 8) - outb(cached_slave_mask, PIC_SLAVE_IMR); - else - outb(cached_master_mask, PIC_MASTER_IMR); - spin_unlock_irqrestore(&i8259A_lock, flags); -} - -int i8259A_irq_pending(unsigned int irq) -{ - unsigned int mask = 1<> 8); - spin_unlock_irqrestore(&i8259A_lock, flags); - - return ret; -} - -void make_8259A_irq(unsigned int irq) -{ - disable_irq_nosync(irq); - io_apic_irqs &= ~(1<> 8); - outb(0x0A,PIC_SLAVE_CMD); /* back to the IRR register */ - return value; -} - -/* - * Careful! The 8259A is a fragile beast, it pretty - * much _has_ to be done exactly like this (mask it - * first, _then_ send the EOI, and the order of EOI - * to the two 8259s is important! - */ -static void mask_and_ack_8259A(unsigned int irq) -{ - unsigned int irqmask = 1 << irq; - unsigned long flags; - - spin_lock_irqsave(&i8259A_lock, flags); - /* - * Lightweight spurious IRQ detection. We do not want - * to overdo spurious IRQ handling - it's usually a sign - * of hardware problems, so we only do the checks we can - * do without slowing down good hardware unnecessarily. - * - * Note that IRQ7 and IRQ15 (the two spurious IRQs - * usually resulting from the 8259A-1|2 PICs) occur - * even if the IRQ is masked in the 8259A. Thus we - * can check spurious 8259A IRQs without doing the - * quite slow i8259A_irq_real() call for every IRQ. - * This does not cover 100% of spurious interrupts, - * but should be enough to warn the user that there - * is something bad going on ... - */ - if (cached_irq_mask & irqmask) - goto spurious_8259A_irq; - cached_irq_mask |= irqmask; -handle_real_irq: - if (irq & 8) { - inb(PIC_SLAVE_IMR); /* DUMMY - (do we need this?) */ - outb(cached_slave_mask, PIC_SLAVE_IMR); - outb(0x60+(irq&7),PIC_SLAVE_CMD);/* 'Specific EOI' to slave */ - outb(0x60+PIC_CASCADE_IR,PIC_MASTER_CMD); /* 'Specific EOI' to master-IRQ2 */ - } else { - inb(PIC_MASTER_IMR); /* DUMMY - (do we need this?) */ - outb(cached_master_mask, PIC_MASTER_IMR); - outb(0x60+irq,PIC_MASTER_CMD); /* 'Specific EOI to master */ - } - spin_unlock_irqrestore(&i8259A_lock, flags); - return; - -spurious_8259A_irq: - /* - * this is the slow path - should happen rarely. - */ - if (i8259A_irq_real(irq)) - /* - * oops, the IRQ _is_ in service according to the - * 8259A - not spurious, go handle it. - */ - goto handle_real_irq; - - { - static int spurious_irq_mask; - /* - * At this point we can be sure the IRQ is spurious, - * lets ACK and report it. [once per IRQ] - */ - if (!(spurious_irq_mask & irqmask)) { - printk(KERN_DEBUG "spurious 8259A interrupt: IRQ%d.\n", irq); - spurious_irq_mask |= irqmask; - } - atomic_inc(&irq_err_count); - /* - * Theoretically we do not have to handle this IRQ, - * but in Linux this does not cause problems and is - * simpler for us. - */ - goto handle_real_irq; - } -} - -static char irq_trigger[2]; -/** - * ELCR registers (0x4d0, 0x4d1) control edge/level of IRQ - */ -static void restore_ELCR(char *trigger) -{ - outb(trigger[0], 0x4d0); - outb(trigger[1], 0x4d1); -} - -static void save_ELCR(char *trigger) -{ - /* IRQ 0,1,2,8,13 are marked as reserved */ - trigger[0] = inb(0x4d0) & 0xF8; - trigger[1] = inb(0x4d1) & 0xDE; -} - -static int i8259A_resume(struct sys_device *dev) -{ - init_8259A(i8259A_auto_eoi); - restore_ELCR(irq_trigger); - return 0; -} - -static int i8259A_suspend(struct sys_device *dev, pm_message_t state) -{ - save_ELCR(irq_trigger); - return 0; -} - -static int i8259A_shutdown(struct sys_device *dev) -{ - /* Put the i8259A into a quiescent state that - * the kernel initialization code can get it - * out of. - */ - outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ - outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-1 */ - return 0; -} - -static struct sysdev_class i8259_sysdev_class = { - .name = "i8259", - .suspend = i8259A_suspend, - .resume = i8259A_resume, - .shutdown = i8259A_shutdown, -}; - -static struct sys_device device_i8259A = { - .id = 0, - .cls = &i8259_sysdev_class, -}; - -static int __init i8259A_init_sysfs(void) -{ - int error = sysdev_class_register(&i8259_sysdev_class); - if (!error) - error = sysdev_register(&device_i8259A); - return error; -} - -device_initcall(i8259A_init_sysfs); - -void init_8259A(int auto_eoi) -{ - unsigned long flags; - - i8259A_auto_eoi = auto_eoi; - - spin_lock_irqsave(&i8259A_lock, flags); - - outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ - outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ - - /* - * outb_pic - this has to work on a wide range of PC hardware. - */ - outb_pic(0x11, PIC_MASTER_CMD); /* ICW1: select 8259A-1 init */ - outb_pic(0x20 + 0, PIC_MASTER_IMR); /* ICW2: 8259A-1 IR0-7 mapped to 0x20-0x27 */ - outb_pic(1U << PIC_CASCADE_IR, PIC_MASTER_IMR); /* 8259A-1 (the master) has a slave on IR2 */ - if (auto_eoi) /* master does Auto EOI */ - outb_pic(MASTER_ICW4_DEFAULT | PIC_ICW4_AEOI, PIC_MASTER_IMR); - else /* master expects normal EOI */ - outb_pic(MASTER_ICW4_DEFAULT, PIC_MASTER_IMR); - - outb_pic(0x11, PIC_SLAVE_CMD); /* ICW1: select 8259A-2 init */ - outb_pic(0x20 + 8, PIC_SLAVE_IMR); /* ICW2: 8259A-2 IR0-7 mapped to 0x28-0x2f */ - outb_pic(PIC_CASCADE_IR, PIC_SLAVE_IMR); /* 8259A-2 is a slave on master's IR2 */ - outb_pic(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR); /* (slave's support for AEOI in flat mode is to be investigated) */ - if (auto_eoi) - /* - * In AEOI mode we just have to mask the interrupt - * when acking. - */ - i8259A_chip.mask_ack = disable_8259A_irq; - else - i8259A_chip.mask_ack = mask_and_ack_8259A; - - udelay(100); /* wait for 8259A to initialize */ - - outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */ - outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */ - - spin_unlock_irqrestore(&i8259A_lock, flags); -} /* * Note that on a 486, we don't want to do a SIGFPE on an irq13 diff --git a/arch/x86/kernel/i8259_64.c b/arch/x86/kernel/i8259_64.c index 1870e0e..b44095e 100644 --- a/arch/x86/kernel/i8259_64.c +++ b/arch/x86/kernel/i8259_64.c @@ -101,315 +101,6 @@ static void (*__initdata interrupt[NR_VECTORS - FIRST_EXTERNAL_VECTOR])(void) = #undef IRQ #undef IRQLIST_16 -/* - * This is the 'legacy' 8259A Programmable Interrupt Controller, - * present in the majority of PC/AT boxes. - * plus some generic x86 specific things if generic specifics makes - * any sense at all. - * this file should become arch/i386/kernel/irq.c when the old irq.c - * moves to arch independent land - */ - -static int i8259A_auto_eoi; -DEFINE_SPINLOCK(i8259A_lock); -static void mask_and_ack_8259A(unsigned int); - -static struct irq_chip i8259A_chip = { - .name = "XT-PIC", - .mask = disable_8259A_irq, - .disable = disable_8259A_irq, - .unmask = enable_8259A_irq, - .mask_ack = mask_and_ack_8259A, -}; - -/* - * 8259A PIC functions to handle ISA devices: - */ - -/* - * This contains the irq mask for both 8259A irq controllers, - */ -unsigned int cached_irq_mask = 0xffff; - -/* - * Not all IRQs can be routed through the IO-APIC, eg. on certain (older) - * boards the timer interrupt is not really connected to any IO-APIC pin, - * it's fed to the master 8259A's IR0 line only. - * - * Any '1' bit in this mask means the IRQ is routed through the IO-APIC. - * this 'mixed mode' IRQ handling costs nothing because it's only used - * at IRQ setup time. - */ -unsigned long io_apic_irqs; - -void disable_8259A_irq(unsigned int irq) -{ - unsigned int mask = 1 << irq; - unsigned long flags; - - spin_lock_irqsave(&i8259A_lock, flags); - cached_irq_mask |= mask; - if (irq & 8) - outb(cached_slave_mask, PIC_SLAVE_IMR); - else - outb(cached_master_mask, PIC_MASTER_IMR); - spin_unlock_irqrestore(&i8259A_lock, flags); -} - -void enable_8259A_irq(unsigned int irq) -{ - unsigned int mask = ~(1 << irq); - unsigned long flags; - - spin_lock_irqsave(&i8259A_lock, flags); - cached_irq_mask &= mask; - if (irq & 8) - outb(cached_slave_mask, PIC_SLAVE_IMR); - else - outb(cached_master_mask, PIC_MASTER_IMR); - spin_unlock_irqrestore(&i8259A_lock, flags); -} - -int i8259A_irq_pending(unsigned int irq) -{ - unsigned int mask = 1<> 8); - spin_unlock_irqrestore(&i8259A_lock, flags); - - return ret; -} - -void make_8259A_irq(unsigned int irq) -{ - disable_irq_nosync(irq); - io_apic_irqs &= ~(1<> 8); - outb(0x0A,PIC_SLAVE_CMD); /* back to the IRR register */ - return value; -} - -/* - * Careful! The 8259A is a fragile beast, it pretty - * much _has_ to be done exactly like this (mask it - * first, _then_ send the EOI, and the order of EOI - * to the two 8259s is important! - */ -static void mask_and_ack_8259A(unsigned int irq) -{ - unsigned int irqmask = 1 << irq; - unsigned long flags; - - spin_lock_irqsave(&i8259A_lock, flags); - /* - * Lightweight spurious IRQ detection. We do not want - * to overdo spurious IRQ handling - it's usually a sign - * of hardware problems, so we only do the checks we can - * do without slowing down good hardware unnecessarily. - * - * Note that IRQ7 and IRQ15 (the two spurious IRQs - * usually resulting from the 8259A-1|2 PICs) occur - * even if the IRQ is masked in the 8259A. Thus we - * can check spurious 8259A IRQs without doing the - * quite slow i8259A_irq_real() call for every IRQ. - * This does not cover 100% of spurious interrupts, - * but should be enough to warn the user that there - * is something bad going on ... - */ - if (cached_irq_mask & irqmask) - goto spurious_8259A_irq; - cached_irq_mask |= irqmask; - -handle_real_irq: - if (irq & 8) { - inb(PIC_SLAVE_IMR); /* DUMMY - (do we need this?) */ - outb(cached_slave_mask, PIC_SLAVE_IMR); - /* 'Specific EOI' to slave */ - outb(0x60+(irq&7),PIC_SLAVE_CMD); - /* 'Specific EOI' to master-IRQ2 */ - outb(0x60+PIC_CASCADE_IR,PIC_MASTER_CMD); - } else { - inb(PIC_MASTER_IMR); /* DUMMY - (do we need this?) */ - outb(cached_master_mask, PIC_MASTER_IMR); - /* 'Specific EOI' to master */ - outb(0x60+irq,PIC_MASTER_CMD); - } - spin_unlock_irqrestore(&i8259A_lock, flags); - return; - -spurious_8259A_irq: - /* - * this is the slow path - should happen rarely. - */ - if (i8259A_irq_real(irq)) - /* - * oops, the IRQ _is_ in service according to the - * 8259A - not spurious, go handle it. - */ - goto handle_real_irq; - - { - static int spurious_irq_mask; - /* - * At this point we can be sure the IRQ is spurious, - * lets ACK and report it. [once per IRQ] - */ - if (!(spurious_irq_mask & irqmask)) { - printk(KERN_DEBUG - "spurious 8259A interrupt: IRQ%d.\n", irq); - spurious_irq_mask |= irqmask; - } - atomic_inc(&irq_err_count); - /* - * Theoretically we do not have to handle this IRQ, - * but in Linux this does not cause problems and is - * simpler for us. - */ - goto handle_real_irq; - } -} - -static char irq_trigger[2]; -/** - * ELCR registers (0x4d0, 0x4d1) control edge/level of IRQ - */ -static void restore_ELCR(char *trigger) -{ - outb(trigger[0], 0x4d0); - outb(trigger[1], 0x4d1); -} - -static void save_ELCR(char *trigger) -{ - /* IRQ 0,1,2,8,13 are marked as reserved */ - trigger[0] = inb(0x4d0) & 0xF8; - trigger[1] = inb(0x4d1) & 0xDE; -} - -static int i8259A_resume(struct sys_device *dev) -{ - init_8259A(i8259A_auto_eoi); - restore_ELCR(irq_trigger); - return 0; -} - -static int i8259A_suspend(struct sys_device *dev, pm_message_t state) -{ - save_ELCR(irq_trigger); - return 0; -} - -static int i8259A_shutdown(struct sys_device *dev) -{ - /* Put the i8259A into a quiescent state that - * the kernel initialization code can get it - * out of. - */ - outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ - outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-1 */ - return 0; -} - -static struct sysdev_class i8259_sysdev_class = { - .name = "i8259", - .suspend = i8259A_suspend, - .resume = i8259A_resume, - .shutdown = i8259A_shutdown, -}; - -static struct sys_device device_i8259A = { - .id = 0, - .cls = &i8259_sysdev_class, -}; - -static int __init i8259A_init_sysfs(void) -{ - int error = sysdev_class_register(&i8259_sysdev_class); - if (!error) - error = sysdev_register(&device_i8259A); - return error; -} - -device_initcall(i8259A_init_sysfs); - -void init_8259A(int auto_eoi) -{ - unsigned long flags; - - i8259A_auto_eoi = auto_eoi; - - spin_lock_irqsave(&i8259A_lock, flags); - - outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ - outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ - - /* - * outb_pic - this has to work on a wide range of PC hardware. - */ - outb_pic(0x11, PIC_MASTER_CMD); /* ICW1: select 8259A-1 init */ - /* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 */ - outb_pic(IRQ0_VECTOR, PIC_MASTER_IMR); - /* 8259A-1 (the master) has a slave on IR2 */ - outb_pic(0x04, PIC_MASTER_IMR); - if (auto_eoi) /* master does Auto EOI */ - outb_pic(MASTER_ICW4_DEFAULT | PIC_ICW4_AEOI, PIC_MASTER_IMR); - else /* master expects normal EOI */ - outb_pic(MASTER_ICW4_DEFAULT, PIC_MASTER_IMR); - - outb_pic(0x11, PIC_SLAVE_CMD); /* ICW1: select 8259A-2 init */ - /* ICW2: 8259A-2 IR0-7 mapped to 0x38-0x3f */ - outb_pic(IRQ8_VECTOR, PIC_SLAVE_IMR); - /* 8259A-2 is a slave on master's IR2 */ - outb_pic(PIC_CASCADE_IR, PIC_SLAVE_IMR); - /* (slave's support for AEOI in flat mode is to be investigated) */ - outb_pic(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR); - - if (auto_eoi) - /* - * In AEOI mode we just have to mask the interrupt - * when acking. - */ - i8259A_chip.mask_ack = disable_8259A_irq; - else - i8259A_chip.mask_ack = mask_and_ack_8259A; - - udelay(100); /* wait for 8259A to initialize */ - - outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */ - outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */ - - spin_unlock_irqrestore(&i8259A_lock, flags); -} - diff --git a/include/asm-x86/i8259.h b/include/asm-x86/i8259.h index 45d4df3..2f98df9 100644 --- a/include/asm-x86/i8259.h +++ b/include/asm-x86/i8259.h @@ -55,4 +55,6 @@ static inline void outb_pic(unsigned char value, unsigned int port) udelay(2); } +extern struct irq_chip i8259A_chip; + #endif /* __ASM_I8259_H__ */ -- cgit v0.10.2 From 15d613cb25efd978dd55592d011a6ffc487b3432 Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Wed, 21 May 2008 11:47:24 +0200 Subject: x86: i8259.c: remove #ifdefs around includes Remove #ifdefs around includes; including too much should be always safe. Signed-off-by: Pavel Machek Cc: macro@ds2.pg.gda.pl Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index 2decba6..8b7eb0c 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c @@ -1,14 +1,10 @@ -#ifdef CONFIG_X86_64 #include -#endif /* CONFIG_X86_64 */ #include #include #include #include #include -#ifdef CONFIG_X86_64 #include -#endif /* CONFIG_X86_64 */ #include #include #include @@ -16,24 +12,17 @@ #include #include -#ifdef CONFIG_X86_64 #include -#endif /* CONFIG_X86_64 */ #include #include #include -#ifndef CONFIG_X86_64 #include -#else /* CONFIG_X86_64 */ #include -#endif /* CONFIG_X86_64 */ #include #include #include #include -#ifndef CONFIG_X86_64 #include -#endif /* ! CONFIG_X86_64 */ #include /* -- cgit v0.10.2 From 3e8631d27088f394b6788829e238a60bf07d47ab Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Wed, 21 May 2008 11:52:52 +0200 Subject: x86: i8259.c: remove trivial ifdefs Remove #ifdefs where the only difference is formatting of comments. Signed-off-by: Pavel Machek Cc: macro@ds2.pg.gda.pl Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index 8b7eb0c..433e49e 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c @@ -175,24 +175,14 @@ handle_real_irq: if (irq & 8) { inb(PIC_SLAVE_IMR); /* DUMMY - (do we need this?) */ outb(cached_slave_mask, PIC_SLAVE_IMR); -#ifndef CONFIG_X86_64 - outb(0x60+(irq&7),PIC_SLAVE_CMD);/* 'Specific EOI' to slave */ - outb(0x60+PIC_CASCADE_IR,PIC_MASTER_CMD); /* 'Specific EOI' to master-IRQ2 */ -#else /* CONFIG_X86_64 */ /* 'Specific EOI' to slave */ - outb(0x60+(irq&7),PIC_SLAVE_CMD); + outb(0x60+(irq&7), PIC_SLAVE_CMD); /* 'Specific EOI' to master-IRQ2 */ - outb(0x60+PIC_CASCADE_IR,PIC_MASTER_CMD); -#endif /* CONFIG_X86_64 */ + outb(0x60+PIC_CASCADE_IR, PIC_MASTER_CMD); } else { inb(PIC_MASTER_IMR); /* DUMMY - (do we need this?) */ outb(cached_master_mask, PIC_MASTER_IMR); -#ifndef CONFIG_X86_64 - outb(0x60+irq,PIC_MASTER_CMD); /* 'Specific EOI to master */ -#else /* CONFIG_X86_64 */ - /* 'Specific EOI' to master */ - outb(0x60+irq,PIC_MASTER_CMD); -#endif /* CONFIG_X86_64 */ + outb(0x60+irq, PIC_MASTER_CMD); /* 'Specific EOI to master */ } spin_unlock_irqrestore(&i8259A_lock, flags); return; @@ -215,12 +205,8 @@ spurious_8259A_irq: * lets ACK and report it. [once per IRQ] */ if (!(spurious_irq_mask & irqmask)) { -#ifndef CONFIG_X86_64 - printk(KERN_DEBUG "spurious 8259A interrupt: IRQ%d.\n", irq); -#else /* CONFIG_X86_64 */ printk(KERN_DEBUG "spurious 8259A interrupt: IRQ%d.\n", irq); -#endif /* CONFIG_X86_64 */ spurious_irq_mask |= irqmask; } atomic_inc(&irq_err_count); -- cgit v0.10.2 From 680afbf989d697b9ba1382017a73c8cfa53b3f89 Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Wed, 21 May 2008 11:57:52 +0200 Subject: x86: i8259: cleanup codingstyle Signed-off-by: Pavel Machek Cc: macro@ds2.pg.gda.pl Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index 433e49e..7a0fda8 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c @@ -129,14 +129,14 @@ static inline int i8259A_irq_real(unsigned int irq) int irqmask = 1<> 8); - outb(0x0A,PIC_SLAVE_CMD); /* back to the IRR register */ + outb(0x0A, PIC_SLAVE_CMD); /* back to the IRR register */ return value; } -- cgit v0.10.2 From d23b200a75f78e62744c3c25d078855eec8a689b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 12 May 2008 15:43:36 +0200 Subject: x86: make init_ISA_irqs() static Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/i8259_64.c b/arch/x86/kernel/i8259_64.c index b44095e..31f49e8 100644 --- a/arch/x86/kernel/i8259_64.c +++ b/arch/x86/kernel/i8259_64.c @@ -134,7 +134,7 @@ DEFINE_PER_CPU(vector_irq_t, vector_irq) = { [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1 }; -void __init init_ISA_irqs (void) +static void __init init_ISA_irqs (void) { int i; -- cgit v0.10.2 From ec42418f1973e1f77da1fcca3be59c0acb878a9d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 24 May 2008 15:59:58 +0200 Subject: x86: rename the i8259_32/64.c leftovers to irqinit_32/64.c The leftovers of the i8259 unification have nothing to do with i8259 at all. They contain interrupt init code and the i8259_xx name is just misleading now. Rename them to irqinit_32/64.c Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index f71a76e..2a53ad2 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -18,7 +18,7 @@ CFLAGS_tsc_64.o := $(nostackp) obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o obj-y += traps_$(BITS).o irq_$(BITS).o obj-y += time_$(BITS).o ioport.o ldt.o -obj-y += setup_$(BITS).o i8259.o i8259_$(BITS).o setup.o +obj-y += setup_$(BITS).o i8259.o irqinit_$(BITS).o setup.o obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o setup64.o diff --git a/arch/x86/kernel/i8259_32.c b/arch/x86/kernel/i8259_32.c deleted file mode 100644 index d669142..0000000 --- a/arch/x86/kernel/i8259_32.c +++ /dev/null @@ -1,114 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - - -/* - * Note that on a 486, we don't want to do a SIGFPE on an irq13 - * as the irq is unreliable, and exception 16 works correctly - * (ie as explained in the intel literature). On a 386, you - * can't use exception 16 due to bad IBM design, so we have to - * rely on the less exact irq13. - * - * Careful.. Not only is IRQ13 unreliable, but it is also - * leads to races. IBM designers who came up with it should - * be shot. - */ - - -static irqreturn_t math_error_irq(int cpl, void *dev_id) -{ - extern void math_error(void __user *); - outb(0,0xF0); - if (ignore_fpu_irq || !boot_cpu_data.hard_math) - return IRQ_NONE; - math_error((void __user *)get_irq_regs()->ip); - return IRQ_HANDLED; -} - -/* - * New motherboards sometimes make IRQ 13 be a PCI interrupt, - * so allow interrupt sharing. - */ -static struct irqaction fpu_irq = { - .handler = math_error_irq, - .mask = CPU_MASK_NONE, - .name = "fpu", -}; - -void __init init_ISA_irqs (void) -{ - int i; - -#ifdef CONFIG_X86_LOCAL_APIC - init_bsp_APIC(); -#endif - init_8259A(0); - - /* - * 16 old-style INTA-cycle interrupts: - */ - for (i = 0; i < 16; i++) { - set_irq_chip_and_handler_name(i, &i8259A_chip, - handle_level_irq, "XT"); - } -} - -/* Overridden in paravirt.c */ -void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); - -void __init native_init_IRQ(void) -{ - int i; - - /* all the set up before the call gates are initialised */ - pre_intr_init_hook(); - - /* - * Cover the whole vector space, no vector can escape - * us. (some of these will be overridden and become - * 'special' SMP interrupts) - */ - for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) { - int vector = FIRST_EXTERNAL_VECTOR + i; - if (i >= NR_IRQS) - break; - /* SYSCALL_VECTOR was reserved in trap_init. */ - if (!test_bit(vector, used_vectors)) - set_intr_gate(vector, interrupt[i]); - } - - /* setup after call gates are initialised (usually add in - * the architecture specific gates) - */ - intr_init_hook(); - - /* - * External FPU? Set up irq13 if so, for - * original braindamaged IBM FERR coupling. - */ - if (boot_cpu_data.hard_math && !cpu_has_fpu) - setup_irq(FPU_IRQ, &fpu_irq); - - irq_ctx_init(smp_processor_id()); -} diff --git a/arch/x86/kernel/i8259_64.c b/arch/x86/kernel/i8259_64.c deleted file mode 100644 index 31f49e8..0000000 --- a/arch/x86/kernel/i8259_64.c +++ /dev/null @@ -1,217 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * Common place to define all x86 IRQ vectors - * - * This builds up the IRQ handler stubs using some ugly macros in irq.h - * - * These macros create the low-level assembly IRQ routines that save - * register context and call do_IRQ(). do_IRQ() then does all the - * operations that are needed to keep the AT (or SMP IOAPIC) - * interrupt-controller happy. - */ - -#define IRQ_NAME2(nr) nr##_interrupt(void) -#define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr) - -/* - * SMP has a few special interrupts for IPI messages - */ - -#define BUILD_IRQ(nr) \ - asmlinkage void IRQ_NAME(nr); \ - asm("\n.p2align\n" \ - "IRQ" #nr "_interrupt:\n\t" \ - "push $~(" #nr ") ; " \ - "jmp common_interrupt"); - -#define BI(x,y) \ - BUILD_IRQ(x##y) - -#define BUILD_16_IRQS(x) \ - BI(x,0) BI(x,1) BI(x,2) BI(x,3) \ - BI(x,4) BI(x,5) BI(x,6) BI(x,7) \ - BI(x,8) BI(x,9) BI(x,a) BI(x,b) \ - BI(x,c) BI(x,d) BI(x,e) BI(x,f) - -/* - * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts: - * (these are usually mapped to vectors 0x30-0x3f) - */ - -/* - * The IO-APIC gives us many more interrupt sources. Most of these - * are unused but an SMP system is supposed to have enough memory ... - * sometimes (mostly wrt. hw bugs) we get corrupted vectors all - * across the spectrum, so we really want to be prepared to get all - * of these. Plus, more powerful systems might have more than 64 - * IO-APIC registers. - * - * (these are usually mapped into the 0x30-0xff vector range) - */ - BUILD_16_IRQS(0x2) BUILD_16_IRQS(0x3) -BUILD_16_IRQS(0x4) BUILD_16_IRQS(0x5) BUILD_16_IRQS(0x6) BUILD_16_IRQS(0x7) -BUILD_16_IRQS(0x8) BUILD_16_IRQS(0x9) BUILD_16_IRQS(0xa) BUILD_16_IRQS(0xb) -BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd) BUILD_16_IRQS(0xe) BUILD_16_IRQS(0xf) - -#undef BUILD_16_IRQS -#undef BI - - -#define IRQ(x,y) \ - IRQ##x##y##_interrupt - -#define IRQLIST_16(x) \ - IRQ(x,0), IRQ(x,1), IRQ(x,2), IRQ(x,3), \ - IRQ(x,4), IRQ(x,5), IRQ(x,6), IRQ(x,7), \ - IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \ - IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f) - -/* for the irq vectors */ -static void (*__initdata interrupt[NR_VECTORS - FIRST_EXTERNAL_VECTOR])(void) = { - IRQLIST_16(0x2), IRQLIST_16(0x3), - IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7), - IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb), - IRQLIST_16(0xc), IRQLIST_16(0xd), IRQLIST_16(0xe), IRQLIST_16(0xf) -}; - -#undef IRQ -#undef IRQLIST_16 - - - - -/* - * IRQ2 is cascade interrupt to second interrupt controller - */ - -static struct irqaction irq2 = { - .handler = no_action, - .mask = CPU_MASK_NONE, - .name = "cascade", -}; -DEFINE_PER_CPU(vector_irq_t, vector_irq) = { - [0 ... IRQ0_VECTOR - 1] = -1, - [IRQ0_VECTOR] = 0, - [IRQ1_VECTOR] = 1, - [IRQ2_VECTOR] = 2, - [IRQ3_VECTOR] = 3, - [IRQ4_VECTOR] = 4, - [IRQ5_VECTOR] = 5, - [IRQ6_VECTOR] = 6, - [IRQ7_VECTOR] = 7, - [IRQ8_VECTOR] = 8, - [IRQ9_VECTOR] = 9, - [IRQ10_VECTOR] = 10, - [IRQ11_VECTOR] = 11, - [IRQ12_VECTOR] = 12, - [IRQ13_VECTOR] = 13, - [IRQ14_VECTOR] = 14, - [IRQ15_VECTOR] = 15, - [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1 -}; - -static void __init init_ISA_irqs (void) -{ - int i; - - init_bsp_APIC(); - init_8259A(0); - - for (i = 0; i < NR_IRQS; i++) { - irq_desc[i].status = IRQ_DISABLED; - irq_desc[i].action = NULL; - irq_desc[i].depth = 1; - - if (i < 16) { - /* - * 16 old-style INTA-cycle interrupts: - */ - set_irq_chip_and_handler_name(i, &i8259A_chip, - handle_level_irq, "XT"); - } else { - /* - * 'high' PCI IRQs filled in on demand - */ - irq_desc[i].chip = &no_irq_chip; - } - } -} - -void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); - -void __init native_init_IRQ(void) -{ - int i; - - init_ISA_irqs(); - /* - * Cover the whole vector space, no vector can escape - * us. (some of these will be overridden and become - * 'special' SMP interrupts) - */ - for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) { - int vector = FIRST_EXTERNAL_VECTOR + i; - if (vector != IA32_SYSCALL_VECTOR) - set_intr_gate(vector, interrupt[i]); - } - -#ifdef CONFIG_SMP - /* - * The reschedule interrupt is a CPU-to-CPU reschedule-helper - * IPI, driven by wakeup. - */ - alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); - - /* IPIs for invalidation */ - alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0); - alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1); - alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2); - alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3); - alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4); - alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5); - alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6); - alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7); - - /* IPI for generic function call */ - alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); - - /* Low priority IPI to cleanup after moving an irq */ - set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); -#endif - alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); - alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); - - /* self generated IPI for local APIC timer */ - alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); - - /* IPI vectors for APIC spurious and error interrupts */ - alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); - alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); - - if (!acpi_ioapic) - setup_irq(2, &irq2); -} diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c new file mode 100644 index 0000000..d669142 --- /dev/null +++ b/arch/x86/kernel/irqinit_32.c @@ -0,0 +1,114 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +/* + * Note that on a 486, we don't want to do a SIGFPE on an irq13 + * as the irq is unreliable, and exception 16 works correctly + * (ie as explained in the intel literature). On a 386, you + * can't use exception 16 due to bad IBM design, so we have to + * rely on the less exact irq13. + * + * Careful.. Not only is IRQ13 unreliable, but it is also + * leads to races. IBM designers who came up with it should + * be shot. + */ + + +static irqreturn_t math_error_irq(int cpl, void *dev_id) +{ + extern void math_error(void __user *); + outb(0,0xF0); + if (ignore_fpu_irq || !boot_cpu_data.hard_math) + return IRQ_NONE; + math_error((void __user *)get_irq_regs()->ip); + return IRQ_HANDLED; +} + +/* + * New motherboards sometimes make IRQ 13 be a PCI interrupt, + * so allow interrupt sharing. + */ +static struct irqaction fpu_irq = { + .handler = math_error_irq, + .mask = CPU_MASK_NONE, + .name = "fpu", +}; + +void __init init_ISA_irqs (void) +{ + int i; + +#ifdef CONFIG_X86_LOCAL_APIC + init_bsp_APIC(); +#endif + init_8259A(0); + + /* + * 16 old-style INTA-cycle interrupts: + */ + for (i = 0; i < 16; i++) { + set_irq_chip_and_handler_name(i, &i8259A_chip, + handle_level_irq, "XT"); + } +} + +/* Overridden in paravirt.c */ +void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); + +void __init native_init_IRQ(void) +{ + int i; + + /* all the set up before the call gates are initialised */ + pre_intr_init_hook(); + + /* + * Cover the whole vector space, no vector can escape + * us. (some of these will be overridden and become + * 'special' SMP interrupts) + */ + for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) { + int vector = FIRST_EXTERNAL_VECTOR + i; + if (i >= NR_IRQS) + break; + /* SYSCALL_VECTOR was reserved in trap_init. */ + if (!test_bit(vector, used_vectors)) + set_intr_gate(vector, interrupt[i]); + } + + /* setup after call gates are initialised (usually add in + * the architecture specific gates) + */ + intr_init_hook(); + + /* + * External FPU? Set up irq13 if so, for + * original braindamaged IBM FERR coupling. + */ + if (boot_cpu_data.hard_math && !cpu_has_fpu) + setup_irq(FPU_IRQ, &fpu_irq); + + irq_ctx_init(smp_processor_id()); +} diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c new file mode 100644 index 0000000..31f49e8 --- /dev/null +++ b/arch/x86/kernel/irqinit_64.c @@ -0,0 +1,217 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Common place to define all x86 IRQ vectors + * + * This builds up the IRQ handler stubs using some ugly macros in irq.h + * + * These macros create the low-level assembly IRQ routines that save + * register context and call do_IRQ(). do_IRQ() then does all the + * operations that are needed to keep the AT (or SMP IOAPIC) + * interrupt-controller happy. + */ + +#define IRQ_NAME2(nr) nr##_interrupt(void) +#define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr) + +/* + * SMP has a few special interrupts for IPI messages + */ + +#define BUILD_IRQ(nr) \ + asmlinkage void IRQ_NAME(nr); \ + asm("\n.p2align\n" \ + "IRQ" #nr "_interrupt:\n\t" \ + "push $~(" #nr ") ; " \ + "jmp common_interrupt"); + +#define BI(x,y) \ + BUILD_IRQ(x##y) + +#define BUILD_16_IRQS(x) \ + BI(x,0) BI(x,1) BI(x,2) BI(x,3) \ + BI(x,4) BI(x,5) BI(x,6) BI(x,7) \ + BI(x,8) BI(x,9) BI(x,a) BI(x,b) \ + BI(x,c) BI(x,d) BI(x,e) BI(x,f) + +/* + * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts: + * (these are usually mapped to vectors 0x30-0x3f) + */ + +/* + * The IO-APIC gives us many more interrupt sources. Most of these + * are unused but an SMP system is supposed to have enough memory ... + * sometimes (mostly wrt. hw bugs) we get corrupted vectors all + * across the spectrum, so we really want to be prepared to get all + * of these. Plus, more powerful systems might have more than 64 + * IO-APIC registers. + * + * (these are usually mapped into the 0x30-0xff vector range) + */ + BUILD_16_IRQS(0x2) BUILD_16_IRQS(0x3) +BUILD_16_IRQS(0x4) BUILD_16_IRQS(0x5) BUILD_16_IRQS(0x6) BUILD_16_IRQS(0x7) +BUILD_16_IRQS(0x8) BUILD_16_IRQS(0x9) BUILD_16_IRQS(0xa) BUILD_16_IRQS(0xb) +BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd) BUILD_16_IRQS(0xe) BUILD_16_IRQS(0xf) + +#undef BUILD_16_IRQS +#undef BI + + +#define IRQ(x,y) \ + IRQ##x##y##_interrupt + +#define IRQLIST_16(x) \ + IRQ(x,0), IRQ(x,1), IRQ(x,2), IRQ(x,3), \ + IRQ(x,4), IRQ(x,5), IRQ(x,6), IRQ(x,7), \ + IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \ + IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f) + +/* for the irq vectors */ +static void (*__initdata interrupt[NR_VECTORS - FIRST_EXTERNAL_VECTOR])(void) = { + IRQLIST_16(0x2), IRQLIST_16(0x3), + IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7), + IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb), + IRQLIST_16(0xc), IRQLIST_16(0xd), IRQLIST_16(0xe), IRQLIST_16(0xf) +}; + +#undef IRQ +#undef IRQLIST_16 + + + + +/* + * IRQ2 is cascade interrupt to second interrupt controller + */ + +static struct irqaction irq2 = { + .handler = no_action, + .mask = CPU_MASK_NONE, + .name = "cascade", +}; +DEFINE_PER_CPU(vector_irq_t, vector_irq) = { + [0 ... IRQ0_VECTOR - 1] = -1, + [IRQ0_VECTOR] = 0, + [IRQ1_VECTOR] = 1, + [IRQ2_VECTOR] = 2, + [IRQ3_VECTOR] = 3, + [IRQ4_VECTOR] = 4, + [IRQ5_VECTOR] = 5, + [IRQ6_VECTOR] = 6, + [IRQ7_VECTOR] = 7, + [IRQ8_VECTOR] = 8, + [IRQ9_VECTOR] = 9, + [IRQ10_VECTOR] = 10, + [IRQ11_VECTOR] = 11, + [IRQ12_VECTOR] = 12, + [IRQ13_VECTOR] = 13, + [IRQ14_VECTOR] = 14, + [IRQ15_VECTOR] = 15, + [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1 +}; + +static void __init init_ISA_irqs (void) +{ + int i; + + init_bsp_APIC(); + init_8259A(0); + + for (i = 0; i < NR_IRQS; i++) { + irq_desc[i].status = IRQ_DISABLED; + irq_desc[i].action = NULL; + irq_desc[i].depth = 1; + + if (i < 16) { + /* + * 16 old-style INTA-cycle interrupts: + */ + set_irq_chip_and_handler_name(i, &i8259A_chip, + handle_level_irq, "XT"); + } else { + /* + * 'high' PCI IRQs filled in on demand + */ + irq_desc[i].chip = &no_irq_chip; + } + } +} + +void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); + +void __init native_init_IRQ(void) +{ + int i; + + init_ISA_irqs(); + /* + * Cover the whole vector space, no vector can escape + * us. (some of these will be overridden and become + * 'special' SMP interrupts) + */ + for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) { + int vector = FIRST_EXTERNAL_VECTOR + i; + if (vector != IA32_SYSCALL_VECTOR) + set_intr_gate(vector, interrupt[i]); + } + +#ifdef CONFIG_SMP + /* + * The reschedule interrupt is a CPU-to-CPU reschedule-helper + * IPI, driven by wakeup. + */ + alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); + + /* IPIs for invalidation */ + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7); + + /* IPI for generic function call */ + alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); + + /* Low priority IPI to cleanup after moving an irq */ + set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); +#endif + alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); + alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); + + /* self generated IPI for local APIC timer */ + alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); + + /* IPI vectors for APIC spurious and error interrupts */ + alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); + alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); + + if (!acpi_ioapic) + setup_irq(2, &irq2); +} -- cgit v0.10.2 From 3711ccb07b7f0a13f4f1aa16a8fdca9c930f21ca Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 24 May 2008 17:24:34 +0200 Subject: x86: fixup the fallout of the bitops changes Signed-off-by: Thomas Gleixner diff --git a/include/asm-x86/thread_info.h b/include/asm-x86/thread_info.h index 25d7105..895339d 100644 --- a/include/asm-x86/thread_info.h +++ b/include/asm-x86/thread_info.h @@ -245,7 +245,7 @@ static inline void set_restore_sigmask(void) { struct thread_info *ti = current_thread_info(); ti->status |= TS_RESTORE_SIGMASK; - set_bit(TIF_SIGPENDING, &ti->flags); + set_bit(TIF_SIGPENDING, (unsigned long *)&ti->flags); } #endif /* !__ASSEMBLY__ */ -- cgit v0.10.2 From 81d50bb254ed53a0da45a65988e4e1fa08e8a541 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Thu, 15 May 2008 19:42:49 -0700 Subject: posix-timers: print RT watchdog message It's useful to detect which process is killed by RT watchdog. Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index f1525ad..c42a03a 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -1037,6 +1037,9 @@ static void check_thread_timers(struct task_struct *tsk, sig->rlim[RLIMIT_RTTIME].rlim_cur += USEC_PER_SEC; } + printk(KERN_INFO + "RT Watchdog Timeout: %s[%d]\n", + tsk->comm, task_pid_nr(tsk)); __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk); } } -- cgit v0.10.2 From 7f6f3a39d258adf51f0fb1fe0dab52272a1c61a4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 24 May 2008 23:12:18 +0200 Subject: namespacecheck: fix kernel printk.c Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/printk.c b/kernel/printk.c index 8fb01c3..b620e3d 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -231,7 +231,7 @@ static inline void boot_delay_msec(void) /* * Return the number of unread characters in the log buffer. */ -int log_buf_get_len(void) +static int log_buf_get_len(void) { return logged_chars; } @@ -270,7 +270,7 @@ int log_buf_copy(char *dest, int idx, int len) /* * Extract a single character from the log buffer. */ -int log_buf_read(int idx) +static int log_buf_read(int idx) { char ret; -- cgit v0.10.2 From 42fdfa238a23643226910acf922ea930b3286032 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 24 May 2008 23:14:51 +0200 Subject: namespacecheck: more kernel/printk.c fixes [ Stephen Rothwell : build fix ] Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 792bf0a..f2a668c 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -184,9 +184,6 @@ asmlinkage int vprintk(const char *fmt, va_list args) __attribute__ ((format (printf, 1, 0))); asmlinkage int printk(const char * fmt, ...) __attribute__ ((format (printf, 1, 2))) __cold; -extern int log_buf_get_len(void); -extern int log_buf_read(int idx); -extern int log_buf_copy(char *dest, int idx, int len); extern int printk_ratelimit_jiffies; extern int printk_ratelimit_burst; @@ -202,9 +199,6 @@ static inline int vprintk(const char *s, va_list args) { return 0; } static inline int printk(const char *s, ...) __attribute__ ((format (printf, 1, 2))); static inline int __cold printk(const char *s, ...) { return 0; } -static inline int log_buf_get_len(void) { return 0; } -static inline int log_buf_read(int idx) { return 0; } -static inline int log_buf_copy(char *dest, int idx, int len) { return 0; } static inline int printk_ratelimit(void) { return 0; } static inline int __printk_ratelimit(int ratelimit_jiffies, \ int ratelimit_burst) { return 0; } diff --git a/kernel/printk.c b/kernel/printk.c index b620e3d..55d16e5 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -268,19 +268,6 @@ int log_buf_copy(char *dest, int idx, int len) } /* - * Extract a single character from the log buffer. - */ -static int log_buf_read(int idx) -{ - char ret; - - if (log_buf_copy(&ret, idx, 1) == 1) - return ret; - else - return -1; -} - -/* * Commands to do_syslog: * * 0 -- Close the log. Currently a NOP. -- cgit v0.10.2 From 3b8945e8d40645eecdb7d2357ca531f9b4dd9f71 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 12 May 2008 21:21:04 +0200 Subject: printk: clean up recursion check related static variables Make printk_recursion_bug_msg static and drop printk prefix from recursion variables. Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/printk.c b/kernel/printk.c index 55d16e5..8b42f87 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -652,16 +652,14 @@ static int acquire_console_semaphore_for_printk(unsigned int cpu) spin_unlock(&logbuf_lock); return retval; } - -const char printk_recursion_bug_msg [] = - KERN_CRIT "BUG: recent printk recursion!\n"; -static int printk_recursion_bug; +static const char recursion_bug_msg [] = + KERN_CRIT "BUG: recent printk recursion!\n"; +static int recursion_bug; +static int log_level_unknown = 1; +static char printk_buf[1024]; asmlinkage int vprintk(const char *fmt, va_list args) { - static int log_level_unknown = 1; - static char printk_buf[1024]; - unsigned long flags; int printed_len = 0; int this_cpu; @@ -686,7 +684,7 @@ asmlinkage int vprintk(const char *fmt, va_list args) * it can be printed at the next appropriate moment: */ if (!oops_in_progress) { - printk_recursion_bug = 1; + recursion_bug = 1; goto out_restore_irqs; } zap_locks(); @@ -696,10 +694,10 @@ asmlinkage int vprintk(const char *fmt, va_list args) spin_lock(&logbuf_lock); printk_cpu = this_cpu; - if (printk_recursion_bug) { - printk_recursion_bug = 0; - strcpy(printk_buf, printk_recursion_bug_msg); - printed_len = sizeof(printk_recursion_bug_msg); + if (recursion_bug) { + recursion_bug = 0; + strcpy(printk_buf, recursion_bug_msg); + printed_len = sizeof(recursion_bug_msg); } /* Emit the output into the temporary buffer */ printed_len += vscnprintf(printk_buf + printed_len, -- cgit v0.10.2 From cd3a1b8562d28490b334a61d5eb05df3d722d91e Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 12 May 2008 21:21:04 +0200 Subject: printk: don't prefer unsuited consoles on registration console election: If some console happens to be registered first which does not provide a tty binding (!console->device), it prevents that more suited consoles which are registered later on can enter the candidate pool for console_device(). This is observable with KGDB's console which may already be registered (and exploited!) during early debugger connections, that is before any regular console registration. This patch fixes the issue by postponing the final, automated preferred_console selection until someone with a non-NULL device handler comes around. Signed-off-by: Jan Kiszka Cc: Jason Wessel Cc: Gerd Hoffmann Cc: Michael Ellerman Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/printk.c b/kernel/printk.c index 8b42f87..7d55561 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -1157,8 +1157,11 @@ void register_console(struct console *console) console->index = 0; if (console->setup == NULL || console->setup(console, NULL) == 0) { - console->flags |= CON_ENABLED | CON_CONSDEV; - preferred_console = 0; + console->flags |= CON_ENABLED; + if (console->device) { + console->flags |= CON_CONSDEV; + preferred_console = 0; + } } } -- cgit v0.10.2 From ac60ad7413ca8208094609a3b88ed9b1ed012fbc Mon Sep 17 00:00:00 2001 From: Nick Andrew Date: Mon, 12 May 2008 21:21:04 +0200 Subject: printk: refactor processing of line severity tokens Restructure the logic of vprintk() so the processing of the leading 3 characters of each input line is in one place, regardless whether printk_time is enabled. This makes the code smaller and easier to understand. size reduction in kernel/printk.o: text data bss dec hex filename 6157 397 1049804 1056358 101e66 printk.o.before 6117 397 1049804 1056318 101e3e printk.o.after and some style uncleanlinesses removed as well as a side-effect: Before: total: 19 errors, 22 warnings, 1340 lines checked After: total: 17 errors, 22 warnings, 1333 lines checked Signed-off-by: Nick Andrew Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/printk.c b/kernel/printk.c index 7d55561..98ca1b7 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -655,13 +655,13 @@ static int acquire_console_semaphore_for_printk(unsigned int cpu) static const char recursion_bug_msg [] = KERN_CRIT "BUG: recent printk recursion!\n"; static int recursion_bug; -static int log_level_unknown = 1; + static int new_text_line = 1; static char printk_buf[1024]; asmlinkage int vprintk(const char *fmt, va_list args) { - unsigned long flags; int printed_len = 0; + unsigned long flags; int this_cpu; char *p; @@ -703,61 +703,54 @@ asmlinkage int vprintk(const char *fmt, va_list args) printed_len += vscnprintf(printk_buf + printed_len, sizeof(printk_buf) - printed_len, fmt, args); + /* * Copy the output into log_buf. If the caller didn't provide * appropriate log level tags, we insert them here */ for (p = printk_buf; *p; p++) { - if (log_level_unknown) { - /* log_level_unknown signals the start of a new line */ + if (new_text_line) { + int current_log_level = default_message_loglevel; + /* If a token, set current_log_level and skip over */ + if (p[0] == '<' && p[1] >= '0' && p[1] <= '7' && + p[2] == '>') { + current_log_level = p[1] - '0'; + p += 3; + printed_len -= 3; + } + + /* Always output the token */ + emit_log_char('<'); + emit_log_char(current_log_level + '0'); + emit_log_char('>'); + printed_len += 3; + new_text_line = 0; + if (printk_time) { - int loglev_char; + /* Follow the token with the time */ char tbuf[50], *tp; unsigned tlen; unsigned long long t; unsigned long nanosec_rem; - /* - * force the log level token to be - * before the time output. - */ - if (p[0] == '<' && p[1] >='0' && - p[1] <= '7' && p[2] == '>') { - loglev_char = p[1]; - p += 3; - printed_len -= 3; - } else { - loglev_char = default_message_loglevel - + '0'; - } t = cpu_clock(printk_cpu); nanosec_rem = do_div(t, 1000000000); - tlen = sprintf(tbuf, - "<%c>[%5lu.%06lu] ", - loglev_char, - (unsigned long)t, - nanosec_rem/1000); + tlen = sprintf(tbuf, "[%5lu.%06lu] ", + (unsigned long) t, + nanosec_rem / 1000); for (tp = tbuf; tp < tbuf + tlen; tp++) emit_log_char(*tp); printed_len += tlen; - } else { - if (p[0] != '<' || p[1] < '0' || - p[1] > '7' || p[2] != '>') { - emit_log_char('<'); - emit_log_char(default_message_loglevel - + '0'); - emit_log_char('>'); - printed_len += 3; - } } - log_level_unknown = 0; + if (!*p) break; } + emit_log_char(*p); if (*p == '\n') - log_level_unknown = 1; + new_text_line = 1; } /* -- cgit v0.10.2 From 091593080533a752ce66d12858d8c4105c8e1793 Mon Sep 17 00:00:00 2001 From: Nick Andrew Date: Mon, 12 May 2008 21:21:04 +0200 Subject: printk: remember the message level for multi-line output printk(KERN_ALERT "Danger Will Robinson!\nAlien Approaching!\n"); At present this will result in one message at ALERT level and one at the current default message loglevel (e.g. WARNING). This is non-intuitive. Modify vprintk() to remember the message loglevel each time it is specified and use it for subsequent lines of output which do not specify one, within the same call to printk. Signed-off-by: Nick Andrew Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/printk.c b/kernel/printk.c index 98ca1b7..475fc22 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -661,6 +661,7 @@ static char printk_buf[1024]; asmlinkage int vprintk(const char *fmt, va_list args) { int printed_len = 0; + int current_log_level = default_message_loglevel; unsigned long flags; int this_cpu; char *p; @@ -710,7 +711,6 @@ asmlinkage int vprintk(const char *fmt, va_list args) */ for (p = printk_buf; *p; p++) { if (new_text_line) { - int current_log_level = default_message_loglevel; /* If a token, set current_log_level and skip over */ if (p[0] == '<' && p[1] >= '0' && p[1] <= '7' && p[2] == '>') { -- cgit v0.10.2 From 6360b1fbb4a939efd34fc770c2ebd927c55506e0 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 12 May 2008 15:44:41 +0200 Subject: move BUG_TABLE into RODATA Signed-off-by: Jan Beulich Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/avr32/kernel/vmlinux.lds.S b/arch/avr32/kernel/vmlinux.lds.S index 481cfd4..bc932c9 100644 --- a/arch/avr32/kernel/vmlinux.lds.S +++ b/arch/avr32/kernel/vmlinux.lds.S @@ -93,8 +93,6 @@ SECTIONS __stop___ex_table = .; } - BUG_TABLE - RODATA . = ALIGN(THREAD_SIZE); diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S index 50b4a3a..ff7d4ff 100644 --- a/arch/parisc/kernel/vmlinux.lds.S +++ b/arch/parisc/kernel/vmlinux.lds.S @@ -66,7 +66,6 @@ SECTIONS _etext = .; RODATA - BUG_TABLE /* writeable */ /* Make sure this is page aligned so diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 0c3000b..53d57d1 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -64,8 +64,6 @@ SECTIONS NOTES - BUG_TABLE - /* * Init sections discarded at runtime */ diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index b460715..76c1e60 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -40,7 +40,6 @@ SECTIONS _etext = .; /* End of text section */ NOTES :text :note - BUG_TABLE :text RODATA diff --git a/arch/sh/kernel/vmlinux_32.lds.S b/arch/sh/kernel/vmlinux_32.lds.S index c711378..7b4b82b 100644 --- a/arch/sh/kernel/vmlinux_32.lds.S +++ b/arch/sh/kernel/vmlinux_32.lds.S @@ -44,7 +44,6 @@ SECTIONS _etext = .; /* End of text section */ - BUG_TABLE NOTES RO_DATA(PAGE_SIZE) diff --git a/arch/sh/kernel/vmlinux_64.lds.S b/arch/sh/kernel/vmlinux_64.lds.S index d1e1770..33fa464 100644 --- a/arch/sh/kernel/vmlinux_64.lds.S +++ b/arch/sh/kernel/vmlinux_64.lds.S @@ -65,7 +65,6 @@ SECTIONS _etext = .; /* End of text section */ - BUG_TABLE NOTES RO_DATA(PAGE_SIZE) diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S index ce5ed08..aa08554 100644 --- a/arch/x86/kernel/vmlinux_32.lds.S +++ b/arch/x86/kernel/vmlinux_32.lds.S @@ -49,16 +49,14 @@ SECTIONS _etext = .; /* End of text section */ } :text = 0x9090 + NOTES :text :note + . = ALIGN(16); /* Exception table */ __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { __start___ex_table = .; *(__ex_table) __stop___ex_table = .; - } - - NOTES :text :note - - BUG_TABLE :text + } :text = 0x9090 . = ALIGN(4); .tracedata : AT(ADDR(.tracedata) - LOAD_OFFSET) { diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index fad3674..d123747 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S @@ -19,7 +19,7 @@ PHDRS { data PT_LOAD FLAGS(7); /* RWE */ user PT_LOAD FLAGS(7); /* RWE */ data.init PT_LOAD FLAGS(7); /* RWE */ - note PT_NOTE FLAGS(4); /* R__ */ + note PT_NOTE FLAGS(0); /* ___ */ } SECTIONS { @@ -40,16 +40,14 @@ SECTIONS _etext = .; /* End of text section */ } :text = 0x9090 + NOTES :text :note + . = ALIGN(16); /* Exception table */ __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { __start___ex_table = .; *(__ex_table) __stop___ex_table = .; - } - - NOTES :text :note - - BUG_TABLE :text + } :text = 0x9090 RODATA diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index f054778..dd2cc81 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -67,6 +67,8 @@ *(.rodata1) \ } \ \ + BUG_TABLE \ + \ /* PCI quirks */ \ .pci_fixup : AT(ADDR(.pci_fixup) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__start_pci_fixups_early) = .; \ @@ -310,6 +312,7 @@ .stab.indexstr 0 : { *(.stab.indexstr) } \ .comment 0 : { *(.comment) } +#ifdef CONFIG_GENERIC_BUG #define BUG_TABLE \ . = ALIGN(8); \ __bug_table : AT(ADDR(__bug_table) - LOAD_OFFSET) { \ @@ -317,6 +320,9 @@ *(__bug_table) \ __stop___bug_table = .; \ } +#else +#define BUG_TABLE +#endif #define NOTES \ .notes : AT(ADDR(.notes) - LOAD_OFFSET) { \ -- cgit v0.10.2 From 63687a528c39a67c1a213cdffa09feb0e6af9dbe Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 12 May 2008 15:44:41 +0200 Subject: x86: move tracedata to RODATA .. allowing it to be write-protected just as other read-only data under CONFIG_DEBUG_RODATA. Signed-off-by: Jan Beulich Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S index ce5ed08..2674f57 100644 --- a/arch/x86/kernel/vmlinux_32.lds.S +++ b/arch/x86/kernel/vmlinux_32.lds.S @@ -60,13 +60,6 @@ SECTIONS BUG_TABLE :text - . = ALIGN(4); - .tracedata : AT(ADDR(.tracedata) - LOAD_OFFSET) { - __tracedata_start = .; - *(.tracedata) - __tracedata_end = .; - } - RODATA /* writeable */ diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index fad3674..687041b 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S @@ -53,13 +53,6 @@ SECTIONS RODATA - . = ALIGN(4); - .tracedata : AT(ADDR(.tracedata) - LOAD_OFFSET) { - __tracedata_start = .; - *(.tracedata) - __tracedata_end = .; - } - . = ALIGN(PAGE_SIZE); /* Align data segment to page size boundary */ /* Data */ .data : AT(ADDR(.data) - LOAD_OFFSET) { diff --git a/drivers/base/power/trace.c b/drivers/base/power/trace.c index 2b4b392..87a7f1d 100644 --- a/drivers/base/power/trace.c +++ b/drivers/base/power/trace.c @@ -153,7 +153,7 @@ EXPORT_SYMBOL(set_trace_device); * it's not any guarantee, but it's a high _likelihood_ that * the match is valid). */ -void generate_resume_trace(void *tracedata, unsigned int user) +void generate_resume_trace(const void *tracedata, unsigned int user) { unsigned short lineno = *(unsigned short *)tracedata; const char *file = *(const char **)(tracedata + 2); diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index f054778..f1992dc 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -93,6 +93,8 @@ VMLINUX_SYMBOL(__end_rio_route_ops) = .; \ } \ \ + TRACEDATA \ + \ /* Kernel symbol table: Normal symbols */ \ __ksymtab : AT(ADDR(__ksymtab) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__start___ksymtab) = .; \ @@ -318,6 +320,18 @@ __stop___bug_table = .; \ } +#ifdef CONFIG_PM_TRACE +#define TRACEDATA \ + . = ALIGN(4); \ + .tracedata : AT(ADDR(.tracedata) - LOAD_OFFSET) { \ + __tracedata_start = .; \ + *(.tracedata) \ + __tracedata_end = .; \ + } +#else +#define TRACEDATA +#endif + #define NOTES \ .notes : AT(ADDR(.notes) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__start_notes) = .; \ diff --git a/include/asm-x86/resume-trace.h b/include/asm-x86/resume-trace.h index 2557514..8d9f0b4 100644 --- a/include/asm-x86/resume-trace.h +++ b/include/asm-x86/resume-trace.h @@ -6,7 +6,7 @@ #define TRACE_RESUME(user) \ do { \ if (pm_trace_enabled) { \ - void *tracedata; \ + const void *tracedata; \ asm volatile(_ASM_MOV_UL " $1f,%0\n" \ ".section .tracedata,\"a\"\n" \ "1:\t.word %c1\n\t" \ diff --git a/include/linux/resume-trace.h b/include/linux/resume-trace.h index f3f4f28..c9ba2fd 100644 --- a/include/linux/resume-trace.h +++ b/include/linux/resume-trace.h @@ -8,7 +8,7 @@ extern int pm_trace_enabled; struct device; extern void set_trace_device(struct device *); -extern void generate_resume_trace(void *tracedata, unsigned int user); +extern void generate_resume_trace(const void *tracedata, unsigned int user); #define TRACE_DEVICE(dev) do { \ if (pm_trace_enabled) \ -- cgit v0.10.2 From a2eddfa95919a730e0e5ed17e9c303fe5ba249cd Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 12 May 2008 15:44:41 +0200 Subject: x86: make /proc/stat account for all interrupts LAPIC interrupts, which don't go through the generic interrupt handling code, aren't accounted for in /proc/stat. Hence this patch adds a mechanism architectures can use to accordingly adjust the statistics. Signed-off-by: Jan Beulich Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 147352d..468acd0 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -313,16 +313,20 @@ skip: per_cpu(irq_stat,j).irq_tlb_count); seq_printf(p, " TLB shootdowns\n"); #endif +#ifdef CONFIG_X86_MCE seq_printf(p, "TRM: "); for_each_online_cpu(j) seq_printf(p, "%10u ", per_cpu(irq_stat,j).irq_thermal_count); seq_printf(p, " Thermal event interrupts\n"); +#endif +#ifdef CONFIG_X86_LOCAL_APIC seq_printf(p, "SPU: "); for_each_online_cpu(j) seq_printf(p, "%10u ", per_cpu(irq_stat,j).irq_spurious_count); seq_printf(p, " Spurious interrupts\n"); +#endif seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); #if defined(CONFIG_X86_IO_APIC) seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count)); @@ -331,6 +335,40 @@ skip: return 0; } +/* + * /proc/stat helpers + */ +u64 arch_irq_stat_cpu(unsigned int cpu) +{ + u64 sum = nmi_count(cpu); + +#ifdef CONFIG_X86_LOCAL_APIC + sum += per_cpu(irq_stat, cpu).apic_timer_irqs; +#endif +#ifdef CONFIG_SMP + sum += per_cpu(irq_stat, cpu).irq_resched_count; + sum += per_cpu(irq_stat, cpu).irq_call_count; + sum += per_cpu(irq_stat, cpu).irq_tlb_count; +#endif +#ifdef CONFIG_X86_MCE + sum += per_cpu(irq_stat, cpu).irq_thermal_count; +#endif +#ifdef CONFIG_X86_LOCAL_APIC + sum += per_cpu(irq_stat, cpu).irq_spurious_count; +#endif + return sum; +} + +u64 arch_irq_stat(void) +{ + u64 sum = atomic_read(&irq_err_count); + +#ifdef CONFIG_X86_IO_APIC + sum += atomic_read(&irq_mis_count); +#endif + return sum; +} + #ifdef CONFIG_HOTPLUG_CPU #include diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 3aac154..1f78b23 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -135,6 +135,7 @@ skip: seq_printf(p, "%10u ", cpu_pda(j)->irq_tlb_count); seq_printf(p, " TLB shootdowns\n"); #endif +#ifdef CONFIG_X86_MCE seq_printf(p, "TRM: "); for_each_online_cpu(j) seq_printf(p, "%10u ", cpu_pda(j)->irq_thermal_count); @@ -143,6 +144,7 @@ skip: for_each_online_cpu(j) seq_printf(p, "%10u ", cpu_pda(j)->irq_threshold_count); seq_printf(p, " Threshold APIC interrupts\n"); +#endif seq_printf(p, "SPU: "); for_each_online_cpu(j) seq_printf(p, "%10u ", cpu_pda(j)->irq_spurious_count); @@ -153,6 +155,32 @@ skip: } /* + * /proc/stat helpers + */ +u64 arch_irq_stat_cpu(unsigned int cpu) +{ + u64 sum = cpu_pda(cpu)->__nmi_count; + + sum += cpu_pda(cpu)->apic_timer_irqs; +#ifdef CONFIG_SMP + sum += cpu_pda(cpu)->irq_resched_count; + sum += cpu_pda(cpu)->irq_call_count; + sum += cpu_pda(cpu)->irq_tlb_count; +#endif +#ifdef CONFIG_X86_MCE + sum += cpu_pda(cpu)->irq_thermal_count; + sum += cpu_pda(cpu)->irq_threshold_count; +#endif + sum += cpu_pda(cpu)->irq_spurious_count; + return sum; +} + +u64 arch_irq_stat(void) +{ + return atomic_read(&irq_err_count); +} + +/* * do_IRQ handles all normal device IRQ's (the special * SMP cross-CPU interrupts have their own specific * handlers). diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index 74a323d..903e617 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -472,6 +472,13 @@ static const struct file_operations proc_vmalloc_operations = { }; #endif +#ifndef arch_irq_stat_cpu +#define arch_irq_stat_cpu(cpu) 0 +#endif +#ifndef arch_irq_stat +#define arch_irq_stat() 0 +#endif + static int show_stat(struct seq_file *p, void *v) { int i; @@ -509,7 +516,9 @@ static int show_stat(struct seq_file *p, void *v) sum += temp; per_irq_sum[j] += temp; } + sum += arch_irq_stat_cpu(i); } + sum += arch_irq_stat(); seq_printf(p, "cpu %llu %llu %llu %llu %llu %llu %llu %llu %llu\n", (unsigned long long)cputime64_to_clock_t(user), diff --git a/include/asm-x86/hardirq.h b/include/asm-x86/hardirq.h index 314434d..000787d 100644 --- a/include/asm-x86/hardirq.h +++ b/include/asm-x86/hardirq.h @@ -3,3 +3,9 @@ #else # include "hardirq_64.h" #endif + +extern u64 arch_irq_stat_cpu(unsigned int cpu); +#define arch_irq_stat_cpu arch_irq_stat_cpu + +extern u64 arch_irq_stat(void); +#define arch_irq_stat arch_irq_stat -- cgit v0.10.2 From 962cf36c5bf6d2840b8d66ee9a606fae2f540bbd Mon Sep 17 00:00:00 2001 From: "Carlos R. Mafra" Date: Thu, 15 May 2008 11:15:37 -0300 Subject: Remove argument from open_softirq which is always NULL As git-grep shows, open_softirq() is always called with the last argument being NULL block/blk-core.c: open_softirq(BLOCK_SOFTIRQ, blk_done_softirq, NULL); kernel/hrtimer.c: open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq, NULL); kernel/rcuclassic.c: open_softirq(RCU_SOFTIRQ, rcu_process_callbacks, NULL); kernel/rcupreempt.c: open_softirq(RCU_SOFTIRQ, rcu_process_callbacks, NULL); kernel/sched.c: open_softirq(SCHED_SOFTIRQ, run_rebalance_domains, NULL); kernel/softirq.c: open_softirq(TASKLET_SOFTIRQ, tasklet_action, NULL); kernel/softirq.c: open_softirq(HI_SOFTIRQ, tasklet_hi_action, NULL); kernel/timer.c: open_softirq(TIMER_SOFTIRQ, run_timer_softirq, NULL); net/core/dev.c: open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL); net/core/dev.c: open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL); This observation has already been made by Matthew Wilcox in June 2002 (http://www.cs.helsinki.fi/linux/linux-kernel/2002-25/0687.html) "I notice that none of the current softirq routines use the data element passed to them." and the situation hasn't changed since them. So it appears we can safely remove that extra argument to save 128 (54) bytes of kernel data (text). Signed-off-by: Carlos R. Mafra Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/block/blk-core.c b/block/blk-core.c index 6a9cc0d..75fdc65 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2048,7 +2048,7 @@ int __init blk_dev_init(void) for_each_possible_cpu(i) INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i)); - open_softirq(BLOCK_SOFTIRQ, blk_done_softirq, NULL); + open_softirq(BLOCK_SOFTIRQ, blk_done_softirq); register_hotcpu_notifier(&blk_cpu_notifier); return 0; diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index f1fc747..a86186d 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -285,12 +285,11 @@ enum struct softirq_action { void (*action)(struct softirq_action *); - void *data; }; asmlinkage void do_softirq(void); asmlinkage void __do_softirq(void); -extern void open_softirq(int nr, void (*action)(struct softirq_action*), void *data); +extern void open_softirq(int nr, void (*action)(struct softirq_action *)); extern void softirq_init(void); #define __raise_softirq_irqoff(nr) do { or_softirq_pending(1UL << (nr)); } while (0) extern void raise_softirq_irqoff(unsigned int nr); diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 421be5f..861b408 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -1669,7 +1669,7 @@ void __init hrtimers_init(void) (void *)(long)smp_processor_id()); register_cpu_notifier(&hrtimers_nb); #ifdef CONFIG_HIGH_RES_TIMERS - open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq, NULL); + open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq); #endif } diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c index f4ffbd0..f6e01f3 100644 --- a/kernel/rcuclassic.c +++ b/kernel/rcuclassic.c @@ -529,7 +529,7 @@ static void __cpuinit rcu_online_cpu(int cpu) rcu_init_percpu_data(cpu, &rcu_ctrlblk, rdp); rcu_init_percpu_data(cpu, &rcu_bh_ctrlblk, bh_rdp); - open_softirq(RCU_SOFTIRQ, rcu_process_callbacks, NULL); + open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); } static int __cpuinit rcu_cpu_notify(struct notifier_block *self, diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c index e1cdf19..9dd827d 100644 --- a/kernel/rcupreempt.c +++ b/kernel/rcupreempt.c @@ -1125,7 +1125,7 @@ void __init __rcu_init(void) for_each_online_cpu(cpu) rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE, (void *)(long) cpu); - open_softirq(RCU_SOFTIRQ, rcu_process_callbacks, NULL); + open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); } /* diff --git a/kernel/sched.c b/kernel/sched.c index cfa222a..56ea3a2 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -8154,7 +8154,7 @@ void __init sched_init(void) #endif #ifdef CONFIG_SMP - open_softirq(SCHED_SOFTIRQ, run_rebalance_domains, NULL); + open_softirq(SCHED_SOFTIRQ, run_rebalance_domains); #endif #ifdef CONFIG_RT_MUTEXES diff --git a/kernel/softirq.c b/kernel/softirq.c index 36e0617..0592568 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -347,9 +347,8 @@ void raise_softirq(unsigned int nr) local_irq_restore(flags); } -void open_softirq(int nr, void (*action)(struct softirq_action*), void *data) +void open_softirq(int nr, void (*action)(struct softirq_action *)) { - softirq_vec[nr].data = data; softirq_vec[nr].action = action; } @@ -503,8 +502,8 @@ void __init softirq_init(void) &per_cpu(tasklet_hi_vec, cpu).head; } - open_softirq(TASKLET_SOFTIRQ, tasklet_action, NULL); - open_softirq(HI_SOFTIRQ, tasklet_hi_action, NULL); + open_softirq(TASKLET_SOFTIRQ, tasklet_action); + open_softirq(HI_SOFTIRQ, tasklet_hi_action); } static int ksoftirqd(void * __bind_cpu) diff --git a/kernel/timer.c b/kernel/timer.c index ceacc66..b4da888 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1502,7 +1502,7 @@ void __init init_timers(void) BUG_ON(err == NOTIFY_BAD); register_cpu_notifier(&timers_nb); - open_softirq(TIMER_SOFTIRQ, run_timer_softirq, NULL); + open_softirq(TIMER_SOFTIRQ, run_timer_softirq); } /** diff --git a/net/core/dev.c b/net/core/dev.c index 5829630..cf0e167 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4563,8 +4563,8 @@ static int __init net_dev_init(void) dev_boot_phase = 0; - open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL); - open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL); + open_softirq(NET_TX_SOFTIRQ, net_tx_action); + open_softirq(NET_RX_SOFTIRQ, net_rx_action); hotcpu_notifier(dev_cpu_callback, 0); dst_init(); -- cgit v0.10.2 From c2c14fb7afcc67b48724571506e095fccc65a670 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 12 May 2008 15:43:36 +0200 Subject: x86: tsc_64.c make constant UL arch/x86/kernel/tsc_64.c:245:13: warning: constant 0x100000000 is so big it is long Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/tsc_64.c b/arch/x86/kernel/tsc_64.c index fcc16e5..c19b0e2 100644 --- a/arch/x86/kernel/tsc_64.c +++ b/arch/x86/kernel/tsc_64.c @@ -242,7 +242,7 @@ void __init tsc_calibrate(void) if (hpet) { printk(KERN_INFO "TSC calibrated against HPET\n"); if (hpet2 < hpet1) - hpet2 += 0x100000000; + hpet2 += 0x100000000UL; hpet2 -= hpet1; tsc1 = (hpet2 * hpet_readl(HPET_PERIOD)) / 1000000; } else { -- cgit v0.10.2 From e6b0edef3453677b13e175a104a83eb36d062dd3 Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Mon, 12 May 2008 15:43:38 +0200 Subject: x86: clean up vdso_enabled type on x86_64 This fixes type of "vdso_enabled" on X86_64 to match extern in asm/elf.h. Signed-off-by: OGAWA Hirofumi Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c index 3fdd514..19a6cfa 100644 --- a/arch/x86/vdso/vma.c +++ b/arch/x86/vdso/vma.c @@ -16,7 +16,7 @@ #include "vextern.h" /* Just for VMAGIC. */ #undef VEXTERN -int vdso_enabled = 1; +unsigned int __read_mostly vdso_enabled = 1; extern char vdso_start[], vdso_end[]; extern unsigned short vdso_sync_cpuid; -- cgit v0.10.2 From 46dd98a5c0b907f94742579ab605be1933a37abd Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 14 May 2008 16:10:39 -0700 Subject: arch/x86/mm/pat.c: use boot_cpu_has() arch/x86/mm/pat.c: In function 'phys_mem_access_prot_allowed': arch/x86/mm/pat.c:526: warning: passing argument 2 of 'constant_test_bit' from incompatible pointer type arch/x86/mm/pat.c:526: warning: passing argument 2 of 'variable_test_bit' from incompatible pointer type arch/x86/mm/pat.c:527: warning: passing argument 2 of 'constant_test_bit' from incompatible pointer type arch/x86/mm/pat.c:527: warning: passing argument 2 of 'variable_test_bit' from incompatible pointer type arch/x86/mm/pat.c:528: warning: passing argument 2 of 'constant_test_bit' from incompatible pointer type arch/x86/mm/pat.c:528: warning: passing argument 2 of 'variable_test_bit' from incompatible pointer type arch/x86/mm/pat.c:529: warning: passing argument 2 of 'constant_test_bit' from incompatible pointer type arch/x86/mm/pat.c:529: warning: passing argument 2 of 'variable_test_bit' from incompatible pointer type Don't open-code test_bit() on a __u32 Cc: Andrea Arcangeli Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index a1cbea0..e83b770 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -536,10 +536,10 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, * we maintain the tradition of paranoia in this code. */ if (!pat_wc_enabled && - ! ( test_bit(X86_FEATURE_MTRR, boot_cpu_data.x86_capability) || - test_bit(X86_FEATURE_K6_MTRR, boot_cpu_data.x86_capability) || - test_bit(X86_FEATURE_CYRIX_ARR, boot_cpu_data.x86_capability) || - test_bit(X86_FEATURE_CENTAUR_MCR, boot_cpu_data.x86_capability)) && + ! ( boot_cpu_has(X86_FEATURE_MTRR) || + boot_cpu_has(X86_FEATURE_K6_MTRR) || + boot_cpu_has(X86_FEATURE_CYRIX_ARR) || + boot_cpu_has(X86_FEATURE_CENTAUR_MCR)) && (pfn << PAGE_SHIFT) >= __pa(high_memory)) { flags = _PAGE_CACHE_UC; } -- cgit v0.10.2 From 5136dea5734cfddbc6d7ccb7ead85a3ac7ce3de2 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 14 May 2008 16:10:41 -0700 Subject: x86: bitops take an unsigned long * All (or most) other architectures do this. So should x86. Fix. Cc: Andrea Arcangeli Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/include/asm-x86/bitops.h b/include/asm-x86/bitops.h index ee4b3ea..7d2494b 100644 --- a/include/asm-x86/bitops.h +++ b/include/asm-x86/bitops.h @@ -43,7 +43,7 @@ * Note that @nr may be almost arbitrarily large; this function is not * restricted to acting on a single-word quantity. */ -static inline void set_bit(int nr, volatile void *addr) +static inline void set_bit(int nr, volatile unsigned long *addr) { asm volatile(LOCK_PREFIX "bts %1,%0" : ADDR : "Ir" (nr) : "memory"); } @@ -57,7 +57,7 @@ static inline void set_bit(int nr, volatile void *addr) * If it's called on the same region of memory simultaneously, the effect * may be that only one operation succeeds. */ -static inline void __set_bit(int nr, volatile void *addr) +static inline void __set_bit(int nr, volatile unsigned long *addr) { asm volatile("bts %1,%0" : ADDR : "Ir" (nr) : "memory"); } @@ -72,7 +72,7 @@ static inline void __set_bit(int nr, volatile void *addr) * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit() * in order to ensure changes are visible on other processors. */ -static inline void clear_bit(int nr, volatile void *addr) +static inline void clear_bit(int nr, volatile unsigned long *addr) { asm volatile(LOCK_PREFIX "btr %1,%0" : ADDR : "Ir" (nr)); } @@ -85,13 +85,13 @@ static inline void clear_bit(int nr, volatile void *addr) * clear_bit() is atomic and implies release semantics before the memory * operation. It can be used for an unlock. */ -static inline void clear_bit_unlock(unsigned nr, volatile void *addr) +static inline void clear_bit_unlock(unsigned nr, volatile unsigned long *addr) { barrier(); clear_bit(nr, addr); } -static inline void __clear_bit(int nr, volatile void *addr) +static inline void __clear_bit(int nr, volatile unsigned long *addr) { asm volatile("btr %1,%0" : ADDR : "Ir" (nr)); } @@ -108,7 +108,7 @@ static inline void __clear_bit(int nr, volatile void *addr) * No memory barrier is required here, because x86 cannot reorder stores past * older loads. Same principle as spin_unlock. */ -static inline void __clear_bit_unlock(unsigned nr, volatile void *addr) +static inline void __clear_bit_unlock(unsigned nr, volatile unsigned long *addr) { barrier(); __clear_bit(nr, addr); @@ -126,7 +126,7 @@ static inline void __clear_bit_unlock(unsigned nr, volatile void *addr) * If it's called on the same region of memory simultaneously, the effect * may be that only one operation succeeds. */ -static inline void __change_bit(int nr, volatile void *addr) +static inline void __change_bit(int nr, volatile unsigned long *addr) { asm volatile("btc %1,%0" : ADDR : "Ir" (nr)); } @@ -140,7 +140,7 @@ static inline void __change_bit(int nr, volatile void *addr) * Note that @nr may be almost arbitrarily large; this function is not * restricted to acting on a single-word quantity. */ -static inline void change_bit(int nr, volatile void *addr) +static inline void change_bit(int nr, volatile unsigned long *addr) { asm volatile(LOCK_PREFIX "btc %1,%0" : ADDR : "Ir" (nr)); } @@ -153,7 +153,7 @@ static inline void change_bit(int nr, volatile void *addr) * This operation is atomic and cannot be reordered. * It also implies a memory barrier. */ -static inline int test_and_set_bit(int nr, volatile void *addr) +static inline int test_and_set_bit(int nr, volatile unsigned long *addr) { int oldbit; @@ -170,7 +170,7 @@ static inline int test_and_set_bit(int nr, volatile void *addr) * * This is the same as test_and_set_bit on x86. */ -static inline int test_and_set_bit_lock(int nr, volatile void *addr) +static inline int test_and_set_bit_lock(int nr, volatile unsigned long *addr) { return test_and_set_bit(nr, addr); } @@ -184,7 +184,7 @@ static inline int test_and_set_bit_lock(int nr, volatile void *addr) * If two examples of this operation race, one can appear to succeed * but actually fail. You must protect multiple accesses with a lock. */ -static inline int __test_and_set_bit(int nr, volatile void *addr) +static inline int __test_and_set_bit(int nr, volatile unsigned long *addr) { int oldbit; @@ -203,7 +203,7 @@ static inline int __test_and_set_bit(int nr, volatile void *addr) * This operation is atomic and cannot be reordered. * It also implies a memory barrier. */ -static inline int test_and_clear_bit(int nr, volatile void *addr) +static inline int test_and_clear_bit(int nr, volatile unsigned long *addr) { int oldbit; @@ -223,7 +223,7 @@ static inline int test_and_clear_bit(int nr, volatile void *addr) * If two examples of this operation race, one can appear to succeed * but actually fail. You must protect multiple accesses with a lock. */ -static inline int __test_and_clear_bit(int nr, volatile void *addr) +static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr) { int oldbit; @@ -235,7 +235,7 @@ static inline int __test_and_clear_bit(int nr, volatile void *addr) } /* WARNING: non atomic and it can be reordered! */ -static inline int __test_and_change_bit(int nr, volatile void *addr) +static inline int __test_and_change_bit(int nr, volatile unsigned long *addr) { int oldbit; @@ -255,7 +255,7 @@ static inline int __test_and_change_bit(int nr, volatile void *addr) * This operation is atomic and cannot be reordered. * It also implies a memory barrier. */ -static inline int test_and_change_bit(int nr, volatile void *addr) +static inline int test_and_change_bit(int nr, volatile unsigned long *addr) { int oldbit; @@ -266,13 +266,13 @@ static inline int test_and_change_bit(int nr, volatile void *addr) return oldbit; } -static inline int constant_test_bit(int nr, const volatile void *addr) +static inline int constant_test_bit(int nr, const volatile unsigned long *addr) { return ((1UL << (nr % BITS_PER_LONG)) & (((unsigned long *)addr)[nr / BITS_PER_LONG])) != 0; } -static inline int variable_test_bit(int nr, volatile const void *addr) +static inline int variable_test_bit(int nr, volatile const unsigned long *addr) { int oldbit; -- cgit v0.10.2 From eef8f871d84b5df1a24902a4e4700188be1aff2c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 12 May 2008 15:43:34 +0200 Subject: x86: vsmp_64 add missing includes sparse mutters: arch/x86/kernel/vsmp_64.c:126:5: warning: symbol 'is_vsmp_box' was not declared. Should it be static? arch/x86/kernel/vsmp_64.c:145:13: warning: symbol 'vsmp_init' was not declared. Should it be static? Include the appropriate headers. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c index ba8c0b7..0c029e8 100644 --- a/arch/x86/kernel/vsmp_64.c +++ b/arch/x86/kernel/vsmp_64.c @@ -15,9 +15,12 @@ #include #include #include + +#include #include #include #include +#include #if defined CONFIG_PCI && defined CONFIG_PARAVIRT /* -- cgit v0.10.2 From 535694f361419ca195fd915dd5038c926334e1be Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 12 May 2008 15:43:35 +0200 Subject: x86: boot/printfc use NULL instead 0 Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/boot/printf.c b/arch/x86/boot/printf.c index c1d00c02..50e47cd 100644 --- a/arch/x86/boot/printf.c +++ b/arch/x86/boot/printf.c @@ -56,7 +56,7 @@ static char *number(char *str, long num, int base, int size, int precision, if (type & LEFT) type &= ~ZEROPAD; if (base < 2 || base > 36) - return 0; + return NULL; c = (type & ZEROPAD) ? '0' : ' '; sign = 0; if (type & SIGN) { -- cgit v0.10.2 From 635ee418381566f03819408e1303ef21fcf2d41c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 12 May 2008 15:43:35 +0200 Subject: x86: create prototype for (un)map_devmem Global functions need a prototype. Add it. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/include/asm-x86/page.h b/include/asm-x86/page.h index dc936dd..ed16509 100644 --- a/include/asm-x86/page.h +++ b/include/asm-x86/page.h @@ -51,8 +51,15 @@ #ifndef __ASSEMBLY__ +typedef struct { pgdval_t pgd; } pgd_t; +typedef struct { pgprotval_t pgprot; } pgprot_t; + extern int page_is_ram(unsigned long pagenr); extern int devmem_is_allowed(unsigned long pagenr); +extern void map_devmem(unsigned long pfn, unsigned long size, + pgprot_t vma_prot); +extern void unmap_devmem(unsigned long pfn, unsigned long size, + pgprot_t vma_prot); extern unsigned long max_pfn_mapped; @@ -74,9 +81,6 @@ static inline void copy_user_page(void *to, void *from, unsigned long vaddr, alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr) #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE -typedef struct { pgdval_t pgd; } pgd_t; -typedef struct { pgprotval_t pgprot; } pgprot_t; - static inline pgd_t native_make_pgd(pgdval_t val) { return (pgd_t) { val }; -- cgit v0.10.2 From 0eafe234a2571f51ef9f7e8baddb58c828750771 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 12 May 2008 15:43:35 +0200 Subject: x86: k8topology add missing header k8_scan_nodes is global and needs a prototype. Add the header file which contains it. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/mm/k8topology_64.c b/arch/x86/mm/k8topology_64.c index 1f476e4..4847119 100644 --- a/arch/x86/mm/k8topology_64.c +++ b/arch/x86/mm/k8topology_64.c @@ -22,6 +22,7 @@ #include #include #include +#include static __init int find_northbridge(void) { -- cgit v0.10.2 From d34c08958fa36c7a8c3f8d9c0ebe6ec1ab744a68 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 12 May 2008 15:43:35 +0200 Subject: x86: k8topology fix shadow variable sparse mutters: arch/x86/mm/k8topology_64.c:108:7: warning: symbol 'nodeid' shadows an earlier one Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/mm/k8topology_64.c b/arch/x86/mm/k8topology_64.c index 4847119..36c6c4a 100644 --- a/arch/x86/mm/k8topology_64.c +++ b/arch/x86/mm/k8topology_64.c @@ -76,10 +76,10 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) { unsigned long prevbase; struct bootnode nodes[8]; - int nodeid, i, nb; + int i, nb; unsigned char nodeids[8]; int found = 0; - u32 reg; + u32 nodeid, reg; unsigned numnodes; unsigned cores; unsigned bits; @@ -106,7 +106,6 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) prevbase = 0; for (i = 0; i < 8; i++) { unsigned long base, limit; - u32 nodeid; base = read_pci_config(0, nb, 1, 0x40 + i*8); limit = read_pci_config(0, nb, 1, 0x44 + i*8); -- cgit v0.10.2 From 55d4f22abc9f011c5db982f83377cdd38bb9a2a3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 12 May 2008 15:43:35 +0200 Subject: x86: k8topology cleanup variable declarations Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/mm/k8topology_64.c b/arch/x86/mm/k8topology_64.c index 36c6c4a..0ea66b5 100644 --- a/arch/x86/mm/k8topology_64.c +++ b/arch/x86/mm/k8topology_64.c @@ -74,17 +74,12 @@ static __init void early_get_boot_cpu_id(void) int __init k8_scan_nodes(unsigned long start, unsigned long end) { + unsigned numnodes, cores, bits, apicid_base; unsigned long prevbase; struct bootnode nodes[8]; - int i, nb; unsigned char nodeids[8]; - int found = 0; + int i, j, nb, found = 0; u32 nodeid, reg; - unsigned numnodes; - unsigned cores; - unsigned bits; - int j; - unsigned apicid_base; if (!early_pci_allowed()) return -1; -- cgit v0.10.2 From d1097635deed7196c2a859f285c29267779ca45a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 2 May 2008 23:42:01 +0200 Subject: x86: move mmconfig declarations to header arch/x86/kernel/mmconf-fam10h_64.c is missing the prototypes, which are decalred in arch/x86/kernel/setup_64.c. Move the prototypes and the inline stubs to the appropriate header file. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c index edc5fbf..fdfdc55 100644 --- a/arch/x86/kernel/mmconf-fam10h_64.c +++ b/arch/x86/kernel/mmconf-fam10h_64.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "../pci/pci.h" diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index 6dff128..341230d 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -71,6 +71,7 @@ #include #include #include +#include #include #ifdef CONFIG_PARAVIRT @@ -293,18 +294,6 @@ static void __init parse_setup_data(void) } } -#ifdef CONFIG_PCI_MMCONFIG -extern void __cpuinit fam10h_check_enable_mmcfg(void); -extern void __init check_enable_amd_mmconf_dmi(void); -#else -void __cpuinit fam10h_check_enable_mmcfg(void) -{ -} -void __init check_enable_amd_mmconf_dmi(void) -{ -} -#endif - /* * setup_arch - architecture-specific boot-time initializations * diff --git a/include/asm-x86/mmconfig.h b/include/asm-x86/mmconfig.h new file mode 100644 index 0000000..95beda0 --- /dev/null +++ b/include/asm-x86/mmconfig.h @@ -0,0 +1,12 @@ +#ifndef _ASM_MMCONFIG_H +#define _ASM_MMCONFIG_H + +#ifdef CONFIG_PCI_MMCONFIG +extern void __cpuinit fam10h_check_enable_mmcfg(void); +extern void __init check_enable_amd_mmconf_dmi(void); +#else +static inline void fam10h_check_enable_mmcfg(void) { } +static inline void check_enable_amd_mmconf_dmi(void) { } +#endif + +#endif -- cgit v0.10.2 From 11034d55975be6d8b955a6eb11e87fc67ec086c2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 12 May 2008 15:43:36 +0200 Subject: x86: init64.c include initrd.h free_initrd_mem needs a prototype, which is in linux/initrd.h Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 32ba13b..1812309 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include -- cgit v0.10.2 From e0b32d768cad17af07af3aced67498bde98296c9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 12 May 2008 15:43:37 +0200 Subject: x86: make command_line static in setup_64.c Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index 341230d..78c1be6 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -119,7 +119,7 @@ EXPORT_SYMBOL_GPL(edid_info); extern int root_mountflags; -char __initdata command_line[COMMAND_LINE_SIZE]; +static char __initdata command_line[COMMAND_LINE_SIZE]; static struct resource standard_io_resources[] = { { .name = "dma1", .start = 0x00, .end = 0x1f, -- cgit v0.10.2 From 968cbfad1a727b5689ae620f4d970abf6ce73340 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 12 May 2008 15:43:37 +0200 Subject: x86: make __pci_mmcfg_init static in mmconfig-shared.c Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 0cfebec..23faaa8 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -374,7 +374,7 @@ reject: static int __initdata known_bridge; -void __init __pci_mmcfg_init(int early) +static void __init __pci_mmcfg_init(int early) { /* MMCONFIG disabled */ if ((pci_probe & PCI_PROBE_MMCONF) == 0) -- cgit v0.10.2 From 311f83494830fec17f086401a5b43984bb447cd2 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Mon, 12 May 2008 15:43:37 +0200 Subject: x86: kernel/pci-dma.c cleanups This patch contains the following cleanups: - make the following needlessly global code static: - dma_alloc_pages() Signed-off-by: Adrian Bunk Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index c5ef1af..5bc29ca 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -357,7 +357,7 @@ int dma_supported(struct device *dev, u64 mask) EXPORT_SYMBOL(dma_supported); /* Allocate DMA memory on node near device */ -noinline struct page * +static noinline struct page * dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order) { int node; -- cgit v0.10.2 From cca7c0850fa0381639bf84dd06ee900e16346373 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 12 May 2008 15:43:37 +0200 Subject: x86_64: fix mm.txt documentation Commit 85eb69a16aab5a394ce043c2131319eae35e6493 introduced 512 MiB sized kernel and 1.5 GiB sized module space but omitted to change documentation properly. Fix that. [Wasn't the hole intentional protection hole?] Signed-off-by: Jiri Slaby Cc: Andi Kleen Cc: hpa@zytor.com Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/Documentation/x86_64/mm.txt b/Documentation/x86_64/mm.txt index b89b6d2..efce750 100644 --- a/Documentation/x86_64/mm.txt +++ b/Documentation/x86_64/mm.txt @@ -11,9 +11,8 @@ ffffc10000000000 - ffffc1ffffffffff (=40 bits) hole ffffc20000000000 - ffffe1ffffffffff (=45 bits) vmalloc/ioremap space ffffe20000000000 - ffffe2ffffffffff (=40 bits) virtual memory map (1TB) ... unused hole ... -ffffffff80000000 - ffffffff82800000 (=40 MB) kernel text mapping, from phys 0 -... unused hole ... -ffffffff88000000 - fffffffffff00000 (=1919 MB) module mapping space +ffffffff80000000 - ffffffffa0000000 (=512 MB) kernel text mapping, from phys 0 +ffffffffa0000000 - fffffffffff00000 (=1536 MB) module mapping space The direct mapping covers all memory in the system up to the highest memory address (this means in some cases it can also include PCI memory -- cgit v0.10.2 From 6a1673ae2201931ce34bf7ba9f90928c864d8334 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 12 May 2008 15:43:38 +0200 Subject: x86: make memory_add_physaddr_to_nid depend on MEMORY_HOTPLUG memory_add_physaddr_to_nid() is only used in the CONFIG_MEMORY_HOTPLUG_SPARSE || CONFIG_ACPI_HOTPLUG_MEMORY case. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index 3890234..f6d0584 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c @@ -522,6 +522,7 @@ int __node_distance(int a, int b) EXPORT_SYMBOL(__node_distance); +#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) || defined(CONFIG_ACPI_HOTPLUG_MEMORY) int memory_add_physaddr_to_nid(u64 start) { int i, ret = 0; @@ -533,4 +534,4 @@ int memory_add_physaddr_to_nid(u64 start) return ret; } EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); - +#endif -- cgit v0.10.2 From 4e50e62ce52f39b5f45e82f68b18254458b429bb Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 12 May 2008 15:43:38 +0200 Subject: x86: eliminate duplicate consistency checks in init_32.c Signed-off-by: Jan Beulich Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index ec30d10..4834c0f 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -571,17 +571,6 @@ void __init mem_init(void) #endif bad_ppro = ppro_with_ram_bug(); -#ifdef CONFIG_HIGHMEM - /* check that fixmap and pkmap do not overlap */ - if (PKMAP_BASE + LAST_PKMAP*PAGE_SIZE >= FIXADDR_START) { - printk(KERN_ERR - "fixmap and kmap areas overlap - this will crash\n"); - printk(KERN_ERR "pkstart: %lxh pkend: %lxh fixstart %lxh\n", - PKMAP_BASE, PKMAP_BASE + LAST_PKMAP*PAGE_SIZE, - FIXADDR_START); - BUG(); - } -#endif /* this will put all low memory onto the freelists */ totalram_pages += free_all_bootmem(); @@ -614,7 +603,6 @@ void __init mem_init(void) (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)) ); -#if 1 /* double-sanity-check paranoia */ printk(KERN_INFO "virtual kernel memory layout:\n" " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n" #ifdef CONFIG_HIGHMEM @@ -655,7 +643,6 @@ void __init mem_init(void) #endif BUG_ON(VMALLOC_START > VMALLOC_END); BUG_ON((unsigned long)high_memory > VMALLOC_START); -#endif /* double-sanity-check paranoia */ if (boot_cpu_data.wp_works_ok < 0) test_wp_bit(); -- cgit v0.10.2 From 4c8ab98249fa3cead320ec0ee4cde9960b951989 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 12 May 2008 15:43:38 +0200 Subject: i386: move FIX_ACPI_* into non-permanent range .. as they are used at early boot time only. Signed-off-by: Jan Beulich Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/include/asm-x86/fixmap_32.h b/include/asm-x86/fixmap_32.h index 4b96148..f0df7ee 100644 --- a/include/asm-x86/fixmap_32.h +++ b/include/asm-x86/fixmap_32.h @@ -79,10 +79,6 @@ enum fixed_addresses { FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, #endif -#ifdef CONFIG_ACPI - FIX_ACPI_BEGIN, - FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1, -#endif #ifdef CONFIG_PCI_MMCONFIG FIX_PCIE_MCFG, #endif @@ -103,6 +99,10 @@ enum fixed_addresses { (__end_of_permanent_fixed_addresses & 511), FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_NESTING - 1, FIX_WP_TEST, +#ifdef CONFIG_ACPI + FIX_ACPI_BEGIN, + FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1, +#endif #ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT FIX_OHCI1394_BASE, #endif -- cgit v0.10.2 From ebdd561a19d7917ac49fff9c355aa4833c504bf1 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 12 May 2008 15:43:38 +0200 Subject: x86: constify data in reboot.c Signed-off-by: Jan Beulich Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index f6be7d5..f8a6216 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -27,7 +27,7 @@ void (*pm_power_off)(void); EXPORT_SYMBOL(pm_power_off); -static long no_idt[3]; +static const struct desc_ptr no_idt = {}; static int reboot_mode; enum reboot_type reboot_type = BOOT_KBD; int reboot_force; @@ -201,15 +201,15 @@ core_initcall(reboot_init); controller to pulse the CPU reset line, which is more thorough, but doesn't work with at least one type of 486 motherboard. It is easy to stop this code working; hence the copious comments. */ -static unsigned long long +static const unsigned long long real_mode_gdt_entries [3] = { 0x0000000000000000ULL, /* Null descriptor */ - 0x00009a000000ffffULL, /* 16-bit real-mode 64k code at 0x00000000 */ - 0x000092000100ffffULL /* 16-bit real-mode 64k data at 0x00000100 */ + 0x00009b000000ffffULL, /* 16-bit real-mode 64k code at 0x00000000 */ + 0x000093000100ffffULL /* 16-bit real-mode 64k data at 0x00000100 */ }; -static struct desc_ptr +static const struct desc_ptr real_mode_gdt = { sizeof (real_mode_gdt_entries) - 1, (long)real_mode_gdt_entries }, real_mode_idt = { 0x3ff, 0 }; @@ -231,7 +231,7 @@ real_mode_idt = { 0x3ff, 0 }; More could be done here to set up the registers as if a CPU reset had occurred; hopefully real BIOSs don't assume much. */ -static unsigned char real_mode_switch [] = +static const unsigned char real_mode_switch [] = { 0x66, 0x0f, 0x20, 0xc0, /* movl %cr0,%eax */ 0x66, 0x83, 0xe0, 0x11, /* andl $0x00000011,%eax */ @@ -245,7 +245,7 @@ static unsigned char real_mode_switch [] = 0x24, 0x10, /* f: andb $0x10,al */ 0x66, 0x0f, 0x22, 0xc0 /* movl %eax,%cr0 */ }; -static unsigned char jump_to_bios [] = +static const unsigned char jump_to_bios [] = { 0xea, 0x00, 0x00, 0xff, 0xff /* ljmp $0xffff,$0x0000 */ }; @@ -255,7 +255,7 @@ static unsigned char jump_to_bios [] = * specified by the code and length parameters. * We assume that length will aways be less that 100! */ -void machine_real_restart(unsigned char *code, int length) +void machine_real_restart(const unsigned char *code, int length) { local_irq_disable(); @@ -368,7 +368,7 @@ static void native_machine_emergency_restart(void) } case BOOT_TRIPLE: - load_idt((const struct desc_ptr *)&no_idt); + load_idt(&no_idt); __asm__ __volatile__("int3"); reboot_type = BOOT_KBD; diff --git a/arch/x86/kernel/reboot_fixups_32.c b/arch/x86/kernel/reboot_fixups_32.c index dec0b5e..61a8377 100644 --- a/arch/x86/kernel/reboot_fixups_32.c +++ b/arch/x86/kernel/reboot_fixups_32.c @@ -49,7 +49,7 @@ struct device_fixup { void (*reboot_fixup)(struct pci_dev *); }; -static struct device_fixup fixups_table[] = { +static const struct device_fixup fixups_table[] = { { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, cs5530a_warm_reset }, { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_ISA, cs5536_warm_reset }, { PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SC1100_BRIDGE, cs5530a_warm_reset }, @@ -64,7 +64,7 @@ static struct device_fixup fixups_table[] = { */ void mach_reboot_fixups(void) { - struct device_fixup *cur; + const struct device_fixup *cur; struct pci_dev *dev; int i; diff --git a/include/asm-x86/reboot.h b/include/asm-x86/reboot.h index e63741f..206f355 100644 --- a/include/asm-x86/reboot.h +++ b/include/asm-x86/reboot.h @@ -14,8 +14,8 @@ struct machine_ops { extern struct machine_ops machine_ops; -void machine_real_restart(unsigned char *code, int length); void native_machine_crash_shutdown(struct pt_regs *regs); void native_machine_shutdown(void); +void machine_real_restart(const unsigned char *code, int length); #endif /* _ASM_REBOOT_H */ -- cgit v0.10.2 From 9831bfb201a8e00415d0f9e5b5a50d902a90b2db Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 12 May 2008 15:43:38 +0200 Subject: x86 - hide X86_VM_MASK from userland programs v3 X86_VM_MASK is kernel specific flags so hide it from userland programs. It should be defined *before* ptrace.h inclusion because of circular link between these files Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/include/asm-x86/vm86.h b/include/asm-x86/vm86.h index 074b357..cbf4a0e 100644 --- a/include/asm-x86/vm86.h +++ b/include/asm-x86/vm86.h @@ -14,12 +14,6 @@ #include -#ifdef CONFIG_VM86 -#define X86_VM_MASK X86_EFLAGS_VM -#else -#define X86_VM_MASK 0 /* No VM86 support */ -#endif - #define BIOSSEG 0x0f000 #define CPU_086 0 @@ -133,6 +127,13 @@ struct vm86plus_struct { }; #ifdef __KERNEL__ + +#ifdef CONFIG_VM86 +#define X86_VM_MASK X86_EFLAGS_VM +#else +#define X86_VM_MASK 0 /* No VM86 support */ +#endif + /* * This is the (kernel) stack-layout when we have done a "SAVE_ALL" from vm86 * mode - the main change is that the old segment descriptors aren't -- cgit v0.10.2 From 369101da7e3f6f971b7303922d2978b8483241c6 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 12 May 2008 15:43:38 +0200 Subject: x86: head_64.S cleanup - use predefined flags from processor-flags.h We should better use already defined flags from processor-flags.h instead of defining own ones [>>> object code check >>>] original md5sum: 9cfa6dbf045a046bb5dfb85f8bcfe8c4 arch/x86/kernel/head_64.o text data bss dec hex filename 37361 4432 8192 49985 c341 arch/x86/kernel/head_64.o patched md5sum: 9cfa6dbf045a046bb5dfb85f8bcfe8c4 arch/x86/kernel/head_64.o text data bss dec hex filename 37361 4432 8192 49985 c341 arch/x86/kernel/head_64.o [<<< object code check <<<] Signed-off-by: Cyrill Gorcunov Acked-by: H. Peter Anvin Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 10a1955..2f59ca8 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -18,6 +18,7 @@ #include #include #include +#include #ifdef CONFIG_PARAVIRT #include @@ -184,14 +185,10 @@ ENTRY(secondary_startup_64) 1: wrmsr /* Make changes effective */ /* Setup cr0 */ -#define CR0_PM 1 /* protected mode */ -#define CR0_MP (1<<1) -#define CR0_ET (1<<4) -#define CR0_NE (1<<5) -#define CR0_WP (1<<16) -#define CR0_AM (1<<18) -#define CR0_PAGING (1<<31) - movl $CR0_PM|CR0_MP|CR0_ET|CR0_NE|CR0_WP|CR0_AM|CR0_PAGING,%eax +#define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \ + X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \ + X86_CR0_PG) + movl $CR0_STATE, %eax /* Make changes effective */ movq %rax, %cr0 -- cgit v0.10.2 From e83e31f41a623262714d9c56dde7dfc51ac078f7 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 12 May 2008 15:43:39 +0200 Subject: x86: compressed/head_64.S cleanup - use predefined flags from processor-flags.h We should better use already defined flags from processor-flags.h instead of defining own ones [>>> object code check >>>] original md5sum: 129f24be6df396fb7d8bf998c01fc716 arch/x86/boot/compressed/head_64.o text data bss dec hex filename 705 48 45056 45809 b2f1 arch/x86/boot/compressed/head_64.o patched md5sum: 129f24be6df396fb7d8bf998c01fc716 arch/x86/boot/compressed/head_64-new.o text data bss dec hex filename 705 48 45056 45809 b2f1 arch/x86/boot/compressed/head_64-new.o [<<< object code check <<<] Signed-off-by: Cyrill Gorcunov Acked-by: H. Peter Anvin Signed-off-by: Ingo Molnar diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index d8819ef..1d5dff4 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -30,6 +30,7 @@ #include #include #include +#include #include .section ".text.head" @@ -109,7 +110,7 @@ startup_32: /* Enable PAE mode */ xorl %eax, %eax - orl $(1 << 5), %eax + orl $(X86_CR4_PAE), %eax movl %eax, %cr4 /* @@ -170,7 +171,7 @@ startup_32: pushl %eax /* Enter paged protected Mode, activating Long Mode */ - movl $0x80000001, %eax /* Enable Paging and Protected mode */ + movl $(X86_CR0_PG | X86_CR0_PE), %eax /* Enable Paging and Protected mode */ movl %eax, %cr0 /* Jump from 32bit compatibility mode into 64bit mode. */ -- cgit v0.10.2 From 2cc74111c78dbc7b26c41b4e87cfebfc3aed49c4 Mon Sep 17 00:00:00 2001 From: Huang Weiyi Date: Sun, 11 May 2008 19:35:54 +0800 Subject: x86: ipi.c: removed duplicated include Removed duplicated include in arch/x86/kernel/ipi.c. Signed-off-by: Huang Weiyi Cc: mingo@redhat.com Cc: hpa@zytor.com Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c index c0df7b8..9d98cda 100644 --- a/arch/x86/kernel/ipi.c +++ b/arch/x86/kernel/ipi.c @@ -8,7 +8,6 @@ #include #include #include -#include #include #include -- cgit v0.10.2 From 883b7af932b4435eb4798cfa4fec0848639c2a87 Mon Sep 17 00:00:00 2001 From: Huang Weiyi Date: Sun, 11 May 2008 19:36:57 +0800 Subject: x86: smpboot.c: removed duplicated include Removed duplicated include in arch/x86/kernel/smpboot.c. Signed-off-by: Huang Weiyi Cc: mingo@redhat.com Cc: hpa@zytor.com Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 3898849..0974fc0 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -59,7 +59,6 @@ #include #include #include -#include #include #include #include -- cgit v0.10.2 From 2237ce2057b82561f7ea27ed30153571f404112d Mon Sep 17 00:00:00 2001 From: Fernando Luis Vazquez Cao Date: Fri, 16 May 2008 11:01:26 +0900 Subject: x86: cleanup, remove duplicate declaration of unknown_nmi_panic Signed-off-by: Fernando Luis Vazquez Cao Signed-off-by: Ingo Molnar diff --git a/include/asm-x86/nmi.h b/include/asm-x86/nmi.h index 1e36302..8455bf3 100644 --- a/include/asm-x86/nmi.h +++ b/include/asm-x86/nmi.h @@ -46,7 +46,6 @@ extern void nmi_watchdog_default(void); extern int check_nmi_watchdog(void); extern int nmi_watchdog_enabled; -extern int unknown_nmi_panic; extern int avail_to_resrv_perfctr_nmi_bit(unsigned int); extern int avail_to_resrv_perfctr_nmi(unsigned int); extern int reserve_perfctr_nmi(unsigned int); -- cgit v0.10.2 From 05139d8fb445d9a3569071af1c5920fc46191b7b Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Tue, 13 May 2008 21:14:22 +0400 Subject: x86: head_64.S cleanup - use straight move to CR4 register Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 2f59ca8..7475b4c 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -155,9 +155,7 @@ ENTRY(secondary_startup_64) */ /* Enable PAE mode and PGE */ - xorq %rax, %rax - btsq $5, %rax - btsq $7, %rax + movl $(X86_CR4_PAE | X86_CR4_PGE), %eax movq %rax, %cr4 /* Setup early boot stage 4 level pagetables. */ -- cgit v0.10.2 From 0e192b99d7d09f08b445c31ee7c7f3d0bfb27345 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Tue, 13 May 2008 20:55:40 +0400 Subject: x86: head_64.S cleanup - use PMD_SHIFT instead of numeric constant Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 7475b4c..d8ed325 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -322,11 +322,11 @@ early_idt_ripmsg: ENTRY(name) /* Automate the creation of 1 to 1 mapping pmd entries */ -#define PMDS(START, PERM, COUNT) \ - i = 0 ; \ - .rept (COUNT) ; \ - .quad (START) + (i << 21) + (PERM) ; \ - i = i + 1 ; \ +#define PMDS(START, PERM, COUNT) \ + i = 0 ; \ + .rept (COUNT) ; \ + .quad (START) + (i << PMD_SHIFT) + (PERM) ; \ + i = i + 1 ; \ .endr /* -- cgit v0.10.2 From 23eb271b9186531e1eb704dda9ab37abcfd0ca4a Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 14 May 2008 16:10:39 -0700 Subject: x86: setup_force_cpu_cap(): don't do clear_bit(non-unsigned-long) Another hack to make proper prototyping of x86 bitops viable. Cc: Andrea Arcangeli Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar diff --git a/include/asm-x86/cpufeature.h b/include/asm-x86/cpufeature.h index 0d609c8..78b47e7 100644 --- a/include/asm-x86/cpufeature.h +++ b/include/asm-x86/cpufeature.h @@ -142,11 +142,11 @@ extern const char * const x86_power_flags[32]; #define clear_cpu_cap(c, bit) clear_bit(bit, (unsigned long *)((c)->x86_capability)) #define setup_clear_cpu_cap(bit) do { \ clear_cpu_cap(&boot_cpu_data, bit); \ - set_bit(bit, cleared_cpu_caps); \ + set_bit(bit, (unsigned long *)cleared_cpu_caps); \ } while (0) #define setup_force_cpu_cap(bit) do { \ set_cpu_cap(&boot_cpu_data, bit); \ - clear_bit(bit, cleared_cpu_caps); \ + clear_bit(bit, (unsigned long *)cleared_cpu_caps); \ } while (0) #define cpu_has_fpu boot_cpu_has(X86_FEATURE_FPU) -- cgit v0.10.2 From bfe4bb1526945e446d2912bef2e1e2cbd2c7349e Mon Sep 17 00:00:00 2001 From: Miklos Vajna Date: Sat, 17 May 2008 22:48:13 +0200 Subject: x86: janitor work in bugs.c Just moved trailing statements to the next line, removed space before open/close parenthesis, wrapped long lines. Signed-off-by: Miklos Vajna Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 170d2f5..1b1c56b 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -59,8 +59,12 @@ static void __init check_fpu(void) return; } -/* trap_init() enabled FXSR and company _before_ testing for FP problems here. */ - /* Test for the divl bug.. */ + /* + * trap_init() enabled FXSR and company _before_ testing for FP + * problems here. + * + * Test for the divl bug.. + */ __asm__("fninit\n\t" "fldl %1\n\t" "fdivl %2\n\t" @@ -108,10 +112,15 @@ static void __init check_popad(void) "movl $12345678,%%eax; movl $0,%%edi; pusha; popa; movl (%%edx,%%edi),%%ecx " : "=&a" (res) : "d" (inp) - : "ecx", "edi" ); - /* If this fails, it means that any user program may lock the CPU hard. Too bad. */ - if (res != 12345678) printk( "Buggy.\n" ); - else printk( "OK.\n" ); + : "ecx", "edi"); + /* + * If this fails, it means that any user program may lock the + * CPU hard. Too bad. + */ + if (res != 12345678) + printk("Buggy.\n"); + else + printk("OK.\n"); #endif } @@ -137,7 +146,8 @@ static void __init check_config(void) * i486+ only features! (WP works in supervisor mode and the * new "invlpg" and "bswap" instructions) */ -#if defined(CONFIG_X86_WP_WORKS_OK) || defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_BSWAP) +#if defined(CONFIG_X86_WP_WORKS_OK) || defined(CONFIG_X86_INVLPG) || \ + defined(CONFIG_X86_BSWAP) if (boot_cpu_data.x86 == 3) panic("Kernel requires i486+ for 'invlpg' and other features"); #endif @@ -170,6 +180,7 @@ void __init check_bugs(void) check_fpu(); check_hlt(); check_popad(); - init_utsname()->machine[1] = '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); + init_utsname()->machine[1] = + '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); alternative_instructions(); } -- cgit v0.10.2 From f0766440dda7ace8a43b030f75e2dcb82449fb85 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Fri, 9 May 2008 19:09:48 -0700 Subject: x86: unify current.h Simply stitch these together. There are just two definitions that are shared but the file is resonably small and putting these things together shows that further unifications requires a unification of the per cpu / pda handling between both arches. Signed-off-by: Christoph Lameter Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/include/asm-x86/current.h b/include/asm-x86/current.h index d2526d3..7515c19 100644 --- a/include/asm-x86/current.h +++ b/include/asm-x86/current.h @@ -1,5 +1,39 @@ +#ifndef _X86_CURRENT_H +#define _X86_CURRENT_H + #ifdef CONFIG_X86_32 -# include "current_32.h" -#else -# include "current_64.h" -#endif +#include +#include + +struct task_struct; + +DECLARE_PER_CPU(struct task_struct *, current_task); +static __always_inline struct task_struct *get_current(void) +{ + return x86_read_percpu(current_task); +} + +#else /* X86_32 */ + +#ifndef __ASSEMBLY__ +#include + +struct task_struct; + +static __always_inline struct task_struct *get_current(void) +{ + return read_pda(pcurrent); +} + +#else /* __ASSEMBLY__ */ + +#include +#define GET_CURRENT(reg) movq %gs:(pda_pcurrent),reg + +#endif /* __ASSEMBLY__ */ + +#endif /* X86_32 */ + +#define current get_current() + +#endif /* X86_CURRENT_H */ diff --git a/include/asm-x86/current_32.h b/include/asm-x86/current_32.h deleted file mode 100644 index 5af9bdb..0000000 --- a/include/asm-x86/current_32.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef _I386_CURRENT_H -#define _I386_CURRENT_H - -#include -#include - -struct task_struct; - -DECLARE_PER_CPU(struct task_struct *, current_task); -static __always_inline struct task_struct *get_current(void) -{ - return x86_read_percpu(current_task); -} - -#define current get_current() - -#endif /* !(_I386_CURRENT_H) */ diff --git a/include/asm-x86/current_64.h b/include/asm-x86/current_64.h deleted file mode 100644 index 2d368ed..0000000 --- a/include/asm-x86/current_64.h +++ /dev/null @@ -1,27 +0,0 @@ -#ifndef _X86_64_CURRENT_H -#define _X86_64_CURRENT_H - -#if !defined(__ASSEMBLY__) -struct task_struct; - -#include - -static inline struct task_struct *get_current(void) -{ - struct task_struct *t = read_pda(pcurrent); - return t; -} - -#define current get_current() - -#else - -#ifndef ASM_OFFSET_H -#include -#endif - -#define GET_CURRENT(reg) movq %gs:(pda_pcurrent),reg - -#endif - -#endif /* !(_X86_64_CURRENT_H) */ -- cgit v0.10.2 From 2141261a11524acbfce86a5a213bbaea08794bbb Mon Sep 17 00:00:00 2001 From: Miklos Vajna Date: Tue, 13 May 2008 13:35:25 +0200 Subject: x86: janitor work in video-vga.c Just moved an open brace to the previous line. Signed-off-by: Miklos Vajna Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/x86/boot/video-vga.c b/arch/x86/boot/video-vga.c index 40ecb8d..b939cb4 100644 --- a/arch/x86/boot/video-vga.c +++ b/arch/x86/boot/video-vga.c @@ -259,8 +259,7 @@ static int vga_probe(void) return mode_count[adapter]; } -__videocard video_vga = -{ +__videocard video_vga = { .card_name = "VGA", .probe = vga_probe, .set_mode = vga_set_mode, -- cgit v0.10.2 From 78d64fc21d2aa425c65de49765cddecc84d595a4 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 12 May 2008 15:44:39 +0200 Subject: x86: include/asm-x86/string_32.h - style only Looked at this file because of __memcpy warnings. Thought it could use a style/checkpatch cleanup. No change in vmlinux. [tglx: fixed the remaining issues ] Signed-off-by: Joe Perches Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/include/asm-x86/string_32.h b/include/asm-x86/string_32.h index b49369a..8d0c593 100644 --- a/include/asm-x86/string_32.h +++ b/include/asm-x86/string_32.h @@ -29,81 +29,116 @@ extern char *strchr(const char *s, int c); #define __HAVE_ARCH_STRLEN extern size_t strlen(const char *s); -static __always_inline void * __memcpy(void * to, const void * from, size_t n) +static __always_inline void *__memcpy(void *to, const void *from, size_t n) { -int d0, d1, d2; -__asm__ __volatile__( - "rep ; movsl\n\t" - "movl %4,%%ecx\n\t" - "andl $3,%%ecx\n\t" - "jz 1f\n\t" - "rep ; movsb\n\t" - "1:" - : "=&c" (d0), "=&D" (d1), "=&S" (d2) - : "0" (n/4), "g" (n), "1" ((long) to), "2" ((long) from) - : "memory"); -return (to); + int d0, d1, d2; + asm volatile("rep ; movsl\n\t" + "movl %4,%%ecx\n\t" + "andl $3,%%ecx\n\t" + "jz 1f\n\t" + "rep ; movsb\n\t" + "1:" + : "=&c" (d0), "=&D" (d1), "=&S" (d2) + : "0" (n / 4), "g" (n), "1" ((long)to), "2" ((long)from) + : "memory"); + return to; } /* * This looks ugly, but the compiler can optimize it totally, * as the count is constant. */ -static __always_inline void * __constant_memcpy(void * to, const void * from, size_t n) +static __always_inline void *__constant_memcpy(void *to, const void *from, + size_t n) { long esi, edi; - if (!n) return to; -#if 1 /* want to do small copies with non-string ops? */ + if (!n) + return to; + switch (n) { - case 1: *(char*)to = *(char*)from; return to; - case 2: *(short*)to = *(short*)from; return to; - case 4: *(int*)to = *(int*)from; return to; -#if 1 /* including those doable with two moves? */ - case 3: *(short*)to = *(short*)from; - *((char*)to+2) = *((char*)from+2); return to; - case 5: *(int*)to = *(int*)from; - *((char*)to+4) = *((char*)from+4); return to; - case 6: *(int*)to = *(int*)from; - *((short*)to+2) = *((short*)from+2); return to; - case 8: *(int*)to = *(int*)from; - *((int*)to+1) = *((int*)from+1); return to; -#endif + case 1: + *(char *)to = *(char *)from; + return to; + case 2: + *(short *)to = *(short *)from; + return to; + case 4: + *(int *)to = *(int *)from; + return to; + + case 3: + *(short *)to = *(short *)from; + *((char *)to + 2) = *((char *)from + 2); + return to; + case 5: + *(int *)to = *(int *)from; + *((char *)to + 4) = *((char *)from + 4); + return to; + case 6: + *(int *)to = *(int *)from; + *((short *)to + 2) = *((short *)from + 2); + return to; + case 8: + *(int *)to = *(int *)from; + *((int *)to + 1) = *((int *)from + 1); + return to; } -#endif - esi = (long) from; - edi = (long) to; - if (n >= 5*4) { + + esi = (long)from; + edi = (long)to; + if (n >= 5 * 4) { /* large block: use rep prefix */ int ecx; - __asm__ __volatile__( - "rep ; movsl" - : "=&c" (ecx), "=&D" (edi), "=&S" (esi) - : "0" (n/4), "1" (edi),"2" (esi) - : "memory" + asm volatile("rep ; movsl" + : "=&c" (ecx), "=&D" (edi), "=&S" (esi) + : "0" (n / 4), "1" (edi), "2" (esi) + : "memory" ); } else { /* small block: don't clobber ecx + smaller code */ - if (n >= 4*4) __asm__ __volatile__("movsl" - :"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory"); - if (n >= 3*4) __asm__ __volatile__("movsl" - :"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory"); - if (n >= 2*4) __asm__ __volatile__("movsl" - :"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory"); - if (n >= 1*4) __asm__ __volatile__("movsl" - :"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory"); + if (n >= 4 * 4) + asm volatile("movsl" + : "=&D"(edi), "=&S"(esi) + : "0"(edi), "1"(esi) + : "memory"); + if (n >= 3 * 4) + asm volatile("movsl" + : "=&D"(edi), "=&S"(esi) + : "0"(edi), "1"(esi) + : "memory"); + if (n >= 2 * 4) + asm volatile("movsl" + : "=&D"(edi), "=&S"(esi) + : "0"(edi), "1"(esi) + : "memory"); + if (n >= 1 * 4) + asm volatile("movsl" + : "=&D"(edi), "=&S"(esi) + : "0"(edi), "1"(esi) + : "memory"); } switch (n % 4) { /* tail */ - case 0: return to; - case 1: __asm__ __volatile__("movsb" - :"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory"); - return to; - case 2: __asm__ __volatile__("movsw" - :"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory"); - return to; - default: __asm__ __volatile__("movsw\n\tmovsb" - :"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory"); - return to; + case 0: + return to; + case 1: + asm volatile("movsb" + : "=&D"(edi), "=&S"(esi) + : "0"(edi), "1"(esi) + : "memory"); + return to; + case 2: + asm volatile("movsw" + : "=&D"(edi), "=&S"(esi) + : "0"(edi), "1"(esi) + : "memory"); + return to; + default: + asm volatile("movsw\n\tmovsb" + : "=&D"(edi), "=&S"(esi) + : "0"(edi), "1"(esi) + : "memory"); + return to; } } @@ -117,87 +152,86 @@ static __always_inline void * __constant_memcpy(void * to, const void * from, si * This CPU favours 3DNow strongly (eg AMD Athlon) */ -static inline void * __constant_memcpy3d(void * to, const void * from, size_t len) +static inline void *__constant_memcpy3d(void *to, const void *from, size_t len) { if (len < 512) return __constant_memcpy(to, from, len); return _mmx_memcpy(to, from, len); } -static __inline__ void *__memcpy3d(void *to, const void *from, size_t len) +static inline void *__memcpy3d(void *to, const void *from, size_t len) { if (len < 512) return __memcpy(to, from, len); return _mmx_memcpy(to, from, len); } -#define memcpy(t, f, n) \ -(__builtin_constant_p(n) ? \ - __constant_memcpy3d((t),(f),(n)) : \ - __memcpy3d((t),(f),(n))) +#define memcpy(t, f, n) \ + (__builtin_constant_p((n)) \ + ? __constant_memcpy3d((t), (f), (n)) \ + : __memcpy3d((t), (f), (n))) #else /* * No 3D Now! */ - -#define memcpy(t, f, n) \ -(__builtin_constant_p(n) ? \ - __constant_memcpy((t),(f),(n)) : \ - __memcpy((t),(f),(n))) + +#define memcpy(t, f, n) \ + (__builtin_constant_p((n)) \ + ? __constant_memcpy((t), (f), (n)) \ + : __memcpy((t), (f), (n))) #endif #define __HAVE_ARCH_MEMMOVE -void *memmove(void * dest,const void * src, size_t n); +void *memmove(void *dest, const void *src, size_t n); #define memcmp __builtin_memcmp #define __HAVE_ARCH_MEMCHR -extern void *memchr(const void * cs,int c,size_t count); +extern void *memchr(const void *cs, int c, size_t count); -static inline void * __memset_generic(void * s, char c,size_t count) +static inline void *__memset_generic(void *s, char c, size_t count) { -int d0, d1; -__asm__ __volatile__( - "rep\n\t" - "stosb" - : "=&c" (d0), "=&D" (d1) - :"a" (c),"1" (s),"0" (count) - :"memory"); -return s; + int d0, d1; + asm volatile("rep\n\t" + "stosb" + : "=&c" (d0), "=&D" (d1) + : "a" (c), "1" (s), "0" (count) + : "memory"); + return s; } /* we might want to write optimized versions of these later */ -#define __constant_count_memset(s,c,count) __memset_generic((s),(c),(count)) +#define __constant_count_memset(s, c, count) __memset_generic((s), (c), (count)) /* - * memset(x,0,y) is a reasonably common thing to do, so we want to fill + * memset(x, 0, y) is a reasonably common thing to do, so we want to fill * things 32 bits at a time even when we don't know the size of the * area at compile-time.. */ -static __always_inline void * __constant_c_memset(void * s, unsigned long c, size_t count) +static __always_inline +void *__constant_c_memset(void *s, unsigned long c, size_t count) { -int d0, d1; -__asm__ __volatile__( - "rep ; stosl\n\t" - "testb $2,%b3\n\t" - "je 1f\n\t" - "stosw\n" - "1:\ttestb $1,%b3\n\t" - "je 2f\n\t" - "stosb\n" - "2:" - :"=&c" (d0), "=&D" (d1) - :"a" (c), "q" (count), "0" (count/4), "1" ((long) s) - :"memory"); -return (s); + int d0, d1; + asm volatile("rep ; stosl\n\t" + "testb $2,%b3\n\t" + "je 1f\n\t" + "stosw\n" + "1:\ttestb $1,%b3\n\t" + "je 2f\n\t" + "stosb\n" + "2:" + : "=&c" (d0), "=&D" (d1) + : "a" (c), "q" (count), "0" (count/4), "1" ((long)s) + : "memory"); + return s; } /* Added by Gertjan van Wingerde to make minix and sysv module work */ #define __HAVE_ARCH_STRNLEN -extern size_t strnlen(const char * s, size_t count); +extern size_t strnlen(const char *s, size_t count); /* end of additional stuff */ #define __HAVE_ARCH_STRSTR @@ -207,66 +241,78 @@ extern char *strstr(const char *cs, const char *ct); * This looks horribly ugly, but the compiler can optimize it totally, * as we by now know that both pattern and count is constant.. */ -static __always_inline void * __constant_c_and_count_memset(void * s, unsigned long pattern, size_t count) +static __always_inline +void *__constant_c_and_count_memset(void *s, unsigned long pattern, + size_t count) { switch (count) { + case 0: + return s; + case 1: + *(unsigned char *)s = pattern & 0xff; + return s; + case 2: + *(unsigned short *)s = pattern & 0xffff; + return s; + case 3: + *(unsigned short *)s = pattern & 0xffff; + *((unsigned char *)s + 2) = pattern & 0xff; + return s; + case 4: + *(unsigned long *)s = pattern; + return s; + } + +#define COMMON(x) \ + asm volatile("rep ; stosl" \ + x \ + : "=&c" (d0), "=&D" (d1) \ + : "a" (pattern), "0" (count/4), "1" ((long)s) \ + : "memory") + + { + int d0, d1; + switch (count % 4) { case 0: + COMMON(""); return s; case 1: - *(unsigned char *)s = pattern & 0xff; + COMMON("\n\tstosb"); return s; case 2: - *(unsigned short *)s = pattern & 0xffff; + COMMON("\n\tstosw"); return s; - case 3: - *(unsigned short *)s = pattern & 0xffff; - *(2+(unsigned char *)s) = pattern & 0xff; + default: + COMMON("\n\tstosw\n\tstosb"); return s; - case 4: - *(unsigned long *)s = pattern; - return s; - } -#define COMMON(x) \ -__asm__ __volatile__( \ - "rep ; stosl" \ - x \ - : "=&c" (d0), "=&D" (d1) \ - : "a" (pattern),"0" (count/4),"1" ((long) s) \ - : "memory") -{ - int d0, d1; - switch (count % 4) { - case 0: COMMON(""); return s; - case 1: COMMON("\n\tstosb"); return s; - case 2: COMMON("\n\tstosw"); return s; - default: COMMON("\n\tstosw\n\tstosb"); return s; + } } -} - + #undef COMMON } -#define __constant_c_x_memset(s, c, count) \ -(__builtin_constant_p(count) ? \ - __constant_c_and_count_memset((s),(c),(count)) : \ - __constant_c_memset((s),(c),(count))) +#define __constant_c_x_memset(s, c, count) \ + (__builtin_constant_p(count) \ + ? __constant_c_and_count_memset((s), (c), (count)) \ + : __constant_c_memset((s), (c), (count))) -#define __memset(s, c, count) \ -(__builtin_constant_p(count) ? \ - __constant_count_memset((s),(c),(count)) : \ - __memset_generic((s),(c),(count))) +#define __memset(s, c, count) \ + (__builtin_constant_p(count) \ + ? __constant_count_memset((s), (c), (count)) \ + : __memset_generic((s), (c), (count))) #define __HAVE_ARCH_MEMSET -#define memset(s, c, count) \ -(__builtin_constant_p(c) ? \ - __constant_c_x_memset((s),(0x01010101UL*(unsigned char)(c)),(count)) : \ - __memset((s),(c),(count))) +#define memset(s, c, count) \ + (__builtin_constant_p(c) \ + ? __constant_c_x_memset((s), (0x01010101UL * (unsigned char)(c)), \ + (count)) \ + : __memset((s), (c), (count))) /* * find the first occurrence of byte 'c', or 1 past the area if none */ #define __HAVE_ARCH_MEMSCAN -extern void *memscan(void * addr, int c, size_t size); +extern void *memscan(void *addr, int c, size_t size); #endif /* __KERNEL__ */ -- cgit v0.10.2 From 83cd1daa1dc3400e5ca2255818641233ebb30680 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 12 May 2008 15:44:39 +0200 Subject: x86: eliminate dead code in x86_64 entry.S Remove the not longer used handlers for reserved vectors. Signed-off-by: Jan Beulich Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 556a8df..1f1751d 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1120,10 +1120,6 @@ ENTRY(coprocessor_segment_overrun) zeroentry do_coprocessor_segment_overrun END(coprocessor_segment_overrun) -ENTRY(reserved) - zeroentry do_reserved -END(reserved) - /* runs on exception stack */ ENTRY(double_fault) XCPT_FRAME diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index adff76e..ec6d3b2 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c @@ -71,7 +71,6 @@ asmlinkage void general_protection(void); asmlinkage void page_fault(void); asmlinkage void coprocessor_error(void); asmlinkage void simd_coprocessor_error(void); -asmlinkage void reserved(void); asmlinkage void alignment_check(void); asmlinkage void machine_check(void); asmlinkage void spurious_interrupt_bug(void); @@ -702,12 +701,10 @@ DO_ERROR_INFO( 0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip) DO_ERROR( 4, SIGSEGV, "overflow", overflow) DO_ERROR( 5, SIGSEGV, "bounds", bounds) DO_ERROR_INFO( 6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip) -DO_ERROR( 7, SIGSEGV, "device not available", device_not_available) DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0) -DO_ERROR(18, SIGSEGV, "reserved", reserved) /* Runs on IST stack */ asmlinkage void do_stack_segment(struct pt_regs *regs, long error_code) -- cgit v0.10.2 From 48e239572271cf79412d90753a721242a765f7d9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 24 May 2008 17:24:34 +0200 Subject: x86: fixup the fallout of the bitops changes Signed-off-by: Thomas Gleixner diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 5015976..ee1d6d3 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -255,7 +255,7 @@ int ptep_test_and_clear_young(struct vm_area_struct *vma, if (pte_young(*ptep)) ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, - &ptep->pte); + (unsigned long *) &ptep->pte); if (ret) pte_update(vma->vm_mm, addr, ptep); -- cgit v0.10.2 From 668a6c3654560aef8741642478973e205a4f02bf Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 19 May 2008 13:35:24 +0200 Subject: - fix mmioftrace + rcu merge interaction Signed-off-by: Thomas Gleixner diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index b65871e..93d8203 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -6,6 +6,7 @@ */ #include +#include #include #include #include -- cgit v0.10.2 From c9fea78dc9775981bed2da379568c8a8fddd1fb6 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 19 May 2008 13:53:39 +0200 Subject: x86: make NUMAQ depend on PCI NUMAQ depends on the existence of PCI hardware, and requiring PCI makes this subarch simpler and avoids build failures if PCI is disabled. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index bbafeac..0293728 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -270,7 +270,7 @@ config X86_VOYAGER config X86_NUMAQ bool "NUMAQ (IBM/Sequent)" - depends on SMP && X86_32 + depends on SMP && X86_32 && PCI select NUMA help This option is used for getting Linux to run on a (IBM/Sequent) NUMA -- cgit v0.10.2 From 1ac97018169c5a13feaa90d9671f2d6ba2d9e86e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 19 May 2008 14:10:14 +0200 Subject: x86: untangle pci dependencies make PCI-less subarches not build with PCI - instead of complicating the PCI dependencies. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 0293728..78700f5 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -258,7 +258,7 @@ config X86_ELAN config X86_VOYAGER bool "Voyager (NCR)" - depends on X86_32 && (SMP || BROKEN) + depends on X86_32 && (SMP || BROKEN) && !PCI help Voyager is an MCA-based 32-way capable SMP architecture proprietary to NCR Corp. Machine classes 345x/35xx/4100/51xx are Voyager-based. @@ -300,7 +300,7 @@ config X86_BIGSMP config X86_VISWS bool "SGI 320/540 (Visual Workstation)" - depends on X86_32 + depends on X86_32 && !PCI help The SGI Visual Workstation series is an IA32-based workstation based on SGI systems chips with some legacy PC hardware attached. @@ -344,7 +344,7 @@ config X86_RDC321X config X86_VSMP bool "Support for ScaleMP vSMP" select PARAVIRT - depends on X86_64 + depends on X86_64 && !PCI help Support for ScaleMP vSMP systems. Say 'Y' here if this kernel is supposed to run on these EM64T-based machines. Only choose this option @@ -1477,8 +1477,7 @@ endmenu menu "Bus options (PCI etc.)" config PCI - bool "PCI support" if !X86_VISWS && !X86_VSMP - depends on !X86_VOYAGER + bool "PCI support" default y select ARCH_SUPPORTS_MSI if (X86_LOCAL_APIC && X86_IO_APIC) help -- cgit v0.10.2 From 0da72a4aeb4482c64c1142a2e36b556d13374937 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 30 Apr 2008 20:11:51 +0200 Subject: x86: fix sparse warning in mtrr/generic.c arch/x86/kernel/cpu/mtrr/generic.c:216:12: warning: symbol 'lo' shadows an earlier one Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 5d241ce..0625d41 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -213,12 +213,12 @@ void __init get_mtrr_state(void) mtrr_state.enabled = (lo & 0xc00) >> 10; if (amd_special_default_mtrr()) { - unsigned lo, hi; + unsigned low, high; /* TOP_MEM2 */ - rdmsr(MSR_K8_TOP_MEM2, lo, hi); - tom2 = hi; + rdmsr(MSR_K8_TOP_MEM2, low, high); + tom2 = high; tom2 <<= 32; - tom2 |= lo; + tom2 |= low; tom2 &= 0xffffff8000000ULL; } if (mtrr_show) { -- cgit v0.10.2 From 0dbfafa5fcd4dd189e2adc7b6ed9e0405e846d79 Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Wed, 23 Apr 2008 15:09:05 +0200 Subject: x86: move i386 memory setup code to e820_32.c The x86_64 code has centralized the memory setup code in e820_64.c. This patch copies that approach to i386: - early_param("mem", ...) parsing is moved from setup_32.c to e820_32.c. - setup_memory_map() and finish_e820_parsing() are factored out from setup_arch(), and declarations are added to e820_32.h. - print_memory_map() is made static and removed from e820_32.h. - user_defined_memmap is marked as __initdata. Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/e820_32.c b/arch/x86/kernel/e820_32.c index ed733e7..31ea2bb 100644 --- a/arch/x86/kernel/e820_32.c +++ b/arch/x86/kernel/e820_32.c @@ -30,7 +30,6 @@ unsigned long pci_mem_start = 0x10000000; #ifdef CONFIG_PCI EXPORT_SYMBOL(pci_mem_start); #endif -extern int user_defined_memmap; static struct resource system_rom_resource = { .name = "System ROM", @@ -584,7 +583,7 @@ void __init e820_register_memory(void) pci_mem_start, gapstart, gapsize); } -void __init print_memory_map(char *who) +static void __init print_memory_map(char *who) { int i; @@ -692,6 +691,54 @@ e820_all_mapped(unsigned long s, unsigned long e, unsigned type) return 0; } +/* Overridden in paravirt.c if CONFIG_PARAVIRT */ +char * __init __attribute__((weak)) memory_setup(void) +{ + return machine_specific_memory_setup(); +} + +void __init setup_memory_map(void) +{ + printk(KERN_INFO "BIOS-provided physical RAM map:\n"); + print_memory_map(memory_setup()); +} + +static int __initdata user_defined_memmap; + +/* + * "mem=nopentium" disables the 4MB page tables. + * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM + * to , overriding the bios size. + * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from + * to +, overriding the bios size. + * + * HPA tells me bootloaders need to parse mem=, so no new + * option should be mem= [also see Documentation/i386/boot.txt] + */ +static int __init parse_mem(char *arg) +{ + if (!arg) + return -EINVAL; + + if (strcmp(arg, "nopentium") == 0) { + setup_clear_cpu_cap(X86_FEATURE_PSE); + } else { + /* If the user specifies memory size, we + * limit the BIOS-provided memory map to + * that size. exactmap can be used to specify + * the exact map. mem=number can be used to + * trim the existing memory map. + */ + unsigned long long mem_size; + + mem_size = memparse(arg, &arg); + limit_regions(mem_size); + user_defined_memmap = 1; + } + return 0; +} +early_param("mem", parse_mem); + static int __init parse_memmap(char *arg) { if (!arg) @@ -762,6 +809,15 @@ void __init update_memory_range(u64 start, u64 size, unsigned old_type, new_type); } } + +void __init finish_e820_parsing(void) +{ + if (user_defined_memmap) { + printk(KERN_INFO "user-defined physical RAM map:\n"); + print_memory_map("user"); + } +} + void __init update_e820(void) { u8 nr_map; diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c index 2c5f8b2..b54c79c 100644 --- a/arch/x86/kernel/setup_32.c +++ b/arch/x86/kernel/setup_32.c @@ -237,42 +237,6 @@ static inline void copy_edd(void) } #endif -int __initdata user_defined_memmap; - -/* - * "mem=nopentium" disables the 4MB page tables. - * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM - * to , overriding the bios size. - * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from - * to +, overriding the bios size. - * - * HPA tells me bootloaders need to parse mem=, so no new - * option should be mem= [also see Documentation/i386/boot.txt] - */ -static int __init parse_mem(char *arg) -{ - if (!arg) - return -EINVAL; - - if (strcmp(arg, "nopentium") == 0) { - setup_clear_cpu_cap(X86_FEATURE_PSE); - } else { - /* If the user specifies memory size, we - * limit the BIOS-provided memory map to - * that size. exactmap can be used to specify - * the exact map. mem=number can be used to - * trim the existing memory map. - */ - unsigned long long mem_size; - - mem_size = memparse(arg, &arg); - limit_regions(mem_size); - user_defined_memmap = 1; - } - return 0; -} -early_param("mem", parse_mem); - #ifdef CONFIG_PROC_VMCORE /* elfcorehdr= specifies the location of elf core header * stored by the crashed kernel. @@ -725,12 +689,6 @@ static void set_mca_bus(int x) static void set_mca_bus(int x) { } #endif -/* Overridden in paravirt.c if CONFIG_PARAVIRT */ -char * __init __attribute__((weak)) memory_setup(void) -{ - return machine_specific_memory_setup(); -} - #ifdef CONFIG_NUMA /* * In the golden day, when everything among i386 and x86_64 will be @@ -786,8 +744,7 @@ void __init setup_arch(char **cmdline_p) #endif ARCH_SETUP - printk(KERN_INFO "BIOS-provided physical RAM map:\n"); - print_memory_map(memory_setup()); + setup_memory_map(); copy_edd(); @@ -807,10 +764,7 @@ void __init setup_arch(char **cmdline_p) parse_early_param(); - if (user_defined_memmap) { - printk(KERN_INFO "user-defined physical RAM map:\n"); - print_memory_map("user"); - } + finish_e820_parsing(); strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); *cmdline_p = command_line; diff --git a/include/asm-x86/e820_32.h b/include/asm-x86/e820_32.h index a9f7c6e..e1f10c6 100644 --- a/include/asm-x86/e820_32.h +++ b/include/asm-x86/e820_32.h @@ -18,6 +18,9 @@ #ifndef __ASSEMBLY__ +extern void setup_memory_map(void); +extern void finish_e820_parsing(void); + extern struct e820map e820; extern void update_e820(void); @@ -32,7 +35,6 @@ extern void update_memory_range(u64 start, u64 size, unsigned old_type, unsigned new_type); extern void e820_register_memory(void); extern void limit_regions(unsigned long long size); -extern void print_memory_map(char *who); extern void init_iomem_resources(struct resource *code_resource, struct resource *data_resource, struct resource *bss_resource); -- cgit v0.10.2 From 95ffa2438d0e9c48779f0106b1c0eb36165e759c Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 29 Apr 2008 03:52:33 -0700 Subject: x86: mtrr cleanup for converting continuous to discrete layout, v8 some BIOS like to use continus MTRR layout, and X driver can not add WB entries for graphical cards when 4g or more RAM installed. the patch will change MTRR to discrete. mtrr_chunk_size= could be used to have smaller continuous block to hold holes. default is 256m, could be set according to size of graphics card memory. mtrr_gran_size= could be used to send smallest mtrr block to avoid run out of MTRRs v2: fix -1 for UC checking v3: default to disable, and need use enable_mtrr_cleanup to enable this feature skip the var state change warning. remove next_basek in range_to_mtrr() v4: correct warning mask. v5: CONFIG_MTRR_SANITIZER v6: fix 1g, 2g, 512 aligment with extra hole v7: gran_sizek to prevent running out of MTRRs. v8: fix hole_basek caculation caused when removing next_basek gran_sizek using when basek is 0. need to apply [PATCH] x86: fix trimming e820 with MTRR holes. right after this one. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index e07c432..4442760 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -599,6 +599,20 @@ and is between 256 and 4096 characters. It is defined in the file See drivers/char/README.epca and Documentation/digiepca.txt. + disable_mtrr_cleanup [X86] + enable_mtrr_cleanup [X86] + The kernel tries to adjust MTRR layout from continuous + to discrete, to make X server driver able to add WB + entry later. This parameter enables/disables that. + + mtrr_chunk_size=nn[KMG] [X86] + used for mtrr cleanup. It is largest continous chunk + that could hold holes aka. UC entries. + + mtrr_gran_size=nn[KMG] [X86] + used for mtrr cleanup. It is granity of mtrr block. + Big value could prevent small alignment use up MTRRs. + disable_mtrr_trim [X86, Intel and AMD only] By default the kernel will trim any uncacheable memory out of your available memory pool based on diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index fe361ae..97a1764 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1092,6 +1092,32 @@ config MTRR See for more information. +config MTRR_SANITIZER + def_bool y + prompt "MTRR cleanup support" + depends on MTRR + help + Convert MTRR layout from continuous to discrete, so some X driver + could add WB entries. + + Say N here if you see bootup problems (boot crash, boot hang, + spontaneous reboots). + + Could be disabled with disable_mtrr_cleanup. Also mtrr_chunk_size + could be used to send largest mtrr entry size for continuous block + to hold holes (aka. UC entries) + + If unsure, say Y. + +config MTRR_SANITIZER_ENABLE_DEFAULT + def_bool y + prompt "Enable MTRR cleanup by default" + depends on MTRR_SANITIZER + help + Enable mtrr cleanup by default + + If unsure, say Y. + config X86_PAT bool prompt "x86 PAT support" diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 0625d41..5aae648 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -37,7 +37,7 @@ static struct fixed_range_block fixed_range_blocks[] = { static unsigned long smp_changes_mask; static struct mtrr_state mtrr_state = {}; static int mtrr_state_set; -static u64 tom2; +u64 mtrr_tom2; #undef MODULE_PARAM_PREFIX #define MODULE_PARAM_PREFIX "mtrr." @@ -139,8 +139,8 @@ u8 mtrr_type_lookup(u64 start, u64 end) } } - if (tom2) { - if (start >= (1ULL<<32) && (end < tom2)) + if (mtrr_tom2) { + if (start >= (1ULL<<32) && (end < mtrr_tom2)) return MTRR_TYPE_WRBACK; } @@ -158,6 +158,20 @@ get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr) rdmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi); } +/* fill the MSR pair relating to a var range */ +void fill_mtrr_var_range(unsigned int index, + u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi) +{ + struct mtrr_var_range *vr; + + vr = mtrr_state.var_ranges; + + vr[index].base_lo = base_lo; + vr[index].base_hi = base_hi; + vr[index].mask_lo = mask_lo; + vr[index].mask_hi = mask_hi; +} + static void get_fixed_ranges(mtrr_type * frs) { @@ -216,10 +230,10 @@ void __init get_mtrr_state(void) unsigned low, high; /* TOP_MEM2 */ rdmsr(MSR_K8_TOP_MEM2, low, high); - tom2 = high; - tom2 <<= 32; - tom2 |= low; - tom2 &= 0xffffff8000000ULL; + mtrr_tom2 = high; + mtrr_tom2 <<= 32; + mtrr_tom2 |= low; + mtrr_tom2 &= 0xffffff8000000ULL; } if (mtrr_show) { int high_width; @@ -251,9 +265,9 @@ void __init get_mtrr_state(void) else printk(KERN_INFO "MTRR %u disabled\n", i); } - if (tom2) { + if (mtrr_tom2) { printk(KERN_INFO "TOM2: %016llx aka %lldM\n", - tom2, tom2>>20); + mtrr_tom2, mtrr_tom2>>20); } } mtrr_state_set = 1; diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 6a1e278..8a6f68b 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -609,6 +610,452 @@ static struct sysdev_driver mtrr_sysdev_driver = { .resume = mtrr_restore, }; +#ifdef CONFIG_MTRR_SANITIZER + +#ifdef CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT +static int enable_mtrr_cleanup __initdata = 1; +#else +static int enable_mtrr_cleanup __initdata; +#endif + +#else + +static int enable_mtrr_cleanup __initdata = -1; + +#endif + +static int __init disable_mtrr_cleanup_setup(char *str) +{ + if (enable_mtrr_cleanup != -1) + enable_mtrr_cleanup = 0; + return 0; +} +early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup); + +static int __init enable_mtrr_cleanup_setup(char *str) +{ + if (enable_mtrr_cleanup != -1) + enable_mtrr_cleanup = 1; + return 0; +} +early_param("enble_mtrr_cleanup", enable_mtrr_cleanup_setup); + +#define RANGE_NUM 256 + +struct res_range { + unsigned long start; + unsigned long end; +}; + +static int __init add_range(struct res_range *range, int nr_range, unsigned long start, + unsigned long end, int merge) +{ + int i; + + if (!merge) + goto addit; + + /* try to merge it with old one */ + for (i = 0; i < nr_range; i++) { + unsigned long final_start, final_end; + unsigned long common_start, common_end; + + if (!range[i].end) + continue; + + common_start = max(range[i].start, start); + common_end = min(range[i].end, end); + if (common_start > common_end + 1) + continue; + + final_start = min(range[i].start, start); + final_end = max(range[i].end, end); + + range[i].start = final_start; + range[i].end = final_end; + return nr_range; + } + +addit: + /* need to add that */ + if (nr_range >= RANGE_NUM) + return nr_range; + + range[nr_range].start = start; + range[nr_range].end = end; + + nr_range++; + + return nr_range; + +} +static void __init subtract_range(struct res_range *range, unsigned long start, + unsigned long end) +{ + int i; + int j; + + for (j = 0; j < RANGE_NUM; j++) { + if (!range[j].end) + continue; + + if (start <= range[j].start && end >= range[j].end) { + range[j].start = 0; + range[j].end = 0; + continue; + } + + if (start <= range[j].start && end < range[j].end && range[j].start < end + 1) { + range[j].start = end + 1; + continue; + } + + + if (start > range[j].start && end >= range[j].end && range[j].end > start - 1) { + range[j].end = start - 1; + continue; + } + + if (start > range[j].start && end < range[j].end) { + /* find the new spare */ + for (i = 0; i < RANGE_NUM; i++) { + if (range[i].end == 0) + break; + } + if (i < RANGE_NUM) { + range[i].end = range[j].end; + range[i].start = end + 1; + } else { + printk(KERN_ERR "run of slot in ranges\n"); + } + range[j].end = start - 1; + continue; + } + } +} + +static int __init cmp_range(const void *x1, const void *x2) +{ + const struct res_range *r1 = x1; + const struct res_range *r2 = x2; + long start1, start2; + + start1 = r1->start; + start2 = r2->start; + + return start1 - start2; +} + +struct var_mtrr_state { + unsigned long range_startk, range_sizek; + unsigned long chunk_sizek; + unsigned long gran_sizek; + unsigned int reg; + unsigned address_bits; +}; + +static void __init set_var_mtrr( + unsigned int reg, unsigned long basek, unsigned long sizek, + unsigned char type, unsigned address_bits) +{ + u32 base_lo, base_hi, mask_lo, mask_hi; + unsigned address_mask_high; + + if (!sizek) { + fill_mtrr_var_range(reg, 0, 0, 0, 0); + return; + } + + address_mask_high = ((1u << (address_bits - 32u)) - 1u); + + base_hi = basek >> 22; + base_lo = basek << 10; + + if (sizek < 4*1024*1024) { + mask_hi = address_mask_high; + mask_lo = ~((sizek << 10) - 1); + } else { + mask_hi = address_mask_high & (~((sizek >> 22) - 1)); + mask_lo = 0; + } + + base_lo |= type; + mask_lo |= 0x800; + fill_mtrr_var_range(reg, base_lo, base_hi, mask_lo, mask_hi); +} + +static unsigned int __init range_to_mtrr(unsigned int reg, + unsigned long range_startk, unsigned long range_sizek, + unsigned char type, unsigned address_bits) +{ + if (!range_sizek || (reg >= num_var_ranges)) + return reg; + + while (range_sizek) { + unsigned long max_align, align; + unsigned long sizek; + /* Compute the maximum size I can make a range */ + if (range_startk) + max_align = ffs(range_startk) - 1; + else + max_align = 32; + align = fls(range_sizek) - 1; + if (align > max_align) + align = max_align; + + sizek = 1 << align; + printk(KERN_INFO "Setting variable MTRR %d, base: %ldMB, range: %ldMB, type %s\n", + reg, range_startk >> 10, sizek >> 10, + (type == MTRR_TYPE_UNCACHABLE)?"UC": + ((type == MTRR_TYPE_WRBACK)?"WB":"Other") + ); + set_var_mtrr(reg++, range_startk, sizek, type, address_bits); + range_startk += sizek; + range_sizek -= sizek; + if (reg >= num_var_ranges) + break; + } + return reg; +} + +static void __init range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek) +{ + unsigned long hole_basek, hole_sizek; + unsigned long range0_basek, range0_sizek; + unsigned long range_basek, range_sizek; + unsigned long chunk_sizek; + unsigned long gran_sizek; + + hole_basek = 0; + hole_sizek = 0; + chunk_sizek = state->chunk_sizek; + gran_sizek = state->gran_sizek; + + /* align with gran size, prevent small block used up MTRRs */ + range_basek = ALIGN(state->range_startk, gran_sizek); + if ((range_basek > basek) && basek) + return; + range_sizek = ALIGN(state->range_sizek - (range_basek - state->range_startk), gran_sizek); + + while (range_basek + range_sizek > (state->range_startk + state->range_sizek)) { + range_sizek -= gran_sizek; + if (!range_sizek) + return; + } + state->range_startk = range_basek; + state->range_sizek = range_sizek; + + /* try to append some small hole */ + range0_basek = state->range_startk; + range0_sizek = ALIGN(state->range_sizek, chunk_sizek); + if ((range0_sizek == state->range_sizek) || + ((range0_basek + range0_sizek - chunk_sizek > basek) && basek)) { + printk(KERN_INFO "rangeX: %016lx - %016lx\n", range0_basek<<10, (range0_basek + state->range_sizek)<<10); + state->reg = range_to_mtrr(state->reg, range0_basek, + state->range_sizek, MTRR_TYPE_WRBACK, state->address_bits); + return; + } + + + range0_sizek -= chunk_sizek; + printk(KERN_INFO "range0: %016lx - %016lx\n", range0_basek<<10, (range0_basek + range0_sizek)<<10); + state->reg = range_to_mtrr(state->reg, range0_basek, + range0_sizek, MTRR_TYPE_WRBACK, state->address_bits); + + range_basek = range0_basek + range0_sizek; + range_sizek = chunk_sizek; + if (range_sizek - (state->range_sizek - range0_sizek) < (chunk_sizek >> 1)) { + hole_sizek = range_sizek - (state->range_sizek - range0_sizek); + hole_basek = range_basek + range_sizek - hole_sizek; + } else + range_sizek = state->range_sizek - range0_sizek; + + printk(KERN_INFO "range: %016lx - %016lx\n", range_basek<<10, (range_basek + range_sizek)<<10); + state->reg = range_to_mtrr(state->reg, range_basek, + range_sizek, MTRR_TYPE_WRBACK, state->address_bits); + if (hole_sizek) { + printk(KERN_INFO "hole: %016lx - %016lx\n", hole_basek<<10, (hole_basek + hole_sizek)<<10); + state->reg = range_to_mtrr(state->reg, hole_basek, + hole_sizek, MTRR_TYPE_UNCACHABLE, state->address_bits); + } +} + +static void __init set_var_mtrr_range(struct var_mtrr_state *state, unsigned long base_pfn, unsigned long size_pfn) +{ + unsigned long basek, sizek; + + if (state->reg >= num_var_ranges) + return; + + basek = base_pfn << (PAGE_SHIFT - 10); + sizek = size_pfn << (PAGE_SHIFT - 10); + + /* See if I can merge with the last range */ + if ((basek <= 1024) || (state->range_startk + state->range_sizek == basek)) { + unsigned long endk = basek + sizek; + state->range_sizek = endk - state->range_startk; + return; + } + /* Write the range mtrrs */ + if (state->range_sizek != 0) { + range_to_mtrr_with_hole(state, basek); + + state->range_startk = 0; + state->range_sizek = 0; + } + /* Allocate an msr */ + state->range_startk = basek; + state->range_sizek = sizek; +} + +/* mininum size of mtrr block that can take hole */ +static u64 mtrr_chunk_size __initdata = (256ULL<<20); + +static int __init parse_mtrr_chunk_size_opt(char *p) +{ + if (!p) + return -EINVAL; + mtrr_chunk_size = memparse(p, &p); + return 0; +} +early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt); + +/* granity of mtrr of block */ +static u64 mtrr_gran_size __initdata = (64ULL<<20); + +static int __init parse_mtrr_gran_size_opt(char *p) +{ + if (!p) + return -EINVAL; + mtrr_gran_size = memparse(p, &p); + return 0; +} +early_param("mtrr_gran_size", parse_mtrr_gran_size_opt); + +static void __init x86_setup_var_mtrrs(struct res_range *range, int nr_range, unsigned address_bits) +{ + struct var_mtrr_state var_state; + int i; + + var_state.range_startk = 0; + var_state.range_sizek = 0; + var_state.reg = 0; + var_state.address_bits = address_bits; + var_state.chunk_sizek = mtrr_chunk_size >> 10; + var_state.gran_sizek = mtrr_gran_size >> 10; + + /* Write the range etc */ + for (i = 0; i < nr_range; i++) + set_var_mtrr_range(&var_state, range[i].start, range[i].end - range[i].start + 1); + + /* Write the last range */ + range_to_mtrr_with_hole(&var_state, 0); + printk(KERN_INFO "DONE variable MTRRs\n"); + /* Clear out the extra MTRR's */ + while (var_state.reg < num_var_ranges) + set_var_mtrr(var_state.reg++, 0, 0, 0, var_state.address_bits); +} + +static int __init x86_get_mtrr_mem_range(struct res_range *range, int nr_range, unsigned long extra_remove_base, unsigned long extra_remove_size) +{ + unsigned long i, base, size; + mtrr_type type; + + for (i = 0; i < num_var_ranges; i++) { + mtrr_if->get(i, &base, &size, &type); + if (type != MTRR_TYPE_WRBACK) + continue; + nr_range = add_range(range, nr_range, base, base + size - 1, 1); + } + printk(KERN_INFO "After WB checking\n"); + for (i = 0; i < nr_range; i++) + printk(KERN_INFO "MTRR MAP PFN: %016lx - %016lx\n", range[i].start, range[i].end + 1); + + /* take out UC ranges */ + for (i = 0; i < num_var_ranges; i++) { + mtrr_if->get(i, &base, &size, &type); + if (type != MTRR_TYPE_UNCACHABLE) + continue; + if (!size) + continue; + subtract_range(range, base, base + size - 1); + } + if (extra_remove_size) + subtract_range(range, extra_remove_base, extra_remove_base + extra_remove_size - 1); + + /* get new range num */ + nr_range = 0; + for (i = 0; i < RANGE_NUM; i++) { + if (!range[i].end) + continue; + nr_range++; + } + printk(KERN_INFO "After UC checking\n"); + for (i = 0; i < nr_range; i++) + printk(KERN_INFO "MTRR MAP PFN: %016lx - %016lx\n", range[i].start, range[i].end + 1); + + /* sort the ranges */ + sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); + printk(KERN_INFO "After sorting\n"); + for (i = 0; i < nr_range; i++) + printk(KERN_INFO "MTRR MAP PFN: %016lx - %016lx\n", range[i].start, range[i].end + 1); + + return nr_range; +} + +static int __init mtrr_cleanup(unsigned address_bits) +{ + unsigned long i, base, size, def, dummy; + mtrr_type type; + struct res_range range[RANGE_NUM]; + int nr_range; + unsigned long extra_remove_base, extra_remove_size; + + /* extra one for all 0 */ + int num[MTRR_NUM_TYPES + 1]; + + if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1) + return 0; + rdmsr(MTRRdefType_MSR, def, dummy); + def &= 0xff; + if (def != MTRR_TYPE_UNCACHABLE) + return 0; + + /* check entries number */ + memset(num, 0, sizeof(num)); + for (i = 0; i < num_var_ranges; i++) { + mtrr_if->get(i, &base, &size, &type); + if (type >= MTRR_NUM_TYPES) + continue; + if (!size) + type = MTRR_NUM_TYPES; + num[type]++; + } + + /* check if we got UC entries */ + if (!num[MTRR_TYPE_UNCACHABLE]) + return 0; + + /* check if we only had WB and UC */ + if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] != + num_var_ranges - num[MTRR_NUM_TYPES]) + return 0; + + memset(range, 0, sizeof(range)); + extra_remove_size = 0; + if (mtrr_tom2) { + extra_remove_base = 1 << (32 - PAGE_SHIFT); + extra_remove_size = (mtrr_tom2>>PAGE_SHIFT) - extra_remove_base; + } + nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base, extra_remove_size); + + /* convert ranges to var ranges state */ + x86_setup_var_mtrrs(range, nr_range, address_bits); + + return 1; + +} + static int disable_mtrr_trim; static int __init disable_mtrr_trim_setup(char *str) @@ -729,18 +1176,21 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) */ void __init mtrr_bp_init(void) { + u32 phys_addr; init_ifs(); + phys_addr = 32; + if (cpu_has_mtrr) { mtrr_if = &generic_mtrr_ops; size_or_mask = 0xff000000; /* 36 bits */ size_and_mask = 0x00f00000; + phys_addr = 36; /* This is an AMD specific MSR, but we assume(hope?) that Intel will implement it to when they extend the address bus of the Xeon. */ if (cpuid_eax(0x80000000) >= 0x80000008) { - u32 phys_addr; phys_addr = cpuid_eax(0x80000008) & 0xff; /* CPUID workaround for Intel 0F33/0F34 CPU */ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && @@ -758,6 +1208,7 @@ void __init mtrr_bp_init(void) don't support PAE */ size_or_mask = 0xfff00000; /* 32 bits */ size_and_mask = 0; + phys_addr = 32; } } else { switch (boot_cpu_data.x86_vendor) { @@ -791,8 +1242,13 @@ void __init mtrr_bp_init(void) if (mtrr_if) { set_num_var_ranges(); init_table(); - if (use_intel()) + if (use_intel()) { get_mtrr_state(); + + if (mtrr_cleanup(phys_addr)) + mtrr_if->set_all(); + + } } } @@ -829,9 +1285,10 @@ static int __init mtrr_init_finialize(void) { if (!mtrr_if) return 0; - if (use_intel()) - mtrr_state_warn(); - else { + if (use_intel()) { + if (enable_mtrr_cleanup < 1) + mtrr_state_warn(); + } else { /* The CPUs haven't MTRR and seem to not support SMP. They have * specific drivers, we use a tricky method to support * suspend/resume for them. diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h index 2cc77eb..2dc4ec6 100644 --- a/arch/x86/kernel/cpu/mtrr/mtrr.h +++ b/arch/x86/kernel/cpu/mtrr/mtrr.h @@ -81,6 +81,8 @@ void set_mtrr_done(struct set_mtrr_context *ctxt); void set_mtrr_cache_disable(struct set_mtrr_context *ctxt); void set_mtrr_prepare_save(struct set_mtrr_context *ctxt); +void fill_mtrr_var_range(unsigned int index, + u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi); void get_mtrr_state(void); extern void set_mtrr_ops(struct mtrr_ops * ops); @@ -92,6 +94,7 @@ extern struct mtrr_ops * mtrr_if; #define use_intel() (mtrr_if && mtrr_if->use_intel_if == 1) extern unsigned int num_var_ranges; +extern u64 mtrr_tom2; void mtrr_state_warn(void); const char *mtrr_attrib_to_str(int x); -- cgit v0.10.2 From 42651f15824d003e8357693ab72c4dbb3e280836 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 29 Apr 2008 01:59:49 -0700 Subject: x86: fix trimming e820 with MTRR holes. converting MTRR layout from continous to discrete, some time could run out of MTRRs. So add gran_sizek to prevent that by dumpping small RAM piece less than gran_sizek. previous trimming only can handle highest_pfn from mtrr to end_pfn from e820. when have more than 4g RAM installed, there will be holes below 4g. so need to check ram below 4g is coverred well. need to be applied after [PATCH] x86: mtrr cleanup for converting continuous to discrete layout v7 Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 8a6f68b..9ab5c16 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -1095,6 +1095,17 @@ int __init amd_special_default_mtrr(void) return 0; } +static u64 __init real_trim_memory(unsigned long start_pfn, unsigned long limit_pfn) +{ + u64 trim_start, trim_size; + trim_start = start_pfn; + trim_start <<= PAGE_SHIFT; + trim_size = limit_pfn; + trim_size <<= PAGE_SHIFT; + trim_size -= trim_start; + return update_memory_range(trim_start, trim_size, E820_RAM, + E820_RESERVED); +} /** * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs * @end_pfn: ending page frame number @@ -1110,8 +1121,13 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) { unsigned long i, base, size, highest_pfn = 0, def, dummy; mtrr_type type; - u64 trim_start, trim_size; + struct res_range range[RANGE_NUM]; + int nr_range; + u64 total_real_trim_size; + int changed; + /* extra one for all 0 */ + int num[MTRR_NUM_TYPES + 1]; /* * Make sure we only trim uncachable memory on machines that * support the Intel MTRR architecture: @@ -1123,9 +1139,6 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) if (def != MTRR_TYPE_UNCACHABLE) return 0; - if (amd_special_default_mtrr()) - return 0; - /* Find highest cached pfn */ for (i = 0; i < num_var_ranges; i++) { mtrr_if->get(i, &base, &size, &type); @@ -1145,26 +1158,80 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) return 0; } - if (highest_pfn < end_pfn) { + /* check entries number */ + memset(num, 0, sizeof(num)); + for (i = 0; i < num_var_ranges; i++) { + mtrr_if->get(i, &base, &size, &type); + if (type >= MTRR_NUM_TYPES) + continue; + if (!size) + type = MTRR_NUM_TYPES; + num[type]++; + } + + /* no entry for WB? */ + if (!num[MTRR_TYPE_WRBACK]) + return 0; + + /* check if we only had WB and UC */ + if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] != + num_var_ranges - num[MTRR_NUM_TYPES]) + return 0; + + memset(range, 0, sizeof(range)); + nr_range = 0; + if (mtrr_tom2) { + range[nr_range].start = (1ULL<<(32 - PAGE_SHIFT)); + range[nr_range].end = (mtrr_tom2 >> PAGE_SHIFT) - 1; + if (highest_pfn < range[nr_range].end + 1) + highest_pfn = range[nr_range].end + 1; + nr_range++; + } + nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0); + + changed = 0; + total_real_trim_size = 0; + + /* check the top at first */ + i = nr_range - 1; + if (range[i].end + 1 < end_pfn) { + total_real_trim_size += real_trim_memory(range[i].end + 1, end_pfn); + } + + if (total_real_trim_size) { printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover" - " all of memory, losing %luMB of RAM.\n", - (end_pfn - highest_pfn) >> (20 - PAGE_SHIFT)); + " all of memory, losing %lluMB of RAM.\n", + total_real_trim_size >> 20); WARN_ON(1); - printk(KERN_INFO "update e820 for mtrr\n"); - trim_start = highest_pfn; - trim_start <<= PAGE_SHIFT; - trim_size = end_pfn; - trim_size <<= PAGE_SHIFT; - trim_size -= trim_start; - update_memory_range(trim_start, trim_size, E820_RAM, - E820_RESERVED); + printk(KERN_INFO "update e820 for mtrr -- end_pfn\n"); update_e820(); - return 1; + changed = 1; } - return 0; + total_real_trim_size = 0; + if (range[0].start) + total_real_trim_size += real_trim_memory(0, range[0].start); + + for (i = 0; i < nr_range - 1; i--) { + if (range[i].end + 1 < range[i+1].start) + total_real_trim_size += real_trim_memory(range[i].end + 1, range[i+1].start); + } + + if (total_real_trim_size) { + printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover" + " all of memory, losing %lluMB of RAM.\n", + total_real_trim_size >> 20); + + WARN_ON(1); + + printk(KERN_INFO "update e820 for mtrr -- holes\n"); + update_e820(); + changed = 1; + } + + return changed; } /** diff --git a/arch/x86/kernel/e820_32.c b/arch/x86/kernel/e820_32.c index 31ea2bb..857f706 100644 --- a/arch/x86/kernel/e820_32.c +++ b/arch/x86/kernel/e820_32.c @@ -783,10 +783,11 @@ static int __init parse_memmap(char *arg) return 0; } early_param("memmap", parse_memmap); -void __init update_memory_range(u64 start, u64 size, unsigned old_type, +u64 __init update_memory_range(u64 start, u64 size, unsigned old_type, unsigned new_type) { int i; + u64 real_updated_size = 0; BUG_ON(old_type == new_type); @@ -798,6 +799,7 @@ void __init update_memory_range(u64 start, u64 size, unsigned old_type, /* totally covered? */ if (ei->addr >= start && ei->size <= size) { ei->type = new_type; + real_updated_size += ei->size; continue; } /* partially covered */ @@ -807,7 +809,10 @@ void __init update_memory_range(u64 start, u64 size, unsigned old_type, continue; add_memory_region(final_start, final_end - final_start, new_type); + real_updated_size += final_end - final_start; } + + return real_updated_size; } void __init finish_e820_parsing(void) diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c index 124480c..848b2cd 100644 --- a/arch/x86/kernel/e820_64.c +++ b/arch/x86/kernel/e820_64.c @@ -829,10 +829,11 @@ void __init finish_e820_parsing(void) } } -void __init update_memory_range(u64 start, u64 size, unsigned old_type, +u64 __init update_memory_range(u64 start, u64 size, unsigned old_type, unsigned new_type) { int i; + u64 real_updated_size = 0; BUG_ON(old_type == new_type); @@ -844,6 +845,7 @@ void __init update_memory_range(u64 start, u64 size, unsigned old_type, /* totally covered? */ if (ei->addr >= start && ei->size <= size) { ei->type = new_type; + real_updated_size += ei->size; continue; } /* partially covered */ @@ -853,7 +855,9 @@ void __init update_memory_range(u64 start, u64 size, unsigned old_type, continue; add_memory_region(final_start, final_end - final_start, new_type); + real_updated_size += final_end - final_start; } + return real_updated_size; } void __init update_e820(void) diff --git a/include/asm-x86/e820_32.h b/include/asm-x86/e820_32.h index e1f10c6..af0711b 100644 --- a/include/asm-x86/e820_32.h +++ b/include/asm-x86/e820_32.h @@ -31,7 +31,7 @@ extern void propagate_e820_map(void); extern void register_bootmem_low_pages(unsigned long max_low_pfn); extern void add_memory_region(unsigned long long start, unsigned long long size, int type); -extern void update_memory_range(u64 start, u64 size, unsigned old_type, +extern u64 update_memory_range(u64 start, u64 size, unsigned old_type, unsigned new_type); extern void e820_register_memory(void); extern void limit_regions(unsigned long long size); diff --git a/include/asm-x86/e820_64.h b/include/asm-x86/e820_64.h index 71c4d68..6ae3e28 100644 --- a/include/asm-x86/e820_64.h +++ b/include/asm-x86/e820_64.h @@ -21,7 +21,7 @@ extern unsigned long find_e820_area_size(unsigned long start, unsigned long align); extern void add_memory_region(unsigned long start, unsigned long size, int type); -extern void update_memory_range(u64 start, u64 size, unsigned old_type, +extern u64 update_memory_range(u64 start, u64 size, unsigned old_type, unsigned new_type); extern void setup_memory_region(void); extern void contig_e820_setup(void); -- cgit v0.10.2 From 8a374026c265476b1acfc3c186a66d59ebdb2cda Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 29 Apr 2008 20:25:16 -0700 Subject: x86: fix trimming e820 with MTRR holes. - fix v2: process hole then end_pfn fix update_memory_range with whole cover comparing Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 9ab5c16..5a2a4c1 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -1098,11 +1098,12 @@ int __init amd_special_default_mtrr(void) static u64 __init real_trim_memory(unsigned long start_pfn, unsigned long limit_pfn) { u64 trim_start, trim_size; - trim_start = start_pfn; + trim_start = start_pfn; trim_start <<= PAGE_SHIFT; trim_size = limit_pfn; trim_size <<= PAGE_SHIFT; trim_size -= trim_start; + return update_memory_range(trim_start, trim_size, E820_RAM, E820_RESERVED); } @@ -1124,7 +1125,6 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) struct res_range range[RANGE_NUM]; int nr_range; u64 total_real_trim_size; - int changed; /* extra one for all 0 */ int num[MTRR_NUM_TYPES + 1]; @@ -1189,49 +1189,35 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) } nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0); - changed = 0; - total_real_trim_size = 0; - - /* check the top at first */ - i = nr_range - 1; - if (range[i].end + 1 < end_pfn) { - total_real_trim_size += real_trim_memory(range[i].end + 1, end_pfn); - } - - if (total_real_trim_size) { - printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover" - " all of memory, losing %lluMB of RAM.\n", - total_real_trim_size >> 20); - - WARN_ON(1); - - printk(KERN_INFO "update e820 for mtrr -- end_pfn\n"); - update_e820(); - changed = 1; - } - total_real_trim_size = 0; + /* check the head */ if (range[0].start) total_real_trim_size += real_trim_memory(0, range[0].start); - - for (i = 0; i < nr_range - 1; i--) { + /* check the holes */ + for (i = 0; i < nr_range - 1; i++) { if (range[i].end + 1 < range[i+1].start) total_real_trim_size += real_trim_memory(range[i].end + 1, range[i+1].start); } + /* check the top */ + i = nr_range - 1; + if (range[i].end + 1 < end_pfn) + total_real_trim_size += real_trim_memory(range[i].end + 1, end_pfn); if (total_real_trim_size) { printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover" " all of memory, losing %lluMB of RAM.\n", total_real_trim_size >> 20); - WARN_ON(1); + if (enable_mtrr_cleanup < 1) + WARN_ON(1); - printk(KERN_INFO "update e820 for mtrr -- holes\n"); + printk(KERN_INFO "update e820 for mtrr\n"); update_e820(); - changed = 1; + + return 1; } - return changed; + return 0; } /** diff --git a/arch/x86/kernel/e820_32.c b/arch/x86/kernel/e820_32.c index 857f706..751d517 100644 --- a/arch/x86/kernel/e820_32.c +++ b/arch/x86/kernel/e820_32.c @@ -797,7 +797,8 @@ u64 __init update_memory_range(u64 start, u64 size, unsigned old_type, if (ei->type != old_type) continue; /* totally covered? */ - if (ei->addr >= start && ei->size <= size) { + if (ei->addr >= start && + (ei->addr + ei->size) <= (start + size)) { ei->type = new_type; real_updated_size += ei->size; continue; diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c index 848b2cd..c45b4de 100644 --- a/arch/x86/kernel/e820_64.c +++ b/arch/x86/kernel/e820_64.c @@ -843,7 +843,8 @@ u64 __init update_memory_range(u64 start, u64 size, unsigned old_type, if (ei->type != old_type) continue; /* totally covered? */ - if (ei->addr >= start && ei->size <= size) { + if (ei->addr >= start && + (ei->addr + ei->size) <= (start + size)) { ei->type = new_type; real_updated_size += ei->size; continue; -- cgit v0.10.2 From f5098d62c1d1cede8ff23d01bbf50a421f110562 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 29 Apr 2008 20:25:58 -0700 Subject: x86: mtrr cleanup for converting continuous to discrete layout v8 - fix v9: address format change requests by Ingo more case handling in range_to_var_with_hole Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 97a1764..f417fe3 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1110,13 +1110,12 @@ config MTRR_SANITIZER If unsure, say Y. config MTRR_SANITIZER_ENABLE_DEFAULT - def_bool y - prompt "Enable MTRR cleanup by default" + int "MTRR cleanup enable value (0-1)" + range 0 1 + default "0" depends on MTRR_SANITIZER help - Enable mtrr cleanup by default - - If unsure, say Y. + Enable mtrr cleanup default value config X86_PAT bool diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 5a2a4c1..79597d3 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -611,17 +611,9 @@ static struct sysdev_driver mtrr_sysdev_driver = { }; #ifdef CONFIG_MTRR_SANITIZER - -#ifdef CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT -static int enable_mtrr_cleanup __initdata = 1; -#else -static int enable_mtrr_cleanup __initdata; -#endif - +static int enable_mtrr_cleanup __initdata = CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT; #else - static int enable_mtrr_cleanup __initdata = -1; - #endif static int __init disable_mtrr_cleanup_setup(char *str) @@ -640,6 +632,7 @@ static int __init enable_mtrr_cleanup_setup(char *str) } early_param("enble_mtrr_cleanup", enable_mtrr_cleanup_setup); +/* should be related to MTRR_VAR_RANGES nums */ #define RANGE_NUM 256 struct res_range { @@ -647,13 +640,27 @@ struct res_range { unsigned long end; }; -static int __init add_range(struct res_range *range, int nr_range, unsigned long start, - unsigned long end, int merge) +static int __init +add_range(struct res_range *range, int nr_range, unsigned long start, + unsigned long end) { - int i; + /* out of slots */ + if (nr_range >= RANGE_NUM) + return nr_range; - if (!merge) - goto addit; + range[nr_range].start = start; + range[nr_range].end = end; + + nr_range++; + + return nr_range; +} + +static int __init +add_range_with_merge(struct res_range *range, int nr_range, unsigned long start, + unsigned long end) +{ + int i; /* try to merge it with old one */ for (i = 0; i < nr_range; i++) { @@ -676,24 +683,14 @@ static int __init add_range(struct res_range *range, int nr_range, unsigned long return nr_range; } -addit: /* need to add that */ - if (nr_range >= RANGE_NUM) - return nr_range; - - range[nr_range].start = start; - range[nr_range].end = end; - - nr_range++; - - return nr_range; - + return add_range(range, nr_range, start, end); } -static void __init subtract_range(struct res_range *range, unsigned long start, - unsigned long end) + +static void __init +subtract_range(struct res_range *range, unsigned long start, unsigned long end) { - int i; - int j; + int i, j; for (j = 0; j < RANGE_NUM; j++) { if (!range[j].end) @@ -747,46 +744,47 @@ static int __init cmp_range(const void *x1, const void *x2) } struct var_mtrr_state { - unsigned long range_startk, range_sizek; - unsigned long chunk_sizek; - unsigned long gran_sizek; - unsigned int reg; - unsigned address_bits; + unsigned long range_startk; + unsigned long range_sizek; + unsigned long chunk_sizek; + unsigned long gran_sizek; + unsigned int reg; + unsigned int address_bits; }; -static void __init set_var_mtrr( - unsigned int reg, unsigned long basek, unsigned long sizek, - unsigned char type, unsigned address_bits) +static void __init +set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek, + unsigned char type, unsigned address_bits) { u32 base_lo, base_hi, mask_lo, mask_hi; - unsigned address_mask_high; + u64 base, mask; if (!sizek) { fill_mtrr_var_range(reg, 0, 0, 0, 0); return; } - address_mask_high = ((1u << (address_bits - 32u)) - 1u); + mask = (1ULL << address_bits) - 1; + mask &= ~((((u64)sizek) << 10) - 1); - base_hi = basek >> 22; - base_lo = basek << 10; + base = ((u64)basek) << 10; - if (sizek < 4*1024*1024) { - mask_hi = address_mask_high; - mask_lo = ~((sizek << 10) - 1); - } else { - mask_hi = address_mask_high & (~((sizek >> 22) - 1)); - mask_lo = 0; - } + base |= type; + mask |= 0x800; + + base_lo = base & ((1ULL<<32) - 1); + base_hi = base >> 32; + + mask_lo = mask & ((1ULL<<32) - 1); + mask_hi = mask >> 32; - base_lo |= type; - mask_lo |= 0x800; fill_mtrr_var_range(reg, base_lo, base_hi, mask_lo, mask_hi); } -static unsigned int __init range_to_mtrr(unsigned int reg, - unsigned long range_startk, unsigned long range_sizek, - unsigned char type, unsigned address_bits) +static unsigned int __init +range_to_mtrr(unsigned int reg, unsigned long range_startk, + unsigned long range_sizek, unsigned char type, + unsigned address_bits) { if (!range_sizek || (reg >= num_var_ranges)) return reg; @@ -794,6 +792,7 @@ static unsigned int __init range_to_mtrr(unsigned int reg, while (range_sizek) { unsigned long max_align, align; unsigned long sizek; + /* Compute the maximum size I can make a range */ if (range_startk) max_align = ffs(range_startk) - 1; @@ -818,7 +817,8 @@ static unsigned int __init range_to_mtrr(unsigned int reg, return reg; } -static void __init range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek) +static void __init +range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek) { unsigned long hole_basek, hole_sizek; unsigned long range0_basek, range0_sizek; @@ -848,23 +848,31 @@ static void __init range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigne /* try to append some small hole */ range0_basek = state->range_startk; range0_sizek = ALIGN(state->range_sizek, chunk_sizek); - if ((range0_sizek == state->range_sizek) || - ((range0_basek + range0_sizek - chunk_sizek > basek) && basek)) { + if (range0_sizek == state->range_sizek) { printk(KERN_INFO "rangeX: %016lx - %016lx\n", range0_basek<<10, (range0_basek + state->range_sizek)<<10); state->reg = range_to_mtrr(state->reg, range0_basek, state->range_sizek, MTRR_TYPE_WRBACK, state->address_bits); return; + } else if (basek) { + while (range0_basek + range0_sizek - chunk_sizek > basek) { + range0_sizek -= chunk_sizek; + if (!range0_sizek) + break; + } } - range0_sizek -= chunk_sizek; + if (range0_sizek > chunk_sizek) + range0_sizek -= chunk_sizek; printk(KERN_INFO "range0: %016lx - %016lx\n", range0_basek<<10, (range0_basek + range0_sizek)<<10); state->reg = range_to_mtrr(state->reg, range0_basek, range0_sizek, MTRR_TYPE_WRBACK, state->address_bits); range_basek = range0_basek + range0_sizek; range_sizek = chunk_sizek; - if (range_sizek - (state->range_sizek - range0_sizek) < (chunk_sizek >> 1)) { + + if ((range_sizek - (state->range_sizek - range0_sizek) < (chunk_sizek >> 1)) && + (range_basek + range_sizek <= basek)) { hole_sizek = range_sizek - (state->range_sizek - range0_sizek); hole_basek = range_basek + range_sizek - hole_sizek; } else @@ -880,7 +888,9 @@ static void __init range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigne } } -static void __init set_var_mtrr_range(struct var_mtrr_state *state, unsigned long base_pfn, unsigned long size_pfn) +static void __init +set_var_mtrr_range(struct var_mtrr_state *state, unsigned long base_pfn, + unsigned long size_pfn) { unsigned long basek, sizek; @@ -921,7 +931,7 @@ static int __init parse_mtrr_chunk_size_opt(char *p) early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt); /* granity of mtrr of block */ -static u64 mtrr_gran_size __initdata = (64ULL<<20); +static u64 mtrr_gran_size __initdata = (1ULL<<20); static int __init parse_mtrr_gran_size_opt(char *p) { @@ -932,17 +942,19 @@ static int __init parse_mtrr_gran_size_opt(char *p) } early_param("mtrr_gran_size", parse_mtrr_gran_size_opt); -static void __init x86_setup_var_mtrrs(struct res_range *range, int nr_range, unsigned address_bits) +static void __init +x86_setup_var_mtrrs(struct res_range *range, int nr_range, + unsigned address_bits) { struct var_mtrr_state var_state; int i; - var_state.range_startk = 0; - var_state.range_sizek = 0; - var_state.reg = 0; - var_state.address_bits = address_bits; - var_state.chunk_sizek = mtrr_chunk_size >> 10; - var_state.gran_sizek = mtrr_gran_size >> 10; + var_state.range_startk = 0; + var_state.range_sizek = 0; + var_state.reg = 0; + var_state.address_bits = address_bits; + var_state.chunk_sizek = mtrr_chunk_size >> 10; + var_state.gran_sizek = mtrr_gran_size >> 10; /* Write the range etc */ for (i = 0; i < nr_range; i++) @@ -952,11 +964,16 @@ static void __init x86_setup_var_mtrrs(struct res_range *range, int nr_range, un range_to_mtrr_with_hole(&var_state, 0); printk(KERN_INFO "DONE variable MTRRs\n"); /* Clear out the extra MTRR's */ - while (var_state.reg < num_var_ranges) - set_var_mtrr(var_state.reg++, 0, 0, 0, var_state.address_bits); + while (var_state.reg < num_var_ranges) { + set_var_mtrr(var_state.reg, 0, 0, 0, var_state.address_bits); + var_state.reg++; + } } -static int __init x86_get_mtrr_mem_range(struct res_range *range, int nr_range, unsigned long extra_remove_base, unsigned long extra_remove_size) +static int __init +x86_get_mtrr_mem_range(struct res_range *range, int nr_range, + unsigned long extra_remove_base, + unsigned long extra_remove_size) { unsigned long i, base, size; mtrr_type type; @@ -965,7 +982,7 @@ static int __init x86_get_mtrr_mem_range(struct res_range *range, int nr_range, mtrr_if->get(i, &base, &size, &type); if (type != MTRR_TYPE_WRBACK) continue; - nr_range = add_range(range, nr_range, base, base + size - 1, 1); + nr_range = add_range_with_merge(range, nr_range, base, base + size - 1); } printk(KERN_INFO "After WB checking\n"); for (i = 0; i < nr_range; i++) @@ -1005,11 +1022,11 @@ static int __init x86_get_mtrr_mem_range(struct res_range *range, int nr_range, static int __init mtrr_cleanup(unsigned address_bits) { + unsigned long extra_remove_base, extra_remove_size; unsigned long i, base, size, def, dummy; - mtrr_type type; struct res_range range[RANGE_NUM]; + mtrr_type type; int nr_range; - unsigned long extra_remove_base, extra_remove_size; /* extra one for all 0 */ int num[MTRR_NUM_TYPES + 1]; @@ -1053,7 +1070,6 @@ static int __init mtrr_cleanup(unsigned address_bits) x86_setup_var_mtrrs(range, nr_range, address_bits); return 1; - } static int disable_mtrr_trim; -- cgit v0.10.2 From 12031a624af7816ec7660b82be648aa3703b4ebe Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 2 May 2008 02:40:22 -0700 Subject: x86: mtrr cleanup for converting continuous to discrete - auto detect v4 Loop through mtrr chunk_size and gran_size from 1M to 2G to find out the optimal value so user does not need to add mtrr_chunk_size and mtrr_gran_size to the kernel command line. If optimal value is not found, print out all list to help select less optimal value. Add mtrr_spare_reg_nr= so user could set 2 instead of 1, if the card need more entries. v2: find the one with more spare entries v3: fix hole_basek offset v4: tight the compare between range and range_new loop stop with 4g Signed-off-by: Yinghai Lu Cc: Andrew Morton Cc: Gabriel C Cc: Mika Fischer Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 4442760..2000977 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -610,8 +610,17 @@ and is between 256 and 4096 characters. It is defined in the file that could hold holes aka. UC entries. mtrr_gran_size=nn[KMG] [X86] - used for mtrr cleanup. It is granity of mtrr block. - Big value could prevent small alignment use up MTRRs. + Used for mtrr cleanup. It is granularity of mtrr block. + Default is 1. + Large value could prevent small alignment from + using up MTRRs. + + mtrr_spare_reg_nr=n [X86] + Format: + Range: 0,7 : spare reg number + Default : 1 + Used for mtrr cleanup. It is spare mtrr entries number. + Set to 2 or more if your graphical card needs more. disable_mtrr_trim [X86, Intel and AMD only] By default the kernel will trim any uncacheable diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index f417fe3..9e97615 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1117,6 +1117,15 @@ config MTRR_SANITIZER_ENABLE_DEFAULT help Enable mtrr cleanup default value +config MTRR_SANITIZER_SPARE_REG_NR_DEFAULT + int "MTRR cleanup spare reg num (0-7)" + range 0 7 + default "1" + depends on MTRR_SANITIZER + help + mtrr cleanup spare entries default, it can be changed via + mtrr_spare_reg_nr= + config X86_PAT bool prompt "x86 PAT support" diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 79597d3..e6c162c 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -610,28 +610,6 @@ static struct sysdev_driver mtrr_sysdev_driver = { .resume = mtrr_restore, }; -#ifdef CONFIG_MTRR_SANITIZER -static int enable_mtrr_cleanup __initdata = CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT; -#else -static int enable_mtrr_cleanup __initdata = -1; -#endif - -static int __init disable_mtrr_cleanup_setup(char *str) -{ - if (enable_mtrr_cleanup != -1) - enable_mtrr_cleanup = 0; - return 0; -} -early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup); - -static int __init enable_mtrr_cleanup_setup(char *str) -{ - if (enable_mtrr_cleanup != -1) - enable_mtrr_cleanup = 1; - return 0; -} -early_param("enble_mtrr_cleanup", enable_mtrr_cleanup_setup); - /* should be related to MTRR_VAR_RANGES nums */ #define RANGE_NUM 256 @@ -702,13 +680,15 @@ subtract_range(struct res_range *range, unsigned long start, unsigned long end) continue; } - if (start <= range[j].start && end < range[j].end && range[j].start < end + 1) { + if (start <= range[j].start && end < range[j].end && + range[j].start < end + 1) { range[j].start = end + 1; continue; } - if (start > range[j].start && end >= range[j].end && range[j].end > start - 1) { + if (start > range[j].start && end >= range[j].end && + range[j].end > start - 1) { range[j].end = start - 1; continue; } @@ -743,18 +723,123 @@ static int __init cmp_range(const void *x1, const void *x2) return start1 - start2; } +struct var_mtrr_range_state { + unsigned long base_pfn; + unsigned long size_pfn; + mtrr_type type; +}; + +struct var_mtrr_range_state __initdata range_state[RANGE_NUM]; + +static int __init +x86_get_mtrr_mem_range(struct res_range *range, int nr_range, + unsigned long extra_remove_base, + unsigned long extra_remove_size) +{ + unsigned long i, base, size; + mtrr_type type; + + for (i = 0; i < num_var_ranges; i++) { + type = range_state[i].type; + if (type != MTRR_TYPE_WRBACK) + continue; + base = range_state[i].base_pfn; + size = range_state[i].size_pfn; + nr_range = add_range_with_merge(range, nr_range, base, + base + size - 1); + } + printk(KERN_DEBUG "After WB checking\n"); + for (i = 0; i < nr_range; i++) + printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", + range[i].start, range[i].end + 1); + + /* take out UC ranges */ + for (i = 0; i < num_var_ranges; i++) { + type = range_state[i].type; + if (type != MTRR_TYPE_UNCACHABLE) + continue; + size = range_state[i].size_pfn; + if (!size) + continue; + base = range_state[i].base_pfn; + subtract_range(range, base, base + size - 1); + } + if (extra_remove_size) + subtract_range(range, extra_remove_base, + extra_remove_base + extra_remove_size - 1); + + /* get new range num */ + nr_range = 0; + for (i = 0; i < RANGE_NUM; i++) { + if (!range[i].end) + continue; + nr_range++; + } + printk(KERN_DEBUG "After UC checking\n"); + for (i = 0; i < nr_range; i++) + printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", + range[i].start, range[i].end + 1); + + /* sort the ranges */ + sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); + printk(KERN_DEBUG "After sorting\n"); + for (i = 0; i < nr_range; i++) + printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", + range[i].start, range[i].end + 1); + + /* clear those is not used */ + for (i = nr_range; i < RANGE_NUM; i++) + memset(&range[i], 0, sizeof(range[i])); + + return nr_range; +} + +static struct res_range __initdata range[RANGE_NUM]; + +#ifdef CONFIG_MTRR_SANITIZER + +static unsigned long __init sum_ranges(struct res_range *range, int nr_range) +{ + unsigned long sum; + int i; + + sum = 0; + for (i = 0; i < nr_range; i++) + sum += range[i].end + 1 - range[i].start; + + return sum; +} + +static int enable_mtrr_cleanup __initdata = + CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT; + +static int __init disable_mtrr_cleanup_setup(char *str) +{ + if (enable_mtrr_cleanup != -1) + enable_mtrr_cleanup = 0; + return 0; +} +early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup); + +static int __init enable_mtrr_cleanup_setup(char *str) +{ + if (enable_mtrr_cleanup != -1) + enable_mtrr_cleanup = 1; + return 0; +} +early_param("enble_mtrr_cleanup", enable_mtrr_cleanup_setup); + struct var_mtrr_state { unsigned long range_startk; unsigned long range_sizek; unsigned long chunk_sizek; unsigned long gran_sizek; unsigned int reg; - unsigned int address_bits; }; static void __init set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek, - unsigned char type, unsigned address_bits) + unsigned char type, unsigned int address_bits) { u32 base_lo, base_hi, mask_lo, mask_hi; u64 base, mask; @@ -781,10 +866,34 @@ set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek, fill_mtrr_var_range(reg, base_lo, base_hi, mask_lo, mask_hi); } +static void __init +save_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek, + unsigned char type) +{ + range_state[reg].base_pfn = basek >> (PAGE_SHIFT - 10); + range_state[reg].size_pfn = sizek >> (PAGE_SHIFT - 10); + range_state[reg].type = type; +} + +static void __init +set_var_mtrr_all(unsigned int address_bits) +{ + unsigned long basek, sizek; + unsigned char type; + unsigned int reg; + + for (reg = 0; reg < num_var_ranges; reg++) { + basek = range_state[reg].base_pfn << (PAGE_SHIFT - 10); + sizek = range_state[reg].size_pfn << (PAGE_SHIFT - 10); + type = range_state[reg].type; + + set_var_mtrr(reg, basek, sizek, type, address_bits); + } +} + static unsigned int __init range_to_mtrr(unsigned int reg, unsigned long range_startk, - unsigned long range_sizek, unsigned char type, - unsigned address_bits) + unsigned long range_sizek, unsigned char type) { if (!range_sizek || (reg >= num_var_ranges)) return reg; @@ -803,12 +912,13 @@ range_to_mtrr(unsigned int reg, unsigned long range_startk, align = max_align; sizek = 1 << align; - printk(KERN_INFO "Setting variable MTRR %d, base: %ldMB, range: %ldMB, type %s\n", + printk(KERN_DEBUG "Setting variable MTRR %d, base: %ldMB, " + "range: %ldMB, type %s\n", reg, range_startk >> 10, sizek >> 10, (type == MTRR_TYPE_UNCACHABLE)?"UC": ((type == MTRR_TYPE_WRBACK)?"WB":"Other") ); - set_var_mtrr(reg++, range_startk, sizek, type, address_bits); + save_var_mtrr(reg++, range_startk, sizek, type); range_startk += sizek; range_sizek -= sizek; if (reg >= num_var_ranges) @@ -817,10 +927,12 @@ range_to_mtrr(unsigned int reg, unsigned long range_startk, return reg; } -static void __init -range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek) +static unsigned __init +range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek, + unsigned long sizek) { unsigned long hole_basek, hole_sizek; + unsigned long second_basek, second_sizek; unsigned long range0_basek, range0_sizek; unsigned long range_basek, range_sizek; unsigned long chunk_sizek; @@ -828,64 +940,95 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek) hole_basek = 0; hole_sizek = 0; + second_basek = 0; + second_sizek = 0; chunk_sizek = state->chunk_sizek; gran_sizek = state->gran_sizek; /* align with gran size, prevent small block used up MTRRs */ range_basek = ALIGN(state->range_startk, gran_sizek); if ((range_basek > basek) && basek) - return; - range_sizek = ALIGN(state->range_sizek - (range_basek - state->range_startk), gran_sizek); + return second_sizek; + state->range_sizek -= (range_basek - state->range_startk); + range_sizek = ALIGN(state->range_sizek, gran_sizek); - while (range_basek + range_sizek > (state->range_startk + state->range_sizek)) { + while (range_sizek > state->range_sizek) { range_sizek -= gran_sizek; if (!range_sizek) - return; + return 0; } - state->range_startk = range_basek; state->range_sizek = range_sizek; /* try to append some small hole */ range0_basek = state->range_startk; range0_sizek = ALIGN(state->range_sizek, chunk_sizek); if (range0_sizek == state->range_sizek) { - printk(KERN_INFO "rangeX: %016lx - %016lx\n", range0_basek<<10, (range0_basek + state->range_sizek)<<10); - state->reg = range_to_mtrr(state->reg, range0_basek, - state->range_sizek, MTRR_TYPE_WRBACK, state->address_bits); - return; - } else if (basek) { - while (range0_basek + range0_sizek - chunk_sizek > basek) { + printk(KERN_DEBUG "rangeX: %016lx - %016lx\n", range0_basek<<10, + (range0_basek + state->range_sizek)<<10); + state->reg = range_to_mtrr(state->reg, range0_basek, + state->range_sizek, MTRR_TYPE_WRBACK); + return 0; + } + + range0_sizek -= chunk_sizek; + if (range0_sizek && sizek) { + while (range0_basek + range0_sizek > (basek + sizek)) { range0_sizek -= chunk_sizek; if (!range0_sizek) break; } } + if (range0_sizek) { + printk(KERN_DEBUG "range0: %016lx - %016lx\n", range0_basek<<10, + (range0_basek + range0_sizek)<<10); + state->reg = range_to_mtrr(state->reg, range0_basek, + range0_sizek, MTRR_TYPE_WRBACK); - if (range0_sizek > chunk_sizek) - range0_sizek -= chunk_sizek; - printk(KERN_INFO "range0: %016lx - %016lx\n", range0_basek<<10, (range0_basek + range0_sizek)<<10); - state->reg = range_to_mtrr(state->reg, range0_basek, - range0_sizek, MTRR_TYPE_WRBACK, state->address_bits); + } range_basek = range0_basek + range0_sizek; range_sizek = chunk_sizek; - if ((range_sizek - (state->range_sizek - range0_sizek) < (chunk_sizek >> 1)) && - (range_basek + range_sizek <= basek)) { - hole_sizek = range_sizek - (state->range_sizek - range0_sizek); - hole_basek = range_basek + range_sizek - hole_sizek; - } else + if (range_basek + range_sizek > basek && + range_basek + range_sizek <= (basek + sizek)) { + /* one hole */ + second_basek = basek; + second_sizek = range_basek + range_sizek - basek; + } + + /* if last piece, only could one hole near end */ + if ((second_basek || !basek) && + range_sizek - (state->range_sizek - range0_sizek) - second_sizek < + (chunk_sizek >> 1)) { + /* + * one hole in middle (second_sizek is 0) or at end + * (second_sizek is 0 ) + */ + hole_sizek = range_sizek - (state->range_sizek - range0_sizek) + - second_sizek; + hole_basek = range_basek + range_sizek - hole_sizek + - second_sizek; + } else { + /* fallback for big hole, or several holes */ range_sizek = state->range_sizek - range0_sizek; + second_basek = 0; + second_sizek = 0; + } - printk(KERN_INFO "range: %016lx - %016lx\n", range_basek<<10, (range_basek + range_sizek)<<10); - state->reg = range_to_mtrr(state->reg, range_basek, - range_sizek, MTRR_TYPE_WRBACK, state->address_bits); + printk(KERN_DEBUG "range: %016lx - %016lx\n", range_basek<<10, + (range_basek + range_sizek)<<10); + state->reg = range_to_mtrr(state->reg, range_basek, range_sizek, + MTRR_TYPE_WRBACK); if (hole_sizek) { - printk(KERN_INFO "hole: %016lx - %016lx\n", hole_basek<<10, (hole_basek + hole_sizek)<<10); - state->reg = range_to_mtrr(state->reg, hole_basek, - hole_sizek, MTRR_TYPE_UNCACHABLE, state->address_bits); + printk(KERN_DEBUG "hole: %016lx - %016lx\n", hole_basek<<10, + (hole_basek + hole_sizek)<<10); + state->reg = range_to_mtrr(state->reg, hole_basek, hole_sizek, + MTRR_TYPE_UNCACHABLE); + } + + return second_sizek; } static void __init @@ -893,6 +1036,7 @@ set_var_mtrr_range(struct var_mtrr_state *state, unsigned long base_pfn, unsigned long size_pfn) { unsigned long basek, sizek; + unsigned long second_sizek = 0; if (state->reg >= num_var_ranges) return; @@ -901,21 +1045,19 @@ set_var_mtrr_range(struct var_mtrr_state *state, unsigned long base_pfn, sizek = size_pfn << (PAGE_SHIFT - 10); /* See if I can merge with the last range */ - if ((basek <= 1024) || (state->range_startk + state->range_sizek == basek)) { + if ((basek <= 1024) || + (state->range_startk + state->range_sizek == basek)) { unsigned long endk = basek + sizek; state->range_sizek = endk - state->range_startk; return; } /* Write the range mtrrs */ - if (state->range_sizek != 0) { - range_to_mtrr_with_hole(state, basek); + if (state->range_sizek != 0) + second_sizek = range_to_mtrr_with_hole(state, basek, sizek); - state->range_startk = 0; - state->range_sizek = 0; - } /* Allocate an msr */ - state->range_startk = basek; - state->range_sizek = sizek; + state->range_startk = basek + second_sizek; + state->range_sizek = sizek - second_sizek; } /* mininum size of mtrr block that can take hole */ @@ -931,7 +1073,7 @@ static int __init parse_mtrr_chunk_size_opt(char *p) early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt); /* granity of mtrr of block */ -static u64 mtrr_gran_size __initdata = (1ULL<<20); +static u64 mtrr_gran_size __initdata; static int __init parse_mtrr_gran_size_opt(char *p) { @@ -942,91 +1084,84 @@ static int __init parse_mtrr_gran_size_opt(char *p) } early_param("mtrr_gran_size", parse_mtrr_gran_size_opt); -static void __init +static int nr_mtrr_spare_reg __initdata = + CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT; + +static int __init parse_mtrr_spare_reg(char *arg) +{ + if (arg) + nr_mtrr_spare_reg = simple_strtoul(arg, NULL, 0); + return 0; +} + +early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg); + +static int __init x86_setup_var_mtrrs(struct res_range *range, int nr_range, - unsigned address_bits) + u64 chunk_size, u64 gran_size) { struct var_mtrr_state var_state; int i; + int num_reg; var_state.range_startk = 0; var_state.range_sizek = 0; var_state.reg = 0; - var_state.address_bits = address_bits; - var_state.chunk_sizek = mtrr_chunk_size >> 10; - var_state.gran_sizek = mtrr_gran_size >> 10; + var_state.chunk_sizek = chunk_size >> 10; + var_state.gran_sizek = gran_size >> 10; + + memset(range_state, 0, sizeof(range_state)); /* Write the range etc */ for (i = 0; i < nr_range; i++) - set_var_mtrr_range(&var_state, range[i].start, range[i].end - range[i].start + 1); + set_var_mtrr_range(&var_state, range[i].start, + range[i].end - range[i].start + 1); /* Write the last range */ - range_to_mtrr_with_hole(&var_state, 0); - printk(KERN_INFO "DONE variable MTRRs\n"); + if (var_state.range_sizek != 0) + range_to_mtrr_with_hole(&var_state, 0, 0); + printk(KERN_DEBUG "DONE variable MTRRs\n"); + + num_reg = var_state.reg; /* Clear out the extra MTRR's */ while (var_state.reg < num_var_ranges) { - set_var_mtrr(var_state.reg, 0, 0, 0, var_state.address_bits); + save_var_mtrr(var_state.reg, 0, 0, 0); var_state.reg++; } -} - -static int __init -x86_get_mtrr_mem_range(struct res_range *range, int nr_range, - unsigned long extra_remove_base, - unsigned long extra_remove_size) -{ - unsigned long i, base, size; - mtrr_type type; - - for (i = 0; i < num_var_ranges; i++) { - mtrr_if->get(i, &base, &size, &type); - if (type != MTRR_TYPE_WRBACK) - continue; - nr_range = add_range_with_merge(range, nr_range, base, base + size - 1); - } - printk(KERN_INFO "After WB checking\n"); - for (i = 0; i < nr_range; i++) - printk(KERN_INFO "MTRR MAP PFN: %016lx - %016lx\n", range[i].start, range[i].end + 1); - /* take out UC ranges */ - for (i = 0; i < num_var_ranges; i++) { - mtrr_if->get(i, &base, &size, &type); - if (type != MTRR_TYPE_UNCACHABLE) - continue; - if (!size) - continue; - subtract_range(range, base, base + size - 1); - } - if (extra_remove_size) - subtract_range(range, extra_remove_base, extra_remove_base + extra_remove_size - 1); + return num_reg; +} - /* get new range num */ - nr_range = 0; - for (i = 0; i < RANGE_NUM; i++) { - if (!range[i].end) - continue; - nr_range++; - } - printk(KERN_INFO "After UC checking\n"); - for (i = 0; i < nr_range; i++) - printk(KERN_INFO "MTRR MAP PFN: %016lx - %016lx\n", range[i].start, range[i].end + 1); +struct mtrr_cleanup_result { + unsigned long gran_sizek; + unsigned long chunk_sizek; + unsigned long lose_cover_sizek; + unsigned int num_reg; + int bad; +}; - /* sort the ranges */ - sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); - printk(KERN_INFO "After sorting\n"); - for (i = 0; i < nr_range; i++) - printk(KERN_INFO "MTRR MAP PFN: %016lx - %016lx\n", range[i].start, range[i].end + 1); +/* + * gran_size: 1M, 2M, ..., 2G + * chunk size: gran_size, ..., 4G + * so we need (2+13)*6 + */ +#define NUM_RESULT 90 +#define PSHIFT (PAGE_SHIFT - 10) - return nr_range; -} +static struct mtrr_cleanup_result __initdata result[NUM_RESULT]; +static struct res_range __initdata range_new[RANGE_NUM]; +static unsigned long __initdata min_loss_pfn[RANGE_NUM]; static int __init mtrr_cleanup(unsigned address_bits) { unsigned long extra_remove_base, extra_remove_size; unsigned long i, base, size, def, dummy; - struct res_range range[RANGE_NUM]; mtrr_type type; - int nr_range; + int nr_range, nr_range_new; + u64 chunk_size, gran_size; + unsigned long range_sums, range_sums_new; + int index_good; + int num_reg_good; /* extra one for all 0 */ int num[MTRR_NUM_TYPES + 1]; @@ -1038,10 +1173,20 @@ static int __init mtrr_cleanup(unsigned address_bits) if (def != MTRR_TYPE_UNCACHABLE) return 0; + /* get it and store it aside */ + memset(range_state, 0, sizeof(range_state)); + for (i = 0; i < num_var_ranges; i++) { + mtrr_if->get(i, &base, &size, &type); + range_state[i].base_pfn = base; + range_state[i].size_pfn = size; + range_state[i].type = type; + } + /* check entries number */ memset(num, 0, sizeof(num)); for (i = 0; i < num_var_ranges; i++) { - mtrr_if->get(i, &base, &size, &type); + type = range_state[i].type; + size = range_state[i].size_pfn; if (type >= MTRR_NUM_TYPES) continue; if (!size) @@ -1062,15 +1207,172 @@ static int __init mtrr_cleanup(unsigned address_bits) extra_remove_size = 0; if (mtrr_tom2) { extra_remove_base = 1 << (32 - PAGE_SHIFT); - extra_remove_size = (mtrr_tom2>>PAGE_SHIFT) - extra_remove_base; + extra_remove_size = + (mtrr_tom2 >> PAGE_SHIFT) - extra_remove_base; + } + nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base, + extra_remove_size); + range_sums = sum_ranges(range, nr_range); + printk(KERN_INFO "total RAM coverred: %ldM\n", + range_sums >> (20 - PAGE_SHIFT)); + + if (mtrr_chunk_size && mtrr_gran_size) { + int num_reg; + + /* convert ranges to var ranges state */ + num_reg = x86_setup_var_mtrrs(range, nr_range, mtrr_chunk_size, + mtrr_gran_size); + + /* we got new setting in range_state, check it */ + memset(range_new, 0, sizeof(range_new)); + nr_range_new = x86_get_mtrr_mem_range(range_new, 0, + extra_remove_base, + extra_remove_size); + range_sums_new = sum_ranges(range_new, nr_range_new); + + i = 0; + result[i].chunk_sizek = mtrr_chunk_size >> 10; + result[i].gran_sizek = mtrr_gran_size >> 10; + result[i].num_reg = num_reg; + if (range_sums < range_sums_new) { + result[i].lose_cover_sizek = + (range_sums_new - range_sums) << PSHIFT; + result[i].bad = 1; + } else + result[i].lose_cover_sizek = + (range_sums - range_sums_new) << PSHIFT; + + printk(KERN_INFO " %sgran_size: %ldM \tchunk_size: %ldM \t", + result[i].bad?" BAD ":"", result[i].gran_sizek >> 10, + result[i].chunk_sizek >> 10); + printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ldM \n", + result[i].num_reg, result[i].bad?"-":"", + result[i].lose_cover_sizek >> 10); + if (!result[i].bad) { + set_var_mtrr_all(address_bits); + return 1; + } + printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, " + "will find optimal one\n"); + memset(result, 0, sizeof(result[0])); + } + + i = 0; + memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn)); + memset(result, 0, sizeof(result)); + for (gran_size = (1ULL<<20); gran_size < (1ULL<<32); gran_size <<= 1) { + for (chunk_size = gran_size; chunk_size < (1ULL<<33); + chunk_size <<= 1) { + int num_reg; + + printk(KERN_INFO + "\ngran_size: %lldM chunk_size_size: %lldM\n", + gran_size >> 20, chunk_size >> 20); + if (i >= NUM_RESULT) + continue; + + /* convert ranges to var ranges state */ + num_reg = x86_setup_var_mtrrs(range, nr_range, + chunk_size, gran_size); + + /* we got new setting in range_state, check it */ + memset(range_new, 0, sizeof(range_new)); + nr_range_new = x86_get_mtrr_mem_range(range_new, 0, + extra_remove_base, extra_remove_size); + range_sums_new = sum_ranges(range_new, nr_range_new); + + result[i].chunk_sizek = chunk_size >> 10; + result[i].gran_sizek = gran_size >> 10; + result[i].num_reg = num_reg; + if (range_sums < range_sums_new) { + result[i].lose_cover_sizek = + (range_sums_new - range_sums) << PSHIFT; + result[i].bad = 1; + } else + result[i].lose_cover_sizek = + (range_sums - range_sums_new) << PSHIFT; + + /* double check it */ + if (!result[i].bad && !result[i].lose_cover_sizek) { + if (nr_range_new != nr_range || + memcmp(range, range_new, sizeof(range))) + result[i].bad = 1; + } + + if (!result[i].bad && (range_sums - range_sums_new < + min_loss_pfn[num_reg])) { + min_loss_pfn[num_reg] = + range_sums - range_sums_new; + } + i++; + } } - nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base, extra_remove_size); - /* convert ranges to var ranges state */ - x86_setup_var_mtrrs(range, nr_range, address_bits); + /* print out all */ + for (i = 0; i < NUM_RESULT; i++) { + printk(KERN_INFO "%sgran_size: %ldM \tchunk_size: %ldM \t", + result[i].bad?"*BAD* ":" ", result[i].gran_sizek >> 10, + result[i].chunk_sizek >> 10); + printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ldM \n", + result[i].num_reg, result[i].bad?"-":"", + result[i].lose_cover_sizek >> 10); + } - return 1; + /* try to find the optimal index */ + if (nr_mtrr_spare_reg >= num_var_ranges) + nr_mtrr_spare_reg = num_var_ranges - 1; + num_reg_good = -1; + for (i = 1; i < num_var_ranges + 1 - nr_mtrr_spare_reg; i++) { + if (!min_loss_pfn[i]) { + num_reg_good = i; + break; + } + } + + index_good = -1; + if (num_reg_good != -1) { + for (i = 0; i < NUM_RESULT; i++) { + if (!result[i].bad && + result[i].num_reg == num_reg_good && + !result[i].lose_cover_sizek) { + index_good = i; + break; + } + } + } + + if (index_good != -1) { + printk(KERN_INFO "Found optimal setting for mtrr clean up\n"); + i = index_good; + printk(KERN_INFO "gran_size: %ldM \tchunk_size: %ldM \t", + result[i].gran_sizek >> 10, + result[i].chunk_sizek >> 10); + printk(KERN_CONT "num_reg: %d \tlose cover RAM: %ldM \n", + result[i].num_reg, + result[i].lose_cover_sizek >> 10); + /* convert ranges to var ranges state */ + chunk_size = result[i].chunk_sizek; + chunk_size <<= 10; + gran_size = result[i].gran_sizek; + gran_size <<= 10; + x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size); + set_var_mtrr_all(address_bits); + return 1; + } + + printk(KERN_INFO "mtrr_cleanup: can not find optimal value\n"); + printk(KERN_INFO "please specify mtrr_gran_size/mtrr_chunk_size\n"); + + return 0; } +#else +static int __init mtrr_cleanup(unsigned address_bits) +{ + return 0; +} +#endif + +static int __initdata changed_by_mtrr_cleanup; static int disable_mtrr_trim; @@ -1111,7 +1413,8 @@ int __init amd_special_default_mtrr(void) return 0; } -static u64 __init real_trim_memory(unsigned long start_pfn, unsigned long limit_pfn) +static u64 __init real_trim_memory(unsigned long start_pfn, + unsigned long limit_pfn) { u64 trim_start, trim_size; trim_start = start_pfn; @@ -1138,9 +1441,8 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) { unsigned long i, base, size, highest_pfn = 0, def, dummy; mtrr_type type; - struct res_range range[RANGE_NUM]; int nr_range; - u64 total_real_trim_size; + u64 total_trim_size; /* extra one for all 0 */ int num[MTRR_NUM_TYPES + 1]; @@ -1155,11 +1457,22 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) if (def != MTRR_TYPE_UNCACHABLE) return 0; - /* Find highest cached pfn */ + /* get it and store it aside */ + memset(range_state, 0, sizeof(range_state)); for (i = 0; i < num_var_ranges; i++) { mtrr_if->get(i, &base, &size, &type); + range_state[i].base_pfn = base; + range_state[i].size_pfn = size; + range_state[i].type = type; + } + + /* Find highest cached pfn */ + for (i = 0; i < num_var_ranges; i++) { + type = range_state[i].type; if (type != MTRR_TYPE_WRBACK) continue; + base = range_state[i].base_pfn; + size = range_state[i].size_pfn; if (highest_pfn < base + size) highest_pfn = base + size; } @@ -1177,9 +1490,10 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) /* check entries number */ memset(num, 0, sizeof(num)); for (i = 0; i < num_var_ranges; i++) { - mtrr_if->get(i, &base, &size, &type); + type = range_state[i].type; if (type >= MTRR_NUM_TYPES) continue; + size = range_state[i].size_pfn; if (!size) type = MTRR_NUM_TYPES; num[type]++; @@ -1205,26 +1519,28 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) } nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0); - total_real_trim_size = 0; + total_trim_size = 0; /* check the head */ if (range[0].start) - total_real_trim_size += real_trim_memory(0, range[0].start); + total_trim_size += real_trim_memory(0, range[0].start); /* check the holes */ for (i = 0; i < nr_range - 1; i++) { if (range[i].end + 1 < range[i+1].start) - total_real_trim_size += real_trim_memory(range[i].end + 1, range[i+1].start); + total_trim_size += real_trim_memory(range[i].end + 1, + range[i+1].start); } /* check the top */ i = nr_range - 1; if (range[i].end + 1 < end_pfn) - total_real_trim_size += real_trim_memory(range[i].end + 1, end_pfn); + total_trim_size += real_trim_memory(range[i].end + 1, + end_pfn); - if (total_real_trim_size) { + if (total_trim_size) { printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover" " all of memory, losing %lluMB of RAM.\n", - total_real_trim_size >> 20); + total_trim_size >> 20); - if (enable_mtrr_cleanup < 1) + if (!changed_by_mtrr_cleanup) WARN_ON(1); printk(KERN_INFO "update e820 for mtrr\n"); @@ -1314,8 +1630,10 @@ void __init mtrr_bp_init(void) if (use_intel()) { get_mtrr_state(); - if (mtrr_cleanup(phys_addr)) + if (mtrr_cleanup(phys_addr)) { + changed_by_mtrr_cleanup = 1; mtrr_if->set_all(); + } } } @@ -1355,7 +1673,7 @@ static int __init mtrr_init_finialize(void) if (!mtrr_if) return 0; if (use_intel()) { - if (enable_mtrr_cleanup < 1) + if (!changed_by_mtrr_cleanup) mtrr_state_warn(); } else { /* The CPUs haven't MTRR and seem to not support SMP. They have -- cgit v0.10.2 From 833e78bfeeef628f0201349a0a05a54f48f07884 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 5 May 2008 15:57:38 -0700 Subject: x86: process fam 10h like k8 with fixed mtrr setting otherwise fixed MTRR for family 10h may not be changed. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 5aae648..a83f5cd7 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -342,7 +342,7 @@ static void set_fixed_range(int msr, bool *changed, unsigned int *msrwords) if (lo != msrwords[0] || hi != msrwords[1]) { if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && - boot_cpu_data.x86 == 15 && + (boot_cpu_data.x86 >= 0x0f && boot_cpu_data.x86 <= 0x11) && ((msrwords[0] | msrwords[1]) & K8_MTRR_RDMEM_WRMEM_MASK)) k8_enable_fixed_iorrs(); mtrr_wrmsr(msr, msrwords[0], msrwords[1]); -- cgit v0.10.2 From b79cd8f1268bab57ff85b19d131f7f23deab2dee Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 11 May 2008 00:30:15 -0700 Subject: x86: make e820.c to have common functions remove the duplicated copy of these functions. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 5e618c3..5369a4e 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -22,7 +22,7 @@ obj-y += setup_$(BITS).o i8259_$(BITS).o setup.o obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o setup64.o -obj-y += bootflag.o e820_$(BITS).o +obj-y += bootflag.o e820_$(BITS).o e820.o obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o obj-y += alternative.o i8253.o pci-nommu.o obj-$(CONFIG_X86_64) += bugs_64.o diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c new file mode 100644 index 0000000..2cb686f --- /dev/null +++ b/arch/x86/kernel/e820.c @@ -0,0 +1,475 @@ +/* + * Handle the memory map. + * The functions here do the job until bootmem takes over. + * + * Getting sanitize_e820_map() in sync with i386 version by applying change: + * - Provisions for empty E820 memory regions (reported by certain BIOSes). + * Alex Achenbach , December 2002. + * Venkatesh Pallipadi + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +struct e820map e820; + +/* For PCI or other memory-mapped resources */ +unsigned long pci_mem_start = 0xaeedbabe; +#ifdef CONFIG_PCI +EXPORT_SYMBOL(pci_mem_start); +#endif + +/* + * This function checks if any part of the range is mapped + * with type. + */ +int +e820_any_mapped(u64 start, u64 end, unsigned type) +{ + int i; + + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + + if (type && ei->type != type) + continue; + if (ei->addr >= end || ei->addr + ei->size <= start) + continue; + return 1; + } + return 0; +} +EXPORT_SYMBOL_GPL(e820_any_mapped); + +/* + * This function checks if the entire range is mapped with type. + * + * Note: this function only works correct if the e820 table is sorted and + * not-overlapping, which is the case + */ +int __init e820_all_mapped(u64 start, u64 end, unsigned type) +{ + int i; + + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + + if (type && ei->type != type) + continue; + /* is the region (part) in overlap with the current region ?*/ + if (ei->addr >= end || ei->addr + ei->size <= start) + continue; + + /* if the region is at the beginning of we move + * start to the end of the region since it's ok until there + */ + if (ei->addr <= start) + start = ei->addr + ei->size; + /* + * if start is now at or beyond end, we're done, full + * coverage + */ + if (start >= end) + return 1; + } + return 0; +} + +/* + * Add a memory region to the kernel e820 map. + */ +void __init add_memory_region(u64 start, u64 size, int type) +{ + int x = e820.nr_map; + + if (x == E820MAX) { + printk(KERN_ERR "Ooops! Too many entries in the memory map!\n"); + return; + } + + e820.map[x].addr = start; + e820.map[x].size = size; + e820.map[x].type = type; + e820.nr_map++; +} + +void __init e820_print_map(char *who) +{ + int i; + + for (i = 0; i < e820.nr_map; i++) { + printk(KERN_INFO " %s: %016Lx - %016Lx ", who, + (unsigned long long) e820.map[i].addr, + (unsigned long long) + (e820.map[i].addr + e820.map[i].size)); + switch (e820.map[i].type) { + case E820_RAM: + printk(KERN_CONT "(usable)\n"); + break; + case E820_RESERVED: + printk(KERN_CONT "(reserved)\n"); + break; + case E820_ACPI: + printk(KERN_CONT "(ACPI data)\n"); + break; + case E820_NVS: + printk(KERN_CONT "(ACPI NVS)\n"); + break; + default: + printk(KERN_CONT "type %u\n", e820.map[i].type); + break; + } + } +} + +/* + * Sanitize the BIOS e820 map. + * + * Some e820 responses include overlapping entries. The following + * replaces the original e820 map with a new one, removing overlaps. + * + */ +int __init sanitize_e820_map(struct e820entry *biosmap, char *pnr_map) +{ + struct change_member { + struct e820entry *pbios; /* pointer to original bios entry */ + unsigned long long addr; /* address for this change point */ + }; + static struct change_member change_point_list[2*E820MAX] __initdata; + static struct change_member *change_point[2*E820MAX] __initdata; + static struct e820entry *overlap_list[E820MAX] __initdata; + static struct e820entry new_bios[E820MAX] __initdata; + struct change_member *change_tmp; + unsigned long current_type, last_type; + unsigned long long last_addr; + int chgidx, still_changing; + int overlap_entries; + int new_bios_entry; + int old_nr, new_nr, chg_nr; + int i; + + /* + Visually we're performing the following + (1,2,3,4 = memory types)... + + Sample memory map (w/overlaps): + ____22__________________ + ______________________4_ + ____1111________________ + _44_____________________ + 11111111________________ + ____________________33__ + ___________44___________ + __________33333_________ + ______________22________ + ___________________2222_ + _________111111111______ + _____________________11_ + _________________4______ + + Sanitized equivalent (no overlap): + 1_______________________ + _44_____________________ + ___1____________________ + ____22__________________ + ______11________________ + _________1______________ + __________3_____________ + ___________44___________ + _____________33_________ + _______________2________ + ________________1_______ + _________________4______ + ___________________2____ + ____________________33__ + ______________________4_ + */ + + /* if there's only one memory region, don't bother */ + if (*pnr_map < 2) + return -1; + + old_nr = *pnr_map; + + /* bail out if we find any unreasonable addresses in bios map */ + for (i = 0; i < old_nr; i++) + if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr) + return -1; + + /* create pointers for initial change-point information (for sorting) */ + for (i = 0; i < 2 * old_nr; i++) + change_point[i] = &change_point_list[i]; + + /* record all known change-points (starting and ending addresses), + omitting those that are for empty memory regions */ + chgidx = 0; + for (i = 0; i < old_nr; i++) { + if (biosmap[i].size != 0) { + change_point[chgidx]->addr = biosmap[i].addr; + change_point[chgidx++]->pbios = &biosmap[i]; + change_point[chgidx]->addr = biosmap[i].addr + + biosmap[i].size; + change_point[chgidx++]->pbios = &biosmap[i]; + } + } + chg_nr = chgidx; + + /* sort change-point list by memory addresses (low -> high) */ + still_changing = 1; + while (still_changing) { + still_changing = 0; + for (i = 1; i < chg_nr; i++) { + unsigned long long curaddr, lastaddr; + unsigned long long curpbaddr, lastpbaddr; + + curaddr = change_point[i]->addr; + lastaddr = change_point[i - 1]->addr; + curpbaddr = change_point[i]->pbios->addr; + lastpbaddr = change_point[i - 1]->pbios->addr; + + /* + * swap entries, when: + * + * curaddr > lastaddr or + * curaddr == lastaddr and curaddr == curpbaddr and + * lastaddr != lastpbaddr + */ + if (curaddr < lastaddr || + (curaddr == lastaddr && curaddr == curpbaddr && + lastaddr != lastpbaddr)) { + change_tmp = change_point[i]; + change_point[i] = change_point[i-1]; + change_point[i-1] = change_tmp; + still_changing = 1; + } + } + } + + /* create a new bios memory map, removing overlaps */ + overlap_entries = 0; /* number of entries in the overlap table */ + new_bios_entry = 0; /* index for creating new bios map entries */ + last_type = 0; /* start with undefined memory type */ + last_addr = 0; /* start with 0 as last starting address */ + + /* loop through change-points, determining affect on the new bios map */ + for (chgidx = 0; chgidx < chg_nr; chgidx++) { + /* keep track of all overlapping bios entries */ + if (change_point[chgidx]->addr == + change_point[chgidx]->pbios->addr) { + /* + * add map entry to overlap list (> 1 entry + * implies an overlap) + */ + overlap_list[overlap_entries++] = + change_point[chgidx]->pbios; + } else { + /* + * remove entry from list (order independent, + * so swap with last) + */ + for (i = 0; i < overlap_entries; i++) { + if (overlap_list[i] == + change_point[chgidx]->pbios) + overlap_list[i] = + overlap_list[overlap_entries-1]; + } + overlap_entries--; + } + /* + * if there are overlapping entries, decide which + * "type" to use (larger value takes precedence -- + * 1=usable, 2,3,4,4+=unusable) + */ + current_type = 0; + for (i = 0; i < overlap_entries; i++) + if (overlap_list[i]->type > current_type) + current_type = overlap_list[i]->type; + /* + * continue building up new bios map based on this + * information + */ + if (current_type != last_type) { + if (last_type != 0) { + new_bios[new_bios_entry].size = + change_point[chgidx]->addr - last_addr; + /* + * move forward only if the new size + * was non-zero + */ + if (new_bios[new_bios_entry].size != 0) + /* + * no more space left for new + * bios entries ? + */ + if (++new_bios_entry >= E820MAX) + break; + } + if (current_type != 0) { + new_bios[new_bios_entry].addr = + change_point[chgidx]->addr; + new_bios[new_bios_entry].type = current_type; + last_addr = change_point[chgidx]->addr; + } + last_type = current_type; + } + } + /* retain count for new bios entries */ + new_nr = new_bios_entry; + + /* copy new bios mapping into original location */ + memcpy(biosmap, new_bios, new_nr * sizeof(struct e820entry)); + *pnr_map = new_nr; + + return 0; +} + +/* + * Copy the BIOS e820 map into a safe place. + * + * Sanity-check it while we're at it.. + * + * If we're lucky and live on a modern system, the setup code + * will have given us a memory map that we can use to properly + * set up memory. If we aren't, we'll fake a memory map. + */ +int __init copy_e820_map(struct e820entry *biosmap, int nr_map) +{ + /* Only one memory region (or negative)? Ignore it */ + if (nr_map < 2) + return -1; + + do { + u64 start = biosmap->addr; + u64 size = biosmap->size; + u64 end = start + size; + u32 type = biosmap->type; + + /* Overflow in 64 bits? Ignore the memory map. */ + if (start > end) + return -1; + + add_memory_region(start, size, type); + } while (biosmap++, --nr_map); + return 0; +} + +u64 __init update_memory_range(u64 start, u64 size, unsigned old_type, + unsigned new_type) +{ + int i; + u64 real_updated_size = 0; + + BUG_ON(old_type == new_type); + + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + u64 final_start, final_end; + if (ei->type != old_type) + continue; + /* totally covered? */ + if (ei->addr >= start && + (ei->addr + ei->size) <= (start + size)) { + ei->type = new_type; + real_updated_size += ei->size; + continue; + } + /* partially covered */ + final_start = max(start, ei->addr); + final_end = min(start + size, ei->addr + ei->size); + if (final_start >= final_end) + continue; + add_memory_region(final_start, final_end - final_start, + new_type); + real_updated_size += final_end - final_start; + } + return real_updated_size; +} + +void __init update_e820(void) +{ + u8 nr_map; + + nr_map = e820.nr_map; + if (sanitize_e820_map(e820.map, &nr_map)) + return; + e820.nr_map = nr_map; + printk(KERN_INFO "modified physical RAM map:\n"); + e820_print_map("modified"); +} + +/* + * Search for the biggest gap in the low 32 bits of the e820 + * memory space. We pass this space to PCI to assign MMIO resources + * for hotplug or unconfigured devices in. + * Hopefully the BIOS let enough space left. + */ +__init void e820_setup_gap(void) +{ + unsigned long gapstart, gapsize, round; + unsigned long long last; + int i; + int found = 0; + + last = 0x100000000ull; + gapstart = 0x10000000; + gapsize = 0x400000; + i = e820.nr_map; + while (--i >= 0) { + unsigned long long start = e820.map[i].addr; + unsigned long long end = start + e820.map[i].size; + + /* + * Since "last" is at most 4GB, we know we'll + * fit in 32 bits if this condition is true + */ + if (last > end) { + unsigned long gap = last - end; + + if (gap > gapsize) { + gapsize = gap; + gapstart = end; + found = 1; + } + } + if (start < last) + last = start; + } + +#ifdef CONFIG_X86_64 + if (!found) { + gapstart = (end_pfn << PAGE_SHIFT) + 1024*1024; + printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit " + "address range\n" + KERN_ERR "PCI: Unassigned devices with 32bit resource " + "registers may break!\n"); + } +#endif + + /* + * See how much we want to round up: start off with + * rounding to the next 1MB area. + */ + round = 0x100000; + while ((gapsize >> 4) > round) + round += round; + /* Fun with two's complement */ + pci_mem_start = (gapstart + round) & -round; + + printk(KERN_INFO + "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n", + pci_mem_start, gapstart, gapsize); +} + diff --git a/arch/x86/kernel/e820_32.c b/arch/x86/kernel/e820_32.c index 751d517..dfe2575 100644 --- a/arch/x86/kernel/e820_32.c +++ b/arch/x86/kernel/e820_32.c @@ -16,21 +16,6 @@ #include #include -struct e820map e820; -struct change_member { - struct e820entry *pbios; /* pointer to original bios entry */ - unsigned long long addr; /* address for this change point */ -}; -static struct change_member change_point_list[2*E820MAX] __initdata; -static struct change_member *change_point[2*E820MAX] __initdata; -static struct e820entry *overlap_list[E820MAX] __initdata; -static struct e820entry new_bios[E820MAX] __initdata; -/* For PCI or other memory-mapped resources */ -unsigned long pci_mem_start = 0x10000000; -#ifdef CONFIG_PCI -EXPORT_SYMBOL(pci_mem_start); -#endif - static struct resource system_rom_resource = { .name = "System ROM", .start = 0xf0000, @@ -254,223 +239,6 @@ void __init e820_mark_nosave_regions(void) } #endif -void __init add_memory_region(unsigned long long start, - unsigned long long size, int type) -{ - int x; - - x = e820.nr_map; - - if (x == E820MAX) { - printk(KERN_ERR "Ooops! Too many entries in the memory map!\n"); - return; - } - - e820.map[x].addr = start; - e820.map[x].size = size; - e820.map[x].type = type; - e820.nr_map++; -} /* add_memory_region */ - -/* - * Sanitize the BIOS e820 map. - * - * Some e820 responses include overlapping entries. The following - * replaces the original e820 map with a new one, removing overlaps. - * - */ -int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map) -{ - struct change_member *change_tmp; - unsigned long current_type, last_type; - unsigned long long last_addr; - int chgidx, still_changing; - int overlap_entries; - int new_bios_entry; - int old_nr, new_nr, chg_nr; - int i; - - /* - Visually we're performing the following (1,2,3,4 = memory types)... - - Sample memory map (w/overlaps): - ____22__________________ - ______________________4_ - ____1111________________ - _44_____________________ - 11111111________________ - ____________________33__ - ___________44___________ - __________33333_________ - ______________22________ - ___________________2222_ - _________111111111______ - _____________________11_ - _________________4______ - - Sanitized equivalent (no overlap): - 1_______________________ - _44_____________________ - ___1____________________ - ____22__________________ - ______11________________ - _________1______________ - __________3_____________ - ___________44___________ - _____________33_________ - _______________2________ - ________________1_______ - _________________4______ - ___________________2____ - ____________________33__ - ______________________4_ - */ - /* if there's only one memory region, don't bother */ - if (*pnr_map < 2) { - return -1; - } - - old_nr = *pnr_map; - - /* bail out if we find any unreasonable addresses in bios map */ - for (i=0; iaddr = biosmap[i].addr; - change_point[chgidx++]->pbios = &biosmap[i]; - change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size; - change_point[chgidx++]->pbios = &biosmap[i]; - } - } - chg_nr = chgidx; /* true number of change-points */ - - /* sort change-point list by memory addresses (low -> high) */ - still_changing = 1; - while (still_changing) { - still_changing = 0; - for (i=1; i < chg_nr; i++) { - /* if > , swap */ - /* or, if current= & last=, swap */ - if ((change_point[i]->addr < change_point[i-1]->addr) || - ((change_point[i]->addr == change_point[i-1]->addr) && - (change_point[i]->addr == change_point[i]->pbios->addr) && - (change_point[i-1]->addr != change_point[i-1]->pbios->addr)) - ) - { - change_tmp = change_point[i]; - change_point[i] = change_point[i-1]; - change_point[i-1] = change_tmp; - still_changing=1; - } - } - } - - /* create a new bios memory map, removing overlaps */ - overlap_entries=0; /* number of entries in the overlap table */ - new_bios_entry=0; /* index for creating new bios map entries */ - last_type = 0; /* start with undefined memory type */ - last_addr = 0; /* start with 0 as last starting address */ - /* loop through change-points, determining affect on the new bios map */ - for (chgidx=0; chgidx < chg_nr; chgidx++) - { - /* keep track of all overlapping bios entries */ - if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr) - { - /* add map entry to overlap list (> 1 entry implies an overlap) */ - overlap_list[overlap_entries++]=change_point[chgidx]->pbios; - } - else - { - /* remove entry from list (order independent, so swap with last) */ - for (i=0; ipbios) - overlap_list[i] = overlap_list[overlap_entries-1]; - } - overlap_entries--; - } - /* if there are overlapping entries, decide which "type" to use */ - /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */ - current_type = 0; - for (i=0; itype > current_type) - current_type = overlap_list[i]->type; - /* continue building up new bios map based on this information */ - if (current_type != last_type) { - if (last_type != 0) { - new_bios[new_bios_entry].size = - change_point[chgidx]->addr - last_addr; - /* move forward only if the new size was non-zero */ - if (new_bios[new_bios_entry].size != 0) - if (++new_bios_entry >= E820MAX) - break; /* no more space left for new bios entries */ - } - if (current_type != 0) { - new_bios[new_bios_entry].addr = change_point[chgidx]->addr; - new_bios[new_bios_entry].type = current_type; - last_addr=change_point[chgidx]->addr; - } - last_type = current_type; - } - } - new_nr = new_bios_entry; /* retain count for new bios entries */ - - /* copy new bios mapping into original location */ - memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry)); - *pnr_map = new_nr; - - return 0; -} - -/* - * Copy the BIOS e820 map into a safe place. - * - * Sanity-check it while we're at it.. - * - * If we're lucky and live on a modern system, the setup code - * will have given us a memory map that we can use to properly - * set up memory. If we aren't, we'll fake a memory map. - * - * We check to see that the memory map contains at least 2 elements - * before we'll use it, because the detection code in setup.S may - * not be perfect and most every PC known to man has two memory - * regions: one from 0 to 640k, and one from 1mb up. (The IBM - * thinkpad 560x, for example, does not cooperate with the memory - * detection code.) - */ -int __init copy_e820_map(struct e820entry *biosmap, int nr_map) -{ - /* Only one memory region (or negative)? Ignore it */ - if (nr_map < 2) - return -1; - - do { - u64 start = biosmap->addr; - u64 size = biosmap->size; - u64 end = start + size; - u32 type = biosmap->type; - - /* Overflow in 64 bits? Ignore the memory map. */ - if (start > end) - return -1; - - add_memory_region(start, size, type); - } while (biosmap++, --nr_map); - - return 0; -} - /* * Find the highest page frame number we have available */ @@ -535,86 +303,12 @@ void __init register_bootmem_low_pages(unsigned long max_low_pfn) } } -void __init e820_register_memory(void) -{ - unsigned long gapstart, gapsize, round; - unsigned long long last; - int i; - - /* - * Search for the biggest gap in the low 32 bits of the e820 - * memory space. - */ - last = 0x100000000ull; - gapstart = 0x10000000; - gapsize = 0x400000; - i = e820.nr_map; - while (--i >= 0) { - unsigned long long start = e820.map[i].addr; - unsigned long long end = start + e820.map[i].size; - - /* - * Since "last" is at most 4GB, we know we'll - * fit in 32 bits if this condition is true - */ - if (last > end) { - unsigned long gap = last - end; - - if (gap > gapsize) { - gapsize = gap; - gapstart = end; - } - } - if (start < last) - last = start; - } - - /* - * See how much we want to round up: start off with - * rounding to the next 1MB area. - */ - round = 0x100000; - while ((gapsize >> 4) > round) - round += round; - /* Fun with two's complement */ - pci_mem_start = (gapstart + round) & -round; - - printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n", - pci_mem_start, gapstart, gapsize); -} - -static void __init print_memory_map(char *who) -{ - int i; - - for (i = 0; i < e820.nr_map; i++) { - printk(" %s: %016Lx - %016Lx ", who, - e820.map[i].addr, - e820.map[i].addr + e820.map[i].size); - switch (e820.map[i].type) { - case E820_RAM: printk("(usable)\n"); - break; - case E820_RESERVED: - printk("(reserved)\n"); - break; - case E820_ACPI: - printk("(ACPI data)\n"); - break; - case E820_NVS: - printk("(ACPI NVS)\n"); - break; - default: printk("type %u\n", e820.map[i].type); - break; - } - } -} - void __init limit_regions(unsigned long long size) { unsigned long long current_addr; int i; - print_memory_map("limit_regions start"); + e820_print_map("limit_regions start"); for (i = 0; i < e820.nr_map; i++) { current_addr = e820.map[i].addr + e820.map[i].size; if (current_addr < size) @@ -633,62 +327,10 @@ void __init limit_regions(unsigned long long size) e820.nr_map = i + 1; e820.map[i].size -= current_addr - size; } - print_memory_map("limit_regions endfor"); + e820_print_map("limit_regions endfor"); return; } - print_memory_map("limit_regions endfunc"); -} - -/* - * This function checks if any part of the range is mapped - * with type. - */ -int -e820_any_mapped(u64 start, u64 end, unsigned type) -{ - int i; - for (i = 0; i < e820.nr_map; i++) { - const struct e820entry *ei = &e820.map[i]; - if (type && ei->type != type) - continue; - if (ei->addr >= end || ei->addr + ei->size <= start) - continue; - return 1; - } - return 0; -} -EXPORT_SYMBOL_GPL(e820_any_mapped); - - /* - * This function checks if the entire range is mapped with type. - * - * Note: this function only works correct if the e820 table is sorted and - * not-overlapping, which is the case - */ -int __init -e820_all_mapped(unsigned long s, unsigned long e, unsigned type) -{ - u64 start = s; - u64 end = e; - int i; - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - if (type && ei->type != type) - continue; - /* is the region (part) in overlap with the current region ?*/ - if (ei->addr >= end || ei->addr + ei->size <= start) - continue; - /* if the region is at the beginning of we move - * start to the end of the region since it's ok until there - */ - if (ei->addr <= start) - start = ei->addr + ei->size; - /* if start is now at or beyond end, we're done, full - * coverage */ - if (start >= end) - return 1; /* we're done */ - } - return 0; + e820_print_map("limit_regions endfunc"); } /* Overridden in paravirt.c if CONFIG_PARAVIRT */ @@ -700,7 +342,7 @@ char * __init __attribute__((weak)) memory_setup(void) void __init setup_memory_map(void) { printk(KERN_INFO "BIOS-provided physical RAM map:\n"); - print_memory_map(memory_setup()); + e820_print_map(memory_setup()); } static int __initdata user_defined_memmap; @@ -783,55 +425,12 @@ static int __init parse_memmap(char *arg) return 0; } early_param("memmap", parse_memmap); -u64 __init update_memory_range(u64 start, u64 size, unsigned old_type, - unsigned new_type) -{ - int i; - u64 real_updated_size = 0; - - BUG_ON(old_type == new_type); - - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - u64 final_start, final_end; - if (ei->type != old_type) - continue; - /* totally covered? */ - if (ei->addr >= start && - (ei->addr + ei->size) <= (start + size)) { - ei->type = new_type; - real_updated_size += ei->size; - continue; - } - /* partially covered */ - final_start = max(start, ei->addr); - final_end = min(start + size, ei->addr + ei->size); - if (final_start >= final_end) - continue; - add_memory_region(final_start, final_end - final_start, - new_type); - real_updated_size += final_end - final_start; - } - - return real_updated_size; -} void __init finish_e820_parsing(void) { if (user_defined_memmap) { printk(KERN_INFO "user-defined physical RAM map:\n"); - print_memory_map("user"); + e820_print_map("user"); } } -void __init update_e820(void) -{ - u8 nr_map; - - nr_map = e820.nr_map; - if (sanitize_e820_map(e820.map, &nr_map)) - return; - e820.nr_map = nr_map; - printk(KERN_INFO "modified physical RAM map:\n"); - print_memory_map("modified"); -} diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c index c45b4de..28a14eb 100644 --- a/arch/x86/kernel/e820_64.c +++ b/arch/x86/kernel/e820_64.c @@ -29,8 +29,6 @@ #include #include -struct e820map e820; - /* * PFN of last memory page. */ @@ -176,62 +174,6 @@ again: } return changed; } -/* - * This function checks if any part of the range is mapped - * with type. - */ -int -e820_any_mapped(unsigned long start, unsigned long end, unsigned type) -{ - int i; - - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - - if (type && ei->type != type) - continue; - if (ei->addr >= end || ei->addr + ei->size <= start) - continue; - return 1; - } - return 0; -} -EXPORT_SYMBOL_GPL(e820_any_mapped); - -/* - * This function checks if the entire range is mapped with type. - * - * Note: this function only works correct if the e820 table is sorted and - * not-overlapping, which is the case - */ -int __init e820_all_mapped(unsigned long start, unsigned long end, - unsigned type) -{ - int i; - - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - - if (type && ei->type != type) - continue; - /* is the region (part) in overlap with the current region ?*/ - if (ei->addr >= end || ei->addr + ei->size <= start) - continue; - - /* if the region is at the beginning of we move - * start to the end of the region since it's ok until there - */ - if (ei->addr <= start) - start = ei->addr + ei->size; - /* - * if start is now at or beyond end, we're done, full - * coverage - */ - if (start >= end) - return 1; - } - return 0; -} /* * Find a free area with specified alignment in a specific range. @@ -435,24 +377,6 @@ e820_register_active_regions(int nid, unsigned long start_pfn, } /* - * Add a memory region to the kernel e820 map. - */ -void __init add_memory_region(unsigned long start, unsigned long size, int type) -{ - int x = e820.nr_map; - - if (x == E820MAX) { - printk(KERN_ERR "Ooops! Too many entries in the memory map!\n"); - return; - } - - e820.map[x].addr = start; - e820.map[x].size = size; - e820.map[x].type = type; - e820.nr_map++; -} - -/* * Find the hole size (in bytes) in the memory range. * @start: starting address of the memory range to scan * @end: ending address of the memory range to scan @@ -473,266 +397,6 @@ unsigned long __init e820_hole_size(unsigned long start, unsigned long end) return end - start - (ram << PAGE_SHIFT); } -static void __init e820_print_map(char *who) -{ - int i; - - for (i = 0; i < e820.nr_map; i++) { - printk(KERN_INFO " %s: %016Lx - %016Lx ", who, - (unsigned long long) e820.map[i].addr, - (unsigned long long) - (e820.map[i].addr + e820.map[i].size)); - switch (e820.map[i].type) { - case E820_RAM: - printk(KERN_CONT "(usable)\n"); - break; - case E820_RESERVED: - printk(KERN_CONT "(reserved)\n"); - break; - case E820_ACPI: - printk(KERN_CONT "(ACPI data)\n"); - break; - case E820_NVS: - printk(KERN_CONT "(ACPI NVS)\n"); - break; - default: - printk(KERN_CONT "type %u\n", e820.map[i].type); - break; - } - } -} - -/* - * Sanitize the BIOS e820 map. - * - * Some e820 responses include overlapping entries. The following - * replaces the original e820 map with a new one, removing overlaps. - * - */ -static int __init sanitize_e820_map(struct e820entry *biosmap, char *pnr_map) -{ - struct change_member { - struct e820entry *pbios; /* pointer to original bios entry */ - unsigned long long addr; /* address for this change point */ - }; - static struct change_member change_point_list[2*E820MAX] __initdata; - static struct change_member *change_point[2*E820MAX] __initdata; - static struct e820entry *overlap_list[E820MAX] __initdata; - static struct e820entry new_bios[E820MAX] __initdata; - struct change_member *change_tmp; - unsigned long current_type, last_type; - unsigned long long last_addr; - int chgidx, still_changing; - int overlap_entries; - int new_bios_entry; - int old_nr, new_nr, chg_nr; - int i; - - /* - Visually we're performing the following - (1,2,3,4 = memory types)... - - Sample memory map (w/overlaps): - ____22__________________ - ______________________4_ - ____1111________________ - _44_____________________ - 11111111________________ - ____________________33__ - ___________44___________ - __________33333_________ - ______________22________ - ___________________2222_ - _________111111111______ - _____________________11_ - _________________4______ - - Sanitized equivalent (no overlap): - 1_______________________ - _44_____________________ - ___1____________________ - ____22__________________ - ______11________________ - _________1______________ - __________3_____________ - ___________44___________ - _____________33_________ - _______________2________ - ________________1_______ - _________________4______ - ___________________2____ - ____________________33__ - ______________________4_ - */ - - /* if there's only one memory region, don't bother */ - if (*pnr_map < 2) - return -1; - - old_nr = *pnr_map; - - /* bail out if we find any unreasonable addresses in bios map */ - for (i = 0; i < old_nr; i++) - if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr) - return -1; - - /* create pointers for initial change-point information (for sorting) */ - for (i = 0; i < 2 * old_nr; i++) - change_point[i] = &change_point_list[i]; - - /* record all known change-points (starting and ending addresses), - omitting those that are for empty memory regions */ - chgidx = 0; - for (i = 0; i < old_nr; i++) { - if (biosmap[i].size != 0) { - change_point[chgidx]->addr = biosmap[i].addr; - change_point[chgidx++]->pbios = &biosmap[i]; - change_point[chgidx]->addr = biosmap[i].addr + - biosmap[i].size; - change_point[chgidx++]->pbios = &biosmap[i]; - } - } - chg_nr = chgidx; - - /* sort change-point list by memory addresses (low -> high) */ - still_changing = 1; - while (still_changing) { - still_changing = 0; - for (i = 1; i < chg_nr; i++) { - unsigned long long curaddr, lastaddr; - unsigned long long curpbaddr, lastpbaddr; - - curaddr = change_point[i]->addr; - lastaddr = change_point[i - 1]->addr; - curpbaddr = change_point[i]->pbios->addr; - lastpbaddr = change_point[i - 1]->pbios->addr; - - /* - * swap entries, when: - * - * curaddr > lastaddr or - * curaddr == lastaddr and curaddr == curpbaddr and - * lastaddr != lastpbaddr - */ - if (curaddr < lastaddr || - (curaddr == lastaddr && curaddr == curpbaddr && - lastaddr != lastpbaddr)) { - change_tmp = change_point[i]; - change_point[i] = change_point[i-1]; - change_point[i-1] = change_tmp; - still_changing = 1; - } - } - } - - /* create a new bios memory map, removing overlaps */ - overlap_entries = 0; /* number of entries in the overlap table */ - new_bios_entry = 0; /* index for creating new bios map entries */ - last_type = 0; /* start with undefined memory type */ - last_addr = 0; /* start with 0 as last starting address */ - - /* loop through change-points, determining affect on the new bios map */ - for (chgidx = 0; chgidx < chg_nr; chgidx++) { - /* keep track of all overlapping bios entries */ - if (change_point[chgidx]->addr == - change_point[chgidx]->pbios->addr) { - /* - * add map entry to overlap list (> 1 entry - * implies an overlap) - */ - overlap_list[overlap_entries++] = - change_point[chgidx]->pbios; - } else { - /* - * remove entry from list (order independent, - * so swap with last) - */ - for (i = 0; i < overlap_entries; i++) { - if (overlap_list[i] == - change_point[chgidx]->pbios) - overlap_list[i] = - overlap_list[overlap_entries-1]; - } - overlap_entries--; - } - /* - * if there are overlapping entries, decide which - * "type" to use (larger value takes precedence -- - * 1=usable, 2,3,4,4+=unusable) - */ - current_type = 0; - for (i = 0; i < overlap_entries; i++) - if (overlap_list[i]->type > current_type) - current_type = overlap_list[i]->type; - /* - * continue building up new bios map based on this - * information - */ - if (current_type != last_type) { - if (last_type != 0) { - new_bios[new_bios_entry].size = - change_point[chgidx]->addr - last_addr; - /* - * move forward only if the new size - * was non-zero - */ - if (new_bios[new_bios_entry].size != 0) - /* - * no more space left for new - * bios entries ? - */ - if (++new_bios_entry >= E820MAX) - break; - } - if (current_type != 0) { - new_bios[new_bios_entry].addr = - change_point[chgidx]->addr; - new_bios[new_bios_entry].type = current_type; - last_addr = change_point[chgidx]->addr; - } - last_type = current_type; - } - } - /* retain count for new bios entries */ - new_nr = new_bios_entry; - - /* copy new bios mapping into original location */ - memcpy(biosmap, new_bios, new_nr * sizeof(struct e820entry)); - *pnr_map = new_nr; - - return 0; -} - -/* - * Copy the BIOS e820 map into a safe place. - * - * Sanity-check it while we're at it.. - * - * If we're lucky and live on a modern system, the setup code - * will have given us a memory map that we can use to properly - * set up memory. If we aren't, we'll fake a memory map. - */ -static int __init copy_e820_map(struct e820entry *biosmap, int nr_map) -{ - /* Only one memory region (or negative)? Ignore it */ - if (nr_map < 2) - return -1; - - do { - u64 start = biosmap->addr; - u64 size = biosmap->size; - u64 end = start + size; - u32 type = biosmap->type; - - /* Overflow in 64 bits? Ignore the memory map. */ - if (start > end) - return -1; - - add_memory_region(start, size, type); - } while (biosmap++, --nr_map); - return 0; -} - static void early_panic(char *msg) { early_printk(msg); @@ -829,114 +493,6 @@ void __init finish_e820_parsing(void) } } -u64 __init update_memory_range(u64 start, u64 size, unsigned old_type, - unsigned new_type) -{ - int i; - u64 real_updated_size = 0; - - BUG_ON(old_type == new_type); - - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - u64 final_start, final_end; - if (ei->type != old_type) - continue; - /* totally covered? */ - if (ei->addr >= start && - (ei->addr + ei->size) <= (start + size)) { - ei->type = new_type; - real_updated_size += ei->size; - continue; - } - /* partially covered */ - final_start = max(start, ei->addr); - final_end = min(start + size, ei->addr + ei->size); - if (final_start >= final_end) - continue; - add_memory_region(final_start, final_end - final_start, - new_type); - real_updated_size += final_end - final_start; - } - return real_updated_size; -} - -void __init update_e820(void) -{ - u8 nr_map; - - nr_map = e820.nr_map; - if (sanitize_e820_map(e820.map, &nr_map)) - return; - e820.nr_map = nr_map; - printk(KERN_INFO "modified physical RAM map:\n"); - e820_print_map("modified"); -} - -unsigned long pci_mem_start = 0xaeedbabe; -EXPORT_SYMBOL(pci_mem_start); - -/* - * Search for the biggest gap in the low 32 bits of the e820 - * memory space. We pass this space to PCI to assign MMIO resources - * for hotplug or unconfigured devices in. - * Hopefully the BIOS let enough space left. - */ -__init void e820_setup_gap(void) -{ - unsigned long gapstart, gapsize, round; - unsigned long last; - int i; - int found = 0; - - last = 0x100000000ull; - gapstart = 0x10000000; - gapsize = 0x400000; - i = e820.nr_map; - while (--i >= 0) { - unsigned long long start = e820.map[i].addr; - unsigned long long end = start + e820.map[i].size; - - /* - * Since "last" is at most 4GB, we know we'll - * fit in 32 bits if this condition is true - */ - if (last > end) { - unsigned long gap = last - end; - - if (gap > gapsize) { - gapsize = gap; - gapstart = end; - found = 1; - } - } - if (start < last) - last = start; - } - - if (!found) { - gapstart = (end_pfn << PAGE_SHIFT) + 1024*1024; - printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit " - "address range\n" - KERN_ERR "PCI: Unassigned devices with 32bit resource " - "registers may break!\n"); - } - - /* - * See how much we want to round up: start off with - * rounding to the next 1MB area. - */ - round = 0x100000; - while ((gapsize >> 4) > round) - round += round; - /* Fun with two's complement */ - pci_mem_start = (gapstart + round) & -round; - - printk(KERN_INFO - "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n", - pci_mem_start, gapstart, gapsize); -} - int __init arch_get_ram_range(int slot, u64 *addr, u64 *size) { int i; diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c index b54c79c..5faeab6 100644 --- a/arch/x86/kernel/setup_32.c +++ b/arch/x86/kernel/setup_32.c @@ -875,7 +875,7 @@ void __init setup_arch(char **cmdline_p) get_smp_config(); #endif - e820_register_memory(); + e820_setup_gap(); e820_mark_nosave_regions(); #ifdef CONFIG_VT diff --git a/include/asm-x86/e820.h b/include/asm-x86/e820.h index 7004251..b5b519f 100644 --- a/include/asm-x86/e820.h +++ b/include/asm-x86/e820.h @@ -20,6 +20,20 @@ struct e820map { __u32 nr_map; struct e820entry map[E820MAX]; }; + +extern struct e820map e820; + +extern int e820_any_mapped(u64 start, u64 end, unsigned type); +extern int e820_all_mapped(u64 start, u64 end, unsigned type); +extern void add_memory_region(u64 start, u64 size, int type); +extern void e820_print_map(char *who); +extern int sanitize_e820_map(struct e820entry *biosmap, char *pnr_map); +extern int copy_e820_map(struct e820entry *biosmap, int nr_map); +extern u64 update_memory_range(u64 start, u64 size, unsigned old_type, + unsigned new_type); +extern void update_e820(void); +extern void e820_setup_gap(void); + #endif /* __ASSEMBLY__ */ #define ISA_START_ADDRESS 0xa0000 diff --git a/include/asm-x86/e820_32.h b/include/asm-x86/e820_32.h index af0711b..9576b43 100644 --- a/include/asm-x86/e820_32.h +++ b/include/asm-x86/e820_32.h @@ -21,19 +21,8 @@ extern void setup_memory_map(void); extern void finish_e820_parsing(void); -extern struct e820map e820; -extern void update_e820(void); - -extern int e820_all_mapped(unsigned long start, unsigned long end, - unsigned type); -extern int e820_any_mapped(u64 start, u64 end, unsigned type); extern void propagate_e820_map(void); extern void register_bootmem_low_pages(unsigned long max_low_pfn); -extern void add_memory_region(unsigned long long start, - unsigned long long size, int type); -extern u64 update_memory_range(u64 start, u64 size, unsigned old_type, - unsigned new_type); -extern void e820_register_memory(void); extern void limit_regions(unsigned long long size); extern void init_iomem_resources(struct resource *code_resource, struct resource *data_resource, @@ -47,6 +36,5 @@ static inline void e820_mark_nosave_regions(void) } #endif - #endif/*!__ASSEMBLY__*/ #endif/*__E820_HEADER*/ diff --git a/include/asm-x86/e820_64.h b/include/asm-x86/e820_64.h index 6ae3e28..9fac77e 100644 --- a/include/asm-x86/e820_64.h +++ b/include/asm-x86/e820_64.h @@ -19,34 +19,22 @@ extern unsigned long find_e820_area(unsigned long start, unsigned long end, extern unsigned long find_e820_area_size(unsigned long start, unsigned long *sizep, unsigned long align); -extern void add_memory_region(unsigned long start, unsigned long size, - int type); -extern u64 update_memory_range(u64 start, u64 size, unsigned old_type, - unsigned new_type); extern void setup_memory_region(void); extern void contig_e820_setup(void); extern unsigned long e820_end_of_ram(void); extern void e820_reserve_resources(void); extern void e820_mark_nosave_regions(void); -extern int e820_any_mapped(unsigned long start, unsigned long end, - unsigned type); -extern int e820_all_mapped(unsigned long start, unsigned long end, - unsigned type); extern int e820_any_non_reserved(unsigned long start, unsigned long end); extern int is_memory_any_valid(unsigned long start, unsigned long end); extern int e820_all_non_reserved(unsigned long start, unsigned long end); extern int is_memory_all_valid(unsigned long start, unsigned long end); extern unsigned long e820_hole_size(unsigned long start, unsigned long end); -extern void e820_setup_gap(void); extern void e820_register_active_regions(int nid, unsigned long start_pfn, unsigned long end_pfn); extern void finish_e820_parsing(void); -extern struct e820map e820; -extern void update_e820(void); - extern void reserve_early(unsigned long start, unsigned long end, char *name); extern void free_early(unsigned long start, unsigned long end); extern void early_res_to_bootmem(unsigned long start, unsigned long end); -- cgit v0.10.2 From 8004dd965b13b01a96def054d420f6df7ff22d53 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 12 May 2008 17:40:39 -0700 Subject: x86: amd opteron TOM2 mask val fix there is a typo in the mask value, need to remove that extra 0, to avoid 4bit clearing. Signed-off-by: Yinghal Lu Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index a83f5cd7..509bd3d 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -233,7 +233,7 @@ void __init get_mtrr_state(void) mtrr_tom2 = high; mtrr_tom2 <<= 32; mtrr_tom2 |= low; - mtrr_tom2 &= 0xffffff8000000ULL; + mtrr_tom2 &= 0xffffff800000ULL; } if (mtrr_show) { int high_width; diff --git a/arch/x86/pci/k8-bus_64.c b/arch/x86/pci/k8-bus_64.c index 5c2799c..bfefdf0 100644 --- a/arch/x86/pci/k8-bus_64.c +++ b/arch/x86/pci/k8-bus_64.c @@ -384,7 +384,7 @@ static int __init early_fill_mp_bus_info(void) /* need to take out [0, TOM) for RAM*/ address = MSR_K8_TOP_MEM1; rdmsrl(address, val); - end = (val & 0xffffff8000000ULL); + end = (val & 0xffffff800000ULL); printk(KERN_INFO "TOM: %016lx aka %ldM\n", end, end>>20); if (end < (1ULL<<32)) update_range(range, 0, end - 1); @@ -478,7 +478,7 @@ static int __init early_fill_mp_bus_info(void) /* TOP_MEM2 */ address = MSR_K8_TOP_MEM2; rdmsrl(address, val); - end = (val & 0xffffff8000000ULL); + end = (val & 0xffffff800000ULL); printk(KERN_INFO "TOM2: %016lx aka %ldM\n", end, end>>20); update_range(range, 1ULL<<32, end - 1); } -- cgit v0.10.2 From 3f03c54a34fa3da680b3341dd3ba965ef3394bd1 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 9 May 2008 22:40:52 -0700 Subject: x86: mtrr cleanup for converting continuous to discrete layout - fix #2 disable the noisy print out. also use the one the less spare mtrr reg. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index e6c162c..0642201 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -730,6 +730,7 @@ struct var_mtrr_range_state { }; struct var_mtrr_range_state __initdata range_state[RANGE_NUM]; +static int __initdata debug_print; static int __init x86_get_mtrr_mem_range(struct res_range *range, int nr_range, @@ -748,10 +749,12 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range, nr_range = add_range_with_merge(range, nr_range, base, base + size - 1); } - printk(KERN_DEBUG "After WB checking\n"); - for (i = 0; i < nr_range; i++) - printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", + if (debug_print) { + printk(KERN_DEBUG "After WB checking\n"); + for (i = 0; i < nr_range; i++) + printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", range[i].start, range[i].end + 1); + } /* take out UC ranges */ for (i = 0; i < num_var_ranges; i++) { @@ -775,17 +778,21 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range, continue; nr_range++; } - printk(KERN_DEBUG "After UC checking\n"); - for (i = 0; i < nr_range; i++) - printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", - range[i].start, range[i].end + 1); + if (debug_print) { + printk(KERN_DEBUG "After UC checking\n"); + for (i = 0; i < nr_range; i++) + printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", + range[i].start, range[i].end + 1); + } /* sort the ranges */ sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); - printk(KERN_DEBUG "After sorting\n"); - for (i = 0; i < nr_range; i++) - printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", + if (debug_print) { + printk(KERN_DEBUG "After sorting\n"); + for (i = 0; i < nr_range; i++) + printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", range[i].start, range[i].end + 1); + } /* clear those is not used */ for (i = nr_range; i < RANGE_NUM; i++) @@ -912,12 +919,13 @@ range_to_mtrr(unsigned int reg, unsigned long range_startk, align = max_align; sizek = 1 << align; - printk(KERN_DEBUG "Setting variable MTRR %d, base: %ldMB, " - "range: %ldMB, type %s\n", - reg, range_startk >> 10, sizek >> 10, - (type == MTRR_TYPE_UNCACHABLE)?"UC": - ((type == MTRR_TYPE_WRBACK)?"WB":"Other") - ); + if (debug_print) + printk(KERN_DEBUG "Setting variable MTRR %d, " + "base: %ldMB, range: %ldMB, type %s\n", + reg, range_startk >> 10, sizek >> 10, + (type == MTRR_TYPE_UNCACHABLE)?"UC": + ((type == MTRR_TYPE_WRBACK)?"WB":"Other") + ); save_var_mtrr(reg++, range_startk, sizek, type); range_startk += sizek; range_sizek -= sizek; @@ -963,7 +971,9 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek, range0_basek = state->range_startk; range0_sizek = ALIGN(state->range_sizek, chunk_sizek); if (range0_sizek == state->range_sizek) { - printk(KERN_DEBUG "rangeX: %016lx - %016lx\n", range0_basek<<10, + if (debug_print) + printk(KERN_DEBUG "rangeX: %016lx - %016lx\n", + range0_basek<<10, (range0_basek + state->range_sizek)<<10); state->reg = range_to_mtrr(state->reg, range0_basek, state->range_sizek, MTRR_TYPE_WRBACK); @@ -980,7 +990,9 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek, } if (range0_sizek) { - printk(KERN_DEBUG "range0: %016lx - %016lx\n", range0_basek<<10, + if (debug_print) + printk(KERN_DEBUG "range0: %016lx - %016lx\n", + range0_basek<<10, (range0_basek + range0_sizek)<<10); state->reg = range_to_mtrr(state->reg, range0_basek, range0_sizek, MTRR_TYPE_WRBACK); @@ -1016,13 +1028,15 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek, second_sizek = 0; } - printk(KERN_DEBUG "range: %016lx - %016lx\n", range_basek<<10, + if (debug_print) + printk(KERN_DEBUG "range: %016lx - %016lx\n", range_basek<<10, (range_basek + range_sizek)<<10); state->reg = range_to_mtrr(state->reg, range_basek, range_sizek, MTRR_TYPE_WRBACK); if (hole_sizek) { - printk(KERN_DEBUG "hole: %016lx - %016lx\n", hole_basek<<10, - (hole_basek + hole_sizek)<<10); + if (debug_print) + printk(KERN_DEBUG "hole: %016lx - %016lx\n", + hole_basek<<10, (hole_basek + hole_sizek)<<10); state->reg = range_to_mtrr(state->reg, hole_basek, hole_sizek, MTRR_TYPE_UNCACHABLE); @@ -1120,7 +1134,6 @@ x86_setup_var_mtrrs(struct res_range *range, int nr_range, /* Write the last range */ if (var_state.range_sizek != 0) range_to_mtrr_with_hole(&var_state, 0, 0); - printk(KERN_DEBUG "DONE variable MTRRs\n"); num_reg = var_state.reg; /* Clear out the extra MTRR's */ @@ -1219,6 +1232,7 @@ static int __init mtrr_cleanup(unsigned address_bits) if (mtrr_chunk_size && mtrr_gran_size) { int num_reg; + debug_print = 1; /* convert ranges to var ranges state */ num_reg = x86_setup_var_mtrrs(range, nr_range, mtrr_chunk_size, mtrr_gran_size); @@ -1242,8 +1256,8 @@ static int __init mtrr_cleanup(unsigned address_bits) result[i].lose_cover_sizek = (range_sums - range_sums_new) << PSHIFT; - printk(KERN_INFO " %sgran_size: %ldM \tchunk_size: %ldM \t", - result[i].bad?" BAD ":"", result[i].gran_sizek >> 10, + printk(KERN_INFO "%sgran_size: %ldM \tchunk_size: %ldM \t", + result[i].bad?"*BAD*":" ", result[i].gran_sizek >> 10, result[i].chunk_sizek >> 10); printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ldM \n", result[i].num_reg, result[i].bad?"-":"", @@ -1254,6 +1268,7 @@ static int __init mtrr_cleanup(unsigned address_bits) } printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, " "will find optimal one\n"); + debug_print = 0; memset(result, 0, sizeof(result[0])); } @@ -1265,9 +1280,10 @@ static int __init mtrr_cleanup(unsigned address_bits) chunk_size <<= 1) { int num_reg; - printk(KERN_INFO + if (debug_print) + printk(KERN_INFO "\ngran_size: %lldM chunk_size_size: %lldM\n", - gran_size >> 20, chunk_size >> 20); + gran_size >> 20, chunk_size >> 20); if (i >= NUM_RESULT) continue; @@ -1310,10 +1326,10 @@ static int __init mtrr_cleanup(unsigned address_bits) /* print out all */ for (i = 0; i < NUM_RESULT; i++) { - printk(KERN_INFO "%sgran_size: %ldM \tchunk_size: %ldM \t", + printk(KERN_INFO "%sgran_size: %ldM \tchunk_size: %ldM \t", result[i].bad?"*BAD* ":" ", result[i].gran_sizek >> 10, result[i].chunk_sizek >> 10); - printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ldM \n", + printk(KERN_CONT "num_reg: %d \tlose RAM: %s%ldM\n", result[i].num_reg, result[i].bad?"-":"", result[i].lose_cover_sizek >> 10); } @@ -1322,7 +1338,7 @@ static int __init mtrr_cleanup(unsigned address_bits) if (nr_mtrr_spare_reg >= num_var_ranges) nr_mtrr_spare_reg = num_var_ranges - 1; num_reg_good = -1; - for (i = 1; i < num_var_ranges + 1 - nr_mtrr_spare_reg; i++) { + for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) { if (!min_loss_pfn[i]) { num_reg_good = i; break; @@ -1344,10 +1360,10 @@ static int __init mtrr_cleanup(unsigned address_bits) if (index_good != -1) { printk(KERN_INFO "Found optimal setting for mtrr clean up\n"); i = index_good; - printk(KERN_INFO "gran_size: %ldM \tchunk_size: %ldM \t", + printk(KERN_INFO "gran_size: %ldM \tchunk_size: %ldM \t", result[i].gran_sizek >> 10, result[i].chunk_sizek >> 10); - printk(KERN_CONT "num_reg: %d \tlose cover RAM: %ldM \n", + printk(KERN_CONT "num_reg: %d \tlose RAM: %ldM\n", result[i].num_reg, result[i].lose_cover_sizek >> 10); /* convert ranges to var ranges state */ @@ -1355,6 +1371,7 @@ static int __init mtrr_cleanup(unsigned address_bits) chunk_size <<= 10; gran_size = result[i].gran_sizek; gran_size <<= 10; + debug_print = 1; x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size); set_var_mtrr_all(address_bits); return 1; -- cgit v0.10.2 From e3f8ba81fdce852abe36a33eed7b243c4a0acfac Mon Sep 17 00:00:00 2001 From: Paul Jackson Date: Wed, 14 May 2008 08:15:04 -0700 Subject: x86 boot: include missing smp.h header The patch: x86: convert cpu_to_apicid to be a per cpu variable introduced a dependency of ipi.h on smp.h in x86 builds with an allnoconfig. Including smp.h in ipi.h fixes the build error: In file included from arch/x86/kernel/traps_64.c:48: include/asm/ipi.h: In function 'send_IPI_mask_sequence': include/asm/ipi.h:114: error: 'per_cpu__x86_cpu_to_apicid' undeclared (first use in this function) Signed-off-by: Paul Jackson Cc: Mike Travis Signed-off-by: Ingo Molnar diff --git a/include/asm-x86/ipi.h b/include/asm-x86/ipi.h index ecc80f3..196d63c 100644 --- a/include/asm-x86/ipi.h +++ b/include/asm-x86/ipi.h @@ -20,6 +20,7 @@ #include #include +#include /* * the following functions deal with sending IPIs between CPUs. -- cgit v0.10.2 From e9197bf0114661195bee35e7795cfc42164d9b2c Mon Sep 17 00:00:00 2001 From: Paul Jackson Date: Wed, 14 May 2008 08:15:10 -0700 Subject: x86 boot: remove some unused extern function declarations Remove three extern declarations for routines that don't exist. Fix a typo in a comment. Signed-off-by: Paul Jackson Signed-off-by: Ingo Molnar diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index c5066d5..afb07ff 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -233,7 +233,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, else bootmap_start = round_up(start, PAGE_SIZE); /* - * SMP_CAHCE_BYTES could be enough, but init_bootmem_node like + * SMP_CACHE_BYTES could be enough, but init_bootmem_node like * to use that to align to PAGE_SIZE */ bootmap = early_node_mem(nodeid, bootmap_start, end, diff --git a/include/linux/efi.h b/include/linux/efi.h index a5f359a..807373d 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -287,7 +287,6 @@ efi_guid_unparse(efi_guid_t *guid, char *out) extern void efi_init (void); extern void *efi_get_pal_addr (void); extern void efi_map_pal_code (void); -extern void efi_map_memmap(void); extern void efi_memmap_walk (efi_freemem_callback_t callback, void *arg); extern void efi_gettimeofday (struct timespec *ts); extern void efi_enter_virtual_mode (void); /* switch EFI to virtual mode, if possible */ @@ -295,14 +294,11 @@ extern u64 efi_get_iobase (void); extern u32 efi_mem_type (unsigned long phys_addr); extern u64 efi_mem_attributes (unsigned long phys_addr); extern u64 efi_mem_attribute (unsigned long phys_addr, unsigned long size); -extern int efi_mem_attribute_range (unsigned long phys_addr, unsigned long size, - u64 attr); extern int __init efi_uart_console_only (void); extern void efi_initialize_iomem_resources(struct resource *code_resource, struct resource *data_resource, struct resource *bss_resource); extern unsigned long efi_get_time(void); extern int efi_set_rtc_mmss(unsigned long nowtime); -extern int is_available_memory(efi_memory_desc_t * md); extern struct efi_memory_map memmap; /** -- cgit v0.10.2 From cb5dd7c104c64d7ba8ff4dd3eec3d9b92c378937 Mon Sep 17 00:00:00 2001 From: Paul Jackson Date: Wed, 14 May 2008 08:15:16 -0700 Subject: x86 boot: add header comment to dmi.h stating what it is The "dmi.h" file did not state anywhere in the file what "DMI" was. For those who know, it's obvious. For the rest of us, I added a brief opening comment. Signed-off-by: Paul Jackson Signed-off-by: Ingo Molnar diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c index c5e3ed7..455575b 100644 --- a/drivers/firmware/dmi_scan.c +++ b/drivers/firmware/dmi_scan.c @@ -8,6 +8,11 @@ #include #include +/* + * DMI stands for "Desktop Management Interface". It is part + * of and an antecedent to, SMBIOS, which stands for System + * Management BIOS. See further: http://www.dmtf.org/standards + */ static char dmi_empty_string[] = " "; static const char * __init dmi_string_nosave(const struct dmi_header *dm, u8 s) -- cgit v0.10.2 From c801ed3860fe2f84281d4cae4c3e6ca87e81e890 Mon Sep 17 00:00:00 2001 From: Paul Jackson Date: Wed, 14 May 2008 08:15:23 -0700 Subject: x86 boot: simplify pageblock_bits enum declaration The use of #defines with '##' pre-processor concatenation is a useful way to form several symbol names with a common pattern. But when there is just a single name obtained from that #define, it's just obfuscation. Better to just write the plain symbol name, as is. The following patch is a result of my wasting ten minutes looking through the kernel to figure out what 'PB_migrate_end' meant, and forgetting what I came to do, by the time I figured out that the #define PB_range macro defined it. Signed-off-by: Paul Jackson Signed-off-by: Ingo Molnar diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h index e875905..e8c0612 100644 --- a/include/linux/pageblock-flags.h +++ b/include/linux/pageblock-flags.h @@ -25,13 +25,11 @@ #include -/* Macro to aid the definition of ranges of bits */ -#define PB_range(name, required_bits) \ - name, name ## _end = (name + required_bits) - 1 - /* Bit indices that affect a whole block of pages */ enum pageblock_bits { - PB_range(PB_migrate, 3), /* 3 bits required for migrate types */ + PB_migrate, + PB_migrate_end = PB_migrate + 3 - 1, + /* 3 bits required for migrate types */ NR_PAGEBLOCK_BITS }; -- cgit v0.10.2 From b25e31cec7788ccba5749d10f8f4343fffff4750 Mon Sep 17 00:00:00 2001 From: Paul Jackson Date: Wed, 14 May 2008 08:15:28 -0700 Subject: x86 boot: minor code format fixes in e820 and efi routines Standardize a few pointer declarations to not have the extra space after the '*' character. Signed-off-by: Paul Jackson Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c index 28a14eb..c10b4ae 100644 --- a/arch/x86/kernel/e820_64.c +++ b/arch/x86/kernel/e820_64.c @@ -404,7 +404,7 @@ static void early_panic(char *msg) } /* We're not void only for x86 32-bit compat */ -char * __init machine_specific_memory_setup(void) +char *__init machine_specific_memory_setup(void) { char *who = "BIOS-e820"; /* diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c index d0060fd..21a7b75 100644 --- a/arch/x86/kernel/efi_64.c +++ b/arch/x86/kernel/efi_64.c @@ -103,7 +103,7 @@ void __init efi_reserve_bootmem(void) memmap.nr_map * memmap.desc_size); } -void __iomem * __init efi_ioremap(unsigned long phys_addr, unsigned long size) +void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size) { static unsigned pages_mapped __initdata; unsigned i, pages; -- cgit v0.10.2 From c3965bd15118742d72b4bc1a290d37b3f081eb98 Mon Sep 17 00:00:00 2001 From: Paul Jackson Date: Wed, 14 May 2008 08:15:34 -0700 Subject: x86 boot: proper use of ARRAY_SIZE instead of repeated E820MAX constant This patch is motivated by a subsequent patch which will allow for more memory map entries on EFI supported systems than can be passed via the x86 legacy BIOS E820 interface. The legacy interface is limited to E820MAX == 128 memory entries, and that "E820MAX" manifest constant was used as the size for several arrays and loops over those arrays. The primary change in this patch is to change code loop sizes over those arrays from using the constant E820MAX, to using the ARRAY_SIZE() macro evaluated for the array being looped. That way, a subsequent patch can change the size of some of these arrays, without breaking this code. This patch also adds a parameter to the sanitize_e820_map() routine, which had an implicit size for the array passed it of E820MAX entries. This new parameter explicitly passes the size of said array. Once again, this will allow a subsequent patch to change that array size for some calls to sanitize_e820_map() without breaking the code. As part of enhancing the sanitize_e820_map() interface this way, I further combined the unnecessarily distinct x86_32 and x86_64 declarations for this routine into a single, commonly used, declaration. This patch in itself should make no difference to the resulting kernel binary. [ mingo@elte.hu: merged to -tip ] Signed-off-by: Paul Jackson Signed-off-by: Ingo Molnar diff --git a/arch/x86/boot/memory.c b/arch/x86/boot/memory.c index acad32e..53165c9 100644 --- a/arch/x86/boot/memory.c +++ b/arch/x86/boot/memory.c @@ -13,6 +13,7 @@ */ #include "boot.h" +#include #define SMAP 0x534d4150 /* ASCII "SMAP" */ @@ -53,7 +54,7 @@ static int detect_memory_e820(void) count++; desc++; - } while (next && count < E820MAX); + } while (next && count < ARRAY_SIZE(boot_params.e820_map)); return boot_params.e820_entries = count; } diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 2cb686f..2396b9d 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -95,7 +95,7 @@ void __init add_memory_region(u64 start, u64 size, int type) { int x = e820.nr_map; - if (x == E820MAX) { + if (x == ARRAY_SIZE(e820.map)) { printk(KERN_ERR "Ooops! Too many entries in the memory map!\n"); return; } @@ -142,7 +142,8 @@ void __init e820_print_map(char *who) * replaces the original e820 map with a new one, removing overlaps. * */ -int __init sanitize_e820_map(struct e820entry *biosmap, char *pnr_map) +int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, + char *pnr_map) { struct change_member { struct e820entry *pbios; /* pointer to original bios entry */ @@ -314,7 +315,7 @@ int __init sanitize_e820_map(struct e820entry *biosmap, char *pnr_map) * no more space left for new * bios entries ? */ - if (++new_bios_entry >= E820MAX) + if (++new_bios_entry >= max_nr_map) break; } if (current_type != 0) { @@ -403,7 +404,7 @@ void __init update_e820(void) u8 nr_map; nr_map = e820.nr_map; - if (sanitize_e820_map(e820.map, &nr_map)) + if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr_map)) return; e820.nr_map = nr_map; printk(KERN_INFO "modified physical RAM map:\n"); diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c index c10b4ae..58dc6ee 100644 --- a/arch/x86/kernel/e820_64.c +++ b/arch/x86/kernel/e820_64.c @@ -413,7 +413,9 @@ char *__init machine_specific_memory_setup(void) * Otherwise fake a memory map; one section from 0k->640k, * the next section from 1mb->appropriate_mem_k */ - sanitize_e820_map(boot_params.e820_map, &boot_params.e820_entries); + sanitize_e820_map(boot_params.e820_map, + ARRAY_SIZE(boot_params.e820_map), + &boot_params.e820_entries); if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries) < 0) early_panic("Cannot find a valid memory map"); printk(KERN_INFO "BIOS-provided physical RAM map:\n"); @@ -484,7 +486,7 @@ void __init finish_e820_parsing(void) if (userdef) { char nr = e820.nr_map; - if (sanitize_e820_map(e820.map, &nr) < 0) + if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr) < 0) early_panic("Invalid user supplied memory map"); e820.nr_map = nr; diff --git a/arch/x86/mach-default/setup.c b/arch/x86/mach-default/setup.c index 0c28a07..38a856c 100644 --- a/arch/x86/mach-default/setup.c +++ b/arch/x86/mach-default/setup.c @@ -163,7 +163,9 @@ char * __init machine_specific_memory_setup(void) * Otherwise fake a memory map; one section from 0k->640k, * the next section from 1mb->appropriate_mem_k */ - sanitize_e820_map(boot_params.e820_map, &boot_params.e820_entries); + sanitize_e820_map(boot_params.e820_map, + ARRAY_SIZE(boot_params.e820_map), + &boot_params.e820_entries); if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries) < 0) { unsigned long mem_size; diff --git a/arch/x86/mach-voyager/setup.c b/arch/x86/mach-voyager/setup.c index 5ae5466..662b5c0 100644 --- a/arch/x86/mach-voyager/setup.c +++ b/arch/x86/mach-voyager/setup.c @@ -111,7 +111,9 @@ char *__init machine_specific_memory_setup(void) * Otherwise fake a memory map; one section from 0k->640k, * the next section from 1mb->appropriate_mem_k */ - sanitize_e820_map(boot_params.e820_map, &boot_params.e820_entries); + sanitize_e820_map(boot_params.e820_map, + ARRAY_SIZE(boot_params.e820_map), + &boot_params.e820_entries); if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries) < 0) { unsigned long mem_size; diff --git a/include/asm-x86/e820.h b/include/asm-x86/e820.h index b5b519f..65c891d 100644 --- a/include/asm-x86/e820.h +++ b/include/asm-x86/e820.h @@ -27,7 +27,8 @@ extern int e820_any_mapped(u64 start, u64 end, unsigned type); extern int e820_all_mapped(u64 start, u64 end, unsigned type); extern void add_memory_region(u64 start, u64 size, int type); extern void e820_print_map(char *who); -extern int sanitize_e820_map(struct e820entry *biosmap, char *pnr_map); +extern int +sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, char *pnr_map); extern int copy_e820_map(struct e820entry *biosmap, int nr_map); extern u64 update_memory_range(u64 start, u64 size, unsigned old_type, unsigned new_type); diff --git a/include/asm-x86/setup.h b/include/asm-x86/setup.h index fa6763a..ffa0f54 100644 --- a/include/asm-x86/setup.h +++ b/include/asm-x86/setup.h @@ -56,7 +56,6 @@ char * __init machine_specific_memory_setup(void); char *memory_setup(void); int __init copy_e820_map(struct e820entry *biosmap, int nr_map); -int __init sanitize_e820_map(struct e820entry *biosmap, char *pnr_map); void __init add_memory_region(unsigned long long start, unsigned long long size, int type); -- cgit v0.10.2 From 028b785888c523baccdf27af0cdbf1deb92edec0 Mon Sep 17 00:00:00 2001 From: Paul Jackson Date: Wed, 14 May 2008 08:15:40 -0700 Subject: x86 boot: extend some internal memory map arrays to handle larger EFI input Extend internal boot time memory tables to allow for up to three entries per node, which may be larger than the 128 E820MAX entries handled by the legacy BIOS E820 interface. The EFI interface, if present, is capable of passing memory map entries for these larger node counts. This patch requires an earlier patch that rewrote code depending on these array sizes from using E820MAX explicitly to size loops, to instead using ARRAY_SIZE() of the applicable array. Another patch following this one will provide the code to pick up additional memory entries passed via the EFI interface from the BIOS and insert them in the following, now enlarged, arrays. Signed-off-by: Paul Jackson Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 2396b9d..3f7777b 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -149,10 +149,10 @@ int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, struct e820entry *pbios; /* pointer to original bios entry */ unsigned long long addr; /* address for this change point */ }; - static struct change_member change_point_list[2*E820MAX] __initdata; - static struct change_member *change_point[2*E820MAX] __initdata; - static struct e820entry *overlap_list[E820MAX] __initdata; - static struct e820entry new_bios[E820MAX] __initdata; +static struct change_member change_point_list[2*E820_X_MAX] __initdata; +static struct change_member *change_point[2*E820_X_MAX] __initdata; +static struct e820entry *overlap_list[E820_X_MAX] __initdata; +static struct e820entry new_bios[E820_X_MAX] __initdata; struct change_member *change_tmp; unsigned long current_type, last_type; unsigned long long last_addr; diff --git a/include/asm-x86/e820.h b/include/asm-x86/e820.h index 65c891d..ab18457 100644 --- a/include/asm-x86/e820.h +++ b/include/asm-x86/e820.h @@ -2,6 +2,41 @@ #define __ASM_E820_H #define E820MAP 0x2d0 /* our map */ #define E820MAX 128 /* number of entries in E820MAP */ + +/* + * Legacy E820 BIOS limits us to 128 (E820MAX) nodes due to the + * constrained space in the zeropage. If we have more nodes than + * that, and if we've booted off EFI firmware, then the EFI tables + * passed us from the EFI firmware can list more nodes. Size our + * internal memory map tables to have room for these additional + * nodes, based on up to three entries per node for which the + * kernel was built: MAX_NUMNODES == (1 << CONFIG_NODES_SHIFT), + * plus E820MAX, allowing space for the possible duplicate E820 + * entries that might need room in the same arrays, prior to the + * call to sanitize_e820_map() to remove duplicates. The allowance + * of three memory map entries per node is "enough" entries for + * the initial hardware platform motivating this mechanism to make + * use of additional EFI map entries. Future platforms may want + * to allow more than three entries per node or otherwise refine + * this size. + */ + +/* + * Odd: 'make headers_check' complains about numa.h if I try + * to collapse the next two #ifdef lines to a single line: + * #if defined(__KERNEL__) && defined(CONFIG_EFI) + */ +#ifdef __KERNEL__ +#ifdef CONFIG_EFI +#include +#define E820_X_MAX (E820MAX + 3 * MAX_NUMNODES) +#else /* ! CONFIG_EFI */ +#define E820_X_MAX E820MAX +#endif +#else /* ! __KERNEL__ */ +#define E820_X_MAX E820MAX +#endif + #define E820NR 0x1e8 /* # entries in E820MAP */ #define E820_RAM 1 @@ -18,7 +53,7 @@ struct e820entry { struct e820map { __u32 nr_map; - struct e820entry map[E820MAX]; + struct e820entry map[E820_X_MAX]; }; extern struct e820map e820; -- cgit v0.10.2 From 6e9bcc796b120d17b08dde7ab958b82ddb899889 Mon Sep 17 00:00:00 2001 From: Paul Jackson Date: Wed, 14 May 2008 08:15:46 -0700 Subject: x86 boot: change sanitize_e820_map parameter from byte to int to allow bigger memory maps The map size counter passed into, and back out of, sanitize_e820_map(), was an eight bit type (char or u8), as derived from its origins in legacy BIOS E820 structures. This patch changes that type to an 'int', to allow this sanitize routine to also be used on larger maps (larger than the 256 count that fits in a char). The legacy BIOS E820 interface of course does not change; that remains at 8 bits for this count, holding up to E820MAX == 128 entries. But the kernel internals can handle more when those additional memory map entries are passed from the BIOS via EFI interfaces. Signed-off-by: Paul Jackson Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 3f7777b..91abf5b 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -143,7 +143,7 @@ void __init e820_print_map(char *who) * */ int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, - char *pnr_map) + int *pnr_map) { struct change_member { struct e820entry *pbios; /* pointer to original bios entry */ @@ -204,6 +204,7 @@ static struct e820entry new_bios[E820_X_MAX] __initdata; return -1; old_nr = *pnr_map; + BUG_ON(old_nr > max_nr_map); /* bail out if we find any unreasonable addresses in bios map */ for (i = 0; i < old_nr; i++) @@ -401,7 +402,7 @@ u64 __init update_memory_range(u64 start, u64 size, unsigned old_type, void __init update_e820(void) { - u8 nr_map; + int nr_map; nr_map = e820.nr_map; if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr_map)) diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c index 58dc6ee..354fbb2 100644 --- a/arch/x86/kernel/e820_64.c +++ b/arch/x86/kernel/e820_64.c @@ -407,15 +407,18 @@ static void early_panic(char *msg) char *__init machine_specific_memory_setup(void) { char *who = "BIOS-e820"; + int new_nr; /* * Try to copy the BIOS-supplied E820-map. * * Otherwise fake a memory map; one section from 0k->640k, * the next section from 1mb->appropriate_mem_k */ + new_nr = boot_params.e820_entries; sanitize_e820_map(boot_params.e820_map, ARRAY_SIZE(boot_params.e820_map), - &boot_params.e820_entries); + &new_nr); + boot_params.e820_entries = new_nr; if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries) < 0) early_panic("Cannot find a valid memory map"); printk(KERN_INFO "BIOS-provided physical RAM map:\n"); @@ -484,7 +487,7 @@ early_param("memmap", parse_memmap_opt); void __init finish_e820_parsing(void) { if (userdef) { - char nr = e820.nr_map; + int nr = e820.nr_map; if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr) < 0) early_panic("Invalid user supplied memory map"); diff --git a/arch/x86/mach-default/setup.c b/arch/x86/mach-default/setup.c index 38a856c..56b4c39 100644 --- a/arch/x86/mach-default/setup.c +++ b/arch/x86/mach-default/setup.c @@ -153,6 +153,7 @@ late_initcall(print_ipi_mode); char * __init machine_specific_memory_setup(void) { char *who; + int new_nr; who = "BIOS-e820"; @@ -163,9 +164,11 @@ char * __init machine_specific_memory_setup(void) * Otherwise fake a memory map; one section from 0k->640k, * the next section from 1mb->appropriate_mem_k */ + new_nr = boot_params.e820_entries; sanitize_e820_map(boot_params.e820_map, ARRAY_SIZE(boot_params.e820_map), - &boot_params.e820_entries); + &new_nr); + boot_params.e820_entries = new_nr; if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries) < 0) { unsigned long mem_size; diff --git a/arch/x86/mach-voyager/setup.c b/arch/x86/mach-voyager/setup.c index 662b5c0..f4aca9f 100644 --- a/arch/x86/mach-voyager/setup.c +++ b/arch/x86/mach-voyager/setup.c @@ -62,6 +62,7 @@ void __init time_init_hook(void) char *__init machine_specific_memory_setup(void) { char *who; + int new_nr; who = "NOT VOYAGER"; @@ -111,9 +112,11 @@ char *__init machine_specific_memory_setup(void) * Otherwise fake a memory map; one section from 0k->640k, * the next section from 1mb->appropriate_mem_k */ + new_nr = boot_params.e820_entries; sanitize_e820_map(boot_params.e820_map, ARRAY_SIZE(boot_params.e820_map), - &boot_params.e820_entries); + &new_nr); + boot_params.e820_entries = new_nr; if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries) < 0) { unsigned long mem_size; diff --git a/include/asm-x86/e820.h b/include/asm-x86/e820.h index ab18457..1eb13b8 100644 --- a/include/asm-x86/e820.h +++ b/include/asm-x86/e820.h @@ -63,7 +63,7 @@ extern int e820_all_mapped(u64 start, u64 end, unsigned type); extern void add_memory_region(u64 start, u64 size, int type); extern void e820_print_map(char *who); extern int -sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, char *pnr_map); +sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, int *pnr_map); extern int copy_e820_map(struct e820entry *biosmap, int nr_map); extern u64 update_memory_range(u64 start, u64 size, unsigned old_type, unsigned new_type); -- cgit v0.10.2 From 5b7eb2e9ef4e467a1248537b47a63bab265be3cc Mon Sep 17 00:00:00 2001 From: Paul Jackson Date: Wed, 14 May 2008 08:15:52 -0700 Subject: x86 boot: longer comment explaining sanitize_e820_map routine Elaborate on the comment for sanitize_e820_map(), epxlaining more what it does, what it inputs, and what it returns. Rearrange the placement of this comment to fit kernel conventions, before the routine's code rather than buried inside it. Signed-off-by: Paul Jackson Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 91abf5b..41c480a 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -139,9 +139,64 @@ void __init e820_print_map(char *who) * Sanitize the BIOS e820 map. * * Some e820 responses include overlapping entries. The following - * replaces the original e820 map with a new one, removing overlaps. + * replaces the original e820 map with a new one, removing overlaps, + * and resolving conflicting memory types in favor of highest + * numbered type. * + * The input parameter biosmap points to an array of 'struct + * e820entry' which on entry has elements in the range [0, *pnr_map) + * valid, and which has space for up to max_nr_map entries. + * On return, the resulting sanitized e820 map entries will be in + * overwritten in the same location, starting at biosmap. + * + * The integer pointed to by pnr_map must be valid on entry (the + * current number of valid entries located at biosmap) and will + * be updated on return, with the new number of valid entries + * (something no more than max_nr_map.) + * + * The return value from sanitize_e820_map() is zero if it + * successfully 'sanitized' the map entries passed in, and is -1 + * if it did nothing, which can happen if either of (1) it was + * only passed one map entry, or (2) any of the input map entries + * were invalid (start + size < start, meaning that the size was + * so big the described memory range wrapped around through zero.) + * + * Visually we're performing the following + * (1,2,3,4 = memory types)... + * + * Sample memory map (w/overlaps): + * ____22__________________ + * ______________________4_ + * ____1111________________ + * _44_____________________ + * 11111111________________ + * ____________________33__ + * ___________44___________ + * __________33333_________ + * ______________22________ + * ___________________2222_ + * _________111111111______ + * _____________________11_ + * _________________4______ + * + * Sanitized equivalent (no overlap): + * 1_______________________ + * _44_____________________ + * ___1____________________ + * ____22__________________ + * ______11________________ + * _________1______________ + * __________3_____________ + * ___________44___________ + * _____________33_________ + * _______________2________ + * ________________1_______ + * _________________4______ + * ___________________2____ + * ____________________33__ + * ______________________4_ */ + int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, int *pnr_map) { @@ -162,43 +217,6 @@ static struct e820entry new_bios[E820_X_MAX] __initdata; int old_nr, new_nr, chg_nr; int i; - /* - Visually we're performing the following - (1,2,3,4 = memory types)... - - Sample memory map (w/overlaps): - ____22__________________ - ______________________4_ - ____1111________________ - _44_____________________ - 11111111________________ - ____________________33__ - ___________44___________ - __________33333_________ - ______________22________ - ___________________2222_ - _________111111111______ - _____________________11_ - _________________4______ - - Sanitized equivalent (no overlap): - 1_______________________ - _44_____________________ - ___1____________________ - ____22__________________ - ______11________________ - _________1______________ - __________3_____________ - ___________44___________ - _____________33_________ - _______________2________ - ________________1_______ - _________________4______ - ___________________2____ - ____________________33__ - ______________________4_ - */ - /* if there's only one memory region, don't bother */ if (*pnr_map < 2) return -1; -- cgit v0.10.2 From 69c9189320c46b14e5ae3ad4b3a0d35cc63cba20 Mon Sep 17 00:00:00 2001 From: Paul Jackson Date: Wed, 14 May 2008 08:15:58 -0700 Subject: x86 boot: add code to add BIOS provided EFI memory entries to kernel Add to the kernels boot memory map 'memmap' entries found in the EFI memory descriptors passed in from the BIOS. On EFI systems, up to E820MAX == 128 memory map entries can be passed via the legacy E820 interface (limited by the size of the 'zeropage'). These entries can be duplicated in the EFI descriptors also passed from the BIOS, and possibly more entries passed by the EFI interface, which does not have the E820MAX limit on number of memory map entries. This code doesn't worry about the likely duplicate, overlapping or (unlikely) conflicting entries between the EFI map and the E820 map. It just dumps all the EFI entries into the memmap[] array (which already has the E820 entries) and lets the existing routine sanitize_e820_map() sort the mess out. Signed-off-by: Paul Jackson Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c index 77d424c..4a1a26d 100644 --- a/arch/x86/kernel/efi.c +++ b/arch/x86/kernel/efi.c @@ -213,6 +213,31 @@ unsigned long efi_get_time(void) eft.minute, eft.second); } +/* + * Tell the kernel about the EFI memory map. This might include + * more than the max 128 entries that can fit in the e820 legacy + * (zeropage) memory map. + */ + +static void __init add_efi_memmap(void) +{ + void *p; + + for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { + efi_memory_desc_t *md = p; + unsigned long long start = md->phys_addr; + unsigned long long size = md->num_pages << EFI_PAGE_SHIFT; + int e820_type; + + if (md->attribute & EFI_MEMORY_WB) + e820_type = E820_RAM; + else + e820_type = E820_RESERVED; + add_memory_region(start, size, e820_type); + } + sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); +} + #if EFI_DEBUG static void __init print_efi_memmap(void) { @@ -370,6 +395,7 @@ void __init efi_init(void) if (memmap.desc_size != sizeof(efi_memory_desc_t)) printk(KERN_WARNING "Kernel-defined memdesc" "doesn't match the one from EFI!\n"); + add_efi_memmap(); /* Setup for EFI runtime service */ reboot_type = BOOT_EFI; -- cgit v0.10.2 From a4c81cf684350797939416c99effb9d3ae46bca6 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 18 May 2008 01:18:57 -0700 Subject: x86: extend e820 ealy_res support 32bit move early_res related from e820_64.c to e820.c make edba detection to be done in head32.c remove smp_alloc_memory, because we have fixed trampoline address now. Signed-off-by: Yinghai Lu arch/x86/kernel/e820.c | 214 ++++++++++++++++++++++++++++++++++++ arch/x86/kernel/e820_64.c | 196 -------------------------------- arch/x86/kernel/head32.c | 76 ++++++++++++ arch/x86/kernel/setup_32.c | 109 +++--------------- arch/x86/kernel/smpboot.c | 17 -- arch/x86/kernel/trampoline.c | 2 arch/x86/mach-voyager/voyager_smp.c | 9 - include/asm-x86/e820.h | 6 + include/asm-x86/e820_64.h | 9 - include/asm-x86/smp.h | 1 arch/x86/kernel/e820.c | 214 ++++++++++++++++++++++++++++++++++++ arch/x86/kernel/e820_64.c | 196 -------------------------------- arch/x86/kernel/head32.c | 76 ++++++++++++ arch/x86/kernel/setup_32.c | 109 +++--------------- arch/x86/kernel/smpboot.c | 17 -- arch/x86/kernel/trampoline.c | 2 arch/x86/mach-voyager/voyager_smp.c | 9 - include/asm-x86/e820.h | 6 + include/asm-x86/e820_64.h | 9 - include/asm-x86/smp.h | 1 arch/x86/kernel/e820.c | 214 ++++++++++++++++++++++++++++++++++++ arch/x86/kernel/e820_64.c | 196 -------------------------------- arch/x86/kernel/head32.c | 76 ++++++++++++ arch/x86/kernel/setup_32.c | 109 +++--------------- arch/x86/kernel/smpboot.c | 17 -- arch/x86/kernel/trampoline.c | 2 arch/x86/mach-voyager/voyager_smp.c | 9 - include/asm-x86/e820.h | 6 + include/asm-x86/e820_64.h | 9 - include/asm-x86/smp.h | 1 10 files changed, 320 insertions(+), 319 deletions(-) Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 41c480a..35da8cd 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -22,7 +22,9 @@ #include #include #include +#include #include +#include struct e820map e820; @@ -493,3 +495,215 @@ __init void e820_setup_gap(void) pci_mem_start, gapstart, gapsize); } + +/* + * Early reserved memory areas. + */ +#define MAX_EARLY_RES 20 + +struct early_res { + u64 start, end; + char name[16]; +}; +static struct early_res early_res[MAX_EARLY_RES] __initdata = { + { 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */ +#if defined(CONFIG_X86_64) && defined(CONFIG_X86_TRAMPOLINE) + { TRAMPOLINE_BASE, TRAMPOLINE_BASE + 2 * PAGE_SIZE, "TRAMPOLINE" }, +#endif +#if defined(CONFIG_X86_32) && defined(CONFIG_SMP) + /* + * But first pinch a few for the stack/trampoline stuff + * FIXME: Don't need the extra page at 4K, but need to fix + * trampoline before removing it. (see the GDT stuff) + */ + { PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE" }, + /* + * Has to be in very low memory so we can execute + * real-mode AP code. + */ + { TRAMPOLINE_BASE, TRAMPOLINE_BASE + PAGE_SIZE, "TRAMPOLINE" }, +#endif + {} +}; + +void __init reserve_early(u64 start, u64 end, char *name) +{ + int i; + struct early_res *r; + for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { + r = &early_res[i]; + if (end > r->start && start < r->end) + panic("Overlapping early reservations %llx-%llx %s to %llx-%llx %s\n", + start, end - 1, name?name:"", r->start, + r->end - 1, r->name); + } + if (i >= MAX_EARLY_RES) + panic("Too many early reservations"); + r = &early_res[i]; + r->start = start; + r->end = end; + if (name) + strncpy(r->name, name, sizeof(r->name) - 1); +} + +void __init free_early(u64 start, u64 end) +{ + struct early_res *r; + int i, j; + + for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { + r = &early_res[i]; + if (start == r->start && end == r->end) + break; + } + if (i >= MAX_EARLY_RES || !early_res[i].end) + panic("free_early on not reserved area: %llx-%llx!", + start, end); + + for (j = i + 1; j < MAX_EARLY_RES && early_res[j].end; j++) + ; + + memmove(&early_res[i], &early_res[i + 1], + (j - 1 - i) * sizeof(struct early_res)); + + early_res[j - 1].end = 0; +} + +void __init early_res_to_bootmem(u64 start, u64 end) +{ + int i; + u64 final_start, final_end; + for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { + struct early_res *r = &early_res[i]; + final_start = max(start, r->start); + final_end = min(end, r->end); + if (final_start >= final_end) + continue; + printk(KERN_INFO " early res: %d [%llx-%llx] %s\n", i, + final_start, final_end - 1, r->name); +#ifdef CONFIG_X86_64 + reserve_bootmem_generic(final_start, final_end - final_start); +#else + reserve_bootmem(final_start, final_end - final_start, + BOOTMEM_DEFAULT); +#endif + } +} + +/* Check for already reserved areas */ +static inline int __init bad_addr(u64 *addrp, u64 size, u64 align) +{ + int i; + u64 addr = *addrp, last; + int changed = 0; +again: + last = addr + size; + for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { + struct early_res *r = &early_res[i]; + if (last >= r->start && addr < r->end) { + *addrp = addr = round_up(r->end, align); + changed = 1; + goto again; + } + } + return changed; +} + +/* Check for already reserved areas */ +static inline int __init bad_addr_size(u64 *addrp, u64 *sizep, u64 align) +{ + int i; + u64 addr = *addrp, last; + u64 size = *sizep; + int changed = 0; +again: + last = addr + size; + for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { + struct early_res *r = &early_res[i]; + if (last > r->start && addr < r->start) { + size = r->start - addr; + changed = 1; + goto again; + } + if (last > r->end && addr < r->end) { + addr = round_up(r->end, align); + size = last - addr; + changed = 1; + goto again; + } + if (last <= r->end && addr >= r->start) { + (*sizep)++; + return 0; + } + } + if (changed) { + *addrp = addr; + *sizep = size; + } + return changed; +} + +/* + * Find a free area with specified alignment in a specific range. + */ +u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align) +{ + int i; + + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + u64 addr, last; + u64 ei_last; + + if (ei->type != E820_RAM) + continue; + addr = round_up(ei->addr, align); + ei_last = ei->addr + ei->size; + if (addr < start) + addr = round_up(start, align); + if (addr >= ei_last) + continue; + while (bad_addr(&addr, size, align) && addr+size <= ei_last) + ; + last = addr + size; + if (last > ei_last) + continue; + if (last > end) + continue; + return addr; + } + return -1ULL; +} + +/* + * Find next free range after *start + */ +u64 __init find_e820_area_size(u64 start, u64 *sizep, u64 align) +{ + int i; + + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + u64 addr, last; + u64 ei_last; + + if (ei->type != E820_RAM) + continue; + addr = round_up(ei->addr, align); + ei_last = ei->addr + ei->size; + if (addr < start) + addr = round_up(start, align); + if (addr >= ei_last) + continue; + *sizep = ei_last - addr; + while (bad_addr_size(&addr, sizep, align) && + addr + *sizep <= ei_last) + ; + last = addr + *sizep; + if (last > ei_last) + continue; + return addr; + } + return -1UL; + +} diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c index 354fbb2..07941b5 100644 --- a/arch/x86/kernel/e820_64.c +++ b/arch/x86/kernel/e820_64.c @@ -47,202 +47,6 @@ unsigned long max_pfn_mapped; static unsigned long __initdata end_user_pfn = MAXMEM>>PAGE_SHIFT; /* - * Early reserved memory areas. - */ -#define MAX_EARLY_RES 20 - -struct early_res { - unsigned long start, end; - char name[16]; -}; -static struct early_res early_res[MAX_EARLY_RES] __initdata = { - { 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */ -#ifdef CONFIG_X86_TRAMPOLINE - { TRAMPOLINE_BASE, TRAMPOLINE_BASE + 2 * PAGE_SIZE, "TRAMPOLINE" }, -#endif - {} -}; - -void __init reserve_early(unsigned long start, unsigned long end, char *name) -{ - int i; - struct early_res *r; - for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { - r = &early_res[i]; - if (end > r->start && start < r->end) - panic("Overlapping early reservations %lx-%lx %s to %lx-%lx %s\n", - start, end - 1, name?name:"", r->start, r->end - 1, r->name); - } - if (i >= MAX_EARLY_RES) - panic("Too many early reservations"); - r = &early_res[i]; - r->start = start; - r->end = end; - if (name) - strncpy(r->name, name, sizeof(r->name) - 1); -} - -void __init free_early(unsigned long start, unsigned long end) -{ - struct early_res *r; - int i, j; - - for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { - r = &early_res[i]; - if (start == r->start && end == r->end) - break; - } - if (i >= MAX_EARLY_RES || !early_res[i].end) - panic("free_early on not reserved area: %lx-%lx!", start, end); - - for (j = i + 1; j < MAX_EARLY_RES && early_res[j].end; j++) - ; - - memmove(&early_res[i], &early_res[i + 1], - (j - 1 - i) * sizeof(struct early_res)); - - early_res[j - 1].end = 0; -} - -void __init early_res_to_bootmem(unsigned long start, unsigned long end) -{ - int i; - unsigned long final_start, final_end; - for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { - struct early_res *r = &early_res[i]; - final_start = max(start, r->start); - final_end = min(end, r->end); - if (final_start >= final_end) - continue; - printk(KERN_INFO " early res: %d [%lx-%lx] %s\n", i, - final_start, final_end - 1, r->name); - reserve_bootmem_generic(final_start, final_end - final_start); - } -} - -/* Check for already reserved areas */ -static inline int __init -bad_addr(unsigned long *addrp, unsigned long size, unsigned long align) -{ - int i; - unsigned long addr = *addrp, last; - int changed = 0; -again: - last = addr + size; - for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { - struct early_res *r = &early_res[i]; - if (last >= r->start && addr < r->end) { - *addrp = addr = round_up(r->end, align); - changed = 1; - goto again; - } - } - return changed; -} - -/* Check for already reserved areas */ -static inline int __init -bad_addr_size(unsigned long *addrp, unsigned long *sizep, unsigned long align) -{ - int i; - unsigned long addr = *addrp, last; - unsigned long size = *sizep; - int changed = 0; -again: - last = addr + size; - for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { - struct early_res *r = &early_res[i]; - if (last > r->start && addr < r->start) { - size = r->start - addr; - changed = 1; - goto again; - } - if (last > r->end && addr < r->end) { - addr = round_up(r->end, align); - size = last - addr; - changed = 1; - goto again; - } - if (last <= r->end && addr >= r->start) { - (*sizep)++; - return 0; - } - } - if (changed) { - *addrp = addr; - *sizep = size; - } - return changed; -} - -/* - * Find a free area with specified alignment in a specific range. - */ -unsigned long __init find_e820_area(unsigned long start, unsigned long end, - unsigned long size, unsigned long align) -{ - int i; - - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - unsigned long addr, last; - unsigned long ei_last; - - if (ei->type != E820_RAM) - continue; - addr = round_up(ei->addr, align); - ei_last = ei->addr + ei->size; - if (addr < start) - addr = round_up(start, align); - if (addr >= ei_last) - continue; - while (bad_addr(&addr, size, align) && addr+size <= ei_last) - ; - last = addr + size; - if (last > ei_last) - continue; - if (last > end) - continue; - return addr; - } - return -1UL; -} - -/* - * Find next free range after *start - */ -unsigned long __init find_e820_area_size(unsigned long start, - unsigned long *sizep, - unsigned long align) -{ - int i; - - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - unsigned long addr, last; - unsigned long ei_last; - - if (ei->type != E820_RAM) - continue; - addr = round_up(ei->addr, align); - ei_last = ei->addr + ei->size; - if (addr < start) - addr = round_up(start, align); - if (addr >= ei_last) - continue; - *sizep = ei_last - addr; - while (bad_addr_size(&addr, sizep, align) && - addr + *sizep <= ei_last) - ; - last = addr + *sizep; - if (last > ei_last) - continue; - return addr; - } - return -1UL; - -} -/* * Find the highest page frame number we have available */ unsigned long __init e820_end_of_ram(void) diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index 3db0590..c216d3c 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c @@ -8,7 +8,83 @@ #include #include +#include +#include +#include +#include + +#define BIOS_LOWMEM_KILOBYTES 0x413 + +/* + * The BIOS places the EBDA/XBDA at the top of conventional + * memory, and usually decreases the reported amount of + * conventional memory (int 0x12) too. This also contains a + * workaround for Dell systems that neglect to reserve EBDA. + * The same workaround also avoids a problem with the AMD768MPX + * chipset: reserve a page before VGA to prevent PCI prefetch + * into it (errata #56). Usually the page is reserved anyways, + * unless you have no PS/2 mouse plugged in. + */ +static void __init reserve_ebda_region(void) +{ + unsigned int lowmem, ebda_addr; + + /* To determine the position of the EBDA and the */ + /* end of conventional memory, we need to look at */ + /* the BIOS data area. In a paravirtual environment */ + /* that area is absent. We'll just have to assume */ + /* that the paravirt case can handle memory setup */ + /* correctly, without our help. */ + if (paravirt_enabled()) + return; + + /* end of low (conventional) memory */ + lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES); + lowmem <<= 10; + + /* start of EBDA area */ + ebda_addr = get_bios_ebda(); + + /* Fixup: bios puts an EBDA in the top 64K segment */ + /* of conventional memory, but does not adjust lowmem. */ + if ((lowmem - ebda_addr) <= 0x10000) + lowmem = ebda_addr; + + /* Fixup: bios does not report an EBDA at all. */ + /* Some old Dells seem to need 4k anyhow (bugzilla 2990) */ + if ((ebda_addr == 0) && (lowmem >= 0x9f000)) + lowmem = 0x9f000; + + /* Paranoia: should never happen, but... */ + if ((lowmem == 0) || (lowmem >= 0x100000)) + lowmem = 0x9f000; + + /* reserve all memory between lowmem and the 1MB mark */ + reserve_early(lowmem, 0x100000, "BIOS reserved"); +} + void __init i386_start_kernel(void) { + reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS"); + +#ifdef CONFIG_BLK_DEV_INITRD + /* Reserve INITRD */ + if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) { + u64 ramdisk_image = boot_params.hdr.ramdisk_image; + u64 ramdisk_size = boot_params.hdr.ramdisk_size; + u64 ramdisk_end = ramdisk_image + ramdisk_size; + reserve_early(ramdisk_image, ramdisk_end, "RAMDISK"); + } +#endif + reserve_early(__pa_symbol(&_end), init_pg_tables_end, "INIT_PG_TABLE"); + + reserve_ebda_region(); + + /* + * At this point everything still needed from the boot loader + * or BIOS or kernel text should be early reserved or marked not + * RAM in e820. All other memory is free game. + */ + start_kernel(); } diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c index 5faeab6..fed482c 100644 --- a/arch/x86/kernel/setup_32.c +++ b/arch/x86/kernel/setup_32.c @@ -359,56 +359,6 @@ unsigned long __init find_max_low_pfn(void) return max_low_pfn; } -#define BIOS_LOWMEM_KILOBYTES 0x413 - -/* - * The BIOS places the EBDA/XBDA at the top of conventional - * memory, and usually decreases the reported amount of - * conventional memory (int 0x12) too. This also contains a - * workaround for Dell systems that neglect to reserve EBDA. - * The same workaround also avoids a problem with the AMD768MPX - * chipset: reserve a page before VGA to prevent PCI prefetch - * into it (errata #56). Usually the page is reserved anyways, - * unless you have no PS/2 mouse plugged in. - */ -static void __init reserve_ebda_region(void) -{ - unsigned int lowmem, ebda_addr; - - /* To determine the position of the EBDA and the */ - /* end of conventional memory, we need to look at */ - /* the BIOS data area. In a paravirtual environment */ - /* that area is absent. We'll just have to assume */ - /* that the paravirt case can handle memory setup */ - /* correctly, without our help. */ - if (paravirt_enabled()) - return; - - /* end of low (conventional) memory */ - lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES); - lowmem <<= 10; - - /* start of EBDA area */ - ebda_addr = get_bios_ebda(); - - /* Fixup: bios puts an EBDA in the top 64K segment */ - /* of conventional memory, but does not adjust lowmem. */ - if ((lowmem - ebda_addr) <= 0x10000) - lowmem = ebda_addr; - - /* Fixup: bios does not report an EBDA at all. */ - /* Some old Dells seem to need 4k anyhow (bugzilla 2990) */ - if ((ebda_addr == 0) && (lowmem >= 0x9f000)) - lowmem = 0x9f000; - - /* Paranoia: should never happen, but... */ - if ((lowmem == 0) || (lowmem >= 0x100000)) - lowmem = 0x9f000; - - /* reserve all memory between lowmem and the 1MB mark */ - reserve_bootmem(lowmem, 0x100000 - lowmem, BOOTMEM_DEFAULT); -} - #ifndef CONFIG_NEED_MULTIPLE_NODES static void __init setup_bootmem_allocator(void); static unsigned long __init setup_memory(void) @@ -522,25 +472,32 @@ static void __init reserve_initrd(void) unsigned long end_of_lowmem = max_low_pfn << PAGE_SHIFT; unsigned long ramdisk_here; - initrd_start = 0; - if (!boot_params.hdr.type_of_loader || !ramdisk_image || !ramdisk_size) return; /* No initrd provided by bootloader */ + initrd_start = 0; + if (ramdisk_end < ramdisk_image) { + free_bootmem(ramdisk_image, ramdisk_size); printk(KERN_ERR "initrd wraps around end of memory, " "disabling initrd\n"); return; } if (ramdisk_size >= end_of_lowmem/2) { + free_bootmem(ramdisk_image, ramdisk_size); printk(KERN_ERR "initrd too large to handle, " "disabling initrd\n"); return; } + if (ramdisk_end <= end_of_lowmem) { /* All in lowmem, easy case */ - reserve_bootmem(ramdisk_image, ramdisk_size, BOOTMEM_DEFAULT); + /* + * don't need to reserve again, already reserved early + * in i386_start_kernel, and early_res_to_bootmem + * convert that to reserved in bootmem + */ initrd_start = ramdisk_image + PAGE_OFFSET; initrd_end = initrd_start+ramdisk_size; return; @@ -582,6 +539,8 @@ static void __init relocate_initrd(void) p = (char *)__va(ramdisk_image); memcpy(q, p, clen); q += clen; + /* need to free these low pages...*/ + free_bootmem(ramdisk_image, clen); ramdisk_image += clen; ramdisk_size -= clen; } @@ -600,47 +559,28 @@ static void __init relocate_initrd(void) ramdisk_image += clen; ramdisk_size -= clen; } + /* high pages is not converted by early_res_to_bootmem */ } #endif /* CONFIG_BLK_DEV_INITRD */ void __init setup_bootmem_allocator(void) { - unsigned long bootmap_size; + unsigned long bootmap_size, bootmap; /* * Initialize the boot-time allocator (with low memory only): */ - bootmap_size = init_bootmem(min_low_pfn, max_low_pfn); - + bootmap_size = bootmem_bootmap_pages(max_low_pfn)<> PAGE_SHIFT, max_low_pfn); register_bootmem_low_pages(max_low_pfn); + early_res_to_bootmem(0, max_low_pfn<llc_shared_map; } -#ifdef CONFIG_X86_32 -/* - * We are called very early to get the low memory for the - * SMP bootup trampoline page. - */ -void __init smp_alloc_memory(void) -{ - trampoline_base = alloc_bootmem_low_pages(PAGE_SIZE); - /* - * Has to be in very low memory so we can execute - * real-mode AP code. - */ - if (__pa(trampoline_base) >= 0x9F000) - BUG(); -} -#endif - static void impress_friends(void) { int cpu; diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c index abbf199..1106fac 100644 --- a/arch/x86/kernel/trampoline.c +++ b/arch/x86/kernel/trampoline.c @@ -2,7 +2,7 @@ #include -/* ready for x86_64, no harm for x86, since it will overwrite after alloc */ +/* ready for x86_64 and x86 */ unsigned char *trampoline_base = __va(TRAMPOLINE_BASE); /* diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c index 8acbf0c..7bbebbf 100644 --- a/arch/x86/mach-voyager/voyager_smp.c +++ b/arch/x86/mach-voyager/voyager_smp.c @@ -1137,15 +1137,6 @@ void flush_tlb_all(void) on_each_cpu(do_flush_tlb_all, 0, 1, 1); } -/* used to set up the trampoline for other CPUs when the memory manager - * is sorted out */ -void __init smp_alloc_memory(void) -{ - trampoline_base = alloc_bootmem_low_pages(PAGE_SIZE); - if (__pa(trampoline_base) >= 0x93000) - BUG(); -} - /* send a reschedule CPI to one CPU by physical CPU number*/ static void voyager_smp_send_reschedule(int cpu) { diff --git a/include/asm-x86/e820.h b/include/asm-x86/e820.h index 1eb13b8..5a58e2b 100644 --- a/include/asm-x86/e820.h +++ b/include/asm-x86/e820.h @@ -70,6 +70,12 @@ extern u64 update_memory_range(u64 start, u64 size, unsigned old_type, extern void update_e820(void); extern void e820_setup_gap(void); +extern u64 find_e820_area(u64 start, u64 end, u64 size, u64 align); +extern u64 find_e820_area_size(u64 start, u64 *sizep, u64 align); +extern void reserve_early(u64 start, u64 end, char *name); +extern void free_early(u64 start, u64 end); +extern void early_res_to_bootmem(u64 start, u64 end); + #endif /* __ASSEMBLY__ */ #define ISA_START_ADDRESS 0xa0000 diff --git a/include/asm-x86/e820_64.h b/include/asm-x86/e820_64.h index 9fac77e..37f176a 100644 --- a/include/asm-x86/e820_64.h +++ b/include/asm-x86/e820_64.h @@ -14,11 +14,6 @@ #include #ifndef __ASSEMBLY__ -extern unsigned long find_e820_area(unsigned long start, unsigned long end, - unsigned long size, unsigned long align); -extern unsigned long find_e820_area_size(unsigned long start, - unsigned long *sizep, - unsigned long align); extern void setup_memory_region(void); extern void contig_e820_setup(void); extern unsigned long e820_end_of_ram(void); @@ -35,10 +30,6 @@ extern void e820_register_active_regions(int nid, unsigned long start_pfn, extern void finish_e820_parsing(void); -extern void reserve_early(unsigned long start, unsigned long end, char *name); -extern void free_early(unsigned long start, unsigned long end); -extern void early_res_to_bootmem(unsigned long start, unsigned long end); - #endif/*!__ASSEMBLY__*/ #endif/*__E820_HEADER*/ diff --git a/include/asm-x86/smp.h b/include/asm-x86/smp.h index 1ebaa5c..514e52b 100644 --- a/include/asm-x86/smp.h +++ b/include/asm-x86/smp.h @@ -201,7 +201,6 @@ extern void cpu_exit_clear(void); extern void cpu_uninit(void); #endif -extern void smp_alloc_memory(void); extern void lock_ipi_call_lock(void); extern void unlock_ipi_call_lock(void); #endif /* __ASSEMBLY__ */ -- cgit v0.10.2 From ba5b14cc0311af6ed20dc25aa98a1d5ea6f2e6c0 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 21 May 2008 18:40:18 -0700 Subject: x86: extend e820 ealy_res support 32bit - fix use find_e820_area to find addess for new RAMDISK, instead of using ram blindly also print out low ram and bootmap info Signed-off-by: Yinghai Lu Cc: Andrew Morton Cc: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c index fed482c..3c451d1 100644 --- a/arch/x86/kernel/setup_32.c +++ b/arch/x86/kernel/setup_32.c @@ -466,11 +466,11 @@ static bool do_relocate_initrd = false; static void __init reserve_initrd(void) { - unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; - unsigned long ramdisk_size = boot_params.hdr.ramdisk_size; - unsigned long ramdisk_end = ramdisk_image + ramdisk_size; - unsigned long end_of_lowmem = max_low_pfn << PAGE_SHIFT; - unsigned long ramdisk_here; + u64 ramdisk_image = boot_params.hdr.ramdisk_image; + u64 ramdisk_size = boot_params.hdr.ramdisk_size; + u64 ramdisk_end = ramdisk_image + ramdisk_size; + u64 end_of_lowmem = max_low_pfn << PAGE_SHIFT; + u64 ramdisk_here; if (!boot_params.hdr.type_of_loader || !ramdisk_image || !ramdisk_size) @@ -478,14 +478,8 @@ static void __init reserve_initrd(void) initrd_start = 0; - if (ramdisk_end < ramdisk_image) { - free_bootmem(ramdisk_image, ramdisk_size); - printk(KERN_ERR "initrd wraps around end of memory, " - "disabling initrd\n"); - return; - } if (ramdisk_size >= end_of_lowmem/2) { - free_bootmem(ramdisk_image, ramdisk_size); + free_early(ramdisk_image, ramdisk_image + ramdisk_size - 1); printk(KERN_ERR "initrd too large to handle, " "disabling initrd\n"); return; @@ -495,8 +489,7 @@ static void __init reserve_initrd(void) /* All in lowmem, easy case */ /* * don't need to reserve again, already reserved early - * in i386_start_kernel, and early_res_to_bootmem - * convert that to reserved in bootmem + * in i386_start_kernel */ initrd_start = ramdisk_image + PAGE_OFFSET; initrd_end = initrd_start+ramdisk_size; @@ -504,11 +497,14 @@ static void __init reserve_initrd(void) } /* We need to move the initrd down into lowmem */ - ramdisk_here = (end_of_lowmem - ramdisk_size) & PAGE_MASK; + ramdisk_here = find_e820_area(min_low_pfn<> PAGE_SHIFT, max_low_pfn); + printk(KERN_INFO " low ram: %08lx - %08lx\n", + min_low_pfn< Date: Mon, 12 May 2008 15:43:35 +0200 Subject: x86: move mp_ioapic_routing to mpparse and make it static mpparse is the only user of mp_ioapic_routing. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index c49ebcc..6f20fa9 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -331,8 +331,6 @@ acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long e #ifdef CONFIG_X86_IO_APIC -struct mp_ioapic_routing mp_ioapic_routing[MAX_IO_APICS]; - static int __init acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end) { diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 404683b..bb72133 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -1,5 +1,5 @@ /* - * Intel Multiprocessor Specification 1.1 and 1.4 +2 * Intel Multiprocessor Specification 1.1 and 1.4 * compliant MP-table parsing routines. * * (c) 1995 Alan Cox, Building #3 @@ -805,7 +805,7 @@ int es7000_plat; #define MP_ISA_BUS 0 -extern struct mp_ioapic_routing mp_ioapic_routing[MAX_IO_APICS]; +static struct mp_ioapic_routing mp_ioapic_routing[MAX_IO_APICS]; static int mp_find_ioapic(int gsi) { -- cgit v0.10.2 From a91eea6df383f26fc4fcbefcbdb5aee8facd17fd Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 12 May 2008 15:43:37 +0200 Subject: x86: fix shadow variables of global end_pnf in e820_64.c Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c index 07941b5..5a9bc92 100644 --- a/arch/x86/kernel/e820_64.c +++ b/arch/x86/kernel/e820_64.c @@ -51,21 +51,21 @@ static unsigned long __initdata end_user_pfn = MAXMEM>>PAGE_SHIFT; */ unsigned long __init e820_end_of_ram(void) { - unsigned long end_pfn; + unsigned long last_pfn; - end_pfn = find_max_pfn_with_active_regions(); + last_pfn = find_max_pfn_with_active_regions(); - if (end_pfn > max_pfn_mapped) - max_pfn_mapped = end_pfn; + if (last_pfn > max_pfn_mapped) + max_pfn_mapped = last_pfn; if (max_pfn_mapped > MAXMEM>>PAGE_SHIFT) max_pfn_mapped = MAXMEM>>PAGE_SHIFT; - if (end_pfn > end_user_pfn) - end_pfn = end_user_pfn; - if (end_pfn > max_pfn_mapped) - end_pfn = max_pfn_mapped; + if (last_pfn > end_user_pfn) + last_pfn = end_user_pfn; + if (last_pfn > max_pfn_mapped) + last_pfn = max_pfn_mapped; printk(KERN_INFO "max_pfn_mapped = %lu\n", max_pfn_mapped); - return end_pfn; + return last_pfn; } /* @@ -124,12 +124,12 @@ void __init e820_mark_nosave_regions(void) } /* - * Finds an active region in the address range from start_pfn to end_pfn and + * Finds an active region in the address range from start_pfn to last_pfn and * returns its range in ei_startpfn and ei_endpfn for the e820 entry. */ static int __init e820_find_active_region(const struct e820entry *ei, unsigned long start_pfn, - unsigned long end_pfn, + unsigned long last_pfn, unsigned long *ei_startpfn, unsigned long *ei_endpfn) { @@ -146,14 +146,14 @@ static int __init e820_find_active_region(const struct e820entry *ei, /* Skip if map is outside the node */ if (ei->type != E820_RAM || *ei_endpfn <= start_pfn || - *ei_startpfn >= end_pfn) + *ei_startpfn >= last_pfn) return 0; /* Check for overlaps */ if (*ei_startpfn < start_pfn) *ei_startpfn = start_pfn; - if (*ei_endpfn > end_pfn) - *ei_endpfn = end_pfn; + if (*ei_endpfn > last_pfn) + *ei_endpfn = last_pfn; /* Obey end_user_pfn to save on memmap */ if (*ei_startpfn >= end_user_pfn) @@ -167,7 +167,7 @@ static int __init e820_find_active_region(const struct e820entry *ei, /* Walk the e820 map and register active regions within a node */ void __init e820_register_active_regions(int nid, unsigned long start_pfn, - unsigned long end_pfn) + unsigned long last_pfn) { unsigned long ei_startpfn; unsigned long ei_endpfn; @@ -175,7 +175,7 @@ e820_register_active_regions(int nid, unsigned long start_pfn, for (i = 0; i < e820.nr_map; i++) if (e820_find_active_region(&e820.map[i], - start_pfn, end_pfn, + start_pfn, last_pfn, &ei_startpfn, &ei_endpfn)) add_active_range(nid, ei_startpfn, ei_endpfn); } @@ -188,13 +188,13 @@ e820_register_active_regions(int nid, unsigned long start_pfn, unsigned long __init e820_hole_size(unsigned long start, unsigned long end) { unsigned long start_pfn = start >> PAGE_SHIFT; - unsigned long end_pfn = end >> PAGE_SHIFT; + unsigned long last_pfn = end >> PAGE_SHIFT; unsigned long ei_startpfn, ei_endpfn, ram = 0; int i; for (i = 0; i < e820.nr_map; i++) { if (e820_find_active_region(&e820.map[i], - start_pfn, end_pfn, + start_pfn, last_pfn, &ei_startpfn, &ei_endpfn)) ram += ei_endpfn - ei_startpfn; } -- cgit v0.10.2 From 4a139a7fdec8964cecf7a3564ee7ae327141d495 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 12 May 2008 15:43:37 +0200 Subject: x86: include pci.h in e820_64.c global pci_mem_start needs a declaration. include pci.h Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c index 5a9bc92..3b2e0b1 100644 --- a/arch/x86/kernel/e820_64.c +++ b/arch/x86/kernel/e820_64.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include -- cgit v0.10.2 From 11a62a056093a7f25f1595fbd8bd5f93559572b6 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 12 May 2008 15:43:38 +0200 Subject: x86: cleanup print out for mptable the new output is: MPTABLE: OEM ID: SUN MPTABLE: Product ID: 4600 M2 MPTABLE: APIC at: 0x instead of it all in one line with <6> and double Product ID... Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index bb72133..5a18b2b 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -113,7 +113,7 @@ static void __init MP_bus_info(struct mpc_config_bus *m) #ifdef CONFIG_X86_NUMAQ mpc_oem_bus_info(m, str, translation_table[mpc_record]); #else - Dprintk("Bus #%d is %s\n", m->mpc_busid, str); + printk(KERN_INFO "Bus #%d is %s\n", m->mpc_busid, str); #endif #if MAX_MP_BUSSES < 256 @@ -183,7 +183,7 @@ static void __init MP_ioapic_info(struct mpc_config_ioapic *m) static void __init MP_intsrc_info(struct mpc_config_intsrc *m) { mp_irqs[mp_irq_entries] = *m; - Dprintk("Int: type %d, pol %d, trig %d, bus %d," + printk(KERN_INFO "Int: type %d, pol %d, trig %d, bus %02x," " IRQ %02x, APIC ID %x, APIC INT %02x\n", m->mpc_irqtype, m->mpc_irqflag & 3, (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus, @@ -196,7 +196,7 @@ static void __init MP_intsrc_info(struct mpc_config_intsrc *m) static void __init MP_lintsrc_info(struct mpc_config_lintsrc *m) { - Dprintk("Lint: type %d, pol %d, trig %d, bus %d," + printk(KERN_INFO "Lint: type %d, pol %d, trig %d, bus %02x," " IRQ %02x, APIC ID %x, APIC LINT %02x\n", m->mpc_irqtype, m->mpc_irqflag & 3, (m->mpc_irqflag >> 2) & 3, m->mpc_srcbusid, @@ -309,16 +309,15 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) } memcpy(oem, mpc->mpc_oem, 8); oem[8] = 0; - printk(KERN_INFO "MPTABLE: OEM ID: %s ", oem); + printk(KERN_INFO "MPTABLE: OEM ID: %s\n", oem); memcpy(str, mpc->mpc_productid, 12); str[12] = 0; - printk("Product ID: %s ", str); #ifdef CONFIG_X86_32 mps_oem_check(mpc, oem, str); #endif - printk(KERN_INFO "MPTABLE: Product ID: %s ", str); + printk(KERN_INFO "MPTABLE: Product ID: %s\n", str); printk(KERN_INFO "MPTABLE: APIC at: 0x%X\n", mpc->mpc_lapic); @@ -689,7 +688,7 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, unsigned int *bp = phys_to_virt(base); struct intel_mp_floating *mpf; - Dprintk("Scan SMP from %p for %ld bytes.\n", bp, length); + printk(KERN_DEBUG "Scan SMP from %p for %ld bytes.\n", bp, length); BUILD_BUG_ON(sizeof(*mpf) != 16); while (length > 0) { -- cgit v0.10.2 From 32c5061265caf201d6a2c0d02181e2b68769c22c Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Wed, 14 May 2008 19:02:51 +0400 Subject: x86: move es7000_plat out of mpparse.c Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 5a18b2b..ff13423 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -793,15 +793,14 @@ void __init find_smp_config(void) ACPI-based MP Configuration -------------------------------------------------------------------------- */ -/* - * Keep this outside and initialized to 0, for !CONFIG_ACPI builds: - */ -int es7000_plat; - #ifdef CONFIG_ACPI #ifdef CONFIG_X86_IO_APIC +#if defined(CONFIG_X86_ES7000) || defined(CONFIG_X86_GENERICARCH) +extern int es7000_plat; +#endif + #define MP_ISA_BUS 0 static struct mp_ioapic_routing mp_ioapic_routing[MAX_IO_APICS]; @@ -928,11 +927,13 @@ void __init mp_config_acpi_legacy_irqs(void) set_bit(MP_ISA_BUS, mp_bus_not_pci); Dprintk("Bus #%d is ISA\n", MP_ISA_BUS); +#if defined(CONFIG_X86_ES7000) || defined(CONFIG_X86_GENERICARCH) /* * Older generations of ES7000 have no legacy identity mappings */ if (es7000_plat == 1) return; +#endif /* * Locate the IOAPIC that manages the ISA IRQs (0-15). diff --git a/arch/x86/mach-es7000/es7000plat.c b/arch/x86/mach-es7000/es7000plat.c index f5d6f7d..a41c77a 100644 --- a/arch/x86/mach-es7000/es7000plat.c +++ b/arch/x86/mach-es7000/es7000plat.c @@ -52,6 +52,8 @@ static struct mip_reg *host_reg; static int mip_port; static unsigned long mip_addr, host_addr; +int es7000_plat; + /* * GSI override for ES7000 platforms. */ diff --git a/include/asm-x86/system.h b/include/asm-x86/system.h index a2f04cd..9f7f63b 100644 --- a/include/asm-x86/system.h +++ b/include/asm-x86/system.h @@ -303,7 +303,6 @@ static inline void clflush(volatile void *__p) void disable_hlt(void); void enable_hlt(void); -extern int es7000_plat; void cpu_idle_wait(void); extern unsigned long arch_align_stack(unsigned long sp); -- cgit v0.10.2 From 11113f84c72bd832dc4e76122e613b0e623e2346 Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Wed, 14 May 2008 19:02:57 +0400 Subject: x86: complete move ACPI from mpparse.c Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 6f20fa9..b2ad09c 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -846,6 +846,310 @@ static int __init acpi_parse_madt_lapic_entries(void) #endif /* CONFIG_X86_LOCAL_APIC */ #ifdef CONFIG_X86_IO_APIC +#define MP_ISA_BUS 0 + +#if defined(CONFIG_X86_ES7000) || defined(CONFIG_X86_GENERICARCH) +extern int es7000_plat; +#endif + +static struct mp_ioapic_routing mp_ioapic_routing[MAX_IO_APICS]; + +static int mp_find_ioapic(int gsi) +{ + int i = 0; + + /* Find the IOAPIC that manages this GSI. */ + for (i = 0; i < nr_ioapics; i++) { + if ((gsi >= mp_ioapic_routing[i].gsi_base) + && (gsi <= mp_ioapic_routing[i].gsi_end)) + return i; + } + + printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi); + return -1; +} + +static u8 __init uniq_ioapic_id(u8 id) +{ +#ifdef CONFIG_X86_32 + if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && + !APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) + return io_apic_get_unique_id(nr_ioapics, id); + else + return id; +#else + int i; + DECLARE_BITMAP(used, 256); + bitmap_zero(used, 256); + for (i = 0; i < nr_ioapics; i++) { + struct mpc_config_ioapic *ia = &mp_ioapics[i]; + __set_bit(ia->mpc_apicid, used); + } + if (!test_bit(id, used)) + return id; + return find_first_zero_bit(used, 256); +#endif +} + +static int bad_ioapic(unsigned long address) +{ + if (nr_ioapics >= MAX_IO_APICS) { + printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded " + "(found %d)\n", MAX_IO_APICS, nr_ioapics); + panic("Recompile kernel with bigger MAX_IO_APICS!\n"); + } + if (!address) { + printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address" + " found in table, skipping!\n"); + return 1; + } + return 0; +} + +void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) +{ + int idx = 0; + + if (bad_ioapic(address)) + return; + + idx = nr_ioapics; + + mp_ioapics[idx].mpc_type = MP_IOAPIC; + mp_ioapics[idx].mpc_flags = MPC_APIC_USABLE; + mp_ioapics[idx].mpc_apicaddr = address; + + set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); + mp_ioapics[idx].mpc_apicid = uniq_ioapic_id(id); +#ifdef CONFIG_X86_32 + mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx); +#else + mp_ioapics[idx].mpc_apicver = 0; +#endif + /* + * Build basic GSI lookup table to facilitate gsi->io_apic lookups + * and to prevent reprogramming of IOAPIC pins (PCI GSIs). + */ + mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid; + mp_ioapic_routing[idx].gsi_base = gsi_base; + mp_ioapic_routing[idx].gsi_end = gsi_base + + io_apic_get_redir_entries(idx); + + printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, " + "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid, + mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr, + mp_ioapic_routing[idx].gsi_base, mp_ioapic_routing[idx].gsi_end); + + nr_ioapics++; +} + +void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) +{ + int ioapic = -1; + int pin = -1; + + /* + * Convert 'gsi' to 'ioapic.pin'. + */ + ioapic = mp_find_ioapic(gsi); + if (ioapic < 0) + return; + pin = gsi - mp_ioapic_routing[ioapic].gsi_base; + + /* + * TBD: This check is for faulty timer entries, where the override + * erroneously sets the trigger to level, resulting in a HUGE + * increase of timer interrupts! + */ + if ((bus_irq == 0) && (trigger == 3)) + trigger = 1; + + mp_irqs[mp_irq_entries].mpc_type = MP_INTSRC; + mp_irqs[mp_irq_entries].mpc_irqtype = mp_INT; + mp_irqs[mp_irq_entries].mpc_irqflag = (trigger << 2) | polarity; + mp_irqs[mp_irq_entries].mpc_srcbus = MP_ISA_BUS; + mp_irqs[mp_irq_entries].mpc_srcbusirq = bus_irq; /* IRQ */ + mp_irqs[mp_irq_entries].mpc_dstapic = + mp_ioapics[ioapic].mpc_apicid; /* APIC ID */ + mp_irqs[mp_irq_entries].mpc_dstirq = pin; /* INTIN# */ + + if (++mp_irq_entries == MAX_IRQ_SOURCES) + panic("Max # of irq sources exceeded!!\n"); + +} + +void __init mp_config_acpi_legacy_irqs(void) +{ + int i = 0; + int ioapic = -1; + +#if defined (CONFIG_MCA) || defined (CONFIG_EISA) + /* + * Fabricate the legacy ISA bus (bus #31). + */ + mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA; +#endif + set_bit(MP_ISA_BUS, mp_bus_not_pci); + Dprintk("Bus #%d is ISA\n", MP_ISA_BUS); + +#if defined(CONFIG_X86_ES7000) || defined(CONFIG_X86_GENERICARCH) + /* + * Older generations of ES7000 have no legacy identity mappings + */ + if (es7000_plat == 1) + return; +#endif + + /* + * Locate the IOAPIC that manages the ISA IRQs (0-15). + */ + ioapic = mp_find_ioapic(0); + if (ioapic < 0) + return; + + mp_irqs[mp_irq_entries].mpc_type = MP_INTSRC; + mp_irqs[mp_irq_entries].mpc_irqflag = 0; /* Conforming */ + mp_irqs[mp_irq_entries].mpc_srcbus = MP_ISA_BUS; +#ifdef CONFIG_X86_IO_APIC + mp_irqs[mp_irq_entries].mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; +#endif + /* + * Use the default configuration for the IRQs 0-15. Unless + * overridden by (MADT) interrupt source override entries. + */ + for (i = 0; i < 16; i++) { + int idx; + + for (idx = 0; idx < mp_irq_entries; idx++) { + struct mpc_config_intsrc *irq = mp_irqs + idx; + + /* Do we already have a mapping for this ISA IRQ? */ + if (irq->mpc_srcbus == MP_ISA_BUS + && irq->mpc_srcbusirq == i) + break; + + /* Do we already have a mapping for this IOAPIC pin */ + if ((irq->mpc_dstapic == + mp_irqs[mp_irq_entries].mpc_dstapic) && + (irq->mpc_dstirq == i)) + break; + } + + if (idx != mp_irq_entries) { + printk(KERN_DEBUG "ACPI: IRQ%d used by override.\n", i); + continue; /* IRQ already used */ + } + + mp_irqs[mp_irq_entries].mpc_irqtype = mp_INT; + mp_irqs[mp_irq_entries].mpc_srcbusirq = i; /* Identity mapped */ + mp_irqs[mp_irq_entries].mpc_dstirq = i; + + if (++mp_irq_entries == MAX_IRQ_SOURCES) + panic("Max # of irq sources exceeded!!\n"); + } +} + +int mp_register_gsi(u32 gsi, int triggering, int polarity) +{ + int ioapic; + int ioapic_pin; +#ifdef CONFIG_X86_32 +#define MAX_GSI_NUM 4096 +#define IRQ_COMPRESSION_START 64 + + static int pci_irq = IRQ_COMPRESSION_START; + /* + * Mapping between Global System Interrupts, which + * represent all possible interrupts, and IRQs + * assigned to actual devices. + */ + static int gsi_to_irq[MAX_GSI_NUM]; +#else + + if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC) + return gsi; +#endif + + /* Don't set up the ACPI SCI because it's already set up */ + if (acpi_gbl_FADT.sci_interrupt == gsi) + return gsi; + + ioapic = mp_find_ioapic(gsi); + if (ioapic < 0) { + printk(KERN_WARNING "No IOAPIC for GSI %u\n", gsi); + return gsi; + } + + ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_base; + +#ifdef CONFIG_X86_32 + if (ioapic_renumber_irq) + gsi = ioapic_renumber_irq(ioapic, gsi); +#endif + + /* + * Avoid pin reprogramming. PRTs typically include entries + * with redundant pin->gsi mappings (but unique PCI devices); + * we only program the IOAPIC on the first. + */ + if (ioapic_pin > MP_MAX_IOAPIC_PIN) { + printk(KERN_ERR "Invalid reference to IOAPIC pin " + "%d-%d\n", mp_ioapic_routing[ioapic].apic_id, + ioapic_pin); + return gsi; + } + if (test_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed)) { + Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n", + mp_ioapic_routing[ioapic].apic_id, ioapic_pin); +#ifdef CONFIG_X86_32 + return (gsi < IRQ_COMPRESSION_START ? gsi : gsi_to_irq[gsi]); +#else + return gsi; +#endif + } + + set_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed); +#ifdef CONFIG_X86_32 + /* + * For GSI >= 64, use IRQ compression + */ + if ((gsi >= IRQ_COMPRESSION_START) + && (triggering == ACPI_LEVEL_SENSITIVE)) { + /* + * For PCI devices assign IRQs in order, avoiding gaps + * due to unused I/O APIC pins. + */ + int irq = gsi; + if (gsi < MAX_GSI_NUM) { + /* + * Retain the VIA chipset work-around (gsi > 15), but + * avoid a problem where the 8254 timer (IRQ0) is setup + * via an override (so it's not on pin 0 of the ioapic), + * and at the same time, the pin 0 interrupt is a PCI + * type. The gsi > 15 test could cause these two pins + * to be shared as IRQ0, and they are not shareable. + * So test for this condition, and if necessary, avoid + * the pin collision. + */ + gsi = pci_irq++; + /* + * Don't assign IRQ used by ACPI SCI + */ + if (gsi == acpi_gbl_FADT.sci_interrupt) + gsi = pci_irq++; + gsi_to_irq[irq] = gsi; + } else { + printk(KERN_ERR "GSI %u is too high\n", gsi); + return gsi; + } + } +#endif + io_apic_set_pci_routing(ioapic, ioapic_pin, gsi, + triggering == ACPI_EDGE_SENSITIVE ? 0 : 1, + polarity == ACPI_ACTIVE_HIGH ? 0 : 1); + return gsi; +} + /* * Parse IOAPIC related entries in MADT * returns 0 on success, < 0 on error diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index ff13423..d05b70c 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -1,5 +1,5 @@ /* -2 * Intel Multiprocessor Specification 1.1 and 1.4 + * Intel Multiprocessor Specification 1.1 and 1.4 * compliant MP-table parsing routines. * * (c) 1995 Alan Cox, Building #3 @@ -788,300 +788,3 @@ void __init find_smp_config(void) { __find_smp_config(1); } - -/* -------------------------------------------------------------------------- - ACPI-based MP Configuration - -------------------------------------------------------------------------- */ - -#ifdef CONFIG_ACPI - -#ifdef CONFIG_X86_IO_APIC - -#if defined(CONFIG_X86_ES7000) || defined(CONFIG_X86_GENERICARCH) -extern int es7000_plat; -#endif - -#define MP_ISA_BUS 0 - -static struct mp_ioapic_routing mp_ioapic_routing[MAX_IO_APICS]; - -static int mp_find_ioapic(int gsi) -{ - int i = 0; - - /* Find the IOAPIC that manages this GSI. */ - for (i = 0; i < nr_ioapics; i++) { - if ((gsi >= mp_ioapic_routing[i].gsi_base) - && (gsi <= mp_ioapic_routing[i].gsi_end)) - return i; - } - - printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi); - return -1; -} - -static u8 __init uniq_ioapic_id(u8 id) -{ -#ifdef CONFIG_X86_32 - if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && - !APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) - return io_apic_get_unique_id(nr_ioapics, id); - else - return id; -#else - int i; - DECLARE_BITMAP(used, 256); - bitmap_zero(used, 256); - for (i = 0; i < nr_ioapics; i++) { - struct mpc_config_ioapic *ia = &mp_ioapics[i]; - __set_bit(ia->mpc_apicid, used); - } - if (!test_bit(id, used)) - return id; - return find_first_zero_bit(used, 256); -#endif -} - -void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) -{ - int idx = 0; - - if (bad_ioapic(address)) - return; - - idx = nr_ioapics; - - mp_ioapics[idx].mpc_type = MP_IOAPIC; - mp_ioapics[idx].mpc_flags = MPC_APIC_USABLE; - mp_ioapics[idx].mpc_apicaddr = address; - - set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); - mp_ioapics[idx].mpc_apicid = uniq_ioapic_id(id); -#ifdef CONFIG_X86_32 - mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx); -#else - mp_ioapics[idx].mpc_apicver = 0; -#endif - /* - * Build basic GSI lookup table to facilitate gsi->io_apic lookups - * and to prevent reprogramming of IOAPIC pins (PCI GSIs). - */ - mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid; - mp_ioapic_routing[idx].gsi_base = gsi_base; - mp_ioapic_routing[idx].gsi_end = gsi_base + - io_apic_get_redir_entries(idx); - - printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, " - "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid, - mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr, - mp_ioapic_routing[idx].gsi_base, mp_ioapic_routing[idx].gsi_end); - - nr_ioapics++; -} - -void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) -{ - struct mpc_config_intsrc intsrc; - int ioapic = -1; - int pin = -1; - - /* - * Convert 'gsi' to 'ioapic.pin'. - */ - ioapic = mp_find_ioapic(gsi); - if (ioapic < 0) - return; - pin = gsi - mp_ioapic_routing[ioapic].gsi_base; - - /* - * TBD: This check is for faulty timer entries, where the override - * erroneously sets the trigger to level, resulting in a HUGE - * increase of timer interrupts! - */ - if ((bus_irq == 0) && (trigger == 3)) - trigger = 1; - - intsrc.mpc_type = MP_INTSRC; - intsrc.mpc_irqtype = mp_INT; - intsrc.mpc_irqflag = (trigger << 2) | polarity; - intsrc.mpc_srcbus = MP_ISA_BUS; - intsrc.mpc_srcbusirq = bus_irq; /* IRQ */ - intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; /* APIC ID */ - intsrc.mpc_dstirq = pin; /* INTIN# */ - - MP_intsrc_info(&intsrc); -} - -void __init mp_config_acpi_legacy_irqs(void) -{ - struct mpc_config_intsrc intsrc; - int i = 0; - int ioapic = -1; - -#if defined (CONFIG_MCA) || defined (CONFIG_EISA) - /* - * Fabricate the legacy ISA bus (bus #31). - */ - mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA; -#endif - set_bit(MP_ISA_BUS, mp_bus_not_pci); - Dprintk("Bus #%d is ISA\n", MP_ISA_BUS); - -#if defined(CONFIG_X86_ES7000) || defined(CONFIG_X86_GENERICARCH) - /* - * Older generations of ES7000 have no legacy identity mappings - */ - if (es7000_plat == 1) - return; -#endif - - /* - * Locate the IOAPIC that manages the ISA IRQs (0-15). - */ - ioapic = mp_find_ioapic(0); - if (ioapic < 0) - return; - - intsrc.mpc_type = MP_INTSRC; - intsrc.mpc_irqflag = 0; /* Conforming */ - intsrc.mpc_srcbus = MP_ISA_BUS; -#ifdef CONFIG_X86_IO_APIC - intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; -#endif - /* - * Use the default configuration for the IRQs 0-15. Unless - * overridden by (MADT) interrupt source override entries. - */ - for (i = 0; i < 16; i++) { - int idx; - - for (idx = 0; idx < mp_irq_entries; idx++) { - struct mpc_config_intsrc *irq = mp_irqs + idx; - - /* Do we already have a mapping for this ISA IRQ? */ - if (irq->mpc_srcbus == MP_ISA_BUS - && irq->mpc_srcbusirq == i) - break; - - /* Do we already have a mapping for this IOAPIC pin */ - if ((irq->mpc_dstapic == intsrc.mpc_dstapic) && - (irq->mpc_dstirq == i)) - break; - } - - if (idx != mp_irq_entries) { - printk(KERN_DEBUG "ACPI: IRQ%d used by override.\n", i); - continue; /* IRQ already used */ - } - - intsrc.mpc_irqtype = mp_INT; - intsrc.mpc_srcbusirq = i; /* Identity mapped */ - intsrc.mpc_dstirq = i; - - MP_intsrc_info(&intsrc); - } -} - -int mp_register_gsi(u32 gsi, int triggering, int polarity) -{ - int ioapic; - int ioapic_pin; -#ifdef CONFIG_X86_32 -#define MAX_GSI_NUM 4096 -#define IRQ_COMPRESSION_START 64 - - static int pci_irq = IRQ_COMPRESSION_START; - /* - * Mapping between Global System Interrupts, which - * represent all possible interrupts, and IRQs - * assigned to actual devices. - */ - static int gsi_to_irq[MAX_GSI_NUM]; -#else - - if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC) - return gsi; -#endif - - /* Don't set up the ACPI SCI because it's already set up */ - if (acpi_gbl_FADT.sci_interrupt == gsi) - return gsi; - - ioapic = mp_find_ioapic(gsi); - if (ioapic < 0) { - printk(KERN_WARNING "No IOAPIC for GSI %u\n", gsi); - return gsi; - } - - ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_base; - -#ifdef CONFIG_X86_32 - if (ioapic_renumber_irq) - gsi = ioapic_renumber_irq(ioapic, gsi); -#endif - - /* - * Avoid pin reprogramming. PRTs typically include entries - * with redundant pin->gsi mappings (but unique PCI devices); - * we only program the IOAPIC on the first. - */ - if (ioapic_pin > MP_MAX_IOAPIC_PIN) { - printk(KERN_ERR "Invalid reference to IOAPIC pin " - "%d-%d\n", mp_ioapic_routing[ioapic].apic_id, - ioapic_pin); - return gsi; - } - if (test_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed)) { - Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n", - mp_ioapic_routing[ioapic].apic_id, ioapic_pin); -#ifdef CONFIG_X86_32 - return (gsi < IRQ_COMPRESSION_START ? gsi : gsi_to_irq[gsi]); -#else - return gsi; -#endif - } - - set_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed); -#ifdef CONFIG_X86_32 - /* - * For GSI >= 64, use IRQ compression - */ - if ((gsi >= IRQ_COMPRESSION_START) - && (triggering == ACPI_LEVEL_SENSITIVE)) { - /* - * For PCI devices assign IRQs in order, avoiding gaps - * due to unused I/O APIC pins. - */ - int irq = gsi; - if (gsi < MAX_GSI_NUM) { - /* - * Retain the VIA chipset work-around (gsi > 15), but - * avoid a problem where the 8254 timer (IRQ0) is setup - * via an override (so it's not on pin 0 of the ioapic), - * and at the same time, the pin 0 interrupt is a PCI - * type. The gsi > 15 test could cause these two pins - * to be shared as IRQ0, and they are not shareable. - * So test for this condition, and if necessary, avoid - * the pin collision. - */ - gsi = pci_irq++; - /* - * Don't assign IRQ used by ACPI SCI - */ - if (gsi == acpi_gbl_FADT.sci_interrupt) - gsi = pci_irq++; - gsi_to_irq[irq] = gsi; - } else { - printk(KERN_ERR "GSI %u is too high\n", gsi); - return gsi; - } - } -#endif - io_apic_set_pci_routing(ioapic, ioapic_pin, gsi, - triggering == ACPI_EDGE_SENSITIVE ? 0 : 1, - polarity == ACPI_ACTIVE_HIGH ? 0 : 1); - return gsi; -} - -#endif /* CONFIG_X86_IO_APIC */ -#endif /* CONFIG_ACPI */ -- cgit v0.10.2 From 5f8951487ddbacbc949e9ffae574f94791f9b4dd Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Wed, 14 May 2008 19:03:04 +0400 Subject: x86: make mp_ioapic_routing definition local Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index b2ad09c..f75c203 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -852,7 +852,12 @@ static int __init acpi_parse_madt_lapic_entries(void) extern int es7000_plat; #endif -static struct mp_ioapic_routing mp_ioapic_routing[MAX_IO_APICS]; +static struct { + int apic_id; + int gsi_base; + int gsi_end; + DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1); +} mp_ioapic_routing[MAX_IO_APICS]; static int mp_find_ioapic(int gsi) { diff --git a/include/asm-x86/io_apic.h b/include/asm-x86/io_apic.h index d593e14..2ef96f0 100644 --- a/include/asm-x86/io_apic.h +++ b/include/asm-x86/io_apic.h @@ -112,13 +112,6 @@ extern int nr_ioapic_registers[MAX_IO_APICS]; #define MP_MAX_IOAPIC_PIN 127 -struct mp_ioapic_routing { - int apic_id; - int gsi_base; - int gsi_end; - DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1); -}; - /* I/O APIC entries */ extern struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS]; -- cgit v0.10.2 From ec2cd0a22e2715f776a934e01c4f8ea098324fe1 Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Wed, 14 May 2008 19:03:10 +0400 Subject: x86: make struct config_ioapic not MPspec specific Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index f75c203..a26dd91 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -887,8 +887,8 @@ static u8 __init uniq_ioapic_id(u8 id) DECLARE_BITMAP(used, 256); bitmap_zero(used, 256); for (i = 0; i < nr_ioapics; i++) { - struct mpc_config_ioapic *ia = &mp_ioapics[i]; - __set_bit(ia->mpc_apicid, used); + struct mp_config_ioapic *ia = &mp_ioapics[i]; + __set_bit(ia->mp_apicid, used); } if (!test_bit(id, used)) return id; @@ -920,29 +920,29 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) idx = nr_ioapics; - mp_ioapics[idx].mpc_type = MP_IOAPIC; - mp_ioapics[idx].mpc_flags = MPC_APIC_USABLE; - mp_ioapics[idx].mpc_apicaddr = address; + mp_ioapics[idx].mp_type = MP_IOAPIC; + mp_ioapics[idx].mp_flags = MPC_APIC_USABLE; + mp_ioapics[idx].mp_apicaddr = address; set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); - mp_ioapics[idx].mpc_apicid = uniq_ioapic_id(id); + mp_ioapics[idx].mp_apicid = uniq_ioapic_id(id); #ifdef CONFIG_X86_32 - mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx); + mp_ioapics[idx].mp_apicver = io_apic_get_version(idx); #else - mp_ioapics[idx].mpc_apicver = 0; + mp_ioapics[idx].mp_apicver = 0; #endif /* * Build basic GSI lookup table to facilitate gsi->io_apic lookups * and to prevent reprogramming of IOAPIC pins (PCI GSIs). */ - mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid; + mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mp_apicid; mp_ioapic_routing[idx].gsi_base = gsi_base; mp_ioapic_routing[idx].gsi_end = gsi_base + io_apic_get_redir_entries(idx); - printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, " - "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid, - mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr, + printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%lx, " + "GSI %d-%d\n", idx, mp_ioapics[idx].mp_apicid, + mp_ioapics[idx].mp_apicver, mp_ioapics[idx].mp_apicaddr, mp_ioapic_routing[idx].gsi_base, mp_ioapic_routing[idx].gsi_end); nr_ioapics++; @@ -975,7 +975,7 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) mp_irqs[mp_irq_entries].mpc_srcbus = MP_ISA_BUS; mp_irqs[mp_irq_entries].mpc_srcbusirq = bus_irq; /* IRQ */ mp_irqs[mp_irq_entries].mpc_dstapic = - mp_ioapics[ioapic].mpc_apicid; /* APIC ID */ + mp_ioapics[ioapic].mp_apicid; /* APIC ID */ mp_irqs[mp_irq_entries].mpc_dstirq = pin; /* INTIN# */ if (++mp_irq_entries == MAX_IRQ_SOURCES) @@ -1016,7 +1016,7 @@ void __init mp_config_acpi_legacy_irqs(void) mp_irqs[mp_irq_entries].mpc_irqflag = 0; /* Conforming */ mp_irqs[mp_irq_entries].mpc_srcbus = MP_ISA_BUS; #ifdef CONFIG_X86_IO_APIC - mp_irqs[mp_irq_entries].mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; + mp_irqs[mp_irq_entries].mpc_dstapic = mp_ioapics[ioapic].mp_apicid; #endif /* * Use the default configuration for the IRQs 0-15. Unless diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 4b99b1b..fa1be1d 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1202,7 +1202,7 @@ void __init init_apic_mappings(void) for (i = 0; i < nr_ioapics; i++) { if (smp_found_config) { - ioapic_phys = mp_ioapics[i].mpc_apicaddr; + ioapic_phys = mp_ioapics[i].mp_apicaddr; if (!ioapic_phys) { printk(KERN_ERR "WARNING: bogus zero IO-APIC " diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index a40d54f..5af1b71 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -72,7 +72,7 @@ int sis_apic_bug = -1; int nr_ioapic_registers[MAX_IO_APICS]; /* I/O APIC entries */ -struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS]; +struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; int nr_ioapics; /* MP IRQ source entries */ @@ -110,7 +110,7 @@ struct io_apic { static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) { return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx) - + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK); + + (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK); } static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) @@ -802,7 +802,7 @@ static int find_irq_entry(int apic, int pin, int type) for (i = 0; i < mp_irq_entries; i++) if (mp_irqs[i].mpc_irqtype == type && - (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mpc_apicid || + (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mp_apicid || mp_irqs[i].mpc_dstapic == MP_APIC_ALL) && mp_irqs[i].mpc_dstirq == pin) return i; @@ -844,7 +844,7 @@ static int __init find_isa_irq_apic(int irq, int type) if (i < mp_irq_entries) { int apic; for(apic = 0; apic < nr_ioapics; apic++) { - if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic) + if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mpc_dstapic) return apic; } } @@ -872,7 +872,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) int lbus = mp_irqs[i].mpc_srcbus; for (apic = 0; apic < nr_ioapics; apic++) - if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic || + if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mpc_dstapic || mp_irqs[i].mpc_dstapic == MP_APIC_ALL) break; @@ -1250,12 +1250,12 @@ static void __init setup_IO_APIC_irqs(void) if (first_notcon) { apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", - mp_ioapics[apic].mpc_apicid, + mp_ioapics[apic].mp_apicid, pin); first_notcon = 0; } else apic_printk(APIC_VERBOSE, ", %d-%d", - mp_ioapics[apic].mpc_apicid, pin); + mp_ioapics[apic].mp_apicid, pin); continue; } @@ -1357,7 +1357,7 @@ void __init print_IO_APIC(void) printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); for (i = 0; i < nr_ioapics; i++) printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", - mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]); + mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]); /* * We are a bit conservative about what we expect. We have to @@ -1376,7 +1376,7 @@ void __init print_IO_APIC(void) reg_03.raw = io_apic_read(apic, 3); spin_unlock_irqrestore(&ioapic_lock, flags); - printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid); + printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid); printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); @@ -1749,14 +1749,14 @@ static void __init setup_ioapic_ids_from_mpc(void) reg_00.raw = io_apic_read(apic, 0); spin_unlock_irqrestore(&ioapic_lock, flags); - old_id = mp_ioapics[apic].mpc_apicid; + old_id = mp_ioapics[apic].mp_apicid; - if (mp_ioapics[apic].mpc_apicid >= get_physical_broadcast()) { + if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) { printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", - apic, mp_ioapics[apic].mpc_apicid); + apic, mp_ioapics[apic].mp_apicid); printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", reg_00.bits.ID); - mp_ioapics[apic].mpc_apicid = reg_00.bits.ID; + mp_ioapics[apic].mp_apicid = reg_00.bits.ID; } /* @@ -1765,9 +1765,9 @@ static void __init setup_ioapic_ids_from_mpc(void) * 'stuck on smp_invalidate_needed IPI wait' messages. */ if (check_apicid_used(phys_id_present_map, - mp_ioapics[apic].mpc_apicid)) { + mp_ioapics[apic].mp_apicid)) { printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", - apic, mp_ioapics[apic].mpc_apicid); + apic, mp_ioapics[apic].mp_apicid); for (i = 0; i < get_physical_broadcast(); i++) if (!physid_isset(i, phys_id_present_map)) break; @@ -1776,13 +1776,13 @@ static void __init setup_ioapic_ids_from_mpc(void) printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", i); physid_set(i, phys_id_present_map); - mp_ioapics[apic].mpc_apicid = i; + mp_ioapics[apic].mp_apicid = i; } else { physid_mask_t tmp; - tmp = apicid_to_cpu_present(mp_ioapics[apic].mpc_apicid); + tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid); apic_printk(APIC_VERBOSE, "Setting %d in the " "phys_id_present_map\n", - mp_ioapics[apic].mpc_apicid); + mp_ioapics[apic].mp_apicid); physids_or(phys_id_present_map, phys_id_present_map, tmp); } @@ -1791,11 +1791,11 @@ static void __init setup_ioapic_ids_from_mpc(void) * We need to adjust the IRQ routing table * if the ID changed. */ - if (old_id != mp_ioapics[apic].mpc_apicid) + if (old_id != mp_ioapics[apic].mp_apicid) for (i = 0; i < mp_irq_entries; i++) if (mp_irqs[i].mpc_dstapic == old_id) mp_irqs[i].mpc_dstapic - = mp_ioapics[apic].mpc_apicid; + = mp_ioapics[apic].mp_apicid; /* * Read the right value from the MPC table and @@ -1803,9 +1803,9 @@ static void __init setup_ioapic_ids_from_mpc(void) */ apic_printk(APIC_VERBOSE, KERN_INFO "...changing IO-APIC physical APIC ID to %d ...", - mp_ioapics[apic].mpc_apicid); + mp_ioapics[apic].mp_apicid); - reg_00.bits.ID = mp_ioapics[apic].mpc_apicid; + reg_00.bits.ID = mp_ioapics[apic].mp_apicid; spin_lock_irqsave(&ioapic_lock, flags); io_apic_write(apic, 0, reg_00.raw); spin_unlock_irqrestore(&ioapic_lock, flags); @@ -1816,7 +1816,7 @@ static void __init setup_ioapic_ids_from_mpc(void) spin_lock_irqsave(&ioapic_lock, flags); reg_00.raw = io_apic_read(apic, 0); spin_unlock_irqrestore(&ioapic_lock, flags); - if (reg_00.bits.ID != mp_ioapics[apic].mpc_apicid) + if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid) printk("could not set ID!\n"); else apic_printk(APIC_VERBOSE, " ok.\n"); @@ -2355,8 +2355,8 @@ static int ioapic_resume(struct sys_device *dev) spin_lock_irqsave(&ioapic_lock, flags); reg_00.raw = io_apic_read(dev->id, 0); - if (reg_00.bits.ID != mp_ioapics[dev->id].mpc_apicid) { - reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid; + if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) { + reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid; io_apic_write(dev->id, 0, reg_00.raw); } spin_unlock_irqrestore(&ioapic_lock, flags); @@ -2789,7 +2789,7 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a apic_printk(APIC_DEBUG, KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry " "(%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i)\n", ioapic, - mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq, + mp_ioapics[ioapic].mp_apicid, pin, entry.vector, irq, edge_level, active_high_low); ioapic_register_intr(irq, entry.vector, edge_level); diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index ef1a8df..4555ad8 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -104,7 +104,7 @@ DEFINE_SPINLOCK(vector_lock); int nr_ioapic_registers[MAX_IO_APICS]; /* I/O APIC entries */ -struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS]; +struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; int nr_ioapics; /* MP IRQ source entries */ @@ -140,7 +140,7 @@ struct io_apic { static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) { return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx) - + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK); + + (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK); } static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) @@ -454,7 +454,7 @@ static int find_irq_entry(int apic, int pin, int type) for (i = 0; i < mp_irq_entries; i++) if (mp_irqs[i].mpc_irqtype == type && - (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mpc_apicid || + (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mp_apicid || mp_irqs[i].mpc_dstapic == MP_APIC_ALL) && mp_irqs[i].mpc_dstirq == pin) return i; @@ -496,7 +496,7 @@ static int __init find_isa_irq_apic(int irq, int type) if (i < mp_irq_entries) { int apic; for(apic = 0; apic < nr_ioapics; apic++) { - if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic) + if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mpc_dstapic) return apic; } } @@ -524,7 +524,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) int lbus = mp_irqs[i].mpc_srcbus; for (apic = 0; apic < nr_ioapics; apic++) - if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic || + if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mpc_dstapic || mp_irqs[i].mpc_dstapic == MP_APIC_ALL) break; @@ -846,7 +846,7 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, apic_printk(APIC_VERBOSE,KERN_DEBUG "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " "IRQ %d Mode:%i Active:%i)\n", - apic, mp_ioapics[apic].mpc_apicid, pin, cfg->vector, + apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector, irq, trigger, polarity); /* @@ -887,10 +887,10 @@ static void __init setup_IO_APIC_irqs(void) idx = find_irq_entry(apic,pin,mp_INT); if (idx == -1) { if (first_notcon) { - apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mpc_apicid, pin); + apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mp_apicid, pin); first_notcon = 0; } else - apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mpc_apicid, pin); + apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mp_apicid, pin); continue; } if (!first_notcon) { @@ -965,7 +965,7 @@ void __apicdebuginit print_IO_APIC(void) printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); for (i = 0; i < nr_ioapics; i++) printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", - mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]); + mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]); /* * We are a bit conservative about what we expect. We have to @@ -983,7 +983,7 @@ void __apicdebuginit print_IO_APIC(void) spin_unlock_irqrestore(&ioapic_lock, flags); printk("\n"); - printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid); + printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid); printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); @@ -1841,8 +1841,8 @@ static int ioapic_resume(struct sys_device *dev) spin_lock_irqsave(&ioapic_lock, flags); reg_00.raw = io_apic_read(dev->id, 0); - if (reg_00.bits.ID != mp_ioapics[dev->id].mpc_apicid) { - reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid; + if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) { + reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid; io_apic_write(dev->id, 0, reg_00.raw); } spin_unlock_irqrestore(&ioapic_lock, flags); @@ -2336,7 +2336,7 @@ void __init ioapic_init_mappings(void) ioapic_res = ioapic_setup_resources(); for (i = 0; i < nr_ioapics; i++) { if (smp_found_config) { - ioapic_phys = mp_ioapics[i].mpc_apicaddr; + ioapic_phys = mp_ioapics[i].mp_apicaddr; } else { ioapic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE); diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index d05b70c..9f1e5bf 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -176,7 +176,11 @@ static void __init MP_ioapic_info(struct mpc_config_ioapic *m) if (bad_ioapic(m->mpc_apicaddr)) return; - mp_ioapics[nr_ioapics] = *m; + mp_ioapics[nr_ioapics].mp_apicaddr = m->mpc_apicaddr; + mp_ioapics[nr_ioapics].mp_apicid = m->mpc_apicid; + mp_ioapics[nr_ioapics].mp_type = m->mpc_type; + mp_ioapics[nr_ioapics].mp_apicver = m->mpc_apicver; + mp_ioapics[nr_ioapics].mp_flags = m->mpc_flags; nr_ioapics++; } @@ -426,7 +430,7 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type) intsrc.mpc_type = MP_INTSRC; intsrc.mpc_irqflag = 0; /* conforming */ intsrc.mpc_srcbus = 0; - intsrc.mpc_dstapic = mp_ioapics[0].mpc_apicid; + intsrc.mpc_dstapic = mp_ioapics[0].mp_apicid; intsrc.mpc_irqtype = mp_INT; diff --git a/include/asm-x86/io_apic.h b/include/asm-x86/io_apic.h index 2ef96f0..ade76c0 100644 --- a/include/asm-x86/io_apic.h +++ b/include/asm-x86/io_apic.h @@ -112,8 +112,16 @@ extern int nr_ioapic_registers[MAX_IO_APICS]; #define MP_MAX_IOAPIC_PIN 127 +struct mp_config_ioapic { + unsigned long mp_apicaddr; + unsigned int mp_apicid; + unsigned char mp_type; + unsigned char mp_apicver; + unsigned char mp_flags; +}; + /* I/O APIC entries */ -extern struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS]; +extern struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; /* # of MP IRQ source entries */ extern int mp_irq_entries; -- cgit v0.10.2 From 2fddb6e28e903a3ab1704cc5aac01be5a59dc05b Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Wed, 14 May 2008 19:03:17 +0400 Subject: x86: make config_irqsrc not MPspec specific Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index a26dd91..276ec05 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -969,14 +969,14 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) if ((bus_irq == 0) && (trigger == 3)) trigger = 1; - mp_irqs[mp_irq_entries].mpc_type = MP_INTSRC; - mp_irqs[mp_irq_entries].mpc_irqtype = mp_INT; - mp_irqs[mp_irq_entries].mpc_irqflag = (trigger << 2) | polarity; - mp_irqs[mp_irq_entries].mpc_srcbus = MP_ISA_BUS; - mp_irqs[mp_irq_entries].mpc_srcbusirq = bus_irq; /* IRQ */ - mp_irqs[mp_irq_entries].mpc_dstapic = + mp_irqs[mp_irq_entries].mp_type = MP_INTSRC; + mp_irqs[mp_irq_entries].mp_irqtype = mp_INT; + mp_irqs[mp_irq_entries].mp_irqflag = (trigger << 2) | polarity; + mp_irqs[mp_irq_entries].mp_srcbus = MP_ISA_BUS; + mp_irqs[mp_irq_entries].mp_srcbusirq = bus_irq; /* IRQ */ + mp_irqs[mp_irq_entries].mp_dstapic = mp_ioapics[ioapic].mp_apicid; /* APIC ID */ - mp_irqs[mp_irq_entries].mpc_dstirq = pin; /* INTIN# */ + mp_irqs[mp_irq_entries].mp_dstirq = pin; /* INTIN# */ if (++mp_irq_entries == MAX_IRQ_SOURCES) panic("Max # of irq sources exceeded!!\n"); @@ -1012,12 +1012,11 @@ void __init mp_config_acpi_legacy_irqs(void) if (ioapic < 0) return; - mp_irqs[mp_irq_entries].mpc_type = MP_INTSRC; - mp_irqs[mp_irq_entries].mpc_irqflag = 0; /* Conforming */ - mp_irqs[mp_irq_entries].mpc_srcbus = MP_ISA_BUS; -#ifdef CONFIG_X86_IO_APIC - mp_irqs[mp_irq_entries].mpc_dstapic = mp_ioapics[ioapic].mp_apicid; -#endif + mp_irqs[mp_irq_entries].mp_type = MP_INTSRC; + mp_irqs[mp_irq_entries].mp_irqflag = 0; /* Conforming */ + mp_irqs[mp_irq_entries].mp_srcbus = MP_ISA_BUS; + mp_irqs[mp_irq_entries].mp_dstapic = mp_ioapics[ioapic].mp_apicid; + /* * Use the default configuration for the IRQs 0-15. Unless * overridden by (MADT) interrupt source override entries. @@ -1026,17 +1025,17 @@ void __init mp_config_acpi_legacy_irqs(void) int idx; for (idx = 0; idx < mp_irq_entries; idx++) { - struct mpc_config_intsrc *irq = mp_irqs + idx; + struct mp_config_intsrc *irq = mp_irqs + idx; /* Do we already have a mapping for this ISA IRQ? */ - if (irq->mpc_srcbus == MP_ISA_BUS - && irq->mpc_srcbusirq == i) + if (irq->mp_srcbus == MP_ISA_BUS + && irq->mp_srcbusirq == i) break; /* Do we already have a mapping for this IOAPIC pin */ - if ((irq->mpc_dstapic == - mp_irqs[mp_irq_entries].mpc_dstapic) && - (irq->mpc_dstirq == i)) + if ((irq->mp_dstapic == + mp_irqs[mp_irq_entries].mp_dstapic) && + (irq->mp_dstirq == i)) break; } @@ -1045,9 +1044,9 @@ void __init mp_config_acpi_legacy_irqs(void) continue; /* IRQ already used */ } - mp_irqs[mp_irq_entries].mpc_irqtype = mp_INT; - mp_irqs[mp_irq_entries].mpc_srcbusirq = i; /* Identity mapped */ - mp_irqs[mp_irq_entries].mpc_dstirq = i; + mp_irqs[mp_irq_entries].mp_irqtype = mp_INT; + mp_irqs[mp_irq_entries].mp_srcbusirq = i; /* Identity mapped */ + mp_irqs[mp_irq_entries].mp_dstirq = i; if (++mp_irq_entries == MAX_IRQ_SOURCES) panic("Max # of irq sources exceeded!!\n"); diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index 5af1b71..ea68c3e 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -76,7 +76,7 @@ struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; int nr_ioapics; /* MP IRQ source entries */ -struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; +struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; /* # of MP IRQ source entries */ int mp_irq_entries; @@ -801,10 +801,10 @@ static int find_irq_entry(int apic, int pin, int type) int i; for (i = 0; i < mp_irq_entries; i++) - if (mp_irqs[i].mpc_irqtype == type && - (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mp_apicid || - mp_irqs[i].mpc_dstapic == MP_APIC_ALL) && - mp_irqs[i].mpc_dstirq == pin) + if (mp_irqs[i].mp_irqtype == type && + (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid || + mp_irqs[i].mp_dstapic == MP_APIC_ALL) && + mp_irqs[i].mp_dstirq == pin) return i; return -1; @@ -818,13 +818,13 @@ static int __init find_isa_irq_pin(int irq, int type) int i; for (i = 0; i < mp_irq_entries; i++) { - int lbus = mp_irqs[i].mpc_srcbus; + int lbus = mp_irqs[i].mp_srcbus; if (test_bit(lbus, mp_bus_not_pci) && - (mp_irqs[i].mpc_irqtype == type) && - (mp_irqs[i].mpc_srcbusirq == irq)) + (mp_irqs[i].mp_irqtype == type) && + (mp_irqs[i].mp_srcbusirq == irq)) - return mp_irqs[i].mpc_dstirq; + return mp_irqs[i].mp_dstirq; } return -1; } @@ -834,17 +834,17 @@ static int __init find_isa_irq_apic(int irq, int type) int i; for (i = 0; i < mp_irq_entries; i++) { - int lbus = mp_irqs[i].mpc_srcbus; + int lbus = mp_irqs[i].mp_srcbus; if (test_bit(lbus, mp_bus_not_pci) && - (mp_irqs[i].mpc_irqtype == type) && - (mp_irqs[i].mpc_srcbusirq == irq)) + (mp_irqs[i].mp_irqtype == type) && + (mp_irqs[i].mp_srcbusirq == irq)) break; } if (i < mp_irq_entries) { int apic; for(apic = 0; apic < nr_ioapics; apic++) { - if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mpc_dstapic) + if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic) return apic; } } @@ -869,23 +869,23 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) return -1; } for (i = 0; i < mp_irq_entries; i++) { - int lbus = mp_irqs[i].mpc_srcbus; + int lbus = mp_irqs[i].mp_srcbus; for (apic = 0; apic < nr_ioapics; apic++) - if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mpc_dstapic || - mp_irqs[i].mpc_dstapic == MP_APIC_ALL) + if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic || + mp_irqs[i].mp_dstapic == MP_APIC_ALL) break; if (!test_bit(lbus, mp_bus_not_pci) && - !mp_irqs[i].mpc_irqtype && + !mp_irqs[i].mp_irqtype && (bus == lbus) && - (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) { - int irq = pin_2_irq(i,apic,mp_irqs[i].mpc_dstirq); + (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) { + int irq = pin_2_irq(i,apic,mp_irqs[i].mp_dstirq); if (!(apic || IO_APIC_IRQ(irq))) continue; - if (pin == (mp_irqs[i].mpc_srcbusirq & 3)) + if (pin == (mp_irqs[i].mp_srcbusirq & 3)) return irq; /* * Use the first all-but-pin matching entry as a @@ -952,7 +952,7 @@ static int EISA_ELCR(unsigned int irq) * EISA conforming in the MP table, that means its trigger type must * be read in from the ELCR */ -#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mpc_srcbusirq)) +#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mp_srcbusirq)) #define default_EISA_polarity(idx) default_ISA_polarity(idx) /* PCI interrupts are always polarity one level triggered, @@ -969,13 +969,13 @@ static int EISA_ELCR(unsigned int irq) static int MPBIOS_polarity(int idx) { - int bus = mp_irqs[idx].mpc_srcbus; + int bus = mp_irqs[idx].mp_srcbus; int polarity; /* * Determine IRQ line polarity (high active or low active): */ - switch (mp_irqs[idx].mpc_irqflag & 3) + switch (mp_irqs[idx].mp_irqflag & 3) { case 0: /* conforms, ie. bus-type dependent polarity */ { @@ -1012,13 +1012,13 @@ static int MPBIOS_polarity(int idx) static int MPBIOS_trigger(int idx) { - int bus = mp_irqs[idx].mpc_srcbus; + int bus = mp_irqs[idx].mp_srcbus; int trigger; /* * Determine IRQ trigger mode (edge or level sensitive): */ - switch ((mp_irqs[idx].mpc_irqflag>>2) & 3) + switch ((mp_irqs[idx].mp_irqflag>>2) & 3) { case 0: /* conforms, ie. bus-type dependent */ { @@ -1097,16 +1097,16 @@ static inline int irq_trigger(int idx) static int pin_2_irq(int idx, int apic, int pin) { int irq, i; - int bus = mp_irqs[idx].mpc_srcbus; + int bus = mp_irqs[idx].mp_srcbus; /* * Debugging check, we are in big trouble if this message pops up! */ - if (mp_irqs[idx].mpc_dstirq != pin) + if (mp_irqs[idx].mp_dstirq != pin) printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); if (test_bit(bus, mp_bus_not_pci)) - irq = mp_irqs[idx].mpc_srcbusirq; + irq = mp_irqs[idx].mp_srcbusirq; else { /* * PCI IRQs are mapped in order @@ -1793,8 +1793,8 @@ static void __init setup_ioapic_ids_from_mpc(void) */ if (old_id != mp_ioapics[apic].mp_apicid) for (i = 0; i < mp_irq_entries; i++) - if (mp_irqs[i].mpc_dstapic == old_id) - mp_irqs[i].mpc_dstapic + if (mp_irqs[i].mp_dstapic == old_id) + mp_irqs[i].mp_dstapic = mp_ioapics[apic].mp_apicid; /* @@ -2810,8 +2810,8 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) return -1; for (i = 0; i < mp_irq_entries; i++) - if (mp_irqs[i].mpc_irqtype == mp_INT && - mp_irqs[i].mpc_srcbusirq == bus_irq) + if (mp_irqs[i].mp_irqtype == mp_INT && + mp_irqs[i].mp_srcbusirq == bus_irq) break; if (i >= mp_irq_entries) return -1; diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 4555ad8..e7f1476 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -108,7 +108,7 @@ struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; int nr_ioapics; /* MP IRQ source entries */ -struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; +struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; /* # of MP IRQ source entries */ int mp_irq_entries; @@ -453,10 +453,10 @@ static int find_irq_entry(int apic, int pin, int type) int i; for (i = 0; i < mp_irq_entries; i++) - if (mp_irqs[i].mpc_irqtype == type && - (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mp_apicid || - mp_irqs[i].mpc_dstapic == MP_APIC_ALL) && - mp_irqs[i].mpc_dstirq == pin) + if (mp_irqs[i].mp_irqtype == type && + (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid || + mp_irqs[i].mp_dstapic == MP_APIC_ALL) && + mp_irqs[i].mp_dstirq == pin) return i; return -1; @@ -470,13 +470,13 @@ static int __init find_isa_irq_pin(int irq, int type) int i; for (i = 0; i < mp_irq_entries; i++) { - int lbus = mp_irqs[i].mpc_srcbus; + int lbus = mp_irqs[i].mp_srcbus; if (test_bit(lbus, mp_bus_not_pci) && - (mp_irqs[i].mpc_irqtype == type) && - (mp_irqs[i].mpc_srcbusirq == irq)) + (mp_irqs[i].mp_irqtype == type) && + (mp_irqs[i].mp_srcbusirq == irq)) - return mp_irqs[i].mpc_dstirq; + return mp_irqs[i].mp_dstirq; } return -1; } @@ -486,17 +486,17 @@ static int __init find_isa_irq_apic(int irq, int type) int i; for (i = 0; i < mp_irq_entries; i++) { - int lbus = mp_irqs[i].mpc_srcbus; + int lbus = mp_irqs[i].mp_srcbus; if (test_bit(lbus, mp_bus_not_pci) && - (mp_irqs[i].mpc_irqtype == type) && - (mp_irqs[i].mpc_srcbusirq == irq)) + (mp_irqs[i].mp_irqtype == type) && + (mp_irqs[i].mp_srcbusirq == irq)) break; } if (i < mp_irq_entries) { int apic; for(apic = 0; apic < nr_ioapics; apic++) { - if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mpc_dstapic) + if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic) return apic; } } @@ -521,23 +521,23 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) return -1; } for (i = 0; i < mp_irq_entries; i++) { - int lbus = mp_irqs[i].mpc_srcbus; + int lbus = mp_irqs[i].mp_srcbus; for (apic = 0; apic < nr_ioapics; apic++) - if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mpc_dstapic || - mp_irqs[i].mpc_dstapic == MP_APIC_ALL) + if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic || + mp_irqs[i].mp_dstapic == MP_APIC_ALL) break; if (!test_bit(lbus, mp_bus_not_pci) && - !mp_irqs[i].mpc_irqtype && + !mp_irqs[i].mp_irqtype && (bus == lbus) && - (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) { - int irq = pin_2_irq(i,apic,mp_irqs[i].mpc_dstirq); + (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) { + int irq = pin_2_irq(i,apic,mp_irqs[i].mp_dstirq); if (!(apic || IO_APIC_IRQ(irq))) continue; - if (pin == (mp_irqs[i].mpc_srcbusirq & 3)) + if (pin == (mp_irqs[i].mp_srcbusirq & 3)) return irq; /* * Use the first all-but-pin matching entry as a @@ -565,13 +565,13 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) static int MPBIOS_polarity(int idx) { - int bus = mp_irqs[idx].mpc_srcbus; + int bus = mp_irqs[idx].mp_srcbus; int polarity; /* * Determine IRQ line polarity (high active or low active): */ - switch (mp_irqs[idx].mpc_irqflag & 3) + switch (mp_irqs[idx].mp_irqflag & 3) { case 0: /* conforms, ie. bus-type dependent polarity */ if (test_bit(bus, mp_bus_not_pci)) @@ -607,13 +607,13 @@ static int MPBIOS_polarity(int idx) static int MPBIOS_trigger(int idx) { - int bus = mp_irqs[idx].mpc_srcbus; + int bus = mp_irqs[idx].mp_srcbus; int trigger; /* * Determine IRQ trigger mode (edge or level sensitive): */ - switch ((mp_irqs[idx].mpc_irqflag>>2) & 3) + switch ((mp_irqs[idx].mp_irqflag>>2) & 3) { case 0: /* conforms, ie. bus-type dependent */ if (test_bit(bus, mp_bus_not_pci)) @@ -660,16 +660,16 @@ static inline int irq_trigger(int idx) static int pin_2_irq(int idx, int apic, int pin) { int irq, i; - int bus = mp_irqs[idx].mpc_srcbus; + int bus = mp_irqs[idx].mp_srcbus; /* * Debugging check, we are in big trouble if this message pops up! */ - if (mp_irqs[idx].mpc_dstirq != pin) + if (mp_irqs[idx].mp_dstirq != pin) printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); if (test_bit(bus, mp_bus_not_pci)) { - irq = mp_irqs[idx].mpc_srcbusirq; + irq = mp_irqs[idx].mp_srcbusirq; } else { /* * PCI IRQs are mapped in order @@ -2242,8 +2242,8 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) return -1; for (i = 0; i < mp_irq_entries; i++) - if (mp_irqs[i].mpc_irqtype == mp_INT && - mp_irqs[i].mpc_srcbusirq == bus_irq) + if (mp_irqs[i].mp_irqtype == mp_INT && + mp_irqs[i].mp_srcbusirq == bus_irq) break; if (i >= mp_irq_entries) return -1; diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 9f1e5bf..59f051d 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -186,12 +186,18 @@ static void __init MP_ioapic_info(struct mpc_config_ioapic *m) static void __init MP_intsrc_info(struct mpc_config_intsrc *m) { - mp_irqs[mp_irq_entries] = *m; printk(KERN_INFO "Int: type %d, pol %d, trig %d, bus %02x," " IRQ %02x, APIC ID %x, APIC INT %02x\n", m->mpc_irqtype, m->mpc_irqflag & 3, (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus, m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq); + mp_irqs[mp_irq_entries].mp_dstapic = m->mpc_dstapic; + mp_irqs[mp_irq_entries].mp_type = m->mpc_type; + mp_irqs[mp_irq_entries].mp_irqtype = m->mpc_irqtype; + mp_irqs[mp_irq_entries].mp_irqflag = m->mpc_irqflag; + mp_irqs[mp_irq_entries].mp_srcbus = m->mpc_srcbus; + mp_irqs[mp_irq_entries].mp_srcbusirq = m->mpc_srcbusirq; + mp_irqs[mp_irq_entries].mp_dstirq = m->mpc_dstirq; if (++mp_irq_entries == MAX_IRQ_SOURCES) panic("Max # of irq sources exceeded!!\n"); } diff --git a/include/asm-x86/io_apic.h b/include/asm-x86/io_apic.h index ade76c0..86d8c3b 100644 --- a/include/asm-x86/io_apic.h +++ b/include/asm-x86/io_apic.h @@ -120,6 +120,16 @@ struct mp_config_ioapic { unsigned char mp_flags; }; +struct mp_config_intsrc { + unsigned int mp_dstapic; + unsigned char mp_type; + unsigned char mp_irqtype; + unsigned short mp_irqflag; + unsigned char mp_srcbus; + unsigned char mp_srcbusirq; + unsigned char mp_dstirq; +}; + /* I/O APIC entries */ extern struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; @@ -127,7 +137,7 @@ extern struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; extern int mp_irq_entries; /* MP IRQ source entries */ -extern struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; +extern struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; /* non-0 if default (table-less) MP configuration */ extern int mpc_default_type; -- cgit v0.10.2 From 59f4519ad7ade61123e90085b0dadb2ea197bd87 Mon Sep 17 00:00:00 2001 From: Soeren Sandmann Date: Sun, 18 May 2008 05:24:41 +0200 Subject: x86: initialize all fields of mp_irqs[mp_irq_entries] Commit "x86: make config_irqsrc not MPspec specific" introduced some uses of uninitialized fields in mp_config_acpi_legacy_irqs(). I need the following patch to get sched-devel/master to boot. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 276ec05..6170c6a 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1012,11 +1012,6 @@ void __init mp_config_acpi_legacy_irqs(void) if (ioapic < 0) return; - mp_irqs[mp_irq_entries].mp_type = MP_INTSRC; - mp_irqs[mp_irq_entries].mp_irqflag = 0; /* Conforming */ - mp_irqs[mp_irq_entries].mp_srcbus = MP_ISA_BUS; - mp_irqs[mp_irq_entries].mp_dstapic = mp_ioapics[ioapic].mp_apicid; - /* * Use the default configuration for the IRQs 0-15. Unless * overridden by (MADT) interrupt source override entries. @@ -1044,6 +1039,10 @@ void __init mp_config_acpi_legacy_irqs(void) continue; /* IRQ already used */ } + mp_irqs[mp_irq_entries].mp_type = MP_INTSRC; + mp_irqs[mp_irq_entries].mp_irqflag = 0; /* Conforming */ + mp_irqs[mp_irq_entries].mp_srcbus = MP_ISA_BUS; + mp_irqs[mp_irq_entries].mp_dstapic = mp_ioapics[ioapic].mp_apicid; mp_irqs[mp_irq_entries].mp_irqtype = mp_INT; mp_irqs[mp_irq_entries].mp_srcbusirq = i; /* Identity mapped */ mp_irqs[mp_irq_entries].mp_dstirq = i; -- cgit v0.10.2 From aafbdf71f1d3aeffd679b1a86e1b28f71515856c Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Thu, 22 May 2008 12:26:15 +0400 Subject: x86: fix mpparse/acpi interaction Sitsofe Wheeler reported boot problems on linux-next. It looks like the same issue as found by Soeren Sandman in 7575217f656a93, "x86: initialize all fields of mp_irqs[mp_irq_entries]". But his fix is also not complete, as dstapic is used before it assigned. Reported-by: Sitsofe Wheeler Bisected-by: Sitsofe Wheeler Signed-off-by: Alexey Starikovskiy Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 6170c6a..2ddfaba 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1019,6 +1019,11 @@ void __init mp_config_acpi_legacy_irqs(void) for (i = 0; i < 16; i++) { int idx; + mp_irqs[mp_irq_entries].mp_type = MP_INTSRC; + mp_irqs[mp_irq_entries].mp_irqflag = 0; /* Conforming */ + mp_irqs[mp_irq_entries].mp_srcbus = MP_ISA_BUS; + mp_irqs[mp_irq_entries].mp_dstapic = mp_ioapics[ioapic].mp_apicid; + for (idx = 0; idx < mp_irq_entries; idx++) { struct mp_config_intsrc *irq = mp_irqs + idx; @@ -1039,10 +1044,6 @@ void __init mp_config_acpi_legacy_irqs(void) continue; /* IRQ already used */ } - mp_irqs[mp_irq_entries].mp_type = MP_INTSRC; - mp_irqs[mp_irq_entries].mp_irqflag = 0; /* Conforming */ - mp_irqs[mp_irq_entries].mp_srcbus = MP_ISA_BUS; - mp_irqs[mp_irq_entries].mp_dstapic = mp_ioapics[ioapic].mp_apicid; mp_irqs[mp_irq_entries].mp_irqtype = mp_INT; mp_irqs[mp_irq_entries].mp_srcbusirq = i; /* Identity mapped */ mp_irqs[mp_irq_entries].mp_dstirq = i; -- cgit v0.10.2 From bf62f3981c7076714e3b9f5fa6989a806cad02bf Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 20 May 2008 20:10:58 -0700 Subject: x86: move e820_mark_nosave_regions to e820.c and make e820_mark_nosave_regions to take limit_pfn to use max_low_pfn for 32bit and end_pfn for 64bit Signed-off-by: Yinghai Lu Cc: Andrew Morton Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 35da8cd..0cd9132 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -495,6 +496,37 @@ __init void e820_setup_gap(void) pci_mem_start, gapstart, gapsize); } +#if defined(CONFIG_X86_64) || \ + (defined(CONFIG_X86_32) && defined(CONFIG_HIBERNATION)) +/** + * Find the ranges of physical addresses that do not correspond to + * e820 RAM areas and mark the corresponding pages as nosave for + * hibernation (32 bit) or software suspend and suspend to RAM (64 bit). + * + * This function requires the e820 map to be sorted and without any + * overlapping entries and assumes the first e820 area to be RAM. + */ +void __init e820_mark_nosave_regions(unsigned long limit_pfn) +{ + int i; + unsigned long pfn; + + pfn = PFN_DOWN(e820.map[0].addr + e820.map[0].size); + for (i = 1; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + + if (pfn < PFN_UP(ei->addr)) + register_nosave_region(pfn, PFN_UP(ei->addr)); + + pfn = PFN_DOWN(ei->addr + ei->size); + if (ei->type != E820_RAM) + register_nosave_region(PFN_UP(ei->addr), pfn); + + if (pfn >= limit_pfn) + break; + } +} +#endif /* * Early reserved memory areas. diff --git a/arch/x86/kernel/e820_32.c b/arch/x86/kernel/e820_32.c index dfe2575..db760d4 100644 --- a/arch/x86/kernel/e820_32.c +++ b/arch/x86/kernel/e820_32.c @@ -9,7 +9,6 @@ #include #include #include -#include #include #include @@ -208,37 +207,6 @@ void __init init_iomem_resources(struct resource *code_resource, } } -#if defined(CONFIG_PM) && defined(CONFIG_HIBERNATION) -/** - * e820_mark_nosave_regions - Find the ranges of physical addresses that do not - * correspond to e820 RAM areas and mark the corresponding pages as nosave for - * hibernation. - * - * This function requires the e820 map to be sorted and without any - * overlapping entries and assumes the first e820 area to be RAM. - */ -void __init e820_mark_nosave_regions(void) -{ - int i; - unsigned long pfn; - - pfn = PFN_DOWN(e820.map[0].addr + e820.map[0].size); - for (i = 1; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - - if (pfn < PFN_UP(ei->addr)) - register_nosave_region(pfn, PFN_UP(ei->addr)); - - pfn = PFN_DOWN(ei->addr + ei->size); - if (ei->type != E820_RAM) - register_nosave_region(PFN_UP(ei->addr), pfn); - - if (pfn >= max_low_pfn) - break; - } -} -#endif - /* * Find the highest page frame number we have available */ diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c index 3b2e0b1..5e063e7 100644 --- a/arch/x86/kernel/e820_64.c +++ b/arch/x86/kernel/e820_64.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include @@ -94,37 +93,6 @@ void __init e820_reserve_resources(void) } /* - * Find the ranges of physical addresses that do not correspond to - * e820 RAM areas and mark the corresponding pages as nosave for software - * suspend and suspend to RAM. - * - * This function requires the e820 map to be sorted and without any - * overlapping entries and assumes the first e820 area to be RAM. - */ -void __init e820_mark_nosave_regions(void) -{ - int i; - unsigned long paddr; - - paddr = round_down(e820.map[0].addr + e820.map[0].size, PAGE_SIZE); - for (i = 1; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - - if (paddr < ei->addr) - register_nosave_region(PFN_DOWN(paddr), - PFN_UP(ei->addr)); - - paddr = round_down(ei->addr + ei->size, PAGE_SIZE); - if (ei->type != E820_RAM) - register_nosave_region(PFN_UP(ei->addr), - PFN_DOWN(paddr)); - - if (paddr >= (end_pfn << PAGE_SHIFT)) - break; - } -} - -/* * Finds an active region in the address range from start_pfn to last_pfn and * returns its range in ei_startpfn and ei_endpfn for the e820 entry. */ diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c index 3c451d1..e1173aec 100644 --- a/arch/x86/kernel/setup_32.c +++ b/arch/x86/kernel/setup_32.c @@ -820,7 +820,7 @@ void __init setup_arch(char **cmdline_p) #endif e820_setup_gap(); - e820_mark_nosave_regions(); + e820_mark_nosave_regions(max_low_pfn); #ifdef CONFIG_VT #if defined(CONFIG_VGA_CONSOLE) diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index 6dff128..975a5da 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -516,7 +516,7 @@ void __init setup_arch(char **cmdline_p) * We trust e820 completely. No explicit ROM probing in memory. */ e820_reserve_resources(); - e820_mark_nosave_regions(); + e820_mark_nosave_regions(end_pfn); /* request I/O space for devices used on all i[345]86 PCs */ for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++) diff --git a/include/asm-x86/e820.h b/include/asm-x86/e820.h index 5a58e2b..4266a2c 100644 --- a/include/asm-x86/e820.h +++ b/include/asm-x86/e820.h @@ -70,6 +70,15 @@ extern u64 update_memory_range(u64 start, u64 size, unsigned old_type, extern void update_e820(void); extern void e820_setup_gap(void); +#if defined(CONFIG_X86_64) || \ + (defined(CONFIG_X86_32) && defined(CONFIG_HIBERNATION)) +extern void e820_mark_nosave_regions(unsigned long limit_pfn); +#else +static inline void e820_mark_nosave_regions(unsigned long limit_pfn) +{ +} +#endif + extern u64 find_e820_area(u64 start, u64 end, u64 size, u64 align); extern u64 find_e820_area_size(u64 start, u64 *sizep, u64 align); extern void reserve_early(u64 start, u64 end, char *name); diff --git a/include/asm-x86/e820_32.h b/include/asm-x86/e820_32.h index 9576b43..7ace825 100644 --- a/include/asm-x86/e820_32.h +++ b/include/asm-x86/e820_32.h @@ -28,13 +28,5 @@ extern void init_iomem_resources(struct resource *code_resource, struct resource *data_resource, struct resource *bss_resource); -#if defined(CONFIG_PM) && defined(CONFIG_HIBERNATION) -extern void e820_mark_nosave_regions(void); -#else -static inline void e820_mark_nosave_regions(void) -{ -} -#endif - #endif/*!__ASSEMBLY__*/ #endif/*__E820_HEADER*/ diff --git a/include/asm-x86/e820_64.h b/include/asm-x86/e820_64.h index 37f176a..917963c 100644 --- a/include/asm-x86/e820_64.h +++ b/include/asm-x86/e820_64.h @@ -18,7 +18,6 @@ extern void setup_memory_region(void); extern void contig_e820_setup(void); extern unsigned long e820_end_of_ram(void); extern void e820_reserve_resources(void); -extern void e820_mark_nosave_regions(void); extern int e820_any_non_reserved(unsigned long start, unsigned long end); extern int is_memory_any_valid(unsigned long start, unsigned long end); extern int e820_all_non_reserved(unsigned long start, unsigned long end); -- cgit v0.10.2 From b3e2416465bc9140bfd209f247e6a789f68f0d19 Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Fri, 23 May 2008 01:54:44 +0400 Subject: x86: Set pic_mode only if local apic code is present diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 59f051d..e61523a 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -609,7 +609,7 @@ static void __init __get_smp_config(unsigned early) printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification); -#ifdef CONFIG_X86_32 +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) if (mpf->mpf_feature2 & (1 << 7)) { printk(KERN_INFO " IMCR and PIC compatibility mode.\n"); pic_mode = 1; -- cgit v0.10.2 From f3918352909f839a7b0dbf9b3f81d2e183c46f88 Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Fri, 23 May 2008 01:54:51 +0400 Subject: x86: move pic_mode to apic_32.c diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index fa1be1d..848c603 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -76,6 +76,8 @@ EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); */ int apic_verbosity; +int pic_mode; + static unsigned int calibration_result; static int lapic_next_event(unsigned long delta, diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index e61523a..b72c046 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -48,8 +48,6 @@ int mp_bus_id_to_pci_bus[MAX_MP_BUSSES] = {[0 ... MAX_MP_BUSSES - 1] = -1 }; static int mp_current_pci_id; -int pic_mode; - /* * Intel MP BIOS table parsing routines: */ diff --git a/arch/x86/mach-visws/mpparse.c b/arch/x86/mach-visws/mpparse.c index 57484e9..b7d063e 100644 --- a/arch/x86/mach-visws/mpparse.c +++ b/arch/x86/mach-visws/mpparse.c @@ -11,8 +11,6 @@ /* Have we found an MP table */ int smp_found_config; -int pic_mode; - extern unsigned int __cpuinitdata maxcpus; /* -- cgit v0.10.2 From bab4b27c00c4880737c18bb91138b1a7dd94164c Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Mon, 19 May 2008 19:47:03 +0400 Subject: x86: move smp_found_config diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 848c603..c304759 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -78,6 +78,9 @@ int apic_verbosity; int pic_mode; +/* Have we found an MP table */ +int smp_found_config; + static unsigned int calibration_result; static int lapic_next_event(unsigned long delta, diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 5910020..54087f9 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -56,6 +56,9 @@ EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); */ int apic_verbosity; +/* Have we found an MP table */ +int smp_found_config; + static struct resource lapic_resource = { .name = "Local APIC", .flags = IORESOURCE_MEM | IORESOURCE_BUSY, diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index b72c046..d67cd76 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -32,9 +32,6 @@ #include #endif -/* Have we found an MP table */ -int smp_found_config; - /* * Various Linux-internal data structures created from the * MP-table. @@ -639,7 +636,9 @@ static void __init __get_smp_config(unsigned early) * override the defaults. */ if (!smp_read_mpc(phys_to_virt(mpf->mpf_physptr), early)) { +#ifdef CONFIG_X86_LOCAL_APIC smp_found_config = 0; +#endif printk(KERN_ERR "BIOS bug, MP table errors detected!...\n"); printk(KERN_ERR "... disabling SMP support. " @@ -706,8 +705,9 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, !mpf_checksum((unsigned char *)bp, 16) && ((mpf->mpf_specification == 1) || (mpf->mpf_specification == 4))) { - +#ifdef CONFIG_X86_LOCAL_APIC smp_found_config = 1; +#endif mpf_found = mpf; #ifdef CONFIG_X86_32 printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n", diff --git a/arch/x86/mach-visws/mpparse.c b/arch/x86/mach-visws/mpparse.c index b7d063e..a2fb78c 100644 --- a/arch/x86/mach-visws/mpparse.c +++ b/arch/x86/mach-visws/mpparse.c @@ -8,9 +8,6 @@ #include "cobalt.h" #include "mach_apic.h" -/* Have we found an MP table */ -int smp_found_config; - extern unsigned int __cpuinitdata maxcpus; /* @@ -74,7 +71,9 @@ void __init find_smp_config(void) if (ncpus > maxcpus) ncpus = maxcpus; +#ifdef CONFIG_X86_LOCAL_APIC smp_found_config = 1; +#endif while (ncpus--) MP_processor_info(mp++); diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c index 7bbebbf..8dedd01 100644 --- a/arch/x86/mach-voyager/voyager_smp.c +++ b/arch/x86/mach-voyager/voyager_smp.c @@ -59,11 +59,6 @@ __u32 voyager_quad_processors = 0; * activity count. Finally exported by i386_ksyms.c */ static int voyager_extended_cpus = 1; -/* Have we found an SMP box - used by time.c to do the profiling - interrupt for timeslicing; do not set to 1 until the per CPU timer - interrupt is active */ -int smp_found_config = 0; - /* Used for the invalidate map that's also checked in the spinlock */ static volatile unsigned long smp_invalidate_needed; -- cgit v0.10.2 From ce6444d39fdea29dcf145d2d95fe9cdc850aa53c Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Mon, 19 May 2008 19:47:09 +0400 Subject: x86: mp_bus_id_to_pci_bus is not needed diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index ea68c3e..17b2e8f 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -864,7 +864,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, " "slot:%d, pin:%d.\n", bus, slot, pin); - if (mp_bus_id_to_pci_bus[bus] == -1) { + if (test_bit(bus, mp_bus_not_pci)) { printk(KERN_WARNING "PCI BIOS passed nonexistent PCI bus %d!\n", bus); return -1; } diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index e7f1476..9637675 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -516,7 +516,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n", bus, slot, pin); - if (mp_bus_id_to_pci_bus[bus] == -1) { + if (test_bit(bus, mp_bus_not_pci)) { apic_printk(APIC_VERBOSE, "PCI BIOS passed nonexistent PCI bus %d!\n", bus); return -1; } diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index d67cd76..83dfd66 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -41,13 +41,6 @@ int mp_bus_id_to_type[MAX_MP_BUSSES]; #endif DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); -int mp_bus_id_to_pci_bus[MAX_MP_BUSSES] = {[0 ... MAX_MP_BUSSES - 1] = -1 }; - -static int mp_current_pci_id; - -/* - * Intel MP BIOS table parsing routines: - */ /* * Checksum an MP configuration block. @@ -101,7 +94,6 @@ static void __cpuinit MP_processor_info(struct mpc_config_processor *m) static void __init MP_bus_info(struct mpc_config_bus *m) { char str[7]; - memcpy(str, m->mpc_bustype, 6); str[6] = 0; @@ -130,8 +122,6 @@ static void __init MP_bus_info(struct mpc_config_bus *m) mpc_oem_pci_bus(m, translation_table[mpc_record]); #endif clear_bit(m->mpc_busid, mp_bus_not_pci); - mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id; - mp_current_pci_id++; #if defined(CONFIG_EISA) || defined (CONFIG_MCA) mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI; } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA) - 1) == 0) { diff --git a/include/asm-x86/mpspec.h b/include/asm-x86/mpspec.h index 57a991b..b785ddd 100644 --- a/include/asm-x86/mpspec.h +++ b/include/asm-x86/mpspec.h @@ -32,8 +32,6 @@ extern int mp_bus_id_to_type[MAX_MP_BUSSES]; extern DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); -extern int mp_bus_id_to_pci_bus[MAX_MP_BUSSES]; - extern unsigned int boot_cpu_physical_apicid; extern int smp_found_config; extern int mpc_default_type; -- cgit v0.10.2 From 8732fc4b237fca3bd3cb0ec87ca8fb90271b0baf Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Mon, 19 May 2008 19:47:16 +0400 Subject: x86: move mp_bus_not_pci from mpparse.c diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index 17b2e8f..2285b81 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -81,6 +81,12 @@ struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; /* # of MP IRQ source entries */ int mp_irq_entries; +#if defined (CONFIG_MCA) || defined (CONFIG_EISA) +int mp_bus_id_to_type[MAX_MP_BUSSES]; +#endif + +DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); + static int disable_timer_pin_1 __initdata; /* diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 9637675..339cf6f 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -113,6 +113,8 @@ struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; /* # of MP IRQ source entries */ int mp_irq_entries; +DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); + /* * Rough estimation of how many shared IRQs there are, can * be changed anytime. diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 83dfd66..e8e041e 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -33,16 +33,6 @@ #endif /* - * Various Linux-internal data structures created from the - * MP-table. - */ -#if defined (CONFIG_MCA) || defined (CONFIG_EISA) -int mp_bus_id_to_type[MAX_MP_BUSSES]; -#endif - -DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); - -/* * Checksum an MP configuration block. */ -- cgit v0.10.2 From 136ef671df04dc157afa0d4b96c7bd23ba072c9c Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Tue, 20 May 2008 00:29:59 +0400 Subject: x86: allow MPPARSE to be deselected in SMP configs diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index ac1e31b..d5b364b 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -127,7 +127,7 @@ config 4KSTACKS config X86_FIND_SMP_CONFIG def_bool y - depends on X86_LOCAL_APIC || X86_VOYAGER + depends on X86_MPPARSE || X86_VOYAGER || X86_VISWS depends on X86_32 config X86_MPPARSE diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c index e1173aec..c04e8c9 100644 --- a/arch/x86/kernel/setup_32.c +++ b/arch/x86/kernel/setup_32.c @@ -814,7 +814,7 @@ void __init setup_arch(char **cmdline_p) "CONFIG_X86_GENERICARCH or CONFIG_X86_BIGSMP.\n"); #endif #endif -#ifdef CONFIG_X86_LOCAL_APIC +#if defined(CONFIG_X86_MPPARSE) || defined(CONFIG_X86_VISWS) if (smp_found_config) get_smp_config(); #endif diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index 975a5da..89e6cca 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -456,10 +456,12 @@ void __init setup_arch(char **cmdline_p) if (efi_enabled) efi_reserve_bootmem(); +#ifdef CONFIG_X86_MPPARSE /* * Find and reserve possible boot-time SMP configuration: */ find_smp_config(); +#endif #ifdef CONFIG_BLK_DEV_INITRD if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) { unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; @@ -502,11 +504,13 @@ void __init setup_arch(char **cmdline_p) init_cpu_to_node(); +#ifdef CONFIG_X86_MPPARSE /* * get boot-time SMP configuration: */ if (smp_found_config) get_smp_config(); +#endif init_apic_mappings(); ioapic_init_mappings(); -- cgit v0.10.2 From b764a15f679942a7bc9d4f9645299e1defcc5b43 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Thu, 22 May 2008 21:22:04 +0100 Subject: x86: Switch apm to unlocked_kernel This pushes the lock a fair way down and the final kill looks like it should be an easy project for someone who wants to have a shot at it. Signed-off-by: Alan Cox Cc: mingo@redhat.com Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index bf9290e..00e6d13 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -228,6 +228,7 @@ #include #include #include +#include #include #include @@ -1149,7 +1150,7 @@ static void queue_event(apm_event_t event, struct apm_user *sender) as->event_tail = 0; } as->events[as->event_head] = event; - if ((!as->suser) || (!as->writer)) + if (!as->suser || !as->writer) continue; switch (event) { case APM_SYS_SUSPEND: @@ -1396,7 +1397,7 @@ static void apm_mainloop(void) static int check_apm_user(struct apm_user *as, const char *func) { - if ((as == NULL) || (as->magic != APM_BIOS_MAGIC)) { + if (as == NULL || as->magic != APM_BIOS_MAGIC) { printk(KERN_ERR "apm: %s passed bad filp\n", func); return 1; } @@ -1459,18 +1460,19 @@ static unsigned int do_poll(struct file *fp, poll_table *wait) return 0; } -static int do_ioctl(struct inode *inode, struct file *filp, - u_int cmd, u_long arg) +static long do_ioctl(struct file *filp, u_int cmd, u_long arg) { struct apm_user *as; + int ret; as = filp->private_data; if (check_apm_user(as, "ioctl")) return -EIO; - if ((!as->suser) || (!as->writer)) + if (!as->suser || !as->writer) return -EPERM; switch (cmd) { case APM_IOC_STANDBY: + lock_kernel(); if (as->standbys_read > 0) { as->standbys_read--; as->standbys_pending--; @@ -1479,8 +1481,10 @@ static int do_ioctl(struct inode *inode, struct file *filp, queue_event(APM_USER_STANDBY, as); if (standbys_pending <= 0) standby(); + unlock_kernel(); break; case APM_IOC_SUSPEND: + lock_kernel(); if (as->suspends_read > 0) { as->suspends_read--; as->suspends_pending--; @@ -1488,16 +1492,17 @@ static int do_ioctl(struct inode *inode, struct file *filp, } else queue_event(APM_USER_SUSPEND, as); if (suspends_pending <= 0) { - return suspend(1); + ret = suspend(1); } else { as->suspend_wait = 1; wait_event_interruptible(apm_suspend_waitqueue, as->suspend_wait == 0); - return as->suspend_result; + ret = as->suspend_result; } - break; + unlock_kernel(); + return ret; default: - return -EINVAL; + return -ENOTTY; } return 0; } @@ -1860,7 +1865,7 @@ static const struct file_operations apm_bios_fops = { .owner = THIS_MODULE, .read = do_read, .poll = do_poll, - .ioctl = do_ioctl, + .unlocked_ioctl = do_ioctl, .open = do_open, .release = do_release, }; -- cgit v0.10.2 From 886dd58258e6ddebe20e7aebef7b167a24bad7ee Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 15:44:38 +0200 Subject: debugging: make stacktrace independent from DEBUG_KERNEL Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index d2099f4..9c17fb9 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -419,7 +419,6 @@ config DEBUG_LOCKING_API_SELFTESTS config STACKTRACE bool - depends on DEBUG_KERNEL depends on STACKTRACE_SUPPORT config DEBUG_KOBJECT -- cgit v0.10.2 From c6531cce6e6e4b99bcda46b6268d6f2d9e30aea4 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:21:14 +0200 Subject: sched: do not trace sched_clock The tracer uses sched_clock, so do not trace it. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/kernel/sched.c b/kernel/sched.c index e2e985e..6590a82 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -884,12 +884,12 @@ static unsigned long long __cpu_clock(int cpu) * For kernel-internal use: high-speed (but slightly incorrect) per-cpu * clock constructed from sched_clock(): */ -unsigned long long cpu_clock(int cpu) +unsigned long long notrace cpu_clock(int cpu) { unsigned long long prev_cpu_time, time, delta_time; unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); prev_cpu_time = per_cpu(prev_cpu_time, cpu); time = __cpu_clock(cpu) + per_cpu(time_offset, cpu); delta_time = time-prev_cpu_time; @@ -898,7 +898,7 @@ unsigned long long cpu_clock(int cpu) time = __sync_cpu_clock(time, cpu); per_cpu(prev_cpu_time, cpu) = time; } - local_irq_restore(flags); + raw_local_irq_restore(flags); return time; } -- cgit v0.10.2 From 2e4db9d2c5db9c9dd3e1d1ec3263bc4a990ff8df Mon Sep 17 00:00:00 2001 From: Fernando Luis Vazquez Cao Date: Mon, 19 May 2008 14:23:44 -0700 Subject: x86: remove duplicate declaration of unknown_nmi_panic Signed-off-by: Fernando Luis Vazquez Cao Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner diff --git a/include/asm-x86/nmi.h b/include/asm-x86/nmi.h index 1e36302..8455bf3 100644 --- a/include/asm-x86/nmi.h +++ b/include/asm-x86/nmi.h @@ -46,7 +46,6 @@ extern void nmi_watchdog_default(void); extern int check_nmi_watchdog(void); extern int nmi_watchdog_enabled; -extern int unknown_nmi_panic; extern int avail_to_resrv_perfctr_nmi_bit(unsigned int); extern int avail_to_resrv_perfctr_nmi(unsigned int); extern int reserve_perfctr_nmi(unsigned int); -- cgit v0.10.2 From 85cc35fa7255d113b5383a9c8536c363274bb475 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 May 2008 21:21:36 +0200 Subject: x86: fix mpparse fallout UP builds with LOCAL_APIC=y and IO_APIC=n fail with a missing reference to mp_bus_not_pci. Distangle the mpparse code some more and move the ioapic specific bus check into a separate function. This code needs sume urgent un#ifdef surgery all over the place. Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index e8e041e..9f3792d 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -81,6 +81,7 @@ static void __cpuinit MP_processor_info(struct mpc_config_processor *m) generic_processor_info(apicid, m->mpc_apicver); } +#ifdef CONFIG_X86_IO_APIC static void __init MP_bus_info(struct mpc_config_bus *m) { char str[7]; @@ -122,6 +123,7 @@ static void __init MP_bus_info(struct mpc_config_bus *m) } else printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str); } +#endif #ifdef CONFIG_X86_IO_APIC @@ -336,7 +338,9 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) { struct mpc_config_bus *m = (struct mpc_config_bus *)mpt; +#ifdef CONFIG_X86_IO_APIC MP_bus_info(m); +#endif mpt += sizeof(*m); count += sizeof(*m); break; @@ -472,40 +476,11 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type) MP_intsrc_info(&intsrc); } -#endif -static inline void __init construct_default_ISA_mptable(int mpc_default_type) +static void construct_ioapic_table(int mpc_default_type) { - struct mpc_config_processor processor; - struct mpc_config_bus bus; -#ifdef CONFIG_X86_IO_APIC struct mpc_config_ioapic ioapic; -#endif - struct mpc_config_lintsrc lintsrc; - int linttypes[2] = { mp_ExtINT, mp_NMI }; - int i; - - /* - * local APIC has default address - */ - mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; - - /* - * 2 CPUs, numbered 0 & 1. - */ - processor.mpc_type = MP_PROCESSOR; - /* Either an integrated APIC or a discrete 82489DX. */ - processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01; - processor.mpc_cpuflag = CPU_ENABLED; - processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) | - (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask; - processor.mpc_featureflag = boot_cpu_data.x86_capability[0]; - processor.mpc_reserved[0] = 0; - processor.mpc_reserved[1] = 0; - for (i = 0; i < 2; i++) { - processor.mpc_apicid = i; - MP_processor_info(&processor); - } + struct mpc_config_bus bus; bus.mpc_type = MP_BUS; bus.mpc_busid = 0; @@ -534,7 +509,6 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type) MP_bus_info(&bus); } -#ifdef CONFIG_X86_IO_APIC ioapic.mpc_type = MP_IOAPIC; ioapic.mpc_apicid = 2; ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01; @@ -546,7 +520,42 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type) * We set up most of the low 16 IO-APIC pins according to MPS rules. */ construct_default_ioirq_mptable(mpc_default_type); +} +#else +static inline void construct_ioapic_table(int mpc_default_type) { } #endif + +static inline void __init construct_default_ISA_mptable(int mpc_default_type) +{ + struct mpc_config_processor processor; + struct mpc_config_lintsrc lintsrc; + int linttypes[2] = { mp_ExtINT, mp_NMI }; + int i; + + /* + * local APIC has default address + */ + mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; + + /* + * 2 CPUs, numbered 0 & 1. + */ + processor.mpc_type = MP_PROCESSOR; + /* Either an integrated APIC or a discrete 82489DX. */ + processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01; + processor.mpc_cpuflag = CPU_ENABLED; + processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) | + (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask; + processor.mpc_featureflag = boot_cpu_data.x86_capability[0]; + processor.mpc_reserved[0] = 0; + processor.mpc_reserved[1] = 0; + for (i = 0; i < 2; i++) { + processor.mpc_apicid = i; + MP_processor_info(&processor); + } + + construct_ioapic_table(mpc_default_type); + lintsrc.mpc_type = MP_LINTSRC; lintsrc.mpc_irqflag = 0; /* conforming */ lintsrc.mpc_srcbusid = 0; -- cgit v0.10.2 From ddca03c98a7f7ad5ab09967ff52d4ed60358c896 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 24 May 2008 19:36:31 +0400 Subject: x86: nmi - unify die_nmi() interface By slightly changing 32bit mode die_nmi() we may unify the interface and make it common for both (32/64bit) modes Signed-off-by: Cyrill Gorcunov Cc: hpa@zytor.com Cc: mingo@redhat.com Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/nmi_32.c b/arch/x86/kernel/nmi_32.c index 69bdae5..bd04a28 100644 --- a/arch/x86/kernel/nmi_32.c +++ b/arch/x86/kernel/nmi_32.c @@ -320,8 +320,6 @@ void touch_nmi_watchdog(void) } EXPORT_SYMBOL(touch_nmi_watchdog); -extern void die_nmi(struct pt_regs *, const char *msg); - notrace __kprobes int nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) { @@ -375,7 +373,8 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) /* * die_nmi will return ONLY if NOTIFY_STOP happens.. */ - die_nmi(regs, "BUG: NMI Watchdog detected LOCKUP"); + die_nmi("BUG: NMI Watchdog detected LOCKUP", + regs, 0); } else { __get_cpu_var(last_irq_sum) = sum; local_set(&__get_cpu_var(alert_counter), 0); @@ -406,7 +405,7 @@ static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu) char buf[64]; sprintf(buf, "NMI received for unknown reason %02x\n", reason); - die_nmi(regs, buf); + die_nmi(buf, regs, 0); return 0; } diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index bde6f63..f31e665 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -755,9 +755,9 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs) static DEFINE_SPINLOCK(nmi_print_lock); -void notrace __kprobes die_nmi(struct pt_regs *regs, const char *msg) +void notrace __kprobes die_nmi(char *str, struct pt_regs *regs, int do_panic) { - if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 2, SIGINT) == NOTIFY_STOP) + if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP) return; spin_lock(&nmi_print_lock); @@ -766,10 +766,12 @@ void notrace __kprobes die_nmi(struct pt_regs *regs, const char *msg) * to get a message out: */ bust_spinlocks(1); - printk(KERN_EMERG "%s", msg); + printk(KERN_EMERG "%s", str); printk(" on CPU%d, ip %08lx, registers:\n", smp_processor_id(), regs->ip); show_registers(regs); + if (do_panic) + panic("Non maskable interrupt"); console_silent(); spin_unlock(&nmi_print_lock); bust_spinlocks(0); diff --git a/include/asm-x86/nmi.h b/include/asm-x86/nmi.h index 8455bf3..7cd5b6a 100644 --- a/include/asm-x86/nmi.h +++ b/include/asm-x86/nmi.h @@ -38,12 +38,12 @@ static inline void unset_nmi_pm_callback(struct pm_dev *dev) #ifdef CONFIG_X86_64 extern void default_do_nmi(struct pt_regs *); -extern void die_nmi(char *str, struct pt_regs *regs, int do_panic); extern void nmi_watchdog_default(void); #else #define nmi_watchdog_default() do {} while (0) #endif +extern void die_nmi(char *str, struct pt_regs *regs, int do_panic); extern int check_nmi_watchdog(void); extern int nmi_watchdog_enabled; extern int avail_to_resrv_perfctr_nmi_bit(unsigned int); -- cgit v0.10.2 From e56b3a12c45e439169f2d1f7194be397667320a6 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 24 May 2008 19:36:32 +0400 Subject: x86: nmi - die_nmi() output message unification Make 64bit die_nmi() to produce the same message as 32bit mode has Signed-off-by: Cyrill Gorcunov Cc: hpa@zytor.com Cc: mingo@redhat.com Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/nmi_64.c b/arch/x86/kernel/nmi_64.c index 33cb8ec..1a98705 100644 --- a/arch/x86/kernel/nmi_64.c +++ b/arch/x86/kernel/nmi_64.c @@ -358,8 +358,8 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) */ local_inc(&__get_cpu_var(alert_counter)); if (local_read(&__get_cpu_var(alert_counter)) == 5*nmi_hz) - die_nmi("NMI Watchdog detected LOCKUP on CPU %d\n", regs, - panic_on_timeout); + die_nmi("NMI Watchdog detected LOCKUP", + regs, panic_on_timeout); } else { __get_cpu_var(last_irq_sum) = sum; local_set(&__get_cpu_var(alert_counter), 0); diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index adff76e..6a048ce 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c @@ -614,7 +614,9 @@ die_nmi(char *str, struct pt_regs *regs, int do_panic) * We are in trouble anyway, lets at least try * to get a message out. */ - printk(str, smp_processor_id()); + printk(KERN_EMERG "%s", str); + printk(" on CPU%d, ip %08lx, registers:\n", + smp_processor_id(), regs->ip); show_registers(regs); if (kexec_should_crash(current)) crash_kexec(regs); -- cgit v0.10.2 From c6425b9f143a75bbcd0a7684b4df40e20d0b2f32 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 24 May 2008 19:36:33 +0400 Subject: x86: move do_nmi(), stop_nmi() and restart_nmi() to traps_64.c traps_32.c already holds these functions so do the same for traps_64.c Signed-off-by: Cyrill Gorcunov Cc: hpa@zytor.com Cc: mingo@redhat.com Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/nmi_64.c b/arch/x86/kernel/nmi_64.c index 1a98705..c1ea671 100644 --- a/arch/x86/kernel/nmi_64.c +++ b/arch/x86/kernel/nmi_64.c @@ -30,7 +30,6 @@ int unknown_nmi_panic; int nmi_watchdog_enabled; -int panic_on_unrecovered_nmi; static cpumask_t backtrace_mask = CPU_MASK_NONE; @@ -383,30 +382,6 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) return rc; } -static unsigned ignore_nmis; - -asmlinkage notrace __kprobes void -do_nmi(struct pt_regs *regs, long error_code) -{ - nmi_enter(); - add_pda(__nmi_count,1); - if (!ignore_nmis) - default_do_nmi(regs); - nmi_exit(); -} - -void stop_nmi(void) -{ - acpi_nmi_disable(); - ignore_nmis++; -} - -void restart_nmi(void) -{ - ignore_nmis--; - acpi_nmi_enable(); -} - #ifdef CONFIG_SYSCTL static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu) diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index 6a048ce..e4a3807 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c @@ -76,7 +76,9 @@ asmlinkage void alignment_check(void); asmlinkage void machine_check(void); asmlinkage void spurious_interrupt_bug(void); +int panic_on_unrecovered_nmi; static unsigned int code_bytes = 64; +static unsigned ignore_nmis; static inline void conditional_sti(struct pt_regs *regs) { @@ -867,6 +869,28 @@ asmlinkage notrace __kprobes void default_do_nmi(struct pt_regs *regs) io_check_error(reason, regs); } +asmlinkage notrace __kprobes void +do_nmi(struct pt_regs *regs, long error_code) +{ + nmi_enter(); + add_pda(__nmi_count, 1); + if (!ignore_nmis) + default_do_nmi(regs); + nmi_exit(); +} + +void stop_nmi(void) +{ + acpi_nmi_disable(); + ignore_nmis++; +} + +void restart_nmi(void) +{ + ignore_nmis--; + acpi_nmi_enable(); +} + /* runs on IST stack. */ asmlinkage void __kprobes do_int3(struct pt_regs * regs, long error_code) { -- cgit v0.10.2 From d1b946b97d71423f365fa797d1428e1847c0bec1 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 24 May 2008 19:36:34 +0400 Subject: x86: nmi_32.c - add "panic" option Allow to pass "panic" option in 32bit mode Signed-off-by: Cyrill Gorcunov Cc: hpa@zytor.com Cc: mingo@redhat.com Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/nmi_32.c b/arch/x86/kernel/nmi_32.c index bd04a28..4437fe1 100644 --- a/arch/x86/kernel/nmi_32.c +++ b/arch/x86/kernel/nmi_32.c @@ -42,6 +42,7 @@ static cpumask_t backtrace_mask = CPU_MASK_NONE; * 0: the lapic NMI watchdog is disabled, but can be enabled */ atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */ +static int panic_on_timeout; unsigned int nmi_watchdog = NMI_DEFAULT; static unsigned int nmi_hz = HZ; @@ -140,6 +141,14 @@ static int __init setup_nmi_watchdog(char *str) { int nmi; + if (!strncmp(str, "panic", 5)) { + panic_on_timeout = 1; + str = strchr(str, ','); + if (!str) + return 1; + ++str; + } + get_option(&str, &nmi); if ((nmi >= NMI_INVALID) || (nmi < NMI_NONE)) @@ -374,7 +383,7 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) * die_nmi will return ONLY if NOTIFY_STOP happens.. */ die_nmi("BUG: NMI Watchdog detected LOCKUP", - regs, 0); + regs, panic_on_timeout); } else { __get_cpu_var(last_irq_sum) = sum; local_set(&__get_cpu_var(alert_counter), 0); -- cgit v0.10.2 From 4b82b277707a39b97271439c475f186f63ec4692 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 24 May 2008 19:36:35 +0400 Subject: x86: nmi_32.c - add nmi_watchdog_default helper Signed-off-by: Cyrill Gorcunov Cc: hpa@zytor.com Cc: mingo@redhat.com Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/nmi_32.c b/arch/x86/kernel/nmi_32.c index 4437fe1..7714e84 100644 --- a/arch/x86/kernel/nmi_32.c +++ b/arch/x86/kernel/nmi_32.c @@ -51,6 +51,17 @@ static DEFINE_PER_CPU(short, wd_enabled); static int endflag __initdata = 0; +/* Run after command line and cpu_init init, but before all other checks */ +void nmi_watchdog_default(void) +{ + if (nmi_watchdog != NMI_DEFAULT) + return; + if (lapic_watchdog_ok()) + nmi_watchdog = NMI_LOCAL_APIC; + else + nmi_watchdog = NMI_IO_APIC; +} + #ifdef CONFIG_SMP /* The performance counters used by NMI_LOCAL_APIC don't trigger when * the CPU is idle. To make sure the NMI watchdog really ticks on all @@ -437,12 +448,8 @@ int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file, return -EIO; } - if (nmi_watchdog == NMI_DEFAULT) { - if (lapic_watchdog_ok()) - nmi_watchdog = NMI_LOCAL_APIC; - else - nmi_watchdog = NMI_IO_APIC; - } + /* if nmi_watchdog is not set yet, then set it */ + nmi_watchdog_default(); if (nmi_watchdog == NMI_LOCAL_APIC) { if (nmi_watchdog_enabled) diff --git a/include/asm-x86/nmi.h b/include/asm-x86/nmi.h index 7cd5b6a..1e8f34d 100644 --- a/include/asm-x86/nmi.h +++ b/include/asm-x86/nmi.h @@ -38,11 +38,9 @@ static inline void unset_nmi_pm_callback(struct pm_dev *dev) #ifdef CONFIG_X86_64 extern void default_do_nmi(struct pt_regs *); -extern void nmi_watchdog_default(void); -#else -#define nmi_watchdog_default() do {} while (0) #endif +extern void nmi_watchdog_default(void); extern void die_nmi(char *str, struct pt_regs *regs, int do_panic); extern int check_nmi_watchdog(void); extern int nmi_watchdog_enabled; -- cgit v0.10.2 From ad63ba169d45ccb6594eb35a6c31c421fe70ff45 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 24 May 2008 19:36:36 +0400 Subject: x86: nmi_32/64.c - use apic_write_around instead of apic_write apic_write_around will be expanded to apic_write in 64bit mode anyway. Only a few CPUs (well, old CPUs to be precise) requires such an action. In general it should not hurt and could be cleaned up for apic_write (just in case) Signed-off-by: Cyrill Gorcunov Cc: hpa@zytor.com Cc: mingo@redhat.com Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/nmi_32.c b/arch/x86/kernel/nmi_32.c index 7714e84..ec5842b 100644 --- a/arch/x86/kernel/nmi_32.c +++ b/arch/x86/kernel/nmi_32.c @@ -248,7 +248,7 @@ void acpi_nmi_enable(void) static void __acpi_nmi_disable(void *__unused) { - apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED); + apic_write_around(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED); } /* diff --git a/arch/x86/kernel/nmi_64.c b/arch/x86/kernel/nmi_64.c index c1ea671..f546e31 100644 --- a/arch/x86/kernel/nmi_64.c +++ b/arch/x86/kernel/nmi_64.c @@ -215,7 +215,7 @@ late_initcall(init_lapic_nmi_sysfs); static void __acpi_nmi_enable(void *__unused) { - apic_write(APIC_LVT0, APIC_DM_NMI); + apic_write_around(APIC_LVT0, APIC_DM_NMI); } /* @@ -229,7 +229,7 @@ void acpi_nmi_enable(void) static void __acpi_nmi_disable(void *__unused) { - apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED); + apic_write_around(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED); } /* -- cgit v0.10.2 From 6c8decdf14b17d42f6eb817d310642f4d6598a19 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 24 May 2008 19:36:37 +0400 Subject: x86: nmi_32.c - unknown_nmi_panic_callback should always panic Signed-off-by: Cyrill Gorcunov Cc: hpa@zytor.com Cc: mingo@redhat.com Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/nmi_32.c b/arch/x86/kernel/nmi_32.c index ec5842b..d0b0684 100644 --- a/arch/x86/kernel/nmi_32.c +++ b/arch/x86/kernel/nmi_32.c @@ -425,7 +425,7 @@ static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu) char buf[64]; sprintf(buf, "NMI received for unknown reason %02x\n", reason); - die_nmi(buf, regs, 0); + die_nmi(buf, regs, 1); /* Always panic here */ return 0; } -- cgit v0.10.2 From 96f9dcb10755e96eae706b9e869c0dc25adb3d74 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 24 May 2008 19:36:38 +0400 Subject: x86: nmi_64.c - use for_each_possible_cpu helper Signed-off-by: Cyrill Gorcunov Cc: hpa@zytor.com Cc: mingo@redhat.com Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/nmi_64.c b/arch/x86/kernel/nmi_64.c index f546e31..d98b21c 100644 --- a/arch/x86/kernel/nmi_64.c +++ b/arch/x86/kernel/nmi_64.c @@ -98,7 +98,7 @@ int __init check_nmi_watchdog(void) smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0); #endif - for (cpu = 0; cpu < NR_CPUS; cpu++) + for_each_possible_cpu(cpu) prev_nmi_count[cpu] = cpu_pda(cpu)->__nmi_count; local_irq_enable(); mdelay((20*1000)/nmi_hz); // wait 20 ticks -- cgit v0.10.2 From 7c2ba83f9a479eee6f302147767a30f3187fbd4b Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 24 May 2008 19:36:39 +0400 Subject: x86: nmi_32.c cleanup - use for_each_online_cpu helper Since cpu_online_map is touched (by for_each_online_cpu) at moment when cpu_callin_map is already filled up we can get rid of its checking at all Signed-off-by: Cyrill Gorcunov Cc: hpa@zytor.com Cc: mingo@redhat.com Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/nmi_32.c b/arch/x86/kernel/nmi_32.c index d0b0684..c6d0777 100644 --- a/arch/x86/kernel/nmi_32.c +++ b/arch/x86/kernel/nmi_32.c @@ -108,13 +108,7 @@ int __init check_nmi_watchdog(void) local_irq_enable(); mdelay((20*1000)/nmi_hz); // wait 20 ticks - for_each_possible_cpu(cpu) { -#ifdef CONFIG_SMP - /* Check cpu_callin_map here because that is set - after the timer is started. */ - if (!cpu_isset(cpu, cpu_callin_map)) - continue; -#endif + for_each_online_cpu(cpu) { if (!per_cpu(wd_enabled, cpu)) continue; if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) { -- cgit v0.10.2 From fd5cea02de100197a4c26d9e103508cf09b50a82 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 24 May 2008 19:36:40 +0400 Subject: x86: nmi_32/64.c - add helper functions to hide arch specific data Signed-off-by: Cyrill Gorcunov Cc: hpa@zytor.com Cc: mingo@redhat.com Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/nmi_32.c b/arch/x86/kernel/nmi_32.c index c6d0777..a94dbed 100644 --- a/arch/x86/kernel/nmi_32.c +++ b/arch/x86/kernel/nmi_32.c @@ -51,6 +51,26 @@ static DEFINE_PER_CPU(short, wd_enabled); static int endflag __initdata = 0; +static inline unsigned int get_nmi_count(int cpu) +{ + return nmi_count(cpu); +} + +static inline int mce_in_progress(void) +{ + return 0; +} + +/* + * Take the local apic timer and PIT/HPET into account. We don't + * know which one is active, when we have highres/dyntick on + */ +static inline unsigned int get_timer_irqs(int cpu) +{ + return per_cpu(irq_stat, cpu).apic_timer_irqs + + per_cpu(irq_stat, cpu).irq0_irqs; +} + /* Run after command line and cpu_init init, but before all other checks */ void nmi_watchdog_default(void) { @@ -104,19 +124,19 @@ int __init check_nmi_watchdog(void) #endif for_each_possible_cpu(cpu) - prev_nmi_count[cpu] = nmi_count(cpu); + prev_nmi_count[cpu] = get_nmi_count(cpu); local_irq_enable(); mdelay((20*1000)/nmi_hz); // wait 20 ticks for_each_online_cpu(cpu) { if (!per_cpu(wd_enabled, cpu)) continue; - if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) { + if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5) { printk(KERN_WARNING "WARNING: CPU#%d: NMI " "appears to be stuck (%d->%d)!\n", cpu, prev_nmi_count[cpu], - nmi_count(cpu)); + get_nmi_count(cpu)); per_cpu(wd_enabled, cpu) = 0; atomic_dec(&nmi_active); } @@ -355,6 +375,13 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) touched = 1; } + sum = get_timer_irqs(cpu); + + if (__get_cpu_var(nmi_touch)) { + __get_cpu_var(nmi_touch) = 0; + touched = 1; + } + if (cpu_isset(cpu, backtrace_mask)) { static DEFINE_SPINLOCK(lock); /* Serialise the printks */ @@ -365,16 +392,9 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) cpu_clear(cpu, backtrace_mask); } - /* - * Take the local apic timer and PIT/HPET into account. We don't - * know which one is active, when we have highres/dyntick on - */ - sum = per_cpu(irq_stat, cpu).apic_timer_irqs + - per_cpu(irq_stat, cpu).irq0_irqs; - if (__get_cpu_var(nmi_touch)) { - __get_cpu_var(nmi_touch) = 0; + /* Could check oops_in_progress here too, but it's safer not to */ + if (mce_in_progress()) touched = 1; - } /* if the none of the timers isn't firing, this cpu isn't doing much */ if (!touched && __get_cpu_var(last_irq_sum) == sum) { diff --git a/arch/x86/kernel/nmi_64.c b/arch/x86/kernel/nmi_64.c index d98b21c..c680244 100644 --- a/arch/x86/kernel/nmi_64.c +++ b/arch/x86/kernel/nmi_64.c @@ -47,6 +47,30 @@ static unsigned int nmi_hz = HZ; static DEFINE_PER_CPU(short, wd_enabled); +static int endflag __initdata = 0; + +static inline unsigned int get_nmi_count(int cpu) +{ + return cpu_pda(cpu)->__nmi_count; +} + +static inline int mce_in_progress(void) +{ +#ifdef CONFIG_X86_MCE + return atomic_read(&mce_entry) > 0; +#endif + return 0; +} + +/* + * Take the local apic timer and PIT/HPET into account. We don't + * know which one is active, when we have highres/dyntick on + */ +static inline unsigned int get_timer_irqs(int cpu) +{ + return read_pda(apic_timer_irqs) + read_pda(irq0_irqs); +} + /* Run after command line and cpu_init init, but before all other checks */ void nmi_watchdog_default(void) { @@ -55,8 +79,6 @@ void nmi_watchdog_default(void) nmi_watchdog = NMI_NONE; } -static int endflag __initdata = 0; - #ifdef CONFIG_SMP /* The performance counters used by NMI_LOCAL_APIC don't trigger when * the CPU is idle. To make sure the NMI watchdog really ticks on all @@ -99,19 +121,19 @@ int __init check_nmi_watchdog(void) #endif for_each_possible_cpu(cpu) - prev_nmi_count[cpu] = cpu_pda(cpu)->__nmi_count; + prev_nmi_count[cpu] = get_nmi_count(cpu); local_irq_enable(); mdelay((20*1000)/nmi_hz); // wait 20 ticks for_each_online_cpu(cpu) { if (!per_cpu(wd_enabled, cpu)) continue; - if (cpu_pda(cpu)->__nmi_count - prev_nmi_count[cpu] <= 5) { + if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5) { printk(KERN_WARNING "WARNING: CPU#%d: NMI " "appears to be stuck (%d->%d)!\n", cpu, prev_nmi_count[cpu], - cpu_pda(cpu)->__nmi_count); + get_nmi_count(cpu)); per_cpu(wd_enabled, cpu) = 0; atomic_dec(&nmi_active); } @@ -327,7 +349,8 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) touched = 1; } - sum = read_pda(apic_timer_irqs) + read_pda(irq0_irqs); + sum = get_timer_irqs(cpu); + if (__get_cpu_var(nmi_touch)) { __get_cpu_var(nmi_touch) = 0; touched = 1; @@ -343,12 +366,9 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) cpu_clear(cpu, backtrace_mask); } -#ifdef CONFIG_X86_MCE - /* Could check oops_in_progress here too, but it's safer - not too */ - if (atomic_read(&mce_entry) > 0) + if (mce_in_progress()) touched = 1; -#endif + /* if the apic timer isn't firing, this cpu isn't doing much */ if (!touched && __get_cpu_var(last_irq_sum) == sum) { /* -- cgit v0.10.2 From 1798bc22b2790bf2a956588e6b17c36ef79ceff7 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 24 May 2008 19:36:41 +0400 Subject: x86: nmi_32/64.c - merge down nmi_32.c and nmi_64.c to nmi.c Signed-off-by: Cyrill Gorcunov Cc: hpa@zytor.com Cc: mingo@redhat.com Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 5e618c3..d5dd666 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -53,7 +53,7 @@ obj-$(CONFIG_X86_32_SMP) += smpcommon.o obj-$(CONFIG_X86_64_SMP) += tsc_sync.o smpcommon.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o obj-$(CONFIG_X86_MPPARSE) += mpparse.o -obj-$(CONFIG_X86_LOCAL_APIC) += apic_$(BITS).o nmi_$(BITS).o +obj-$(CONFIG_X86_LOCAL_APIC) += apic_$(BITS).o nmi.o obj-$(CONFIG_X86_IO_APIC) += io_apic_$(BITS).o obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c new file mode 100644 index 0000000..69a839f --- /dev/null +++ b/arch/x86/kernel/nmi.c @@ -0,0 +1,533 @@ +/* + * NMI watchdog support on APIC systems + * + * Started by Ingo Molnar + * + * Fixes: + * Mikael Pettersson : AMD K7 support for local APIC NMI watchdog. + * Mikael Pettersson : Power Management for local APIC NMI watchdog. + * Mikael Pettersson : Pentium 4 support for local APIC NMI watchdog. + * Pavel Machek and + * Mikael Pettersson : PM converted to driver model. Disable/enable API. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include + +int unknown_nmi_panic; +int nmi_watchdog_enabled; + +static cpumask_t backtrace_mask = CPU_MASK_NONE; + +/* nmi_active: + * >0: the lapic NMI watchdog is active, but can be disabled + * <0: the lapic NMI watchdog has not been set up, and cannot + * be enabled + * 0: the lapic NMI watchdog is disabled, but can be enabled + */ +atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */ +static int panic_on_timeout; + +unsigned int nmi_watchdog = NMI_DEFAULT; + +static unsigned int nmi_hz = HZ; +static DEFINE_PER_CPU(short, wd_enabled); +static int endflag __initdata = 0; + +static inline unsigned int get_nmi_count(int cpu) +{ +#ifdef CONFIG_X86_64 + return cpu_pda(cpu)->__nmi_count; +#else + return nmi_count(cpu); +#endif +} + +static inline int mce_in_progress(void) +{ +#if defined(CONFIX_X86_64) && defined(CONFIG_X86_MCE) + return atomic_read(&mce_entry) > 0; +#endif + return 0; +} + +/* + * Take the local apic timer and PIT/HPET into account. We don't + * know which one is active, when we have highres/dyntick on + */ +static inline unsigned int get_timer_irqs(int cpu) +{ +#ifdef CONFIG_X86_64 + return read_pda(apic_timer_irqs) + read_pda(irq0_irqs); +#else + return per_cpu(irq_stat, cpu).apic_timer_irqs + + per_cpu(irq_stat, cpu).irq0_irqs; +#endif +} + +/* Run after command line and cpu_init init, but before all other checks */ +void nmi_watchdog_default(void) +{ + if (nmi_watchdog != NMI_DEFAULT) + return; +#ifdef CONFIG_X86_64 + nmi_watchdog = NMI_NONE; +#else + if (lapic_watchdog_ok()) + nmi_watchdog = NMI_LOCAL_APIC; + else + nmi_watchdog = NMI_IO_APIC; +#endif +} + +#ifdef CONFIG_SMP +/* + * The performance counters used by NMI_LOCAL_APIC don't trigger when + * the CPU is idle. To make sure the NMI watchdog really ticks on all + * CPUs during the test make them busy. + */ +static __init void nmi_cpu_busy(void *data) +{ + local_irq_enable_in_hardirq(); + /* + * Intentionally don't use cpu_relax here. This is + * to make sure that the performance counter really ticks, + * even if there is a simulator or similar that catches the + * pause instruction. On a real HT machine this is fine because + * all other CPUs are busy with "useless" delay loops and don't + * care if they get somewhat less cycles. + */ + while (endflag == 0) + mb(); +} +#endif + +int __init check_nmi_watchdog(void) +{ + unsigned int *prev_nmi_count; + int cpu; + + if (nmi_watchdog == NMI_NONE || nmi_watchdog == NMI_DISABLED) + return 0; + + if (!atomic_read(&nmi_active)) + return 0; + + prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL); + if (!prev_nmi_count) + goto error; + + printk(KERN_INFO "Testing NMI watchdog ... "); + +#ifdef CONFIG_SMP + if (nmi_watchdog == NMI_LOCAL_APIC) + smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0); +#endif + + for_each_possible_cpu(cpu) + prev_nmi_count[cpu] = get_nmi_count(cpu); + local_irq_enable(); + mdelay((20 * 1000) / nmi_hz); /* wait 20 ticks */ + + for_each_online_cpu(cpu) { + if (!per_cpu(wd_enabled, cpu)) + continue; + if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5) { + printk(KERN_WARNING "WARNING: CPU#%d: NMI " + "appears to be stuck (%d->%d)!\n", + cpu, + prev_nmi_count[cpu], + get_nmi_count(cpu)); + per_cpu(wd_enabled, cpu) = 0; + atomic_dec(&nmi_active); + } + } + endflag = 1; + if (!atomic_read(&nmi_active)) { + kfree(prev_nmi_count); + atomic_set(&nmi_active, -1); + goto error; + } + printk("OK.\n"); + + /* + * now that we know it works we can reduce NMI frequency to + * something more reasonable; makes a difference in some configs + */ + if (nmi_watchdog == NMI_LOCAL_APIC) + nmi_hz = lapic_adjust_nmi_hz(1); + + kfree(prev_nmi_count); + return 0; + +error: +#ifdef CONFIG_X86_32 + timer_ack = !cpu_has_tsc; +#endif + return -1; +} + +static int __init setup_nmi_watchdog(char *str) +{ + int nmi; + + if (!strncmp(str, "panic", 5)) { + panic_on_timeout = 1; + str = strchr(str, ','); + if (!str) + return 1; + ++str; + } + + get_option(&str, &nmi); + + if (nmi >= NMI_INVALID || nmi < NMI_NONE) + return 0; + + nmi_watchdog = nmi; + return 1; +} +__setup("nmi_watchdog=", setup_nmi_watchdog); + +/* + * Suspend/resume support + */ +#ifdef CONFIG_PM + +static int nmi_pm_active; /* nmi_active before suspend */ + +static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state) +{ + /* only CPU0 goes here, other CPUs should be offline */ + nmi_pm_active = atomic_read(&nmi_active); + stop_apic_nmi_watchdog(NULL); + BUG_ON(atomic_read(&nmi_active) != 0); + return 0; +} + +static int lapic_nmi_resume(struct sys_device *dev) +{ + /* only CPU0 goes here, other CPUs should be offline */ + if (nmi_pm_active > 0) { + setup_apic_nmi_watchdog(NULL); + touch_nmi_watchdog(); + } + return 0; +} + +static struct sysdev_class nmi_sysclass = { + .name = "lapic_nmi", + .resume = lapic_nmi_resume, + .suspend = lapic_nmi_suspend, +}; + +static struct sys_device device_lapic_nmi = { + .id = 0, + .cls = &nmi_sysclass, +}; + +static int __init init_lapic_nmi_sysfs(void) +{ + int error; + + /* + * should really be a BUG_ON but b/c this is an + * init call, it just doesn't work. -dcz + */ + if (nmi_watchdog != NMI_LOCAL_APIC) + return 0; + + if (atomic_read(&nmi_active) < 0) + return 0; + + error = sysdev_class_register(&nmi_sysclass); + if (!error) + error = sysdev_register(&device_lapic_nmi); + return error; +} + +/* must come after the local APIC's device_initcall() */ +late_initcall(init_lapic_nmi_sysfs); + +#endif /* CONFIG_PM */ + +static void __acpi_nmi_enable(void *__unused) +{ + apic_write_around(APIC_LVT0, APIC_DM_NMI); +} + +/* + * Enable timer based NMIs on all CPUs: + */ +void acpi_nmi_enable(void) +{ + if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) + on_each_cpu(__acpi_nmi_enable, NULL, 0, 1); +} + +static void __acpi_nmi_disable(void *__unused) +{ + apic_write_around(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED); +} + +/* + * Disable timer based NMIs on all CPUs: + */ +void acpi_nmi_disable(void) +{ + if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) + on_each_cpu(__acpi_nmi_disable, NULL, 0, 1); +} + +void setup_apic_nmi_watchdog(void *unused) +{ + if (__get_cpu_var(wd_enabled)) + return; + + /* cheap hack to support suspend/resume */ + /* if cpu0 is not active neither should the other cpus */ + if (smp_processor_id() != 0 && atomic_read(&nmi_active) <= 0) + return; + + switch (nmi_watchdog) { + case NMI_LOCAL_APIC: + /* enable it before to avoid race with handler */ + __get_cpu_var(wd_enabled) = 1; + if (lapic_watchdog_init(nmi_hz) < 0) { + __get_cpu_var(wd_enabled) = 0; + return; + } + /* FALL THROUGH */ + case NMI_IO_APIC: + __get_cpu_var(wd_enabled) = 1; + atomic_inc(&nmi_active); + } +} + +void stop_apic_nmi_watchdog(void *unused) +{ + /* only support LOCAL and IO APICs for now */ + if (nmi_watchdog != NMI_LOCAL_APIC && + nmi_watchdog != NMI_IO_APIC) + return; + if (__get_cpu_var(wd_enabled) == 0) + return; + if (nmi_watchdog == NMI_LOCAL_APIC) + lapic_watchdog_stop(); + __get_cpu_var(wd_enabled) = 0; + atomic_dec(&nmi_active); +} + +/* + * the best way to detect whether a CPU has a 'hard lockup' problem + * is to check it's local APIC timer IRQ counts. If they are not + * changing then that CPU has some problem. + * + * as these watchdog NMI IRQs are generated on every CPU, we only + * have to check the current processor. + * + * since NMIs don't listen to _any_ locks, we have to be extremely + * careful not to rely on unsafe variables. The printk might lock + * up though, so we have to break up any console locks first ... + * [when there will be more tty-related locks, break them up here too!] + */ + +static DEFINE_PER_CPU(unsigned, last_irq_sum); +static DEFINE_PER_CPU(local_t, alert_counter); +static DEFINE_PER_CPU(int, nmi_touch); + +void touch_nmi_watchdog(void) +{ + if (nmi_watchdog > 0) { + unsigned cpu; + + /* + * Tell other CPUs to reset their alert counters. We cannot + * do it ourselves because the alert count increase is not + * atomic. + */ + for_each_present_cpu(cpu) { + if (per_cpu(nmi_touch, cpu) != 1) + per_cpu(nmi_touch, cpu) = 1; + } + } + + /* + * Tickle the softlockup detector too: + */ + touch_softlockup_watchdog(); +} +EXPORT_SYMBOL(touch_nmi_watchdog); + +notrace __kprobes int +nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) +{ + /* + * Since current_thread_info()-> is always on the stack, and we + * always switch the stack NMI-atomically, it's safe to use + * smp_processor_id(). + */ + unsigned int sum; + int touched = 0; + int cpu = smp_processor_id(); + int rc = 0; + + /* check for other users first */ + if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) + == NOTIFY_STOP) { + rc = 1; + touched = 1; + } + + sum = get_timer_irqs(cpu); + + if (__get_cpu_var(nmi_touch)) { + __get_cpu_var(nmi_touch) = 0; + touched = 1; + } + + if (cpu_isset(cpu, backtrace_mask)) { + static DEFINE_SPINLOCK(lock); /* Serialise the printks */ + + spin_lock(&lock); + printk("NMI backtrace for cpu %d\n", cpu); + dump_stack(); + spin_unlock(&lock); + cpu_clear(cpu, backtrace_mask); + } + + /* Could check oops_in_progress here too, but it's safer not to */ + if (mce_in_progress()) + touched = 1; + + /* if the none of the timers isn't firing, this cpu isn't doing much */ + if (!touched && __get_cpu_var(last_irq_sum) == sum) { + /* + * Ayiee, looks like this CPU is stuck ... + * wait a few IRQs (5 seconds) before doing the oops ... + */ + local_inc(&__get_cpu_var(alert_counter)); + if (local_read(&__get_cpu_var(alert_counter)) == 5 * nmi_hz) + /* + * die_nmi will return ONLY if NOTIFY_STOP happens.. + */ + die_nmi("BUG: NMI Watchdog detected LOCKUP", + regs, panic_on_timeout); + } else { + __get_cpu_var(last_irq_sum) = sum; + local_set(&__get_cpu_var(alert_counter), 0); + } + + /* see if the nmi watchdog went off */ + if (!__get_cpu_var(wd_enabled)) + return rc; + switch (nmi_watchdog) { + case NMI_LOCAL_APIC: + rc |= lapic_wd_event(nmi_hz); + break; + case NMI_IO_APIC: + /* + * don't know how to accurately check for this. + * just assume it was a watchdog timer interrupt + * This matches the old behaviour. + */ + rc = 1; + break; + } + return rc; +} + +#ifdef CONFIG_SYSCTL + +static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu) +{ + unsigned char reason = get_nmi_reason(); + char buf[64]; + + sprintf(buf, "NMI received for unknown reason %02x\n", reason); + die_nmi(buf, regs, 1); /* Always panic here */ + return 0; +} + +/* + * proc handler for /proc/sys/kernel/nmi + */ +int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file, + void __user *buffer, size_t *length, loff_t *ppos) +{ + int old_state; + + nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0; + old_state = nmi_watchdog_enabled; + proc_dointvec(table, write, file, buffer, length, ppos); + if (!!old_state == !!nmi_watchdog_enabled) + return 0; + + if (atomic_read(&nmi_active) < 0 || nmi_watchdog == NMI_DISABLED) { + printk(KERN_WARNING + "NMI watchdog is permanently disabled\n"); + return -EIO; + } + + /* if nmi_watchdog is not set yet, then set it */ + nmi_watchdog_default(); + + if (nmi_watchdog == NMI_LOCAL_APIC) { + if (nmi_watchdog_enabled) + enable_lapic_nmi_watchdog(); + else + disable_lapic_nmi_watchdog(); + } else { + printk(KERN_WARNING + "NMI watchdog doesn't know what hardware to touch\n"); + return -EIO; + } + return 0; +} + +#endif /* CONFIG_SYSCTL */ + +int do_nmi_callback(struct pt_regs *regs, int cpu) +{ +#ifdef CONFIG_SYSCTL + if (unknown_nmi_panic) + return unknown_nmi_panic_callback(regs, cpu); +#endif + return 0; +} + +void __trigger_all_cpu_backtrace(void) +{ + int i; + + backtrace_mask = cpu_online_map; + /* Wait for up to 10 seconds for all CPUs to do the backtrace */ + for (i = 0; i < 10 * 1000; i++) { + if (cpus_empty(backtrace_mask)) + break; + mdelay(1); + } +} + +EXPORT_SYMBOL(nmi_active); +EXPORT_SYMBOL(nmi_watchdog); + diff --git a/arch/x86/kernel/nmi_32.c b/arch/x86/kernel/nmi_32.c deleted file mode 100644 index a94dbed..0000000 --- a/arch/x86/kernel/nmi_32.c +++ /dev/null @@ -1,506 +0,0 @@ -/* - * NMI watchdog support on APIC systems - * - * Started by Ingo Molnar - * - * Fixes: - * Mikael Pettersson : AMD K7 support for local APIC NMI watchdog. - * Mikael Pettersson : Power Management for local APIC NMI watchdog. - * Mikael Pettersson : Pentium 4 support for local APIC NMI watchdog. - * Pavel Machek and - * Mikael Pettersson : PM converted to driver model. Disable/enable API. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "mach_traps.h" - -int unknown_nmi_panic; -int nmi_watchdog_enabled; - -static cpumask_t backtrace_mask = CPU_MASK_NONE; - -/* nmi_active: - * >0: the lapic NMI watchdog is active, but can be disabled - * <0: the lapic NMI watchdog has not been set up, and cannot - * be enabled - * 0: the lapic NMI watchdog is disabled, but can be enabled - */ -atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */ -static int panic_on_timeout; - -unsigned int nmi_watchdog = NMI_DEFAULT; -static unsigned int nmi_hz = HZ; - -static DEFINE_PER_CPU(short, wd_enabled); - -static int endflag __initdata = 0; - -static inline unsigned int get_nmi_count(int cpu) -{ - return nmi_count(cpu); -} - -static inline int mce_in_progress(void) -{ - return 0; -} - -/* - * Take the local apic timer and PIT/HPET into account. We don't - * know which one is active, when we have highres/dyntick on - */ -static inline unsigned int get_timer_irqs(int cpu) -{ - return per_cpu(irq_stat, cpu).apic_timer_irqs + - per_cpu(irq_stat, cpu).irq0_irqs; -} - -/* Run after command line and cpu_init init, but before all other checks */ -void nmi_watchdog_default(void) -{ - if (nmi_watchdog != NMI_DEFAULT) - return; - if (lapic_watchdog_ok()) - nmi_watchdog = NMI_LOCAL_APIC; - else - nmi_watchdog = NMI_IO_APIC; -} - -#ifdef CONFIG_SMP -/* The performance counters used by NMI_LOCAL_APIC don't trigger when - * the CPU is idle. To make sure the NMI watchdog really ticks on all - * CPUs during the test make them busy. - */ -static __init void nmi_cpu_busy(void *data) -{ - local_irq_enable_in_hardirq(); - /* Intentionally don't use cpu_relax here. This is - to make sure that the performance counter really ticks, - even if there is a simulator or similar that catches the - pause instruction. On a real HT machine this is fine because - all other CPUs are busy with "useless" delay loops and don't - care if they get somewhat less cycles. */ - while (endflag == 0) - mb(); -} -#endif - -int __init check_nmi_watchdog(void) -{ - unsigned int *prev_nmi_count; - int cpu; - - if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DISABLED)) - return 0; - - if (!atomic_read(&nmi_active)) - return 0; - - prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL); - if (!prev_nmi_count) - goto error; - - printk(KERN_INFO "Testing NMI watchdog ... "); - -#ifdef CONFIG_SMP - if (nmi_watchdog == NMI_LOCAL_APIC) - smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0); -#endif - - for_each_possible_cpu(cpu) - prev_nmi_count[cpu] = get_nmi_count(cpu); - local_irq_enable(); - mdelay((20*1000)/nmi_hz); // wait 20 ticks - - for_each_online_cpu(cpu) { - if (!per_cpu(wd_enabled, cpu)) - continue; - if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5) { - printk(KERN_WARNING "WARNING: CPU#%d: NMI " - "appears to be stuck (%d->%d)!\n", - cpu, - prev_nmi_count[cpu], - get_nmi_count(cpu)); - per_cpu(wd_enabled, cpu) = 0; - atomic_dec(&nmi_active); - } - } - endflag = 1; - if (!atomic_read(&nmi_active)) { - kfree(prev_nmi_count); - atomic_set(&nmi_active, -1); - goto error; - } - printk("OK.\n"); - - /* now that we know it works we can reduce NMI frequency to - something more reasonable; makes a difference in some configs */ - if (nmi_watchdog == NMI_LOCAL_APIC) - nmi_hz = lapic_adjust_nmi_hz(1); - - kfree(prev_nmi_count); - return 0; -error: - timer_ack = !cpu_has_tsc; - - return -1; -} - -static int __init setup_nmi_watchdog(char *str) -{ - int nmi; - - if (!strncmp(str, "panic", 5)) { - panic_on_timeout = 1; - str = strchr(str, ','); - if (!str) - return 1; - ++str; - } - - get_option(&str, &nmi); - - if ((nmi >= NMI_INVALID) || (nmi < NMI_NONE)) - return 0; - - nmi_watchdog = nmi; - return 1; -} - -__setup("nmi_watchdog=", setup_nmi_watchdog); - - -/* Suspend/resume support */ - -#ifdef CONFIG_PM - -static int nmi_pm_active; /* nmi_active before suspend */ - -static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state) -{ - /* only CPU0 goes here, other CPUs should be offline */ - nmi_pm_active = atomic_read(&nmi_active); - stop_apic_nmi_watchdog(NULL); - BUG_ON(atomic_read(&nmi_active) != 0); - return 0; -} - -static int lapic_nmi_resume(struct sys_device *dev) -{ - /* only CPU0 goes here, other CPUs should be offline */ - if (nmi_pm_active > 0) { - setup_apic_nmi_watchdog(NULL); - touch_nmi_watchdog(); - } - return 0; -} - - -static struct sysdev_class nmi_sysclass = { - .name = "lapic_nmi", - .resume = lapic_nmi_resume, - .suspend = lapic_nmi_suspend, -}; - -static struct sys_device device_lapic_nmi = { - .id = 0, - .cls = &nmi_sysclass, -}; - -static int __init init_lapic_nmi_sysfs(void) -{ - int error; - - /* should really be a BUG_ON but b/c this is an - * init call, it just doesn't work. -dcz - */ - if (nmi_watchdog != NMI_LOCAL_APIC) - return 0; - - if (atomic_read(&nmi_active) < 0) - return 0; - - error = sysdev_class_register(&nmi_sysclass); - if (!error) - error = sysdev_register(&device_lapic_nmi); - return error; -} -/* must come after the local APIC's device_initcall() */ -late_initcall(init_lapic_nmi_sysfs); - -#endif /* CONFIG_PM */ - -static void __acpi_nmi_enable(void *__unused) -{ - apic_write_around(APIC_LVT0, APIC_DM_NMI); -} - -/* - * Enable timer based NMIs on all CPUs: - */ -void acpi_nmi_enable(void) -{ - if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) - on_each_cpu(__acpi_nmi_enable, NULL, 0, 1); -} - -static void __acpi_nmi_disable(void *__unused) -{ - apic_write_around(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED); -} - -/* - * Disable timer based NMIs on all CPUs: - */ -void acpi_nmi_disable(void) -{ - if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) - on_each_cpu(__acpi_nmi_disable, NULL, 0, 1); -} - -void setup_apic_nmi_watchdog(void *unused) -{ - if (__get_cpu_var(wd_enabled)) - return; - - /* cheap hack to support suspend/resume */ - /* if cpu0 is not active neither should the other cpus */ - if ((smp_processor_id() != 0) && (atomic_read(&nmi_active) <= 0)) - return; - - switch (nmi_watchdog) { - case NMI_LOCAL_APIC: - __get_cpu_var(wd_enabled) = 1; /* enable it before to avoid race with handler */ - if (lapic_watchdog_init(nmi_hz) < 0) { - __get_cpu_var(wd_enabled) = 0; - return; - } - /* FALL THROUGH */ - case NMI_IO_APIC: - __get_cpu_var(wd_enabled) = 1; - atomic_inc(&nmi_active); - } -} - -void stop_apic_nmi_watchdog(void *unused) -{ - /* only support LOCAL and IO APICs for now */ - if ((nmi_watchdog != NMI_LOCAL_APIC) && - (nmi_watchdog != NMI_IO_APIC)) - return; - if (__get_cpu_var(wd_enabled) == 0) - return; - if (nmi_watchdog == NMI_LOCAL_APIC) - lapic_watchdog_stop(); - __get_cpu_var(wd_enabled) = 0; - atomic_dec(&nmi_active); -} - -/* - * the best way to detect whether a CPU has a 'hard lockup' problem - * is to check it's local APIC timer IRQ counts. If they are not - * changing then that CPU has some problem. - * - * as these watchdog NMI IRQs are generated on every CPU, we only - * have to check the current processor. - * - * since NMIs don't listen to _any_ locks, we have to be extremely - * careful not to rely on unsafe variables. The printk might lock - * up though, so we have to break up any console locks first ... - * [when there will be more tty-related locks, break them up - * here too!] - */ - -static DEFINE_PER_CPU(unsigned, last_irq_sum); -static DEFINE_PER_CPU(local_t, alert_counter); -static DEFINE_PER_CPU(int, nmi_touch); - -void touch_nmi_watchdog(void) -{ - if (nmi_watchdog > 0) { - unsigned cpu; - - /* - * Tell other CPUs to reset their alert counters. We cannot - * do it ourselves because the alert count increase is not - * atomic. - */ - for_each_present_cpu(cpu) { - if (per_cpu(nmi_touch, cpu) != 1) - per_cpu(nmi_touch, cpu) = 1; - } - } - - /* - * Tickle the softlockup detector too: - */ - touch_softlockup_watchdog(); -} -EXPORT_SYMBOL(touch_nmi_watchdog); - -notrace __kprobes int -nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) -{ - - /* - * Since current_thread_info()-> is always on the stack, and we - * always switch the stack NMI-atomically, it's safe to use - * smp_processor_id(). - */ - unsigned int sum; - int touched = 0; - int cpu = smp_processor_id(); - int rc = 0; - - /* check for other users first */ - if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) - == NOTIFY_STOP) { - rc = 1; - touched = 1; - } - - sum = get_timer_irqs(cpu); - - if (__get_cpu_var(nmi_touch)) { - __get_cpu_var(nmi_touch) = 0; - touched = 1; - } - - if (cpu_isset(cpu, backtrace_mask)) { - static DEFINE_SPINLOCK(lock); /* Serialise the printks */ - - spin_lock(&lock); - printk("NMI backtrace for cpu %d\n", cpu); - dump_stack(); - spin_unlock(&lock); - cpu_clear(cpu, backtrace_mask); - } - - /* Could check oops_in_progress here too, but it's safer not to */ - if (mce_in_progress()) - touched = 1; - - /* if the none of the timers isn't firing, this cpu isn't doing much */ - if (!touched && __get_cpu_var(last_irq_sum) == sum) { - /* - * Ayiee, looks like this CPU is stuck ... - * wait a few IRQs (5 seconds) before doing the oops ... - */ - local_inc(&__get_cpu_var(alert_counter)); - if (local_read(&__get_cpu_var(alert_counter)) == 5*nmi_hz) - /* - * die_nmi will return ONLY if NOTIFY_STOP happens.. - */ - die_nmi("BUG: NMI Watchdog detected LOCKUP", - regs, panic_on_timeout); - } else { - __get_cpu_var(last_irq_sum) = sum; - local_set(&__get_cpu_var(alert_counter), 0); - } - /* see if the nmi watchdog went off */ - if (!__get_cpu_var(wd_enabled)) - return rc; - switch (nmi_watchdog) { - case NMI_LOCAL_APIC: - rc |= lapic_wd_event(nmi_hz); - break; - case NMI_IO_APIC: - /* don't know how to accurately check for this. - * just assume it was a watchdog timer interrupt - * This matches the old behaviour. - */ - rc = 1; - break; - } - return rc; -} - -#ifdef CONFIG_SYSCTL - -static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu) -{ - unsigned char reason = get_nmi_reason(); - char buf[64]; - - sprintf(buf, "NMI received for unknown reason %02x\n", reason); - die_nmi(buf, regs, 1); /* Always panic here */ - return 0; -} - -/* - * proc handler for /proc/sys/kernel/nmi - */ -int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file, - void __user *buffer, size_t *length, loff_t *ppos) -{ - int old_state; - - nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0; - old_state = nmi_watchdog_enabled; - proc_dointvec(table, write, file, buffer, length, ppos); - if (!!old_state == !!nmi_watchdog_enabled) - return 0; - - if (atomic_read(&nmi_active) < 0 || nmi_watchdog == NMI_DISABLED) { - printk( KERN_WARNING "NMI watchdog is permanently disabled\n"); - return -EIO; - } - - /* if nmi_watchdog is not set yet, then set it */ - nmi_watchdog_default(); - - if (nmi_watchdog == NMI_LOCAL_APIC) { - if (nmi_watchdog_enabled) - enable_lapic_nmi_watchdog(); - else - disable_lapic_nmi_watchdog(); - } else { - printk( KERN_WARNING - "NMI watchdog doesn't know what hardware to touch\n"); - return -EIO; - } - return 0; -} - -#endif - -int do_nmi_callback(struct pt_regs *regs, int cpu) -{ -#ifdef CONFIG_SYSCTL - if (unknown_nmi_panic) - return unknown_nmi_panic_callback(regs, cpu); -#endif - return 0; -} - -void __trigger_all_cpu_backtrace(void) -{ - int i; - - backtrace_mask = cpu_online_map; - /* Wait for up to 10 seconds for all CPUs to do the backtrace */ - for (i = 0; i < 10 * 1000; i++) { - if (cpus_empty(backtrace_mask)) - break; - mdelay(1); - } -} - -EXPORT_SYMBOL(nmi_active); -EXPORT_SYMBOL(nmi_watchdog); diff --git a/arch/x86/kernel/nmi_64.c b/arch/x86/kernel/nmi_64.c deleted file mode 100644 index c680244..0000000 --- a/arch/x86/kernel/nmi_64.c +++ /dev/null @@ -1,477 +0,0 @@ -/* - * NMI watchdog support on APIC systems - * - * Started by Ingo Molnar - * - * Fixes: - * Mikael Pettersson : AMD K7 support for local APIC NMI watchdog. - * Mikael Pettersson : Power Management for local APIC NMI watchdog. - * Pavel Machek and - * Mikael Pettersson : PM converted to driver model. Disable/enable API. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include - -int unknown_nmi_panic; -int nmi_watchdog_enabled; - -static cpumask_t backtrace_mask = CPU_MASK_NONE; - -/* nmi_active: - * >0: the lapic NMI watchdog is active, but can be disabled - * <0: the lapic NMI watchdog has not been set up, and cannot - * be enabled - * 0: the lapic NMI watchdog is disabled, but can be enabled - */ -atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */ -static int panic_on_timeout; - -unsigned int nmi_watchdog = NMI_DEFAULT; -static unsigned int nmi_hz = HZ; - -static DEFINE_PER_CPU(short, wd_enabled); - -static int endflag __initdata = 0; - -static inline unsigned int get_nmi_count(int cpu) -{ - return cpu_pda(cpu)->__nmi_count; -} - -static inline int mce_in_progress(void) -{ -#ifdef CONFIG_X86_MCE - return atomic_read(&mce_entry) > 0; -#endif - return 0; -} - -/* - * Take the local apic timer and PIT/HPET into account. We don't - * know which one is active, when we have highres/dyntick on - */ -static inline unsigned int get_timer_irqs(int cpu) -{ - return read_pda(apic_timer_irqs) + read_pda(irq0_irqs); -} - -/* Run after command line and cpu_init init, but before all other checks */ -void nmi_watchdog_default(void) -{ - if (nmi_watchdog != NMI_DEFAULT) - return; - nmi_watchdog = NMI_NONE; -} - -#ifdef CONFIG_SMP -/* The performance counters used by NMI_LOCAL_APIC don't trigger when - * the CPU is idle. To make sure the NMI watchdog really ticks on all - * CPUs during the test make them busy. - */ -static __init void nmi_cpu_busy(void *data) -{ - local_irq_enable_in_hardirq(); - /* Intentionally don't use cpu_relax here. This is - to make sure that the performance counter really ticks, - even if there is a simulator or similar that catches the - pause instruction. On a real HT machine this is fine because - all other CPUs are busy with "useless" delay loops and don't - care if they get somewhat less cycles. */ - while (endflag == 0) - mb(); -} -#endif - -int __init check_nmi_watchdog(void) -{ - unsigned int *prev_nmi_count; - int cpu; - - if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DISABLED)) - return 0; - - if (!atomic_read(&nmi_active)) - return 0; - - prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL); - if (!prev_nmi_count) - return -1; - - printk(KERN_INFO "Testing NMI watchdog ... "); - -#ifdef CONFIG_SMP - if (nmi_watchdog == NMI_LOCAL_APIC) - smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0); -#endif - - for_each_possible_cpu(cpu) - prev_nmi_count[cpu] = get_nmi_count(cpu); - local_irq_enable(); - mdelay((20*1000)/nmi_hz); // wait 20 ticks - - for_each_online_cpu(cpu) { - if (!per_cpu(wd_enabled, cpu)) - continue; - if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5) { - printk(KERN_WARNING "WARNING: CPU#%d: NMI " - "appears to be stuck (%d->%d)!\n", - cpu, - prev_nmi_count[cpu], - get_nmi_count(cpu)); - per_cpu(wd_enabled, cpu) = 0; - atomic_dec(&nmi_active); - } - } - endflag = 1; - if (!atomic_read(&nmi_active)) { - kfree(prev_nmi_count); - atomic_set(&nmi_active, -1); - return -1; - } - printk("OK.\n"); - - /* now that we know it works we can reduce NMI frequency to - something more reasonable; makes a difference in some configs */ - if (nmi_watchdog == NMI_LOCAL_APIC) - nmi_hz = lapic_adjust_nmi_hz(1); - - kfree(prev_nmi_count); - return 0; -} - -static int __init setup_nmi_watchdog(char *str) -{ - int nmi; - - if (!strncmp(str,"panic",5)) { - panic_on_timeout = 1; - str = strchr(str, ','); - if (!str) - return 1; - ++str; - } - - get_option(&str, &nmi); - - if ((nmi >= NMI_INVALID) || (nmi < NMI_NONE)) - return 0; - - nmi_watchdog = nmi; - return 1; -} - -__setup("nmi_watchdog=", setup_nmi_watchdog); - -#ifdef CONFIG_PM - -static int nmi_pm_active; /* nmi_active before suspend */ - -static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state) -{ - /* only CPU0 goes here, other CPUs should be offline */ - nmi_pm_active = atomic_read(&nmi_active); - stop_apic_nmi_watchdog(NULL); - BUG_ON(atomic_read(&nmi_active) != 0); - return 0; -} - -static int lapic_nmi_resume(struct sys_device *dev) -{ - /* only CPU0 goes here, other CPUs should be offline */ - if (nmi_pm_active > 0) { - setup_apic_nmi_watchdog(NULL); - touch_nmi_watchdog(); - } - return 0; -} - -static struct sysdev_class nmi_sysclass = { - .name = "lapic_nmi", - .resume = lapic_nmi_resume, - .suspend = lapic_nmi_suspend, -}; - -static struct sys_device device_lapic_nmi = { - .id = 0, - .cls = &nmi_sysclass, -}; - -static int __init init_lapic_nmi_sysfs(void) -{ - int error; - - /* should really be a BUG_ON but b/c this is an - * init call, it just doesn't work. -dcz - */ - if (nmi_watchdog != NMI_LOCAL_APIC) - return 0; - - if (atomic_read(&nmi_active) < 0) - return 0; - - error = sysdev_class_register(&nmi_sysclass); - if (!error) - error = sysdev_register(&device_lapic_nmi); - return error; -} -/* must come after the local APIC's device_initcall() */ -late_initcall(init_lapic_nmi_sysfs); - -#endif /* CONFIG_PM */ - -static void __acpi_nmi_enable(void *__unused) -{ - apic_write_around(APIC_LVT0, APIC_DM_NMI); -} - -/* - * Enable timer based NMIs on all CPUs: - */ -void acpi_nmi_enable(void) -{ - if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) - on_each_cpu(__acpi_nmi_enable, NULL, 0, 1); -} - -static void __acpi_nmi_disable(void *__unused) -{ - apic_write_around(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED); -} - -/* - * Disable timer based NMIs on all CPUs: - */ -void acpi_nmi_disable(void) -{ - if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) - on_each_cpu(__acpi_nmi_disable, NULL, 0, 1); -} - -void setup_apic_nmi_watchdog(void *unused) -{ - if (__get_cpu_var(wd_enabled)) - return; - - /* cheap hack to support suspend/resume */ - /* if cpu0 is not active neither should the other cpus */ - if ((smp_processor_id() != 0) && (atomic_read(&nmi_active) <= 0)) - return; - - switch (nmi_watchdog) { - case NMI_LOCAL_APIC: - __get_cpu_var(wd_enabled) = 1; - if (lapic_watchdog_init(nmi_hz) < 0) { - __get_cpu_var(wd_enabled) = 0; - return; - } - /* FALL THROUGH */ - case NMI_IO_APIC: - __get_cpu_var(wd_enabled) = 1; - atomic_inc(&nmi_active); - } -} - -void stop_apic_nmi_watchdog(void *unused) -{ - /* only support LOCAL and IO APICs for now */ - if ((nmi_watchdog != NMI_LOCAL_APIC) && - (nmi_watchdog != NMI_IO_APIC)) - return; - if (__get_cpu_var(wd_enabled) == 0) - return; - if (nmi_watchdog == NMI_LOCAL_APIC) - lapic_watchdog_stop(); - __get_cpu_var(wd_enabled) = 0; - atomic_dec(&nmi_active); -} - -/* - * the best way to detect whether a CPU has a 'hard lockup' problem - * is to check it's local APIC timer IRQ counts. If they are not - * changing then that CPU has some problem. - * - * as these watchdog NMI IRQs are generated on every CPU, we only - * have to check the current processor. - */ - -static DEFINE_PER_CPU(unsigned, last_irq_sum); -static DEFINE_PER_CPU(local_t, alert_counter); -static DEFINE_PER_CPU(int, nmi_touch); - -void touch_nmi_watchdog(void) -{ - if (nmi_watchdog > 0) { - unsigned cpu; - - /* - * Tell other CPUs to reset their alert counters. We cannot - * do it ourselves because the alert count increase is not - * atomic. - */ - for_each_present_cpu(cpu) { - if (per_cpu(nmi_touch, cpu) != 1) - per_cpu(nmi_touch, cpu) = 1; - } - } - - touch_softlockup_watchdog(); -} -EXPORT_SYMBOL(touch_nmi_watchdog); - -notrace __kprobes int -nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) -{ - unsigned int sum; - int touched = 0; - int cpu = smp_processor_id(); - int rc = 0; - - /* check for other users first */ - if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) - == NOTIFY_STOP) { - rc = 1; - touched = 1; - } - - sum = get_timer_irqs(cpu); - - if (__get_cpu_var(nmi_touch)) { - __get_cpu_var(nmi_touch) = 0; - touched = 1; - } - - if (cpu_isset(cpu, backtrace_mask)) { - static DEFINE_SPINLOCK(lock); /* Serialise the printks */ - - spin_lock(&lock); - printk("NMI backtrace for cpu %d\n", cpu); - dump_stack(); - spin_unlock(&lock); - cpu_clear(cpu, backtrace_mask); - } - - if (mce_in_progress()) - touched = 1; - - /* if the apic timer isn't firing, this cpu isn't doing much */ - if (!touched && __get_cpu_var(last_irq_sum) == sum) { - /* - * Ayiee, looks like this CPU is stuck ... - * wait a few IRQs (5 seconds) before doing the oops ... - */ - local_inc(&__get_cpu_var(alert_counter)); - if (local_read(&__get_cpu_var(alert_counter)) == 5*nmi_hz) - die_nmi("NMI Watchdog detected LOCKUP", - regs, panic_on_timeout); - } else { - __get_cpu_var(last_irq_sum) = sum; - local_set(&__get_cpu_var(alert_counter), 0); - } - - /* see if the nmi watchdog went off */ - if (!__get_cpu_var(wd_enabled)) - return rc; - switch (nmi_watchdog) { - case NMI_LOCAL_APIC: - rc |= lapic_wd_event(nmi_hz); - break; - case NMI_IO_APIC: - /* don't know how to accurately check for this. - * just assume it was a watchdog timer interrupt - * This matches the old behaviour. - */ - rc = 1; - break; - } - return rc; -} - -#ifdef CONFIG_SYSCTL - -static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu) -{ - unsigned char reason = get_nmi_reason(); - char buf[64]; - - sprintf(buf, "NMI received for unknown reason %02x\n", reason); - die_nmi(buf, regs, 1); /* Always panic here */ - return 0; -} - -/* - * proc handler for /proc/sys/kernel/nmi - */ -int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file, - void __user *buffer, size_t *length, loff_t *ppos) -{ - int old_state; - - nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0; - old_state = nmi_watchdog_enabled; - proc_dointvec(table, write, file, buffer, length, ppos); - if (!!old_state == !!nmi_watchdog_enabled) - return 0; - - if (atomic_read(&nmi_active) < 0 || nmi_watchdog == NMI_DISABLED) { - printk( KERN_WARNING "NMI watchdog is permanently disabled\n"); - return -EIO; - } - - /* if nmi_watchdog is not set yet, then set it */ - nmi_watchdog_default(); - - if (nmi_watchdog == NMI_LOCAL_APIC) { - if (nmi_watchdog_enabled) - enable_lapic_nmi_watchdog(); - else - disable_lapic_nmi_watchdog(); - } else { - printk( KERN_WARNING - "NMI watchdog doesn't know what hardware to touch\n"); - return -EIO; - } - return 0; -} - -#endif - -int do_nmi_callback(struct pt_regs *regs, int cpu) -{ -#ifdef CONFIG_SYSCTL - if (unknown_nmi_panic) - return unknown_nmi_panic_callback(regs, cpu); -#endif - return 0; -} - -void __trigger_all_cpu_backtrace(void) -{ - int i; - - backtrace_mask = cpu_online_map; - /* Wait for up to 10 seconds for all CPUs to do the backtrace */ - for (i = 0; i < 10 * 1000; i++) { - if (cpus_empty(backtrace_mask)) - break; - mdelay(1); - } -} - -EXPORT_SYMBOL(nmi_active); -EXPORT_SYMBOL(nmi_watchdog); -- cgit v0.10.2 From 4b6011bc6ec71660859139ac8d28b7d0badd681c Mon Sep 17 00:00:00 2001 From: Sven Wegener Date: Sun, 25 May 2008 21:16:36 +0200 Subject: x86: Remove obsolete LOCK macro from include/asm-x86/atomic_64.h Commit d167a518 "[PATCH] x86_64: x86_64 version of the smp alternative patch." has left the LOCK macro in include/asm-x86_64/atomic.h, which is now include/asm-x86/atomic_64.h. Its scope should be local to the file, other architectures don't provide it, I couldn't find an in-tree user of it and allyesconfig, allmodconfig and allnoconfig build fine without it, so this patch removes it. Signed-off-by: Sven Wegener Signed-off-by: Thomas Gleixner diff --git a/include/asm-x86/atomic_64.h b/include/asm-x86/atomic_64.h index 3e0cd7d..fe589c1 100644 --- a/include/asm-x86/atomic_64.h +++ b/include/asm-x86/atomic_64.h @@ -11,12 +11,6 @@ * resource counting etc.. */ -#ifdef CONFIG_SMP -#define LOCK "lock ; " -#else -#define LOCK "" -#endif - /* * Make sure gcc doesn't try to be clever and move things around * on us. We need to use _exactly_ the address the user gave us, -- cgit v0.10.2 From 1a20d3ecf5c2a6435df2b756435b1eb1c657d45b Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 26 May 2008 13:36:53 -0700 Subject: x86: string_32.h: workaround for broken gcc 4.0 gcc 4.0 fails to allocate %eax for the pattern operand in the rep store instructions used by memset; force it to do so by declaring a register variable. Signed-off-by: H. Peter Anvin diff --git a/include/asm-x86/string_32.h b/include/asm-x86/string_32.h index 8d0c593..193578c 100644 --- a/include/asm-x86/string_32.h +++ b/include/asm-x86/string_32.h @@ -267,11 +267,18 @@ void *__constant_c_and_count_memset(void *s, unsigned long pattern, asm volatile("rep ; stosl" \ x \ : "=&c" (d0), "=&D" (d1) \ - : "a" (pattern), "0" (count/4), "1" ((long)s) \ + : "a" (eax), "0" (count/4), "1" ((long)s) \ : "memory") { int d0, d1; +#if __GNUC__ == 4 && __GNUC_MINOR__ == 0 + /* Workaround for broken gcc 4.0 */ + register unsigned long eax asm("%eax") = pattern; +#else + unsigned long eax = pattern; +#endif + switch (count % 4) { case 0: COMMON(""); -- cgit v0.10.2 From 19384c0314342222b18d4c7f09cdce1ca74dfd2a Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 22 May 2008 00:22:16 -0400 Subject: ftrace: limit use of check pages The check_pages function is called often enough that it can cause problems with trace outputs or even bringing the system to a halt. This patch limits the check_pages to the places that are most likely to have problems. The check is made at the flip between the global array and the max save array, as well as when the size of the buffers changes and the self tests. This patch also removes the BUG_ON from check_pages and replaces it with a WARN_ON and disabling of the tracer. Signed-off-by: Steven Rostedt Cc: pq@iki.fi Cc: proski@gnu.org Cc: sandmann@redhat.com Cc: a.p.zijlstra@chello.nl Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 3271916..0567f51 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -249,24 +249,32 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) tracing_record_cmdline(current); } +#define CHECK_COND(cond) \ + if (unlikely(cond)) { \ + tracing_disabled = 1; \ + WARN_ON(1); \ + return -1; \ + } + /** * check_pages - integrity check of trace buffers * * As a safty measure we check to make sure the data pages have not - * been corrupted. TODO: configure to disable this because it adds - * a bit of overhead. + * been corrupted. */ -void check_pages(struct trace_array_cpu *data) +int check_pages(struct trace_array_cpu *data) { struct page *page, *tmp; - BUG_ON(data->trace_pages.next->prev != &data->trace_pages); - BUG_ON(data->trace_pages.prev->next != &data->trace_pages); + CHECK_COND(data->trace_pages.next->prev != &data->trace_pages); + CHECK_COND(data->trace_pages.prev->next != &data->trace_pages); list_for_each_entry_safe(page, tmp, &data->trace_pages, lru) { - BUG_ON(page->lru.next->prev != &page->lru); - BUG_ON(page->lru.prev->next != &page->lru); + CHECK_COND(page->lru.next->prev != &page->lru); + CHECK_COND(page->lru.prev->next != &page->lru); } + + return 0; } /** @@ -280,7 +288,6 @@ void *head_page(struct trace_array_cpu *data) { struct page *page; - check_pages(data); if (list_empty(&data->trace_pages)) return NULL; @@ -2566,7 +2573,7 @@ tracing_entries_write(struct file *filp, const char __user *ubuf, { unsigned long val; char buf[64]; - int ret; + int i, ret; if (cnt >= sizeof(buf)) return -EINVAL; @@ -2635,8 +2642,15 @@ tracing_entries_write(struct file *filp, const char __user *ubuf, trace_free_page(); } + /* check integrity */ + for_each_tracing_cpu(i) + check_pages(global_trace.data[i]); + filp->f_pos += cnt; + /* If check pages failed, return ENOMEM */ + if (tracing_disabled) + cnt = -ENOMEM; out: max_tr.entries = global_trace.entries; mutex_unlock(&trace_types_lock); diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 3877dd9..18c5423 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -28,6 +28,7 @@ trace_test_buffer_cpu(struct trace_array *tr, struct trace_array_cpu *data) page = list_entry(data->trace_pages.next, struct page, lru); entries = page_address(page); + check_pages(data); if (head_page(data) != entries) goto failed; -- cgit v0.10.2 From 4902f8849da6d2805bd291551a6dfd48f1b4f604 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 22 May 2008 00:22:18 -0400 Subject: ftrace: move ftrace_special to trace.c Move the ftrace_special out of sched_switch to trace.c. Signed-off-by: Steven Rostedt Cc: pq@iki.fi Cc: proski@gnu.org Cc: sandmann@redhat.com Cc: a.p.zijlstra@chello.nl Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 0567f51..583fe24 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -941,6 +941,30 @@ tracing_sched_wakeup_trace(struct trace_array *tr, trace_wake_up(); } +void +ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) +{ + struct trace_array *tr = &global_trace; + struct trace_array_cpu *data; + unsigned long flags; + long disabled; + int cpu; + + if (tracing_disabled || current_trace == &no_tracer || !tr->ctrl) + return; + + local_irq_save(flags); + cpu = raw_smp_processor_id(); + data = tr->data[cpu]; + disabled = atomic_inc_return(&data->disabled); + + if (likely(disabled == 1)) + __trace_special(tr, data, arg1, arg2, arg3); + + atomic_dec(&data->disabled); + local_irq_restore(flags); +} + #ifdef CONFIG_FTRACE static void function_trace_call(unsigned long ip, unsigned long parent_ip) @@ -2941,8 +2965,6 @@ __init static int tracer_alloc_buffers(void) int ret = -ENOMEM; int i; - global_trace.ctrl = tracer_enabled; - /* TODO: make the number of buffers hot pluggable with CPUS */ tracing_nr_buffers = num_possible_cpus(); tracing_buffer_mask = cpu_possible_map; @@ -3012,6 +3034,7 @@ __init static int tracer_alloc_buffers(void) current_trace = &no_tracer; /* All seems OK, enable tracing */ + global_trace.ctrl = tracer_enabled; tracing_disabled = 0; return 0; diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index d25ffa5..798ec0d 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -125,30 +125,6 @@ wake_up_callback(void *probe_data, void *call_data, wakeup_func(probe_data, __rq, task, curr); } -void -ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) -{ - struct trace_array *tr = ctx_trace; - struct trace_array_cpu *data; - unsigned long flags; - long disabled; - int cpu; - - if (!tracer_enabled) - return; - - local_irq_save(flags); - cpu = raw_smp_processor_id(); - data = tr->data[cpu]; - disabled = atomic_inc_return(&data->disabled); - - if (likely(disabled == 1)) - __trace_special(tr, data, arg1, arg2, arg3); - - atomic_dec(&data->disabled); - local_irq_restore(flags); -} - static void sched_switch_reset(struct trace_array *tr) { int cpu; -- cgit v0.10.2 From 7e18d8e701b6798a5df11e0a16881a60ab1018b6 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 22 May 2008 00:22:19 -0400 Subject: ftrace: add function tracing to wake up tracing This patch adds function tracing to the functions that are called on the CPU of the task being traced. Signed-off-by: Steven Rostedt Cc: pq@iki.fi Cc: proski@gnu.org Cc: sandmann@redhat.com Cc: a.p.zijlstra@chello.nl Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 5d2fb48..bf7e91c 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -30,6 +30,69 @@ static DEFINE_SPINLOCK(wakeup_lock); static void __wakeup_reset(struct trace_array *tr); +#ifdef CONFIG_FTRACE +/* + * irqsoff uses its own tracer function to keep the overhead down: + */ +static void +wakeup_tracer_call(unsigned long ip, unsigned long parent_ip) +{ + struct trace_array *tr = wakeup_trace; + struct trace_array_cpu *data; + unsigned long flags; + long disabled; + int resched; + int cpu; + + if (likely(!wakeup_task)) + return; + + resched = need_resched(); + preempt_disable_notrace(); + + cpu = raw_smp_processor_id(); + data = tr->data[cpu]; + disabled = atomic_inc_return(&data->disabled); + if (unlikely(disabled != 1)) + goto out; + + spin_lock_irqsave(&wakeup_lock, flags); + + if (unlikely(!wakeup_task)) + goto unlock; + + /* + * The task can't disappear because it needs to + * wake up first, and we have the wakeup_lock. + */ + if (task_cpu(wakeup_task) != cpu) + goto unlock; + + trace_function(tr, data, ip, parent_ip, flags); + + unlock: + spin_unlock_irqrestore(&wakeup_lock, flags); + + out: + atomic_dec(&data->disabled); + + /* + * To prevent recursion from the scheduler, if the + * resched flag was set before we entered, then + * don't reschedule. + */ + if (resched) + preempt_enable_no_resched_notrace(); + else + preempt_enable_notrace(); +} + +static struct ftrace_ops trace_ops __read_mostly = +{ + .func = wakeup_tracer_call, +}; +#endif /* CONFIG_FTRACE */ + /* * Should this new latency be reported/recorded? */ @@ -73,7 +136,7 @@ wakeup_sched_switch(void *private, void *rq, struct task_struct *prev, if (next != wakeup_task) return; - /* The task we are waitng for is waking up */ + /* The task we are waiting for is waking up */ data = tr->data[wakeup_cpu]; /* disable local data, not wakeup_cpu data */ @@ -290,6 +353,7 @@ static void start_wakeup_tracer(struct trace_array *tr) smp_wmb(); tracer_enabled = 1; + register_ftrace_function(&trace_ops); return; fail_deprobe_wake_new: @@ -305,6 +369,7 @@ fail_deprobe: static void stop_wakeup_tracer(struct trace_array *tr) { tracer_enabled = 0; + unregister_ftrace_function(&trace_ops); marker_probe_unregister("kernel_sched_schedule", sched_switch_callback, &wakeup_trace); -- cgit v0.10.2 From da89a7a2536c46e76a1a4351a70a8b8417e5fed1 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 22 May 2008 00:22:20 -0400 Subject: ftrace: remove printks from irqsoff trace Printing out new max latencies was fine for the old RT tracer. But for mainline it is a bit messy. We also need to test if the run queue is locked before we can do the print. This means that we may not be printing out latencies if the run queue is locked on another CPU. This produces inconsistencies in the output. This patch simply removes the print altogether. Signed-off-by: Steven Rostedt Cc: pq@iki.fi Cc: proski@gnu.org Cc: sandmann@redhat.com Cc: a.p.zijlstra@chello.nl Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 761f3ec..421d6fe 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -165,22 +165,6 @@ check_critical_timing(struct trace_array *tr, update_max_tr_single(tr, current, cpu); - if (!runqueue_is_locked()) { - if (tracing_thresh) { - printk(KERN_INFO "(%16s-%-5d|#%d): %lu us critical" - " section violates %lu us threshold.\n", - current->comm, current->pid, - raw_smp_processor_id(), - latency, nsecs_to_usecs(tracing_thresh)); - } else { - printk(KERN_INFO "(%16s-%-5d|#%d): new %lu us" - " maximum-latency critical section.\n", - current->comm, current->pid, - raw_smp_processor_id(), - latency); - } - } - max_sequence++; out_unlock: -- cgit v0.10.2 From 41c52c0db9607e59f90da7da5309489fa06e887f Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 22 May 2008 11:46:33 -0400 Subject: ftrace: set_ftrace_notrace feature While debugging latencies in the RT kernel, I found that it would be nice to be able to filter away functions from the trace than just to filter on functions. I added a new interface to the debugfs tracing directory called set_ftrace_notrace When dynamic frace is enabled, this lets you filter away functions that will not be recorded in the trace. It is similar to adding 'notrace' to those functions but by doing it without recompiling the kernel. Here's how set_ftrace_filter and set_ftrace_notrace interact. Remember, if set_ftrace_filter is set, it removes all functions from the trace execpt for those listed in the set_ftrace_filter. set_ftrace_notrace will prevent those functions from being traced. If you were to set one function in both set_ftrace_filter and set_ftrace_notrace and that function was the same, then you would end up with an empty trace. the set of functions to trace is: set_ftrace_filter == empty then all functions not in set_ftrace_notrace else set of the set_ftrace_filter and not in set of set_ftrace_notrace. Signed-off-by: Steven Rostedt Signed-off-by: Thomas Gleixner diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 922e23d..ffbbd54 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -48,6 +48,7 @@ enum { FTRACE_FL_FAILED = (1 << 1), FTRACE_FL_FILTER = (1 << 2), FTRACE_FL_ENABLED = (1 << 3), + FTRACE_FL_NOTRACE = (1 << 4), }; struct dyn_ftrace { diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 89bd9a6..2552454 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -170,7 +170,7 @@ static DEFINE_PER_CPU(int, ftrace_shutdown_disable_cpu); static DEFINE_SPINLOCK(ftrace_shutdown_lock); static DEFINE_MUTEX(ftraced_lock); -static DEFINE_MUTEX(ftrace_filter_lock); +static DEFINE_MUTEX(ftrace_regex_lock); struct ftrace_page { struct ftrace_page *next; @@ -337,13 +337,12 @@ static void __ftrace_replace_code(struct dyn_ftrace *rec, unsigned char *old, unsigned char *new, int enable) { - unsigned long ip; + unsigned long ip, fl; int failed; ip = rec->ip; if (ftrace_filtered && enable) { - unsigned long fl; /* * If filtering is on: * @@ -356,13 +355,16 @@ __ftrace_replace_code(struct dyn_ftrace *rec, * If this record is not set to be filtered * and it is not enabled do nothing. * + * If this record is set not to trace then + * do nothing. + * * If this record is not set to be filtered and * it is enabled, disable it. */ fl = rec->flags & (FTRACE_FL_FILTER | FTRACE_FL_ENABLED); if ((fl == (FTRACE_FL_FILTER | FTRACE_FL_ENABLED)) || - (fl == 0)) + (fl == 0) || (rec->flags & FTRACE_FL_NOTRACE)) return; /* @@ -380,9 +382,17 @@ __ftrace_replace_code(struct dyn_ftrace *rec, } } else { - if (enable) + if (enable) { + /* + * If this record is set not to trace and is + * not enabled, do nothing. + */ + fl = rec->flags & (FTRACE_FL_NOTRACE | FTRACE_FL_ENABLED); + if (fl == FTRACE_FL_NOTRACE) + return; + new = ftrace_call_replace(ip, FTRACE_ADDR); - else + } else old = ftrace_call_replace(ip, FTRACE_ADDR); if (enable) { @@ -721,6 +731,7 @@ static int __init ftrace_dyn_table_alloc(void) enum { FTRACE_ITER_FILTER = (1 << 0), FTRACE_ITER_CONT = (1 << 1), + FTRACE_ITER_NOTRACE = (1 << 2), }; #define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */ @@ -754,7 +765,9 @@ t_next(struct seq_file *m, void *v, loff_t *pos) rec = &iter->pg->records[iter->idx++]; if ((rec->flags & FTRACE_FL_FAILED) || ((iter->flags & FTRACE_ITER_FILTER) && - !(rec->flags & FTRACE_FL_FILTER))) { + !(rec->flags & FTRACE_FL_FILTER)) || + ((iter->flags & FTRACE_ITER_NOTRACE) && + !(rec->flags & FTRACE_FL_NOTRACE))) { rec = NULL; goto retry; } @@ -847,22 +860,24 @@ int ftrace_avail_release(struct inode *inode, struct file *file) return 0; } -static void ftrace_filter_reset(void) +static void ftrace_filter_reset(int enable) { struct ftrace_page *pg; struct dyn_ftrace *rec; + unsigned long type = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE; unsigned i; /* keep kstop machine from running */ preempt_disable(); - ftrace_filtered = 0; + if (enable) + ftrace_filtered = 0; pg = ftrace_pages_start; while (pg) { for (i = 0; i < pg->index; i++) { rec = &pg->records[i]; if (rec->flags & FTRACE_FL_FAILED) continue; - rec->flags &= ~FTRACE_FL_FILTER; + rec->flags &= ~type; } pg = pg->next; } @@ -870,7 +885,7 @@ static void ftrace_filter_reset(void) } static int -ftrace_filter_open(struct inode *inode, struct file *file) +ftrace_regex_open(struct inode *inode, struct file *file, int enable) { struct ftrace_iterator *iter; int ret = 0; @@ -882,15 +897,16 @@ ftrace_filter_open(struct inode *inode, struct file *file) if (!iter) return -ENOMEM; - mutex_lock(&ftrace_filter_lock); + mutex_lock(&ftrace_regex_lock); if ((file->f_mode & FMODE_WRITE) && !(file->f_flags & O_APPEND)) - ftrace_filter_reset(); + ftrace_filter_reset(enable); if (file->f_mode & FMODE_READ) { iter->pg = ftrace_pages_start; iter->pos = -1; - iter->flags = FTRACE_ITER_FILTER; + iter->flags = enable ? FTRACE_ITER_FILTER : + FTRACE_ITER_NOTRACE; ret = seq_open(file, &show_ftrace_seq_ops); if (!ret) { @@ -900,13 +916,25 @@ ftrace_filter_open(struct inode *inode, struct file *file) kfree(iter); } else file->private_data = iter; - mutex_unlock(&ftrace_filter_lock); + mutex_unlock(&ftrace_regex_lock); return ret; } +static int +ftrace_filter_open(struct inode *inode, struct file *file) +{ + return ftrace_regex_open(inode, file, 1); +} + +static int +ftrace_notrace_open(struct inode *inode, struct file *file) +{ + return ftrace_regex_open(inode, file, 0); +} + static ssize_t -ftrace_filter_read(struct file *file, char __user *ubuf, +ftrace_regex_read(struct file *file, char __user *ubuf, size_t cnt, loff_t *ppos) { if (file->f_mode & FMODE_READ) @@ -916,7 +944,7 @@ ftrace_filter_read(struct file *file, char __user *ubuf, } static loff_t -ftrace_filter_lseek(struct file *file, loff_t offset, int origin) +ftrace_regex_lseek(struct file *file, loff_t offset, int origin) { loff_t ret; @@ -936,13 +964,14 @@ enum { }; static void -ftrace_match(unsigned char *buff, int len) +ftrace_match(unsigned char *buff, int len, int enable) { char str[KSYM_SYMBOL_LEN]; char *search = NULL; struct ftrace_page *pg; struct dyn_ftrace *rec; int type = MATCH_FULL; + unsigned long flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE; unsigned i, match = 0, search_len = 0; for (i = 0; i < len; i++) { @@ -966,7 +995,8 @@ ftrace_match(unsigned char *buff, int len) /* keep kstop machine from running */ preempt_disable(); - ftrace_filtered = 1; + if (enable) + ftrace_filtered = 1; pg = ftrace_pages_start; while (pg) { for (i = 0; i < pg->index; i++) { @@ -997,7 +1027,7 @@ ftrace_match(unsigned char *buff, int len) break; } if (matched) - rec->flags |= FTRACE_FL_FILTER; + rec->flags |= flag; } pg = pg->next; } @@ -1005,8 +1035,8 @@ ftrace_match(unsigned char *buff, int len) } static ssize_t -ftrace_filter_write(struct file *file, const char __user *ubuf, - size_t cnt, loff_t *ppos) +ftrace_regex_write(struct file *file, const char __user *ubuf, + size_t cnt, loff_t *ppos, int enable) { struct ftrace_iterator *iter; char ch; @@ -1016,7 +1046,7 @@ ftrace_filter_write(struct file *file, const char __user *ubuf, if (!cnt || cnt < 0) return 0; - mutex_lock(&ftrace_filter_lock); + mutex_lock(&ftrace_regex_lock); if (file->f_mode & FMODE_READ) { struct seq_file *m = file->private_data; @@ -1045,7 +1075,6 @@ ftrace_filter_write(struct file *file, const char __user *ubuf, cnt--; } - if (isspace(ch)) { file->f_pos += read; ret = read; @@ -1072,7 +1101,7 @@ ftrace_filter_write(struct file *file, const char __user *ubuf, if (isspace(ch)) { iter->filtered++; iter->buffer[iter->buffer_idx] = 0; - ftrace_match(iter->buffer, iter->buffer_idx); + ftrace_match(iter->buffer, iter->buffer_idx, enable); iter->buffer_idx = 0; } else iter->flags |= FTRACE_ITER_CONT; @@ -1082,11 +1111,39 @@ ftrace_filter_write(struct file *file, const char __user *ubuf, ret = read; out: - mutex_unlock(&ftrace_filter_lock); + mutex_unlock(&ftrace_regex_lock); return ret; } +static ssize_t +ftrace_filter_write(struct file *file, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + return ftrace_regex_write(file, ubuf, cnt, ppos, 1); +} + +static ssize_t +ftrace_notrace_write(struct file *file, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + return ftrace_regex_write(file, ubuf, cnt, ppos, 0); +} + +static void +ftrace_set_regex(unsigned char *buf, int len, int reset, int enable) +{ + if (unlikely(ftrace_disabled)) + return; + + mutex_lock(&ftrace_regex_lock); + if (reset) + ftrace_filter_reset(enable); + if (buf) + ftrace_match(buf, len, enable); + mutex_unlock(&ftrace_regex_lock); +} + /** * ftrace_set_filter - set a function to filter on in ftrace * @buf - the string that holds the function filter text. @@ -1098,24 +1155,31 @@ ftrace_filter_write(struct file *file, const char __user *ubuf, */ void ftrace_set_filter(unsigned char *buf, int len, int reset) { - if (unlikely(ftrace_disabled)) - return; + ftrace_set_regex(buf, len, reset, 1); +} - mutex_lock(&ftrace_filter_lock); - if (reset) - ftrace_filter_reset(); - if (buf) - ftrace_match(buf, len); - mutex_unlock(&ftrace_filter_lock); +/** + * ftrace_set_notrace - set a function to not trace in ftrace + * @buf - the string that holds the function notrace text. + * @len - the length of the string. + * @reset - non zero to reset all filters before applying this filter. + * + * Notrace Filters denote which functions should not be enabled when tracing + * is enabled. If @buf is NULL and reset is set, all functions will be enabled + * for tracing. + */ +void ftrace_set_notrace(unsigned char *buf, int len, int reset) +{ + ftrace_set_regex(buf, len, reset, 0); } static int -ftrace_filter_release(struct inode *inode, struct file *file) +ftrace_regex_release(struct inode *inode, struct file *file, int enable) { struct seq_file *m = (struct seq_file *)file->private_data; struct ftrace_iterator *iter; - mutex_lock(&ftrace_filter_lock); + mutex_lock(&ftrace_regex_lock); if (file->f_mode & FMODE_READ) { iter = m->private; @@ -1126,7 +1190,7 @@ ftrace_filter_release(struct inode *inode, struct file *file) if (iter->buffer_idx) { iter->filtered++; iter->buffer[iter->buffer_idx] = 0; - ftrace_match(iter->buffer, iter->buffer_idx); + ftrace_match(iter->buffer, iter->buffer_idx, enable); } mutex_lock(&ftrace_sysctl_lock); @@ -1137,10 +1201,22 @@ ftrace_filter_release(struct inode *inode, struct file *file) mutex_unlock(&ftrace_sysctl_lock); kfree(iter); - mutex_unlock(&ftrace_filter_lock); + mutex_unlock(&ftrace_regex_lock); return 0; } +static int +ftrace_filter_release(struct inode *inode, struct file *file) +{ + return ftrace_regex_release(inode, file, 1); +} + +static int +ftrace_notrace_release(struct inode *inode, struct file *file) +{ + return ftrace_regex_release(inode, file, 0); +} + static struct file_operations ftrace_avail_fops = { .open = ftrace_avail_open, .read = seq_read, @@ -1150,12 +1226,20 @@ static struct file_operations ftrace_avail_fops = { static struct file_operations ftrace_filter_fops = { .open = ftrace_filter_open, - .read = ftrace_filter_read, + .read = ftrace_regex_read, .write = ftrace_filter_write, - .llseek = ftrace_filter_lseek, + .llseek = ftrace_regex_lseek, .release = ftrace_filter_release, }; +static struct file_operations ftrace_notrace_fops = { + .open = ftrace_notrace_open, + .read = ftrace_regex_read, + .write = ftrace_notrace_write, + .llseek = ftrace_regex_lseek, + .release = ftrace_notrace_release, +}; + /** * ftrace_force_update - force an update to all recording ftrace functions * @@ -1239,6 +1323,12 @@ static __init int ftrace_init_debugfs(void) if (!entry) pr_warning("Could not create debugfs " "'set_ftrace_filter' entry\n"); + + entry = debugfs_create_file("set_ftrace_notrace", 0644, d_tracer, + NULL, &ftrace_notrace_fops); + if (!entry) + pr_warning("Could not create debugfs " + "'set_ftrace_notrace' entry\n"); return 0; } -- cgit v0.10.2 From 41bc8144d02028133bcd1d545023c6f49e8b2411 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 22 May 2008 11:49:22 -0400 Subject: ftrace: fix up cmdline recording The new work with converting the trace hooks over to markers broke the command line recording of ftrace. This patch fixes it again. Signed-off-by: Steven Rostedt Cc: Peter Zijlstra Cc: Mathieu Desnoyers Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 583fe24..0feae23 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -652,9 +652,6 @@ static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN]; static int cmdline_idx; static DEFINE_SPINLOCK(trace_cmdline_lock); -/* trace in all context switches */ -atomic_t trace_record_cmdline_enabled __read_mostly; - /* temporary disable recording */ atomic_t trace_record_cmdline_disabled __read_mostly; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index c460e85..6b8bd88 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -218,6 +218,8 @@ void trace_function(struct trace_array *tr, void tracing_start_function_trace(void); void tracing_stop_function_trace(void); +void tracing_start_cmdline_record(void); +void tracing_stop_cmdline_record(void); int register_tracer(struct tracer *type); void unregister_tracer(struct tracer *type); @@ -226,8 +228,6 @@ extern unsigned long nsecs_to_usecs(unsigned long nsecs); extern unsigned long tracing_max_latency; extern unsigned long tracing_thresh; -extern atomic_t trace_record_cmdline_enabled; - void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu); void update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu); diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index 0a08465..7ee7dcd 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -29,14 +29,14 @@ static void function_reset(struct trace_array *tr) static void start_function_trace(struct trace_array *tr) { function_reset(tr); - atomic_inc(&trace_record_cmdline_enabled); + tracing_start_cmdline_record(); tracing_start_function_trace(); } static void stop_function_trace(struct trace_array *tr) { tracing_stop_function_trace(); - atomic_dec(&trace_record_cmdline_enabled); + tracing_stop_cmdline_record(); } static void function_trace_init(struct trace_array *tr) diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index 798ec0d..c16935d 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -29,6 +29,9 @@ sched_switch_func(void *private, void *__rq, struct task_struct *prev, long disabled; int cpu; + tracing_record_cmdline(prev); + tracing_record_cmdline(next); + if (!tracer_enabled) return; @@ -63,8 +66,6 @@ sched_switch_callback(void *probe_data, void *call_data, prev = va_arg(*args, typeof(prev)); next = va_arg(*args, typeof(next)); - tracing_record_cmdline(prev); - /* * If tracer_switch_func only points to the local * switch func, it still needs the ptr passed to it. @@ -213,18 +214,26 @@ void tracing_stop_sched_switch(void) tracing_sched_unregister(); } +void tracing_start_cmdline_record(void) +{ + tracing_start_sched_switch(); +} + +void tracing_stop_cmdline_record(void) +{ + tracing_stop_sched_switch(); +} + static void start_sched_trace(struct trace_array *tr) { sched_switch_reset(tr); - atomic_inc(&trace_record_cmdline_enabled); tracer_enabled = 1; - tracing_start_sched_switch(); + tracing_start_cmdline_record(); } static void stop_sched_trace(struct trace_array *tr) { - tracing_stop_sched_switch(); - atomic_dec(&trace_record_cmdline_enabled); + tracing_stop_cmdline_record(); tracer_enabled = 0; } -- cgit v0.10.2 From ccbfac2923c9febaeaf07a50054027a92b502718 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 22 May 2008 14:31:07 -0400 Subject: ftrace: powerpc clean ups This patch cleans up the ftrace code in PowerPC based on the comments from Michael Ellerman. Signed-off-by: Steven Rostedt Cc: Michael Ellerman Cc: proski@gnu.org Cc: a.p.zijlstra@chello.nl Cc: Pekka Paalanen Cc: Steven Rostedt Cc: linuxppc-dev@ozlabs.org Cc: Soeren Sandmann Pedersen Cc: paulus@samba.org Signed-off-by: Thomas Gleixner diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 0e62218..3b1dd29 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -1129,18 +1129,11 @@ _GLOBAL(_mcount) stw r5, 8(r1) LOAD_REG_ADDR(r5, ftrace_trace_function) -#if 0 - mtctr r3 - mr r1, r5 - bctrl -#endif lwz r5,0(r5) -#if 1 + mtctr r5 bctrl -#else - bl ftrace_stub -#endif + nop lwz r6, 8(r1) diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c index 5a4993f..69ed412 100644 --- a/arch/powerpc/kernel/ftrace.c +++ b/arch/powerpc/kernel/ftrace.c @@ -51,10 +51,16 @@ notrace unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) { static unsigned int op; + /* + * It would be nice to just use create_function_call, but that will + * update the code itself. Here we need to just return the + * instruction that is going to be modified, without modifying the + * code. + */ addr = GET_ADDR(addr); /* Set to "bl addr" */ - op = 0x48000001 | (ftrace_calc_offset(ip, addr) & 0x03fffffe); + op = 0x48000001 | (ftrace_calc_offset(ip, addr) & 0x03fffffc); /* * No locking needed, this must be called via kstop_machine diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index cf6b5a7..4300db5 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -43,6 +43,7 @@ #include #include #include +#include #ifdef CONFIG_PPC32 extern void transfer_to_handler(void); @@ -68,6 +69,10 @@ EXPORT_SYMBOL(single_step_exception); EXPORT_SYMBOL(sys_sigreturn); #endif +#ifdef CONFIG_FTRACE +EXPORT_SYMBOL(_mcount); +#endif + EXPORT_SYMBOL(strcpy); EXPORT_SYMBOL(strncpy); EXPORT_SYMBOL(strcat); diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 22f8e2b..19e8fcb 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -47,11 +47,6 @@ #include #endif -#ifdef CONFIG_FTRACE -extern void _mcount(void); -EXPORT_SYMBOL(_mcount); -#endif - extern void bootx_init(unsigned long r4, unsigned long phys); int boot_cpuid; diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 277bf18..098fd96 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -85,11 +85,6 @@ struct ppc64_caches ppc64_caches = { }; EXPORT_SYMBOL_GPL(ppc64_caches); -#ifdef CONFIG_FTRACE -extern void _mcount(void); -EXPORT_SYMBOL(_mcount); -#endif - /* * These are used in binfmt_elf.c to put aux entries on the stack * for each elf executable being started. diff --git a/include/asm-powerpc/ftrace.h b/include/asm-powerpc/ftrace.h new file mode 100644 index 0000000..b1bfa70 --- /dev/null +++ b/include/asm-powerpc/ftrace.h @@ -0,0 +1,6 @@ +#ifndef _ASM_POWERPC_FTRACE +#define _ASM_POWERPC_FTRACE + +extern void _mcount(void); + +#endif -- cgit v0.10.2 From ffdaa3582b6b39d625d585d07e329ffdc925e971 Mon Sep 17 00:00:00 2001 From: Abhishek Sagar Date: Sat, 24 May 2008 23:45:02 +0530 Subject: ftrace: safe traversal of ftrace_hash hlist Hi Steven, I noticed that concurrent instances of ftrace_record_ip() have a race between ftrace_hash list traversal during ftrace_ip_in_hash() (before acquiring ftrace_shutdown_lock) and ftrace_add_hash(). If it's so then this should fix it. Signed-off-by: Abhishek Sagar Cc: rostedt@goodmis.org Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 2552454..9b7c54f 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -201,7 +201,7 @@ ftrace_ip_in_hash(unsigned long ip, unsigned long key) struct hlist_node *t; int found = 0; - hlist_for_each_entry(p, t, &ftrace_hash[key], node) { + hlist_for_each_entry_rcu(p, t, &ftrace_hash[key], node) { if (p->ip == ip) { found = 1; break; @@ -214,7 +214,7 @@ ftrace_ip_in_hash(unsigned long ip, unsigned long key) static inline void ftrace_add_hash(struct dyn_ftrace *node, unsigned long key) { - hlist_add_head(&node->node, &ftrace_hash[key]); + hlist_add_head_rcu(&node->node, &ftrace_hash[key]); } static void ftrace_free_rec(struct dyn_ftrace *rec) -- cgit v0.10.2 From 492a7ea5bcf263ee02a9eb6a3ab0222a1946fade Mon Sep 17 00:00:00 2001 From: Abhishek Sagar Date: Sun, 25 May 2008 00:10:04 +0530 Subject: ftrace: fix updating of ftrace_update_cnt Hi Ingo/Steven, Ftrace currently maintains an update count which includes false updates, i.e, updates which failed. If anything, such failures should be tracked by some separate variable, but this patch provides a minimal fix. Signed-off-by: Abhishek Sagar Cc: rostedt@goodmis.org Signed-off-by: Thomas Gleixner diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 9b7c54f..1843edc 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -453,7 +453,7 @@ static void ftrace_shutdown_replenish(void) ftrace_pages->next = (void *)get_zeroed_page(GFP_KERNEL); } -static void +static int ftrace_code_disable(struct dyn_ftrace *rec) { unsigned long ip; @@ -469,7 +469,9 @@ ftrace_code_disable(struct dyn_ftrace *rec) if (failed) { rec->flags |= FTRACE_FL_FAILED; ftrace_free_rec(rec); + return 0; } + return 1; } static int __ftrace_modify_code(void *data) @@ -617,8 +619,8 @@ static int __ftrace_update_code(void *ignore) /* all CPUS are stopped, we are safe to modify code */ hlist_for_each_entry(p, t, &head, node) { - ftrace_code_disable(p); - ftrace_update_cnt++; + if (ftrace_code_disable(p)) + ftrace_update_cnt++; } } -- cgit v0.10.2 From d031476408ae0f5196e3c579f519dfdefb099b67 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 23 May 2008 14:41:19 +0100 Subject: hrtimer: remove warning in hres_timers_resume hres_timers_resume() warns if there appears to be more than one cpu online. This warning makes sense when the suspend/resume mechanism offlines all cpus but one during the suspend/resume process. However, Xen suspend does not need to offline the other cpus; it merely keeps them tied up in stop_machine() while the virtual machine is suspended. The warning hres_timers_resume issues is therefore spurious. Signed-off-by: Jeremy Fitzhardinge Cc: xen-devel Cc: "Rafael J. Wysocki" Signed-off-by: Thomas Gleixner diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 421be5f..493c4b8 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -632,8 +632,6 @@ void clock_was_set(void) */ void hres_timers_resume(void) { - WARN_ON_ONCE(num_online_cpus() > 1); - /* Retrigger the CPU local events: */ retrigger_next_event(NULL); } -- cgit v0.10.2 From 900cfa46191a7d87cf1891924cb90499287fd235 Mon Sep 17 00:00:00 2001 From: "Carlos R. Mafra" Date: Thu, 22 May 2008 19:25:11 -0300 Subject: hrtimer: Remove unused variables in ktime_divns() The variables dns and inc are not used, remove them. Signed-off-by: Carlos R. Mafra Cc: tglx@linutronix.de Signed-off-by: Thomas Gleixner diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 493c4b8..635739d 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -300,11 +300,10 @@ EXPORT_SYMBOL_GPL(ktime_sub_ns); */ u64 ktime_divns(const ktime_t kt, s64 div) { - u64 dclc, inc, dns; + u64 dclc; int sft = 0; - dclc = dns = ktime_to_ns(kt); - inc = div; + dclc = ktime_to_ns(kt); /* Make sure the divisor is less than 2^32: */ while (div >> 32) { sft++; -- cgit v0.10.2 From 4226ab93d8ae3fd895abe45879fe34d489a98718 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 26 May 2008 23:30:58 +0100 Subject: x86: use pteval_t for _PAGE_FOO Rather than making _PAGE_* constants signed, and then relying on sign-extension to make sure that masks derived from them are wide enough, just explicitly type them pteval_t. This guarantees that they and any derived values are the right size for the current pte format. The reliance on sign extension is fragile, and invokes some very subtle corners of the C type system. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h index 97c271b..fab3ecb 100644 --- a/include/asm-x86/pgtable.h +++ b/include/asm-x86/pgtable.h @@ -20,30 +20,25 @@ #define _PAGE_BIT_PAT_LARGE 12 /* On 2MB or 1GB pages */ #define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */ -/* - * Note: we use _AC(1, L) instead of _AC(1, UL) so that we get a - * sign-extended value on 32-bit with all 1's in the upper word, - * which preserves the upper pte values on 64-bit ptes: - */ -#define _PAGE_PRESENT (_AC(1, L)<<_PAGE_BIT_PRESENT) -#define _PAGE_RW (_AC(1, L)<<_PAGE_BIT_RW) -#define _PAGE_USER (_AC(1, L)<<_PAGE_BIT_USER) -#define _PAGE_PWT (_AC(1, L)<<_PAGE_BIT_PWT) -#define _PAGE_PCD (_AC(1, L)<<_PAGE_BIT_PCD) -#define _PAGE_ACCESSED (_AC(1, L)<<_PAGE_BIT_ACCESSED) -#define _PAGE_DIRTY (_AC(1, L)<<_PAGE_BIT_DIRTY) -#define _PAGE_PSE (_AC(1, L)<<_PAGE_BIT_PSE) /* 2MB page */ -#define _PAGE_GLOBAL (_AC(1, L)<<_PAGE_BIT_GLOBAL) /* Global TLB entry */ -#define _PAGE_UNUSED1 (_AC(1, L)<<_PAGE_BIT_UNUSED1) -#define _PAGE_UNUSED2 (_AC(1, L)<<_PAGE_BIT_UNUSED2) -#define _PAGE_UNUSED3 (_AC(1, L)<<_PAGE_BIT_UNUSED3) -#define _PAGE_PAT (_AC(1, L)<<_PAGE_BIT_PAT) -#define _PAGE_PAT_LARGE (_AC(1, L)<<_PAGE_BIT_PAT_LARGE) +#define _PAGE_PRESENT (_AT(pteval_t, 1) << _PAGE_BIT_PRESENT) +#define _PAGE_RW (_AT(pteval_t, 1) << _PAGE_BIT_RW) +#define _PAGE_USER (_AT(pteval_t, 1) << _PAGE_BIT_USER) +#define _PAGE_PWT (_AT(pteval_t, 1) << _PAGE_BIT_PWT) +#define _PAGE_PCD (_AT(pteval_t, 1) << _PAGE_BIT_PCD) +#define _PAGE_ACCESSED (_AT(pteval_t, 1) << _PAGE_BIT_ACCESSED) +#define _PAGE_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_DIRTY) +#define _PAGE_PSE (_AT(pteval_t, 1) << _PAGE_BIT_PSE) +#define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL) +#define _PAGE_UNUSED1 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED1) +#define _PAGE_UNUSED2 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED2) +#define _PAGE_UNUSED3 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED3) +#define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT) +#define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE) #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) -#define _PAGE_NX (_AC(1, ULL) << _PAGE_BIT_NX) +#define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX) #else -#define _PAGE_NX 0 +#define _PAGE_NX (_AT(pteval_t, 0)) #endif /* If _PAGE_PRESENT is clear, we use these: */ @@ -210,22 +205,22 @@ static inline int pmd_large(pmd_t pte) static inline pte_t pte_mkclean(pte_t pte) { - return __pte(pte_val(pte) & ~(pteval_t)_PAGE_DIRTY); + return __pte(pte_val(pte) & ~_PAGE_DIRTY); } static inline pte_t pte_mkold(pte_t pte) { - return __pte(pte_val(pte) & ~(pteval_t)_PAGE_ACCESSED); + return __pte(pte_val(pte) & ~_PAGE_ACCESSED); } static inline pte_t pte_wrprotect(pte_t pte) { - return __pte(pte_val(pte) & ~(pteval_t)_PAGE_RW); + return __pte(pte_val(pte) & ~_PAGE_RW); } static inline pte_t pte_mkexec(pte_t pte) { - return __pte(pte_val(pte) & ~(pteval_t)_PAGE_NX); + return __pte(pte_val(pte) & ~_PAGE_NX); } static inline pte_t pte_mkdirty(pte_t pte) @@ -250,7 +245,7 @@ static inline pte_t pte_mkhuge(pte_t pte) static inline pte_t pte_clrhuge(pte_t pte) { - return __pte(pte_val(pte) & ~(pteval_t)_PAGE_PSE); + return __pte(pte_val(pte) & ~_PAGE_PSE); } static inline pte_t pte_mkglobal(pte_t pte) @@ -260,7 +255,7 @@ static inline pte_t pte_mkglobal(pte_t pte) static inline pte_t pte_clrglobal(pte_t pte) { - return __pte(pte_val(pte) & ~(pteval_t)_PAGE_GLOBAL); + return __pte(pte_val(pte) & ~_PAGE_GLOBAL); } static inline pte_t pte_mkspecial(pte_t pte) -- cgit v0.10.2 From 4e09e21ccb0dfe7ee8d5641192e0072e83bd916b Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 26 May 2008 23:31:03 +0100 Subject: x86: use symbolic constant in stts() Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner diff --git a/include/asm-x86/system.h b/include/asm-x86/system.h index a2f04cd..7e4c133 100644 --- a/include/asm-x86/system.h +++ b/include/asm-x86/system.h @@ -289,7 +289,7 @@ static inline void native_wbinvd(void) #endif/* CONFIG_PARAVIRT */ -#define stts() write_cr0(8 | read_cr0()) +#define stts() write_cr0(read_cr0() | X86_CR0_TS) #endif /* __KERNEL__ */ -- cgit v0.10.2 From 0acf10d8fbd52926217d3933d196b33fe2468f18 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 26 May 2008 23:30:59 +0100 Subject: xen: add raw console write functions for debug Add a couple of functions which can write directly to the Xen console for debugging. This output ends up on the host's dom0 console (assuming it allows the domain to write there). Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner diff --git a/drivers/char/hvc_xen.c b/drivers/char/hvc_xen.c index dd68f85..e97d9d1 100644 --- a/drivers/char/hvc_xen.c +++ b/drivers/char/hvc_xen.c @@ -157,3 +157,29 @@ struct console xenboot_console = { .write = xenboot_write_console, .flags = CON_PRINTBUFFER | CON_BOOT, }; + +void xen_raw_console_write(const char *str) +{ + int len = strlen(str); + + while(len > 0) { + int rc = HYPERVISOR_console_io(CONSOLEIO_write, len, (char *)str); + if (rc <= 0) + break; + + str += rc; + len -= rc; + } +} + +void xen_raw_printk(const char *fmt, ...) +{ + static char buf[512]; + va_list ap; + + va_start(ap, fmt); + vsnprintf(buf, sizeof(buf), fmt, ap); + va_end(ap); + + xen_raw_console_write(buf); +} diff --git a/include/xen/hvc-console.h b/include/xen/hvc-console.h index 21c0ecf..efc3237 100644 --- a/include/xen/hvc-console.h +++ b/include/xen/hvc-console.h @@ -3,4 +3,7 @@ extern struct console xenboot_console; +void xen_raw_console_write(const char *str); +void xen_raw_printk(const char *fmt, ...); + #endif /* XEN_HVC_CONSOLE_H */ -- cgit v0.10.2 From 0922abdc3982ae54cbe1b24ac5aa91a260eca1bb Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 26 May 2008 23:31:00 +0100 Subject: xen: make early console also write to debug console When using "earlyprintk=xen", also write the console output to the raw debug console. This will appear on dom0's console if the hypervisor has been compiled to allow it. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner diff --git a/drivers/char/hvc_xen.c b/drivers/char/hvc_xen.c index e97d9d1..2413af3 100644 --- a/drivers/char/hvc_xen.c +++ b/drivers/char/hvc_xen.c @@ -134,12 +134,27 @@ module_init(xen_init); module_exit(xen_fini); console_initcall(xen_cons_init); +static void raw_console_write(const char *str, int len) +{ + while(len > 0) { + int rc = HYPERVISOR_console_io(CONSOLEIO_write, len, (char *)str); + if (rc <= 0) + break; + + str += rc; + len -= rc; + } +} + +#ifdef CONFIG_EARLY_PRINTK static void xenboot_write_console(struct console *console, const char *string, unsigned len) { unsigned int linelen, off = 0; const char *pos; + raw_console_write(string, len); + while (off < len && NULL != (pos = strchr(string+off, '\n'))) { linelen = pos-string+off; if (off + linelen > len) @@ -155,21 +170,13 @@ static void xenboot_write_console(struct console *console, const char *string, struct console xenboot_console = { .name = "xenboot", .write = xenboot_write_console, - .flags = CON_PRINTBUFFER | CON_BOOT, + .flags = CON_PRINTBUFFER | CON_BOOT | CON_ANYTIME, }; +#endif /* CONFIG_EARLY_PRINTK */ void xen_raw_console_write(const char *str) { - int len = strlen(str); - - while(len > 0) { - int rc = HYPERVISOR_console_io(CONSOLEIO_write, len, (char *)str); - if (rc <= 0) - break; - - str += rc; - len -= rc; - } + raw_console_write(str, strlen(str)); } void xen_raw_printk(const char *fmt, ...) -- cgit v0.10.2 From 7b1333aa4cb546ddeb9c05098a53d9a777623a05 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 26 May 2008 23:31:01 +0100 Subject: xen: use hypercall rather than clts Xen will trap and emulate clts, but its better to use a hypercall. Also, xenner doesn't handle clts. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index a05b772..446f4cd 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -607,6 +607,30 @@ static void xen_flush_tlb_others(const cpumask_t *cpus, struct mm_struct *mm, xen_mc_issue(PARAVIRT_LAZY_MMU); } +static void xen_clts(void) +{ + struct multicall_space mcs; + + mcs = xen_mc_entry(0); + + MULTI_fpu_taskswitch(mcs.mc, 0); + + xen_mc_issue(PARAVIRT_LAZY_CPU); +} + +static void xen_write_cr0(unsigned long cr0) +{ + struct multicall_space mcs; + + /* Only pay attention to cr0.TS; everything else is + ignored. */ + mcs = xen_mc_entry(0); + + MULTI_fpu_taskswitch(mcs.mc, (cr0 & X86_CR0_TS) != 0); + + xen_mc_issue(PARAVIRT_LAZY_CPU); +} + static void xen_write_cr2(unsigned long cr2) { x86_read_percpu(xen_vcpu)->arch.cr2 = cr2; @@ -978,10 +1002,10 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = { .set_debugreg = xen_set_debugreg, .get_debugreg = xen_get_debugreg, - .clts = native_clts, + .clts = xen_clts, .read_cr0 = native_read_cr0, - .write_cr0 = native_write_cr0, + .write_cr0 = xen_write_cr0, .read_cr4 = native_read_cr4, .read_cr4_safe = native_read_cr4_safe, diff --git a/include/asm-x86/xen/hypercall.h b/include/asm-x86/xen/hypercall.h index c2ccd99..897ff79 100644 --- a/include/asm-x86/xen/hypercall.h +++ b/include/asm-x86/xen/hypercall.h @@ -315,6 +315,13 @@ HYPERVISOR_nmi_op(unsigned long op, unsigned long arg) } static inline void +MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set) +{ + mcl->op = __HYPERVISOR_fpu_taskswitch; + mcl->args[0] = set; +} + +static inline void MULTI_update_va_mapping(struct multicall_entry *mcl, unsigned long va, pte_t new_val, unsigned long flags) { -- cgit v0.10.2 From 349c709f42453707f74bece0d9d35ee5b3842893 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 26 May 2008 23:31:02 +0100 Subject: xen: use new sched_op Use the new sched_op hypercall, mainly because xenner doesn't support the old one. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 446f4cd..35ddaf5 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -254,7 +254,7 @@ static void xen_irq_enable(void) static void xen_safe_halt(void) { /* Blocking includes an implicit local_irq_enable(). */ - if (HYPERVISOR_sched_op(SCHEDOP_block, 0) != 0) + if (HYPERVISOR_sched_op(SCHEDOP_block, NULL) != 0) BUG(); } @@ -1138,11 +1138,13 @@ static const struct smp_ops xen_smp_ops __initdata = { static void xen_reboot(int reason) { + struct sched_shutdown r = { .reason = reason }; + #ifdef CONFIG_SMP smp_send_stop(); #endif - if (HYPERVISOR_sched_op(SCHEDOP_shutdown, reason)) + if (HYPERVISOR_sched_op(SCHEDOP_shutdown, &r)) BUG(); } diff --git a/include/asm-x86/xen/hypercall.h b/include/asm-x86/xen/hypercall.h index 897ff79..2a4f9b4 100644 --- a/include/asm-x86/xen/hypercall.h +++ b/include/asm-x86/xen/hypercall.h @@ -176,9 +176,9 @@ HYPERVISOR_fpu_taskswitch(int set) } static inline int -HYPERVISOR_sched_op(int cmd, unsigned long arg) +HYPERVISOR_sched_op(int cmd, void *arg) { - return _hypercall2(int, sched_op, cmd, arg); + return _hypercall2(int, sched_op_new, cmd, arg); } static inline long -- cgit v0.10.2 From 2956a3511c8c5dccb1d4739ead17c7c3c23a24b7 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 26 May 2008 23:31:04 +0100 Subject: xen: allow some cr4 updates The guest can legitimately change things like cr4.OSFXSR and OSXMMEXCPT, so let it. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 35ddaf5..f687648 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -648,8 +648,10 @@ static unsigned long xen_read_cr2_direct(void) static void xen_write_cr4(unsigned long cr4) { - /* Just ignore cr4 changes; Xen doesn't allow us to do - anything anyway. */ + cr4 &= ~X86_CR4_PGE; + cr4 &= ~X86_CR4_PSE; + + native_write_cr4(cr4); } static unsigned long xen_read_cr3(void) -- cgit v0.10.2 From 239d1fc04ed0b58d638096b12a7f6d50269d30c9 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 26 May 2008 23:31:05 +0100 Subject: xen: don't worry about preempt during xen_irq_enable() When enabling interrupts, we don't need to worry about preemption, because we either enter with interrupts disabled - so no preemption - or the caller is confused and is re-enabling interrupts on some indeterminate processor. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index f687648..4a372b7 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -235,13 +235,13 @@ static void xen_irq_enable(void) { struct vcpu_info *vcpu; - /* There's a one instruction preempt window here. We need to - make sure we're don't switch CPUs between getting the vcpu - pointer and updating the mask. */ - preempt_disable(); + /* We don't need to worry about being preempted here, since + either a) interrupts are disabled, so no preemption, or b) + the caller is confused and is trying to re-enable interrupts + on an indeterminate processor. */ + vcpu = x86_read_percpu(xen_vcpu); vcpu->evtchn_upcall_mask = 0; - preempt_enable_no_resched(); /* Doesn't matter if we get preempted here, because any pending event will get dealt with anyway. */ -- cgit v0.10.2 From a15af1c9ea2750a9ff01e51615c45950bad8221b Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 26 May 2008 23:31:06 +0100 Subject: x86/paravirt: add pte_flags to just get pte flags Add pte_flags() to extract the flags from a pte. This is a special case of pte_val() which is only guaranteed to return the pte's flags correctly; the page number may be corrupted or missing. The intent is to allow paravirt implementations to return pte flags without having to do any translation of the page number (most notably, Xen). Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 74f0c5e..c98d546 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -403,6 +403,7 @@ struct pv_mmu_ops pv_mmu_ops = { #endif /* PAGETABLE_LEVELS >= 3 */ .pte_val = native_pte_val, + .pte_flags = native_pte_val, .pgd_val = native_pgd_val, .make_pte = native_make_pte, diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 4a372b7..1b4b5fa 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1101,6 +1101,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { .set_pmd = xen_set_pmd, .pte_val = xen_pte_val, + .pte_flags = native_pte_val, .pgd_val = xen_pgd_val, .make_pte = xen_make_pte, diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h index 005bd04..5faefea 100644 --- a/drivers/lguest/lg.h +++ b/drivers/lguest/lg.h @@ -136,7 +136,6 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user); * first step in the migration to the kernel types. pte_pfn is already defined * in the kernel. */ #define pgd_flags(x) (pgd_val(x) & ~PAGE_MASK) -#define pte_flags(x) (pte_val(x) & ~PAGE_MASK) #define pgd_pfn(x) (pgd_val(x) >> PAGE_SHIFT) /* interrupts_and_traps.c: */ diff --git a/include/asm-x86/page.h b/include/asm-x86/page.h index dc936dd..a1e2b94 100644 --- a/include/asm-x86/page.h +++ b/include/asm-x86/page.h @@ -160,6 +160,7 @@ static inline pteval_t native_pte_val(pte_t pte) #endif #define pte_val(x) native_pte_val(x) +#define pte_flags(x) native_pte_val(x) #define __pte(x) native_make_pte(x) #endif /* CONFIG_PARAVIRT */ diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h index 0f13b94..5ea37a4 100644 --- a/include/asm-x86/paravirt.h +++ b/include/asm-x86/paravirt.h @@ -239,6 +239,7 @@ struct pv_mmu_ops { unsigned long addr, pte_t *ptep); pteval_t (*pte_val)(pte_t); + pteval_t (*pte_flags)(pte_t); pte_t (*make_pte)(pteval_t pte); pgdval_t (*pgd_val)(pgd_t); @@ -996,6 +997,20 @@ static inline pteval_t pte_val(pte_t pte) return ret; } +static inline pteval_t pte_flags(pte_t pte) +{ + pteval_t ret; + + if (sizeof(pteval_t) > sizeof(long)) + ret = PVOP_CALL2(pteval_t, pv_mmu_ops.pte_flags, + pte.pte, (u64)pte.pte >> 32); + else + ret = PVOP_CALL1(pteval_t, pv_mmu_ops.pte_flags, + pte.pte); + + return ret; +} + static inline pgd_t __pgd(pgdval_t val) { pgdval_t ret; diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h index 97c271b..47a852c 100644 --- a/include/asm-x86/pgtable.h +++ b/include/asm-x86/pgtable.h @@ -164,37 +164,37 @@ extern struct list_head pgd_list; */ static inline int pte_dirty(pte_t pte) { - return pte_val(pte) & _PAGE_DIRTY; + return pte_flags(pte) & _PAGE_DIRTY; } static inline int pte_young(pte_t pte) { - return pte_val(pte) & _PAGE_ACCESSED; + return pte_flags(pte) & _PAGE_ACCESSED; } static inline int pte_write(pte_t pte) { - return pte_val(pte) & _PAGE_RW; + return pte_flags(pte) & _PAGE_RW; } static inline int pte_file(pte_t pte) { - return pte_val(pte) & _PAGE_FILE; + return pte_flags(pte) & _PAGE_FILE; } static inline int pte_huge(pte_t pte) { - return pte_val(pte) & _PAGE_PSE; + return pte_flags(pte) & _PAGE_PSE; } static inline int pte_global(pte_t pte) { - return pte_val(pte) & _PAGE_GLOBAL; + return pte_flags(pte) & _PAGE_GLOBAL; } static inline int pte_exec(pte_t pte) { - return !(pte_val(pte) & _PAGE_NX); + return !(pte_flags(pte) & _PAGE_NX); } static inline int pte_special(pte_t pte) @@ -305,7 +305,7 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) return __pgprot(preservebits | addbits); } -#define pte_pgprot(x) __pgprot(pte_val(x) & ~PTE_MASK) +#define pte_pgprot(x) __pgprot(pte_flags(x) & ~PTE_MASK) #define canon_pgprot(p) __pgprot(pgprot_val(p) & __supported_pte_mask) -- cgit v0.10.2 From 9e124fe16ff24746d6de5a2ad685266d7bce0e08 Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Mon, 26 May 2008 23:31:07 +0100 Subject: xen: Enable console tty by default in domU if it's not a dummy Without console= arguments on the kernel command line, the first console to register becomes enabled and the preferred console (the one behind /dev/console). This is normally tty (assuming CONFIG_VT_CONSOLE is enabled, which it commonly is). This is okay as long tty is a useful console. But unless we have the PV framebuffer, and it is enabled for this domain, tty0 in domU is merely a dummy. In that case, we want the preferred console to be the Xen console hvc0, and we want it without having to fiddle with the kernel command line. Commit b8c2d3dfbc117dff26058fbac316b8acfc2cb5f7 did that for us. Since we now have the PV framebuffer, we want to enable and prefer tty again, but only when PVFB is enabled. But even then we still want to enable the Xen console as well. Problem: when tty registers, we can't yet know whether the PVFB is enabled. By the time we can know (xenstore is up), the console setup game is over. Solution: enable console tty by default, but keep hvc as the preferred console. Change the preferred console to tty when PVFB probes successfully, unless we've been given console kernel parameters. Signed-off-by: Markus Armbruster Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 1b4b5fa..6cfb708 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1256,8 +1256,10 @@ asmlinkage void __init xen_start_kernel(void) ? __pa(xen_start_info->mod_start) : 0; boot_params.hdr.ramdisk_size = xen_start_info->mod_len; - if (!is_initial_xendomain()) + if (!is_initial_xendomain()) { + add_preferred_console("tty", 0, NULL); add_preferred_console("hvc", 0, NULL); + } /* Start the world */ start_kernel(); diff --git a/drivers/video/xen-fbfront.c b/drivers/video/xen-fbfront.c index 619a6f8..4e10876 100644 --- a/drivers/video/xen-fbfront.c +++ b/drivers/video/xen-fbfront.c @@ -18,6 +18,7 @@ * frame buffer. */ +#include #include #include #include @@ -48,6 +49,7 @@ struct xenfb_info { static u32 xenfb_mem_len = XENFB_WIDTH * XENFB_HEIGHT * XENFB_DEPTH / 8; +static void xenfb_make_preferred_console(void); static int xenfb_remove(struct xenbus_device *); static void xenfb_init_shared_page(struct xenfb_info *); static int xenfb_connect_backend(struct xenbus_device *, struct xenfb_info *); @@ -348,6 +350,7 @@ static int __devinit xenfb_probe(struct xenbus_device *dev, if (ret < 0) goto error; + xenfb_make_preferred_console(); return 0; error_nomem: @@ -358,6 +361,28 @@ static int __devinit xenfb_probe(struct xenbus_device *dev, return ret; } +static __devinit void +xenfb_make_preferred_console(void) +{ + struct console *c; + + if (console_set_on_cmdline) + return; + + acquire_console_sem(); + for (c = console_drivers; c; c = c->next) { + if (!strcmp(c->name, "tty") && c->index == 0) + break; + } + release_console_sem(); + if (c) { + unregister_console(c); + c->flags |= CON_CONSDEV; + c->flags &= ~CON_PRINTBUFFER; /* don't print again */ + register_console(c); + } +} + static int xenfb_resume(struct xenbus_device *dev) { struct xenfb_info *info = dev->dev.driver_data; diff --git a/include/linux/console.h b/include/linux/console.h index a4f27fb..248e6e3 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -108,6 +108,8 @@ struct console { struct console *next; }; +extern int console_set_on_cmdline; + extern int add_preferred_console(char *name, int idx, char *options); extern int update_console_cmdline(char *name, int idx, char *name_new, int idx_new, char *options); extern void register_console(struct console *); diff --git a/kernel/printk.c b/kernel/printk.c index 8fb01c3..028ed75d 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -121,6 +121,8 @@ struct console_cmdline static struct console_cmdline console_cmdline[MAX_CMDLINECONSOLES]; static int selected_console = -1; static int preferred_console = -1; +int console_set_on_cmdline; +EXPORT_SYMBOL(console_set_on_cmdline); /* Flag: console code may call schedule() */ static int console_may_schedule; @@ -890,6 +892,7 @@ static int __init console_setup(char *str) *s = 0; __add_preferred_console(buf, idx, options, brl_options); + console_set_on_cmdline = 1; return 1; } __setup("console=", console_setup); -- cgit v0.10.2 From 6ba0e7b36c7cc1745b3cbeda244d14edae3ad058 Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Mon, 26 May 2008 23:31:08 +0100 Subject: xen pvfb: Pointer z-axis (mouse wheel) support Add z-axis motion to pointer events. Backward compatible, because there's space for the z-axis in union xenkbd_in_event, and old backends zero it. Derived from http://xenbits.xensource.com/linux-2.6.18-xen.hg?rev/57dfe0098000 http://xenbits.xensource.com/linux-2.6.18-xen.hg?rev/1edfea26a2a9 http://xenbits.xensource.com/linux-2.6.18-xen.hg?rev/c3ff0b26f664 Signed-off-by: Pat Campbell Signed-off-by: Markus Armbruster Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner diff --git a/drivers/input/xen-kbdfront.c b/drivers/input/xen-kbdfront.c index 0f47f46..9da4452 100644 --- a/drivers/input/xen-kbdfront.c +++ b/drivers/input/xen-kbdfront.c @@ -66,6 +66,9 @@ static irqreturn_t input_handler(int rq, void *dev_id) case XENKBD_TYPE_MOTION: input_report_rel(dev, REL_X, event->motion.rel_x); input_report_rel(dev, REL_Y, event->motion.rel_y); + if (event->motion.rel_z) + input_report_rel(dev, REL_WHEEL, + -event->motion.rel_z); break; case XENKBD_TYPE_KEY: dev = NULL; @@ -84,6 +87,9 @@ static irqreturn_t input_handler(int rq, void *dev_id) case XENKBD_TYPE_POS: input_report_abs(dev, ABS_X, event->pos.abs_x); input_report_abs(dev, ABS_Y, event->pos.abs_y); + if (event->pos.rel_z) + input_report_rel(dev, REL_WHEEL, + -event->pos.rel_z); break; } if (dev) @@ -152,7 +158,7 @@ static int __devinit xenkbd_probe(struct xenbus_device *dev, ptr->evbit[0] = BIT(EV_KEY) | BIT(EV_REL) | BIT(EV_ABS); for (i = BTN_LEFT; i <= BTN_TASK; i++) set_bit(i, ptr->keybit); - ptr->relbit[0] = BIT(REL_X) | BIT(REL_Y); + ptr->relbit[0] = BIT(REL_X) | BIT(REL_Y) | BIT(REL_WHEEL); input_set_abs_params(ptr, ABS_X, 0, XENFB_WIDTH, 0, 0); input_set_abs_params(ptr, ABS_Y, 0, XENFB_HEIGHT, 0, 0); diff --git a/include/xen/interface/io/kbdif.h b/include/xen/interface/io/kbdif.h index fb97f42..8066c78 100644 --- a/include/xen/interface/io/kbdif.h +++ b/include/xen/interface/io/kbdif.h @@ -49,6 +49,7 @@ struct xenkbd_motion { uint8_t type; /* XENKBD_TYPE_MOTION */ int32_t rel_x; /* relative X motion */ int32_t rel_y; /* relative Y motion */ + int32_t rel_z; /* relative Z motion (wheel) */ }; struct xenkbd_key { @@ -61,6 +62,7 @@ struct xenkbd_position { uint8_t type; /* XENKBD_TYPE_POS */ int32_t abs_x; /* absolute X position (in FB pixels) */ int32_t abs_y; /* absolute Y position (in FB pixels) */ + int32_t rel_z; /* relative Z motion (wheel) */ }; #define XENKBD_IN_EVENT_SIZE 40 -- cgit v0.10.2 From 1e892c959da42278e60b21f5ecfd6fba0efff313 Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Mon, 26 May 2008 23:31:09 +0100 Subject: xen pvfb: Module aliases to support module autoloading These are mostly for completeness and consistency with the other frontends, as PVFB is typically compiled in rather than a module. Derived from http://xenbits.xensource.com/linux-2.6.18-xen.hg?rev/5e294e29a43e While there, add module descriptions. Signed-off-by: Markus Armbruster Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner diff --git a/drivers/input/xen-kbdfront.c b/drivers/input/xen-kbdfront.c index 9da4452..eaf69cf 100644 --- a/drivers/input/xen-kbdfront.c +++ b/drivers/input/xen-kbdfront.c @@ -343,4 +343,6 @@ static void __exit xenkbd_cleanup(void) module_init(xenkbd_init); module_exit(xenkbd_cleanup); +MODULE_DESCRIPTION("Xen virtual keyboard/pointer device frontend"); MODULE_LICENSE("GPL"); +MODULE_ALIAS("xen:vkbd"); diff --git a/drivers/video/xen-fbfront.c b/drivers/video/xen-fbfront.c index 4e10876..5553e51 100644 --- a/drivers/video/xen-fbfront.c +++ b/drivers/video/xen-fbfront.c @@ -572,4 +572,6 @@ static void __exit xenfb_cleanup(void) module_init(xenfb_init); module_exit(xenfb_cleanup); +MODULE_DESCRIPTION("Xen virtual framebuffer device frontend"); MODULE_LICENSE("GPL"); +MODULE_ALIAS("xen:vfb"); -- cgit v0.10.2 From f4ad1ebd7a0fae2782ef9f76c0b94b536742c3e8 Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Mon, 26 May 2008 23:31:10 +0100 Subject: xen pvfb: Zero unused bytes in events sent to backend This isn't a security flaw (the backend can see all our memory anyway). But it's the right thing to do all the same. Signed-off-by: Markus Armbruster Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner diff --git a/drivers/video/xen-fbfront.c b/drivers/video/xen-fbfront.c index 5553e51..291eef6 100644 --- a/drivers/video/xen-fbfront.c +++ b/drivers/video/xen-fbfront.c @@ -61,6 +61,7 @@ static void xenfb_do_update(struct xenfb_info *info, union xenfb_out_event event; u32 prod; + memset(&event, 0, sizeof(event)); event.type = XENFB_TYPE_UPDATE; event.update.x = x; event.update.y = y; -- cgit v0.10.2 From e4dcff1f6e7582f76c2c9990b1d9111bbc8e26ef Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Mon, 26 May 2008 23:31:11 +0100 Subject: xen pvfb: Dynamic mode support (screen resizing) The pvfb backend indicates dynamic mode support by creating node feature_resize with a non-zero value in its xenstore directory. xen-fbfront sends a resize notification event on mode change. Fully backwards compatible both ways. Framebuffer size and initial resolution can be controlled through kernel parameter xen_fbfront.video. The backend enforces a separate size limit, which it advertises in node videoram in its xenstore directory. xen-kbdfront gets the maximum screen resolution from nodes width and height in the backend's xenstore directory instead of hardcoding it. Additional goodie: support for larger framebuffers (512M on a 64-bit system with 4K pages). Changing the number of bits per pixels dynamically is not supported, yet. Ported from http://xenbits.xensource.com/linux-2.6.18-xen.hg?rev/92f7b3144f41 http://xenbits.xensource.com/linux-2.6.18-xen.hg?rev/bfc040135633 Signed-off-by: Pat Campbell Signed-off-by: Markus Armbruster Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner diff --git a/drivers/input/xen-kbdfront.c b/drivers/input/xen-kbdfront.c index eaf69cf..9ce3b3b 100644 --- a/drivers/input/xen-kbdfront.c +++ b/drivers/input/xen-kbdfront.c @@ -300,6 +300,16 @@ InitWait: */ if (dev->state != XenbusStateConnected) goto InitWait; /* no InitWait seen yet, fudge it */ + + /* Set input abs params to match backend screen res */ + if (xenbus_scanf(XBT_NIL, info->xbdev->otherend, + "width", "%d", &val) > 0) + input_set_abs_params(info->ptr, ABS_X, 0, val, 0, 0); + + if (xenbus_scanf(XBT_NIL, info->xbdev->otherend, + "height", "%d", &val) > 0) + input_set_abs_params(info->ptr, ABS_Y, 0, val, 0, 0); + break; case XenbusStateClosing: diff --git a/drivers/video/xen-fbfront.c b/drivers/video/xen-fbfront.c index 291eef6..47ed39b 100644 --- a/drivers/video/xen-fbfront.c +++ b/drivers/video/xen-fbfront.c @@ -43,23 +43,47 @@ struct xenfb_info { struct xenfb_page *page; unsigned long *mfns; int update_wanted; /* XENFB_TYPE_UPDATE wanted */ + int feature_resize; /* XENFB_TYPE_RESIZE ok */ + struct xenfb_resize resize; /* protected by resize_lock */ + int resize_dpy; /* ditto */ + spinlock_t resize_lock; struct xenbus_device *xbdev; }; -static u32 xenfb_mem_len = XENFB_WIDTH * XENFB_HEIGHT * XENFB_DEPTH / 8; +#define XENFB_DEFAULT_FB_LEN (XENFB_WIDTH * XENFB_HEIGHT * XENFB_DEPTH / 8) + +enum { KPARAM_MEM, KPARAM_WIDTH, KPARAM_HEIGHT, KPARAM_CNT }; +static int video[KPARAM_CNT] = { 2, XENFB_WIDTH, XENFB_HEIGHT }; +module_param_array(video, int, NULL, 0); +MODULE_PARM_DESC(video, + "Video memory size in MB, width, height in pixels (default 2,800,600)"); static void xenfb_make_preferred_console(void); static int xenfb_remove(struct xenbus_device *); -static void xenfb_init_shared_page(struct xenfb_info *); +static void xenfb_init_shared_page(struct xenfb_info *, struct fb_info *); static int xenfb_connect_backend(struct xenbus_device *, struct xenfb_info *); static void xenfb_disconnect_backend(struct xenfb_info *); +static void xenfb_send_event(struct xenfb_info *info, + union xenfb_out_event *event) +{ + u32 prod; + + prod = info->page->out_prod; + /* caller ensures !xenfb_queue_full() */ + mb(); /* ensure ring space available */ + XENFB_OUT_RING_REF(info->page, prod) = *event; + wmb(); /* ensure ring contents visible */ + info->page->out_prod = prod + 1; + + notify_remote_via_irq(info->irq); +} + static void xenfb_do_update(struct xenfb_info *info, int x, int y, int w, int h) { union xenfb_out_event event; - u32 prod; memset(&event, 0, sizeof(event)); event.type = XENFB_TYPE_UPDATE; @@ -68,14 +92,19 @@ static void xenfb_do_update(struct xenfb_info *info, event.update.width = w; event.update.height = h; - prod = info->page->out_prod; /* caller ensures !xenfb_queue_full() */ - mb(); /* ensure ring space available */ - XENFB_OUT_RING_REF(info->page, prod) = event; - wmb(); /* ensure ring contents visible */ - info->page->out_prod = prod + 1; + xenfb_send_event(info, &event); +} - notify_remote_via_irq(info->irq); +static void xenfb_do_resize(struct xenfb_info *info) +{ + union xenfb_out_event event; + + memset(&event, 0, sizeof(event)); + event.resize = info->resize; + + /* caller ensures !xenfb_queue_full() */ + xenfb_send_event(info, &event); } static int xenfb_queue_full(struct xenfb_info *info) @@ -87,12 +116,28 @@ static int xenfb_queue_full(struct xenfb_info *info) return prod - cons == XENFB_OUT_RING_LEN; } +static void xenfb_handle_resize_dpy(struct xenfb_info *info) +{ + unsigned long flags; + + spin_lock_irqsave(&info->resize_lock, flags); + if (info->resize_dpy) { + if (!xenfb_queue_full(info)) { + info->resize_dpy = 0; + xenfb_do_resize(info); + } + } + spin_unlock_irqrestore(&info->resize_lock, flags); +} + static void xenfb_refresh(struct xenfb_info *info, int x1, int y1, int w, int h) { unsigned long flags; - int y2 = y1 + h - 1; int x2 = x1 + w - 1; + int y2 = y1 + h - 1; + + xenfb_handle_resize_dpy(info); if (!info->update_wanted) return; @@ -225,6 +270,57 @@ static ssize_t xenfb_write(struct fb_info *p, const char __user *buf, return res; } +static int +xenfb_check_var(struct fb_var_screeninfo *var, struct fb_info *info) +{ + struct xenfb_info *xenfb_info; + int required_mem_len; + + xenfb_info = info->par; + + if (!xenfb_info->feature_resize) { + if (var->xres == video[KPARAM_WIDTH] && + var->yres == video[KPARAM_HEIGHT] && + var->bits_per_pixel == xenfb_info->page->depth) { + return 0; + } + return -EINVAL; + } + + /* Can't resize past initial width and height */ + if (var->xres > video[KPARAM_WIDTH] || var->yres > video[KPARAM_HEIGHT]) + return -EINVAL; + + required_mem_len = var->xres * var->yres * xenfb_info->page->depth / 8; + if (var->bits_per_pixel == xenfb_info->page->depth && + var->xres <= info->fix.line_length / (XENFB_DEPTH / 8) && + required_mem_len <= info->fix.smem_len) { + var->xres_virtual = var->xres; + var->yres_virtual = var->yres; + return 0; + } + return -EINVAL; +} + +static int xenfb_set_par(struct fb_info *info) +{ + struct xenfb_info *xenfb_info; + unsigned long flags; + + xenfb_info = info->par; + + spin_lock_irqsave(&xenfb_info->resize_lock, flags); + xenfb_info->resize.type = XENFB_TYPE_RESIZE; + xenfb_info->resize.width = info->var.xres; + xenfb_info->resize.height = info->var.yres; + xenfb_info->resize.stride = info->fix.line_length; + xenfb_info->resize.depth = info->var.bits_per_pixel; + xenfb_info->resize.offset = 0; + xenfb_info->resize_dpy = 1; + spin_unlock_irqrestore(&xenfb_info->resize_lock, flags); + return 0; +} + static struct fb_ops xenfb_fb_ops = { .owner = THIS_MODULE, .fb_read = fb_sys_read, @@ -233,6 +329,8 @@ static struct fb_ops xenfb_fb_ops = { .fb_fillrect = xenfb_fillrect, .fb_copyarea = xenfb_copyarea, .fb_imageblit = xenfb_imageblit, + .fb_check_var = xenfb_check_var, + .fb_set_par = xenfb_set_par, }; static irqreturn_t xenfb_event_handler(int rq, void *dev_id) @@ -261,6 +359,8 @@ static int __devinit xenfb_probe(struct xenbus_device *dev, { struct xenfb_info *info; struct fb_info *fb_info; + int fb_size; + int val; int ret; info = kzalloc(sizeof(*info), GFP_KERNEL); @@ -268,18 +368,35 @@ static int __devinit xenfb_probe(struct xenbus_device *dev, xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure"); return -ENOMEM; } + + /* Limit kernel param videoram amount to what is in xenstore */ + if (xenbus_scanf(XBT_NIL, dev->otherend, "videoram", "%d", &val) == 1) { + if (val < video[KPARAM_MEM]) + video[KPARAM_MEM] = val; + } + + /* If requested res does not fit in available memory, use default */ + fb_size = video[KPARAM_MEM] * 1024 * 1024; + if (video[KPARAM_WIDTH] * video[KPARAM_HEIGHT] * XENFB_DEPTH / 8 + > fb_size) { + video[KPARAM_WIDTH] = XENFB_WIDTH; + video[KPARAM_HEIGHT] = XENFB_HEIGHT; + fb_size = XENFB_DEFAULT_FB_LEN; + } + dev->dev.driver_data = info; info->xbdev = dev; info->irq = -1; info->x1 = info->y1 = INT_MAX; spin_lock_init(&info->dirty_lock); + spin_lock_init(&info->resize_lock); - info->fb = vmalloc(xenfb_mem_len); + info->fb = vmalloc(fb_size); if (info->fb == NULL) goto error_nomem; - memset(info->fb, 0, xenfb_mem_len); + memset(info->fb, 0, fb_size); - info->nr_pages = (xenfb_mem_len + PAGE_SIZE - 1) >> PAGE_SHIFT; + info->nr_pages = (fb_size + PAGE_SIZE - 1) >> PAGE_SHIFT; info->mfns = vmalloc(sizeof(unsigned long) * info->nr_pages); if (!info->mfns) @@ -290,8 +407,6 @@ static int __devinit xenfb_probe(struct xenbus_device *dev, if (!info->page) goto error_nomem; - xenfb_init_shared_page(info); - /* abusing framebuffer_alloc() to allocate pseudo_palette */ fb_info = framebuffer_alloc(sizeof(u32) * 256, NULL); if (fb_info == NULL) @@ -304,9 +419,9 @@ static int __devinit xenfb_probe(struct xenbus_device *dev, fb_info->screen_base = info->fb; fb_info->fbops = &xenfb_fb_ops; - fb_info->var.xres_virtual = fb_info->var.xres = info->page->width; - fb_info->var.yres_virtual = fb_info->var.yres = info->page->height; - fb_info->var.bits_per_pixel = info->page->depth; + fb_info->var.xres_virtual = fb_info->var.xres = video[KPARAM_WIDTH]; + fb_info->var.yres_virtual = fb_info->var.yres = video[KPARAM_HEIGHT]; + fb_info->var.bits_per_pixel = XENFB_DEPTH; fb_info->var.red = (struct fb_bitfield){16, 8, 0}; fb_info->var.green = (struct fb_bitfield){8, 8, 0}; @@ -318,9 +433,9 @@ static int __devinit xenfb_probe(struct xenbus_device *dev, fb_info->var.vmode = FB_VMODE_NONINTERLACED; fb_info->fix.visual = FB_VISUAL_TRUECOLOR; - fb_info->fix.line_length = info->page->line_length; + fb_info->fix.line_length = fb_info->var.xres * XENFB_DEPTH / 8; fb_info->fix.smem_start = 0; - fb_info->fix.smem_len = xenfb_mem_len; + fb_info->fix.smem_len = fb_size; strcpy(fb_info->fix.id, "xen"); fb_info->fix.type = FB_TYPE_PACKED_PIXELS; fb_info->fix.accel = FB_ACCEL_NONE; @@ -337,6 +452,8 @@ static int __devinit xenfb_probe(struct xenbus_device *dev, fb_info->fbdefio = &xenfb_defio; fb_deferred_io_init(fb_info); + xenfb_init_shared_page(info, fb_info); + ret = register_framebuffer(fb_info); if (ret) { fb_deferred_io_cleanup(fb_info); @@ -389,7 +506,7 @@ static int xenfb_resume(struct xenbus_device *dev) struct xenfb_info *info = dev->dev.driver_data; xenfb_disconnect_backend(info); - xenfb_init_shared_page(info); + xenfb_init_shared_page(info, info->fb_info); return xenfb_connect_backend(dev, info); } @@ -417,20 +534,23 @@ static unsigned long vmalloc_to_mfn(void *address) return pfn_to_mfn(vmalloc_to_pfn(address)); } -static void xenfb_init_shared_page(struct xenfb_info *info) +static void xenfb_init_shared_page(struct xenfb_info *info, + struct fb_info *fb_info) { int i; + int epd = PAGE_SIZE / sizeof(info->mfns[0]); for (i = 0; i < info->nr_pages; i++) info->mfns[i] = vmalloc_to_mfn(info->fb + i * PAGE_SIZE); - info->page->pd[0] = vmalloc_to_mfn(info->mfns); - info->page->pd[1] = 0; - info->page->width = XENFB_WIDTH; - info->page->height = XENFB_HEIGHT; - info->page->depth = XENFB_DEPTH; - info->page->line_length = (info->page->depth / 8) * info->page->width; - info->page->mem_length = xenfb_mem_len; + for (i = 0; i * epd < info->nr_pages; i++) + info->page->pd[i] = vmalloc_to_mfn(&info->mfns[i * epd]); + + info->page->width = fb_info->var.xres; + info->page->height = fb_info->var.yres; + info->page->depth = fb_info->var.bits_per_pixel; + info->page->line_length = fb_info->fix.line_length; + info->page->mem_length = fb_info->fix.smem_len; info->page->in_cons = info->page->in_prod = 0; info->page->out_cons = info->page->out_prod = 0; } @@ -530,6 +650,11 @@ InitWait: val = 0; if (val) info->update_wanted = 1; + + if (xenbus_scanf(XBT_NIL, dev->otherend, + "feature-resize", "%d", &val) < 0) + val = 0; + info->feature_resize = val; break; case XenbusStateClosing: diff --git a/include/xen/interface/io/fbif.h b/include/xen/interface/io/fbif.h index 5a934dd..974a51e 100644 --- a/include/xen/interface/io/fbif.h +++ b/include/xen/interface/io/fbif.h @@ -49,11 +49,27 @@ struct xenfb_update { int32_t height; /* rect height */ }; +/* + * Framebuffer resize notification event + * Capable backend sets feature-resize in xenstore. + */ +#define XENFB_TYPE_RESIZE 3 + +struct xenfb_resize { + uint8_t type; /* XENFB_TYPE_RESIZE */ + int32_t width; /* width in pixels */ + int32_t height; /* height in pixels */ + int32_t stride; /* stride in bytes */ + int32_t depth; /* depth in bits */ + int32_t offset; /* start offset within framebuffer */ +}; + #define XENFB_OUT_EVENT_SIZE 40 union xenfb_out_event { uint8_t type; struct xenfb_update update; + struct xenfb_resize resize; char pad[XENFB_OUT_EVENT_SIZE]; }; @@ -105,15 +121,18 @@ struct xenfb_page { * Each directory page holds PAGE_SIZE / sizeof(*pd) * framebuffer pages, and can thus map up to PAGE_SIZE * * PAGE_SIZE / sizeof(*pd) bytes. With PAGE_SIZE == 4096 and - * sizeof(unsigned long) == 4, that's 4 Megs. Two directory - * pages should be enough for a while. + * sizeof(unsigned long) == 4/8, that's 4 Megs 32 bit and 2 + * Megs 64 bit. 256 directories give enough room for a 512 + * Meg framebuffer with a max resolution of 12,800x10,240. + * Should be enough for a while with room leftover for + * expansion. */ - unsigned long pd[2]; + unsigned long pd[256]; }; /* - * Wart: xenkbd needs to know resolution. Put it here until a better - * solution is found, but don't leak it to the backend. + * Wart: xenkbd needs to know default resolution. Put it here until a + * better solution is found, but don't leak it to the backend. */ #ifdef __KERNEL__ #define XENFB_WIDTH 800 -- cgit v0.10.2 From 83abc70a4c6e306f4c1672e25884322f797e4fcb Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 26 May 2008 23:31:12 +0100 Subject: xen: make earlyprintk=xen work again For some perverse reason, if you call add_preferred_console() it prevents setup_early_printk() from successfully enabling the boot console - unless you make it a preferred console too... Also, make xenboot console output distinct from normal console output, since it gets repeated when the console handover happens, and the duplicated output is confusing without disambiguation. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner Cc: Markus Armbruster Cc: Gerd Hoffmann diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 6cfb708..5c0635a 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1257,6 +1257,7 @@ asmlinkage void __init xen_start_kernel(void) boot_params.hdr.ramdisk_size = xen_start_info->mod_len; if (!is_initial_xendomain()) { + add_preferred_console("xenboot", 0, NULL); add_preferred_console("tty", 0, NULL); add_preferred_console("hvc", 0, NULL); } diff --git a/drivers/char/hvc_xen.c b/drivers/char/hvc_xen.c index 2413af3..d5fe0a8 100644 --- a/drivers/char/hvc_xen.c +++ b/drivers/char/hvc_xen.c @@ -155,6 +155,7 @@ static void xenboot_write_console(struct console *console, const char *string, raw_console_write(string, len); + write_console(0, "(early) ", 8); while (off < len && NULL != (pos = strchr(string+off, '\n'))) { linelen = pos-string+off; if (off + linelen > len) -- cgit v0.10.2 From ec9b2065d4d3b797604c09a569083dd9ff951b1b Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Mon, 26 May 2008 23:31:13 +0100 Subject: xen: Move manage.c to drivers/xen for ia64/xen support move arch/x86/xen/manage.c under drivers/xen/to share codes with x86 and ia64. ia64/xen also uses manage.c Signed-off-by: Isaku Yamahata Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index 3d8df98..40b119b 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile @@ -1,4 +1,4 @@ obj-y := enlighten.o setup.o multicalls.o mmu.o \ - time.o manage.o xen-asm.o grant-table.o + time.o xen-asm.o grant-table.o obj-$(CONFIG_SMP) += smp.o diff --git a/arch/x86/xen/manage.c b/arch/x86/xen/manage.c deleted file mode 100644 index aa7af9e..0000000 --- a/arch/x86/xen/manage.c +++ /dev/null @@ -1,143 +0,0 @@ -/* - * Handle extern requests for shutdown, reboot and sysrq - */ -#include -#include -#include -#include - -#include - -#define SHUTDOWN_INVALID -1 -#define SHUTDOWN_POWEROFF 0 -#define SHUTDOWN_SUSPEND 2 -/* Code 3 is SHUTDOWN_CRASH, which we don't use because the domain can only - * report a crash, not be instructed to crash! - * HALT is the same as POWEROFF, as far as we're concerned. The tools use - * the distinction when we return the reason code to them. - */ -#define SHUTDOWN_HALT 4 - -/* Ignore multiple shutdown requests. */ -static int shutting_down = SHUTDOWN_INVALID; - -static void shutdown_handler(struct xenbus_watch *watch, - const char **vec, unsigned int len) -{ - char *str; - struct xenbus_transaction xbt; - int err; - - if (shutting_down != SHUTDOWN_INVALID) - return; - - again: - err = xenbus_transaction_start(&xbt); - if (err) - return; - - str = (char *)xenbus_read(xbt, "control", "shutdown", NULL); - /* Ignore read errors and empty reads. */ - if (XENBUS_IS_ERR_READ(str)) { - xenbus_transaction_end(xbt, 1); - return; - } - - xenbus_write(xbt, "control", "shutdown", ""); - - err = xenbus_transaction_end(xbt, 0); - if (err == -EAGAIN) { - kfree(str); - goto again; - } - - if (strcmp(str, "poweroff") == 0 || - strcmp(str, "halt") == 0) - orderly_poweroff(false); - else if (strcmp(str, "reboot") == 0) - ctrl_alt_del(); - else { - printk(KERN_INFO "Ignoring shutdown request: %s\n", str); - shutting_down = SHUTDOWN_INVALID; - } - - kfree(str); -} - -static void sysrq_handler(struct xenbus_watch *watch, const char **vec, - unsigned int len) -{ - char sysrq_key = '\0'; - struct xenbus_transaction xbt; - int err; - - again: - err = xenbus_transaction_start(&xbt); - if (err) - return; - if (!xenbus_scanf(xbt, "control", "sysrq", "%c", &sysrq_key)) { - printk(KERN_ERR "Unable to read sysrq code in " - "control/sysrq\n"); - xenbus_transaction_end(xbt, 1); - return; - } - - if (sysrq_key != '\0') - xenbus_printf(xbt, "control", "sysrq", "%c", '\0'); - - err = xenbus_transaction_end(xbt, 0); - if (err == -EAGAIN) - goto again; - - if (sysrq_key != '\0') - handle_sysrq(sysrq_key, NULL); -} - -static struct xenbus_watch shutdown_watch = { - .node = "control/shutdown", - .callback = shutdown_handler -}; - -static struct xenbus_watch sysrq_watch = { - .node = "control/sysrq", - .callback = sysrq_handler -}; - -static int setup_shutdown_watcher(void) -{ - int err; - - err = register_xenbus_watch(&shutdown_watch); - if (err) { - printk(KERN_ERR "Failed to set shutdown watcher\n"); - return err; - } - - err = register_xenbus_watch(&sysrq_watch); - if (err) { - printk(KERN_ERR "Failed to set sysrq watcher\n"); - return err; - } - - return 0; -} - -static int shutdown_event(struct notifier_block *notifier, - unsigned long event, - void *data) -{ - setup_shutdown_watcher(); - return NOTIFY_DONE; -} - -static int __init setup_shutdown_event(void) -{ - static struct notifier_block xenstore_notifier = { - .notifier_call = shutdown_event - }; - register_xenstore_notifier(&xenstore_notifier); - - return 0; -} - -subsys_initcall(setup_shutdown_event); diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index 37af04f..363286c 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -1,4 +1,4 @@ -obj-y += grant-table.o features.o events.o +obj-y += grant-table.o features.o events.o manage.o obj-y += xenbus/ obj-$(CONFIG_XEN_XENCOMM) += xencomm.o obj-$(CONFIG_XEN_BALLOON) += balloon.o diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c new file mode 100644 index 0000000..aa7af9e --- /dev/null +++ b/drivers/xen/manage.c @@ -0,0 +1,143 @@ +/* + * Handle extern requests for shutdown, reboot and sysrq + */ +#include +#include +#include +#include + +#include + +#define SHUTDOWN_INVALID -1 +#define SHUTDOWN_POWEROFF 0 +#define SHUTDOWN_SUSPEND 2 +/* Code 3 is SHUTDOWN_CRASH, which we don't use because the domain can only + * report a crash, not be instructed to crash! + * HALT is the same as POWEROFF, as far as we're concerned. The tools use + * the distinction when we return the reason code to them. + */ +#define SHUTDOWN_HALT 4 + +/* Ignore multiple shutdown requests. */ +static int shutting_down = SHUTDOWN_INVALID; + +static void shutdown_handler(struct xenbus_watch *watch, + const char **vec, unsigned int len) +{ + char *str; + struct xenbus_transaction xbt; + int err; + + if (shutting_down != SHUTDOWN_INVALID) + return; + + again: + err = xenbus_transaction_start(&xbt); + if (err) + return; + + str = (char *)xenbus_read(xbt, "control", "shutdown", NULL); + /* Ignore read errors and empty reads. */ + if (XENBUS_IS_ERR_READ(str)) { + xenbus_transaction_end(xbt, 1); + return; + } + + xenbus_write(xbt, "control", "shutdown", ""); + + err = xenbus_transaction_end(xbt, 0); + if (err == -EAGAIN) { + kfree(str); + goto again; + } + + if (strcmp(str, "poweroff") == 0 || + strcmp(str, "halt") == 0) + orderly_poweroff(false); + else if (strcmp(str, "reboot") == 0) + ctrl_alt_del(); + else { + printk(KERN_INFO "Ignoring shutdown request: %s\n", str); + shutting_down = SHUTDOWN_INVALID; + } + + kfree(str); +} + +static void sysrq_handler(struct xenbus_watch *watch, const char **vec, + unsigned int len) +{ + char sysrq_key = '\0'; + struct xenbus_transaction xbt; + int err; + + again: + err = xenbus_transaction_start(&xbt); + if (err) + return; + if (!xenbus_scanf(xbt, "control", "sysrq", "%c", &sysrq_key)) { + printk(KERN_ERR "Unable to read sysrq code in " + "control/sysrq\n"); + xenbus_transaction_end(xbt, 1); + return; + } + + if (sysrq_key != '\0') + xenbus_printf(xbt, "control", "sysrq", "%c", '\0'); + + err = xenbus_transaction_end(xbt, 0); + if (err == -EAGAIN) + goto again; + + if (sysrq_key != '\0') + handle_sysrq(sysrq_key, NULL); +} + +static struct xenbus_watch shutdown_watch = { + .node = "control/shutdown", + .callback = shutdown_handler +}; + +static struct xenbus_watch sysrq_watch = { + .node = "control/sysrq", + .callback = sysrq_handler +}; + +static int setup_shutdown_watcher(void) +{ + int err; + + err = register_xenbus_watch(&shutdown_watch); + if (err) { + printk(KERN_ERR "Failed to set shutdown watcher\n"); + return err; + } + + err = register_xenbus_watch(&sysrq_watch); + if (err) { + printk(KERN_ERR "Failed to set sysrq watcher\n"); + return err; + } + + return 0; +} + +static int shutdown_event(struct notifier_block *notifier, + unsigned long event, + void *data) +{ + setup_shutdown_watcher(); + return NOTIFY_DONE; +} + +static int __init setup_shutdown_event(void) +{ + static struct notifier_block xenstore_notifier = { + .notifier_call = shutdown_event + }; + register_xenstore_notifier(&xenstore_notifier); + + return 0; +} + +subsys_initcall(setup_shutdown_event); -- cgit v0.10.2 From a90971ebddc81330f59203dee9803512aa4e2ef6 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Mon, 26 May 2008 23:31:14 +0100 Subject: xen: compilation fix to balloon driver for ia64 support fix compilation error of ballon driver on ia64. extent_start member is pointer argument. On x86 pointer argument for xen hypercall is passed as virtual address. On the other hand, ia64 and ppc, pointer argument is passed in pseudo physical address. (guest physicall address.) So they must be passed as handle and convert right before issuing hypercall. CC drivers/xen/balloon.o linux-2.6-x86/drivers/xen/balloon.c: In function 'increase_reservation': linux-2.6-x86/drivers/xen/balloon.c:228: error: incompatible types in assignment linux-2.6-x86/drivers/xen/balloon.c: In function 'decrease_reservation': linux-2.6-x86/drivers/xen/balloon.c:324: error: incompatible types in assignment linux-2.6-x86/drivers/xen/balloon.c: In function 'dealloc_pte_fn': linux-2.6-x86/drivers/xen/balloon.c:486: error: incompatible types in assignment linux-2.6-x86/drivers/xen/balloon.c: In function 'alloc_empty_pages_and_pagevec': linux-2.6-x86/drivers/xen/balloon.c:522: error: incompatible types in assignment make[2]: *** [drivers/xen/balloon.o] Error 1 Signed-off-by: Isaku Yamahata Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index ab25ba6..097ba02 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -225,7 +225,7 @@ static int increase_reservation(unsigned long nr_pages) page = balloon_next_page(page); } - reservation.extent_start = (unsigned long)frame_list; + set_xen_guest_handle(reservation.extent_start, frame_list); reservation.nr_extents = nr_pages; rc = HYPERVISOR_memory_op( XENMEM_populate_physmap, &reservation); @@ -321,7 +321,7 @@ static int decrease_reservation(unsigned long nr_pages) balloon_append(pfn_to_page(pfn)); } - reservation.extent_start = (unsigned long)frame_list; + set_xen_guest_handle(reservation.extent_start, frame_list); reservation.nr_extents = nr_pages; ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); BUG_ON(ret != nr_pages); @@ -483,7 +483,7 @@ static int dealloc_pte_fn( .extent_order = 0, .domid = DOMID_SELF }; - reservation.extent_start = (unsigned long)&mfn; + set_xen_guest_handle(reservation.extent_start, &mfn); set_pte_at(&init_mm, addr, pte, __pte_ma(0ull)); set_phys_to_machine(__pa(addr) >> PAGE_SHIFT, INVALID_P2M_ENTRY); ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); @@ -519,7 +519,7 @@ static struct page **alloc_empty_pages_and_pagevec(int nr_pages) .extent_order = 0, .domid = DOMID_SELF }; - reservation.extent_start = (unsigned long)&gmfn; + set_xen_guest_handle(reservation.extent_start, &gmfn); ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); if (ret == 1) -- cgit v0.10.2 From bfdab126cfa6fe3c2ddb8b6007a38202b510b6c1 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Mon, 26 May 2008 23:31:15 +0100 Subject: xen: add missing definitions in include/xen/interface/memory.h which ia64/xen needs Add xen handles realted definitions for xen memory which ia64/xen needs. Pointer argumsnts for ia64/xen hypercall are passed in pseudo physical address (guest physical address) so that it is required to convert guest kernel virtual address into pseudo physical address. The xen guest handle represents such arguments. Signed-off-by: Isaku Yamahata Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner diff --git a/include/xen/interface/memory.h b/include/xen/interface/memory.h index da76846..af36ead 100644 --- a/include/xen/interface/memory.h +++ b/include/xen/interface/memory.h @@ -29,7 +29,7 @@ struct xen_memory_reservation { * OUT: GMFN bases of extents that were allocated * (NB. This command also updates the mach_to_phys translation table) */ - ulong extent_start; + GUEST_HANDLE(ulong) extent_start; /* Number of extents, and size/alignment of each (2^extent_order pages). */ unsigned long nr_extents; @@ -50,6 +50,7 @@ struct xen_memory_reservation { domid_t domid; }; +DEFINE_GUEST_HANDLE_STRUCT(xen_memory_reservation); /* * Returns the maximum machine frame number of mapped RAM in this system. @@ -85,7 +86,7 @@ struct xen_machphys_mfn_list { * any large discontiguities in the machine address space, 2MB gaps in * the machphys table will be represented by an MFN base of zero. */ - ulong extent_start; + GUEST_HANDLE(ulong) extent_start; /* * Number of extents written to the above array. This will be smaller @@ -93,6 +94,7 @@ struct xen_machphys_mfn_list { */ unsigned int nr_extents; }; +DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mfn_list); /* * Sets the GPFN at which a particular page appears in the specified guest's @@ -115,6 +117,7 @@ struct xen_add_to_physmap { /* GPFN where the source mapping page should appear. */ unsigned long gpfn; }; +DEFINE_GUEST_HANDLE_STRUCT(xen_add_to_physmap); /* * Translates a list of domain-specific GPFNs into MFNs. Returns a -ve error @@ -129,13 +132,14 @@ struct xen_translate_gpfn_list { unsigned long nr_gpfns; /* List of GPFNs to translate. */ - ulong gpfn_list; + GUEST_HANDLE(ulong) gpfn_list; /* * Output list to contain MFN translations. May be the same as the input * list (in which case each input GPFN is overwritten with the output MFN). */ - ulong mfn_list; + GUEST_HANDLE(ulong) mfn_list; }; +DEFINE_GUEST_HANDLE_STRUCT(xen_translate_gpfn_list); #endif /* __XEN_PUBLIC_MEMORY_H__ */ -- cgit v0.10.2 From 38bb5ab4179572f4d24d3ca7188172a31ca51a69 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 26 May 2008 23:31:16 +0100 Subject: xen: count resched interrupts properly Make sure resched interrupts appear in /proc/interrupts in the proper place. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 94e6900..74ab896 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -65,6 +65,12 @@ static struct call_data_struct *call_data; */ static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id) { +#ifdef CONFIG_X86_32 + __get_cpu_var(irq_stat).irq_resched_count++; +#else + add_pda(irq_resched_count, 1); +#endif + return IRQ_HANDLED; } -- cgit v0.10.2 From 955d6f1778da5a9795f2dfb07f760006f194609a Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Mon, 26 May 2008 23:31:17 +0100 Subject: xen: drivers/xen/balloon.c: make a function static Make the needlessly global balloon_set_new_target() static. Signed-off-by: Adrian Bunk Acked-by: Chris Wright Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 097ba02..591bc29 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -368,7 +368,7 @@ static void balloon_process(struct work_struct *work) } /* Resets the Xen limit, sets new target, and kicks off processing. */ -void balloon_set_new_target(unsigned long target) +static void balloon_set_new_target(unsigned long target) { /* No need for lock. Not read-modify-write updates. */ balloon_stats.hard_limit = ~0UL; -- cgit v0.10.2 From d451bb7aa852627bdf7be7937dc3d9d9f261b235 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 26 May 2008 23:31:18 +0100 Subject: xen: make phys_to_machine structure dynamic We now support the use of memory hotplug, so the physical to machine page mapping structure must be dynamic. This is implemented as a two-level radix tree structure, which allows us to efficiently incrementally allocate memory for the p2m table as new pages are added. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 5c0635a..73d3c84 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1221,7 +1221,7 @@ asmlinkage void __init xen_start_kernel(void) /* Get mfn list */ if (!xen_feature(XENFEAT_auto_translated_physmap)) - phys_to_machine_mapping = (unsigned long *)xen_start_info->mfn_list; + xen_build_dynamic_phys_to_machine(); pgd = (pgd_t *)xen_start_info->pt_base; diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 07c2653..c3b27de 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -56,6 +56,91 @@ #include "multicalls.h" #include "mmu.h" +/* + * This should probably be a config option. On 32-bit, it costs 1 + * page/gig of memory; on 64-bit its 2 pages/gig. If we want it to be + * completely unbounded we can add another level to the p2m structure. + */ +#define MAX_GUEST_PAGES (16ull * 1024*1024*1024 / PAGE_SIZE) +#define P2M_ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(unsigned long)) + +static unsigned long *p2m_top[MAX_GUEST_PAGES / P2M_ENTRIES_PER_PAGE]; + +static inline unsigned p2m_top_index(unsigned long pfn) +{ + BUG_ON(pfn >= MAX_GUEST_PAGES); + return pfn / P2M_ENTRIES_PER_PAGE; +} + +static inline unsigned p2m_index(unsigned long pfn) +{ + return pfn % P2M_ENTRIES_PER_PAGE; +} + +void __init xen_build_dynamic_phys_to_machine(void) +{ + unsigned pfn; + unsigned long *mfn_list = (unsigned long *)xen_start_info->mfn_list; + + BUG_ON(xen_start_info->nr_pages >= MAX_GUEST_PAGES); + + for(pfn = 0; + pfn < xen_start_info->nr_pages; + pfn += P2M_ENTRIES_PER_PAGE) { + unsigned topidx = p2m_top_index(pfn); + + p2m_top[topidx] = &mfn_list[pfn]; + } +} + +unsigned long get_phys_to_machine(unsigned long pfn) +{ + unsigned topidx, idx; + + topidx = p2m_top_index(pfn); + if (p2m_top[topidx] == NULL) + return INVALID_P2M_ENTRY; + + idx = p2m_index(pfn); + return p2m_top[topidx][idx]; +} + +static void alloc_p2m(unsigned long **pp) +{ + unsigned long *p; + unsigned i; + + p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL); + BUG_ON(p == NULL); + + for(i = 0; i < P2M_ENTRIES_PER_PAGE; i++) + p[i] = INVALID_P2M_ENTRY; + + if (cmpxchg(pp, NULL, p) != NULL) + free_page((unsigned long)p); +} + +void set_phys_to_machine(unsigned long pfn, unsigned long mfn) +{ + unsigned topidx, idx; + + if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) { + BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY); + return; + } + + topidx = p2m_top_index(pfn); + if (p2m_top[topidx] == NULL) { + /* no need to allocate a page to store an invalid entry */ + if (mfn == INVALID_P2M_ENTRY) + return; + alloc_p2m(&p2m_top[topidx]); + } + + idx = p2m_index(pfn); + p2m_top[topidx][idx] = mfn; +} + xmaddr_t arbitrary_virt_to_machine(unsigned long address) { unsigned int level; diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 82517e4..37f8f0b 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -27,8 +27,6 @@ extern const char xen_hypervisor_callback[]; extern const char xen_failsafe_callback[]; -unsigned long *phys_to_machine_mapping; -EXPORT_SYMBOL(phys_to_machine_mapping); /** * machine_specific_memory_setup - Hook for machine specific memory setup. diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index f1063ae..7bdc8c5 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -22,6 +22,8 @@ void __init xen_arch_setup(void); void __init xen_init_IRQ(void); void xen_enable_sysenter(void); +void __init xen_build_dynamic_phys_to_machine(void); + void xen_setup_timer(int cpu); void xen_setup_cpu_clockevents(void); unsigned long xen_cpu_khz(void); diff --git a/include/asm-x86/xen/page.h b/include/asm-x86/xen/page.h index e11f240..293344f 100644 --- a/include/asm-x86/xen/page.h +++ b/include/asm-x86/xen/page.h @@ -26,15 +26,15 @@ typedef struct xpaddr { #define FOREIGN_FRAME_BIT (1UL<<31) #define FOREIGN_FRAME(m) ((m) | FOREIGN_FRAME_BIT) -extern unsigned long *phys_to_machine_mapping; +extern unsigned long get_phys_to_machine(unsigned long pfn); +extern void set_phys_to_machine(unsigned long pfn, unsigned long mfn); static inline unsigned long pfn_to_mfn(unsigned long pfn) { if (xen_feature(XENFEAT_auto_translated_physmap)) return pfn; - return phys_to_machine_mapping[(unsigned int)(pfn)] & - ~FOREIGN_FRAME_BIT; + return get_phys_to_machine(pfn) & ~FOREIGN_FRAME_BIT; } static inline int phys_to_machine_mapping_valid(unsigned long pfn) @@ -42,7 +42,7 @@ static inline int phys_to_machine_mapping_valid(unsigned long pfn) if (xen_feature(XENFEAT_auto_translated_physmap)) return 1; - return (phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY); + return get_phys_to_machine(pfn) != INVALID_P2M_ENTRY; } static inline unsigned long mfn_to_pfn(unsigned long mfn) @@ -106,20 +106,12 @@ static inline unsigned long mfn_to_local_pfn(unsigned long mfn) unsigned long pfn = mfn_to_pfn(mfn); if ((pfn < max_mapnr) && !xen_feature(XENFEAT_auto_translated_physmap) - && (phys_to_machine_mapping[pfn] != mfn)) + && (get_phys_to_machine(pfn) != mfn)) return max_mapnr; /* force !pfn_valid() */ + /* XXX fixme; not true with sparsemem */ return pfn; } -static inline void set_phys_to_machine(unsigned long pfn, unsigned long mfn) -{ - if (xen_feature(XENFEAT_auto_translated_physmap)) { - BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY); - return; - } - phys_to_machine_mapping[pfn] = mfn; -} - /* VIRT <-> MACHINE conversion */ #define virt_to_machine(v) (phys_to_machine(XPADDR(__pa(v)))) #define virt_to_mfn(v) (pfn_to_mfn(PFN_DOWN(__pa(v)))) -- cgit v0.10.2 From 8006ec3e911f93d702e1d4a4e387e244ab434924 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 26 May 2008 23:31:19 +0100 Subject: xen: add configurable max domain size Add a config option to set the max size of a Xen domain. This is used to scale the size of the physical-to-machine array; it ends up using around 1 page/GByte, so there's no reason to be very restrictive. For a 32-bit guest, the default value of 8GB is probably sufficient; there's not much point in giving a 32-bit machine much more memory than that. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index 525b108..d0f1c7c 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig @@ -11,3 +11,13 @@ config XEN This is the Linux Xen port. Enabling this will allow the kernel to boot in a paravirtualized environment under the Xen hypervisor. + +config XEN_MAX_DOMAIN_MEMORY + int "Maximum allowed size of a domain in gigabytes" + default 8 + depends on XEN + help + The pseudo-physical to machine address array is sized + according to the maximum possible memory size of a Xen + domain. This array uses 1 page per gigabyte, so there's no + need to be too stingy here. \ No newline at end of file diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index c3b27de..644232a 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -56,19 +56,13 @@ #include "multicalls.h" #include "mmu.h" -/* - * This should probably be a config option. On 32-bit, it costs 1 - * page/gig of memory; on 64-bit its 2 pages/gig. If we want it to be - * completely unbounded we can add another level to the p2m structure. - */ -#define MAX_GUEST_PAGES (16ull * 1024*1024*1024 / PAGE_SIZE) #define P2M_ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(unsigned long)) -static unsigned long *p2m_top[MAX_GUEST_PAGES / P2M_ENTRIES_PER_PAGE]; +static unsigned long *p2m_top[MAX_DOMAIN_PAGES / P2M_ENTRIES_PER_PAGE]; static inline unsigned p2m_top_index(unsigned long pfn) { - BUG_ON(pfn >= MAX_GUEST_PAGES); + BUG_ON(pfn >= MAX_DOMAIN_PAGES); return pfn / P2M_ENTRIES_PER_PAGE; } @@ -81,12 +75,9 @@ void __init xen_build_dynamic_phys_to_machine(void) { unsigned pfn; unsigned long *mfn_list = (unsigned long *)xen_start_info->mfn_list; + unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages); - BUG_ON(xen_start_info->nr_pages >= MAX_GUEST_PAGES); - - for(pfn = 0; - pfn < xen_start_info->nr_pages; - pfn += P2M_ENTRIES_PER_PAGE) { + for(pfn = 0; pfn < max_pfn; pfn += P2M_ENTRIES_PER_PAGE) { unsigned topidx = p2m_top_index(pfn); p2m_top[topidx] = &mfn_list[pfn]; @@ -97,6 +88,9 @@ unsigned long get_phys_to_machine(unsigned long pfn) { unsigned topidx, idx; + if (unlikely(pfn >= MAX_DOMAIN_PAGES)) + return INVALID_P2M_ENTRY; + topidx = p2m_top_index(pfn); if (p2m_top[topidx] == NULL) return INVALID_P2M_ENTRY; @@ -129,6 +123,11 @@ void set_phys_to_machine(unsigned long pfn, unsigned long mfn) return; } + if (unlikely(pfn >= MAX_DOMAIN_PAGES)) { + BUG_ON(mfn != INVALID_P2M_ENTRY); + return; + } + topidx = p2m_top_index(pfn); if (p2m_top[topidx] == NULL) { /* no need to allocate a page to store an invalid entry */ diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 37f8f0b..4884478 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -16,6 +16,7 @@ #include #include +#include #include #include #include @@ -36,6 +37,8 @@ char * __init xen_memory_setup(void) { unsigned long max_pfn = xen_start_info->nr_pages; + max_pfn = min(MAX_DOMAIN_PAGES, max_pfn); + e820.nr_map = 0; add_memory_region(0, LOWMEMSIZE(), E820_RAM); add_memory_region(HIGH_MEMORY, PFN_PHYS(max_pfn)-HIGH_MEMORY, E820_RAM); diff --git a/include/asm-x86/xen/page.h b/include/asm-x86/xen/page.h index 293344f..377c045 100644 --- a/include/asm-x86/xen/page.h +++ b/include/asm-x86/xen/page.h @@ -26,6 +26,11 @@ typedef struct xpaddr { #define FOREIGN_FRAME_BIT (1UL<<31) #define FOREIGN_FRAME(m) ((m) | FOREIGN_FRAME_BIT) +/* Maximum amount of memory we can handle in a domain in pages */ +#define MAX_DOMAIN_PAGES \ + ((unsigned long)((u64)CONFIG_XEN_MAX_DOMAIN_MEMORY * 1024 * 1024 * 1024 / PAGE_SIZE)) + + extern unsigned long get_phys_to_machine(unsigned long pfn); extern void set_phys_to_machine(unsigned long pfn, unsigned long mfn); -- cgit v0.10.2 From cf0923ea295ba08ae656ef04164a43cb6553ba99 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 26 May 2008 23:31:20 +0100 Subject: xen: efficiently support a holey p2m table When using sparsemem and memory hotplug, the kernel's pseudo-physical address space can be discontigious. Previously this was dealt with by having the upper parts of the radix tree stubbed off. Unfortunately, this is incompatible with save/restore, which requires a complete p2m table. The solution is to have a special distinguished all-invalid p2m leaf page, which we can point all the hole areas at. This allows the tools to see a complete p2m table, but it only costs a page for all memory holes. It also simplifies the code since it removes a few special cases. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 644232a..da7b45b 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -57,8 +57,17 @@ #include "mmu.h" #define P2M_ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(unsigned long)) +#define TOP_ENTRIES (MAX_DOMAIN_PAGES / P2M_ENTRIES_PER_PAGE) -static unsigned long *p2m_top[MAX_DOMAIN_PAGES / P2M_ENTRIES_PER_PAGE]; +/* Placeholder for holes in the address space */ +static unsigned long p2m_missing[P2M_ENTRIES_PER_PAGE] + __attribute__((section(".data.page_aligned"))) = + { [ 0 ... P2M_ENTRIES_PER_PAGE-1 ] = ~0UL }; + + /* Array of pointers to pages containing p2m entries */ +static unsigned long *p2m_top[TOP_ENTRIES] + __attribute__((section(".data.page_aligned"))) = + { [ 0 ... TOP_ENTRIES - 1] = &p2m_missing[0] }; static inline unsigned p2m_top_index(unsigned long pfn) { @@ -92,9 +101,6 @@ unsigned long get_phys_to_machine(unsigned long pfn) return INVALID_P2M_ENTRY; topidx = p2m_top_index(pfn); - if (p2m_top[topidx] == NULL) - return INVALID_P2M_ENTRY; - idx = p2m_index(pfn); return p2m_top[topidx][idx]; } @@ -110,7 +116,7 @@ static void alloc_p2m(unsigned long **pp) for(i = 0; i < P2M_ENTRIES_PER_PAGE; i++) p[i] = INVALID_P2M_ENTRY; - if (cmpxchg(pp, NULL, p) != NULL) + if (cmpxchg(pp, p2m_missing, p) != p2m_missing) free_page((unsigned long)p); } @@ -129,7 +135,7 @@ void set_phys_to_machine(unsigned long pfn, unsigned long mfn) } topidx = p2m_top_index(pfn); - if (p2m_top[topidx] == NULL) { + if (p2m_top[topidx] == p2m_missing) { /* no need to allocate a page to store an invalid entry */ if (mfn == INVALID_P2M_ENTRY) return; -- cgit v0.10.2 From a0d695c821544947342a2d372ec4108bc813b979 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 26 May 2008 23:31:21 +0100 Subject: xen: make dummy_shared_info non-static Rename dummy_shared_info to xen_dummy_shared_info and make it non-static, in anticipation of users outside of enlighten.c Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 73d3c84..d9faaa2 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -75,13 +75,13 @@ DEFINE_PER_CPU(unsigned long, xen_current_cr3); /* actual vcpu cr3 */ struct start_info *xen_start_info; EXPORT_SYMBOL_GPL(xen_start_info); -static /* __initdata */ struct shared_info dummy_shared_info; +struct shared_info xen_dummy_shared_info; /* * Point at some empty memory to start with. We map the real shared_info * page as soon as fixmap is up and running. */ -struct shared_info *HYPERVISOR_shared_info = (void *)&dummy_shared_info; +struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info; /* * Flag to determine whether vcpu info placement is available on all @@ -104,7 +104,7 @@ static void __init xen_vcpu_setup(int cpu) int err; struct vcpu_info *vcpup; - BUG_ON(HYPERVISOR_shared_info == &dummy_shared_info); + BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; if (!have_vcpu_info_placement) diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 7bdc8c5..826d6e4 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -15,6 +15,7 @@ DECLARE_PER_CPU(unsigned long, xen_cr3); DECLARE_PER_CPU(unsigned long, xen_current_cr3); extern struct start_info *xen_start_info; +extern struct shared_info xen_dummy_shared_info; extern struct shared_info *HYPERVISOR_shared_info; char * __init xen_memory_setup(void); -- cgit v0.10.2 From d5edbc1f75420935b1ec7e65df10c8f81cea82de Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 26 May 2008 23:31:22 +0100 Subject: xen: add p2m mfn_list_list When saving a domain, the Xen tools need to remap all our mfns to portable pfns. In order to remap our p2m table, it needs to know where all its pages are, so maintain the references to the p2m table for it to use. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index d9faaa2..ce67dc8 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -880,6 +880,8 @@ static __init void setup_shared_info(void) /* In UP this is as good a place as any to set up shared info */ xen_setup_vcpu_info_placement(); #endif + + xen_setup_mfn_list_list(); } static __init void xen_pagetable_setup_done(pgd_t *base) diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index da7b45b..4740cda 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -69,6 +69,13 @@ static unsigned long *p2m_top[TOP_ENTRIES] __attribute__((section(".data.page_aligned"))) = { [ 0 ... TOP_ENTRIES - 1] = &p2m_missing[0] }; +/* Arrays of p2m arrays expressed in mfns used for save/restore */ +static unsigned long p2m_top_mfn[TOP_ENTRIES] + __attribute__((section(".bss.page_aligned"))); + +static unsigned long p2m_top_mfn_list[TOP_ENTRIES / P2M_ENTRIES_PER_PAGE] + __attribute__((section(".bss.page_aligned"))); + static inline unsigned p2m_top_index(unsigned long pfn) { BUG_ON(pfn >= MAX_DOMAIN_PAGES); @@ -80,11 +87,35 @@ static inline unsigned p2m_index(unsigned long pfn) return pfn % P2M_ENTRIES_PER_PAGE; } +/* Build the parallel p2m_top_mfn structures */ +void xen_setup_mfn_list_list(void) +{ + unsigned pfn, idx; + + for(pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn += P2M_ENTRIES_PER_PAGE) { + unsigned topidx = p2m_top_index(pfn); + + p2m_top_mfn[topidx] = virt_to_mfn(p2m_top[topidx]); + } + + for(idx = 0; idx < ARRAY_SIZE(p2m_top_mfn_list); idx++) { + unsigned topidx = idx * P2M_ENTRIES_PER_PAGE; + p2m_top_mfn_list[idx] = virt_to_mfn(&p2m_top_mfn[topidx]); + } + + BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); + + HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = + virt_to_mfn(p2m_top_mfn_list); + HYPERVISOR_shared_info->arch.max_pfn = xen_start_info->nr_pages; +} + +/* Set up p2m_top to point to the domain-builder provided p2m pages */ void __init xen_build_dynamic_phys_to_machine(void) { - unsigned pfn; unsigned long *mfn_list = (unsigned long *)xen_start_info->mfn_list; unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages); + unsigned pfn; for(pfn = 0; pfn < max_pfn; pfn += P2M_ENTRIES_PER_PAGE) { unsigned topidx = p2m_top_index(pfn); @@ -105,7 +136,7 @@ unsigned long get_phys_to_machine(unsigned long pfn) return p2m_top[topidx][idx]; } -static void alloc_p2m(unsigned long **pp) +static void alloc_p2m(unsigned long **pp, unsigned long *mfnp) { unsigned long *p; unsigned i; @@ -118,6 +149,8 @@ static void alloc_p2m(unsigned long **pp) if (cmpxchg(pp, p2m_missing, p) != p2m_missing) free_page((unsigned long)p); + else + *mfnp = virt_to_mfn(p); } void set_phys_to_machine(unsigned long pfn, unsigned long mfn) @@ -139,7 +172,7 @@ void set_phys_to_machine(unsigned long pfn, unsigned long mfn) /* no need to allocate a page to store an invalid entry */ if (mfn == INVALID_P2M_ENTRY) return; - alloc_p2m(&p2m_top[topidx]); + alloc_p2m(&p2m_top[topidx], &p2m_top_mfn[topidx]); } idx = p2m_index(pfn); diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 826d6e4..a1bc89a 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -18,6 +18,8 @@ extern struct start_info *xen_start_info; extern struct shared_info xen_dummy_shared_info; extern struct shared_info *HYPERVISOR_shared_info; +void xen_setup_mfn_list_list(void); + char * __init xen_memory_setup(void); void __init xen_arch_setup(void); void __init xen_init_IRQ(void); -- cgit v0.10.2 From eb1e305f4ef201e549ffd475b7dcbcd4ec36d7dc Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 26 May 2008 23:31:23 +0100 Subject: xen: add rebind_evtchn_irq Add rebind_evtchn_irq(), which will rebind an device driver's existing irq to a new event channel on restore. Since the new event channel will be masked and bound to vcpu0, we update the state accordingly and unmask the irq once everything is set up. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 4f0f22b..f64e979 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -557,6 +557,33 @@ out: put_cpu(); } +/* Rebind a new event channel to an existing irq. */ +void rebind_evtchn_irq(int evtchn, int irq) +{ + /* Make sure the irq is masked, since the new event channel + will also be masked. */ + disable_irq(irq); + + spin_lock(&irq_mapping_update_lock); + + /* After resume the irq<->evtchn mappings are all cleared out */ + BUG_ON(evtchn_to_irq[evtchn] != -1); + /* Expect irq to have been bound before, + so the bindcount should be non-0 */ + BUG_ON(irq_bindcount[irq] == 0); + + evtchn_to_irq[evtchn] = irq; + irq_info[irq] = mk_irq_info(IRQT_EVTCHN, 0, evtchn); + + spin_unlock(&irq_mapping_update_lock); + + /* new event channels are always bound to cpu 0 */ + irq_set_affinity(irq, cpumask_of_cpu(0)); + + /* Unmask the event channel. */ + enable_irq(irq); +} + /* Rebind an evtchn so that it gets delivered to a specific cpu */ static void rebind_irq_to_cpu(unsigned irq, unsigned tcpu) { diff --git a/include/xen/events.h b/include/xen/events.h index acd8e06..a82ec0c 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -32,6 +32,7 @@ void unbind_from_irqhandler(unsigned int irq, void *dev_id); void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector); int resend_irq_on_evtchn(unsigned int irq); +void rebind_evtchn_irq(int evtchn, int irq); static inline void notify_remote_via_evtchn(int port) { -- cgit v0.10.2 From 0f2287ad7c61f10b2a22a06e2a66cdbbbfc44ad0 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 26 May 2008 23:31:24 +0100 Subject: xen: fix unbind_from_irq() Rearrange the tests in unbind_from_irq() so that we can still unbind an irq even if the underlying event channel is bad. This allows a device driver to shuffle its irqs on save/restore before the underlying event channels have been fixed up. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner diff --git a/drivers/xen/events.c b/drivers/xen/events.c index f64e979..70375a6 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -355,7 +355,7 @@ static void unbind_from_irq(unsigned int irq) spin_lock(&irq_mapping_update_lock); - if (VALID_EVTCHN(evtchn) && (--irq_bindcount[irq] == 0)) { + if ((--irq_bindcount[irq] == 0) && VALID_EVTCHN(evtchn)) { close.port = evtchn; if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0) BUG(); @@ -375,7 +375,7 @@ static void unbind_from_irq(unsigned int irq) evtchn_to_irq[evtchn] = -1; irq_info[irq] = IRQ_UNBOUND; - dynamic_irq_init(irq); + dynamic_irq_cleanup(irq); } spin_unlock(&irq_mapping_update_lock); -- cgit v0.10.2 From 6b9b732d0e396a3f1a95977162a8624aafce38a1 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 26 May 2008 23:31:25 +0100 Subject: xen-console: add save/restore Add code to: 1. Deal with the console page being canonicalized. During save, the console's mfn in the start_info structure is canonicalized to a pfn. In order to deal with that, we always use a copy of the pfn and indirect off that all the time. However, we fall back to using the mfn if the pfn hasn't been initialized yet. 2. Restore the console event channel, and rebind it to the existing irq. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner diff --git a/drivers/char/hvc_xen.c b/drivers/char/hvc_xen.c index d5fe0a8..db2ae42 100644 --- a/drivers/char/hvc_xen.c +++ b/drivers/char/hvc_xen.c @@ -39,9 +39,14 @@ static int xencons_irq; /* ------------------------------------------------------------------ */ +static unsigned long console_pfn = ~0ul; + static inline struct xencons_interface *xencons_interface(void) { - return mfn_to_virt(xen_start_info->console.domU.mfn); + if (console_pfn == ~0ul) + return mfn_to_virt(xen_start_info->console.domU.mfn); + else + return __va(console_pfn << PAGE_SHIFT); } static inline void notify_daemon(void) @@ -101,20 +106,32 @@ static int __init xen_init(void) { struct hvc_struct *hp; - if (!is_running_on_xen()) - return 0; + if (!is_running_on_xen() || + is_initial_xendomain() || + !xen_start_info->console.domU.evtchn) + return -ENODEV; xencons_irq = bind_evtchn_to_irq(xen_start_info->console.domU.evtchn); if (xencons_irq < 0) - xencons_irq = 0 /* NO_IRQ */; + xencons_irq = 0; /* NO_IRQ */ + hp = hvc_alloc(HVC_COOKIE, xencons_irq, &hvc_ops, 256); if (IS_ERR(hp)) return PTR_ERR(hp); hvc = hp; + + console_pfn = mfn_to_pfn(xen_start_info->console.domU.mfn); + return 0; } +void xen_console_resume(void) +{ + if (xencons_irq) + rebind_evtchn_irq(xen_start_info->console.domU.evtchn, xencons_irq); +} + static void __exit xen_fini(void) { if (hvc) diff --git a/include/xen/hvc-console.h b/include/xen/hvc-console.h index efc3237..fd5483a 100644 --- a/include/xen/hvc-console.h +++ b/include/xen/hvc-console.h @@ -3,6 +3,8 @@ extern struct console xenboot_console; +void xen_console_resume(void); + void xen_raw_console_write(const char *str); void xen_raw_printk(const char *fmt, ...); -- cgit v0.10.2 From 7d88d32a4670af583c896e5ecd3929b78538ca62 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 26 May 2008 23:31:26 +0100 Subject: xenbus: rebind irq on restore When restoring, rebind the existing xenbus irq to the new xenbus event channel. (It turns out in practice that this is always the same, and is never updated on restore. That's a bug, but Xeno-linux has been like this for a long time, so it can't really be fixed.) Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner diff --git a/drivers/xen/xenbus/xenbus_comms.c b/drivers/xen/xenbus/xenbus_comms.c index 6efbe3f..090c61e 100644 --- a/drivers/xen/xenbus/xenbus_comms.c +++ b/drivers/xen/xenbus/xenbus_comms.c @@ -203,7 +203,6 @@ int xb_read(void *data, unsigned len) int xb_init_comms(void) { struct xenstore_domain_interface *intf = xen_store_interface; - int err; if (intf->req_prod != intf->req_cons) printk(KERN_ERR "XENBUS request ring is not quiescent " @@ -216,18 +215,20 @@ int xb_init_comms(void) intf->rsp_cons = intf->rsp_prod; } - if (xenbus_irq) - unbind_from_irqhandler(xenbus_irq, &xb_waitq); + if (xenbus_irq) { + /* Already have an irq; assume we're resuming */ + rebind_evtchn_irq(xen_store_evtchn, xenbus_irq); + } else { + int err; + err = bind_evtchn_to_irqhandler(xen_store_evtchn, wake_waiting, + 0, "xenbus", &xb_waitq); + if (err <= 0) { + printk(KERN_ERR "XENBUS request irq failed %i\n", err); + return err; + } - err = bind_evtchn_to_irqhandler( - xen_store_evtchn, wake_waiting, - 0, "xenbus", &xb_waitq); - if (err <= 0) { - printk(KERN_ERR "XENBUS request irq failed %i\n", err); - return err; + xenbus_irq = err; } - xenbus_irq = err; - return 0; } -- cgit v0.10.2 From 0e91398f2a5d4eb6b07df8115917d0d1cf3e9b58 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 26 May 2008 23:31:27 +0100 Subject: xen: implement save/restore This patch implements Xen save/restore and migration. Saving is triggered via xenbus, which is polled in drivers/xen/manage.c. When a suspend request comes in, the kernel prepares itself for saving by: 1 - Freeze all processes. This is primarily to prevent any partially-completed pagetable updates from confusing the suspend process. If CONFIG_PREEMPT isn't defined, then this isn't necessary. 2 - Suspend xenbus and other devices 3 - Stop_machine, to make sure all the other vcpus are quiescent. The Xen tools require the domain to run its save off vcpu0. 4 - Within the stop_machine state, it pins any unpinned pgds (under construction or destruction), performs canonicalizes various other pieces of state (mostly converting mfns to pfns), and finally 5 - Suspend the domain Restore reverses the steps used to save the domain, ending when all the frozen processes are thawed. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index 40b119b..2ba2d16 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile @@ -1,4 +1,4 @@ obj-y := enlighten.o setup.o multicalls.o mmu.o \ - time.o xen-asm.o grant-table.o + time.o xen-asm.o grant-table.o suspend.o obj-$(CONFIG_SMP) += smp.o diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index ce67dc8..b94f63a 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -857,7 +857,7 @@ static __init void xen_pagetable_setup_start(pgd_t *base) PFN_DOWN(__pa(xen_start_info->pt_base))); } -static __init void setup_shared_info(void) +void xen_setup_shared_info(void) { if (!xen_feature(XENFEAT_auto_translated_physmap)) { unsigned long addr = fix_to_virt(FIX_PARAVIRT_BOOTMAP); @@ -894,7 +894,7 @@ static __init void xen_pagetable_setup_done(pgd_t *base) pv_mmu_ops.release_pmd = xen_release_pmd; pv_mmu_ops.set_pte = xen_set_pte; - setup_shared_info(); + xen_setup_shared_info(); /* Actually pin the pagetable down, but we can't set PG_pinned yet because the page structures don't exist yet. */ @@ -902,7 +902,7 @@ static __init void xen_pagetable_setup_done(pgd_t *base) } /* This is called once we have the cpu_possible_map */ -void __init xen_setup_vcpu_info_placement(void) +void xen_setup_vcpu_info_placement(void) { int cpu; diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 4740cda..e959559 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -560,6 +560,29 @@ void xen_pgd_pin(pgd_t *pgd) xen_mc_issue(0); } +/* + * On save, we need to pin all pagetables to make sure they get their + * mfns turned into pfns. Search the list for any unpinned pgds and pin + * them (unpinned pgds are not currently in use, probably because the + * process is under construction or destruction). + */ +void xen_mm_pin_all(void) +{ + unsigned long flags; + struct page *page; + + spin_lock_irqsave(&pgd_lock, flags); + + list_for_each_entry(page, &pgd_list, lru) { + if (!PagePinned(page)) { + xen_pgd_pin((pgd_t *)page_address(page)); + SetPageSavePinned(page); + } + } + + spin_unlock_irqrestore(&pgd_lock, flags); +} + /* The init_mm pagetable is really pinned as soon as its created, but that's before we have page structures to store the bits. So do all the book-keeping now. */ @@ -617,6 +640,29 @@ static void xen_pgd_unpin(pgd_t *pgd) xen_mc_issue(0); } +/* + * On resume, undo any pinning done at save, so that the rest of the + * kernel doesn't see any unexpected pinned pagetables. + */ +void xen_mm_unpin_all(void) +{ + unsigned long flags; + struct page *page; + + spin_lock_irqsave(&pgd_lock, flags); + + list_for_each_entry(page, &pgd_list, lru) { + if (PageSavePinned(page)) { + BUG_ON(!PagePinned(page)); + printk("unpinning pinned %p\n", page_address(page)); + xen_pgd_unpin((pgd_t *)page_address(page)); + ClearPageSavePinned(page); + } + } + + spin_unlock_irqrestore(&pgd_lock, flags); +} + void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next) { spin_lock(&next->page_table_lock); diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 74ab896..d2e3c20 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -35,7 +35,7 @@ #include "xen-ops.h" #include "mmu.h" -static cpumask_t xen_cpu_initialized_map; +cpumask_t xen_cpu_initialized_map; static DEFINE_PER_CPU(int, resched_irq) = -1; static DEFINE_PER_CPU(int, callfunc_irq) = -1; static DEFINE_PER_CPU(int, debug_irq) = -1; diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c new file mode 100644 index 0000000..7620a16 --- /dev/null +++ b/arch/x86/xen/suspend.c @@ -0,0 +1,42 @@ +#include + +#include +#include +#include + +#include +#include + +#include "xen-ops.h" +#include "mmu.h" + +void xen_pre_suspend(void) +{ + xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn); + xen_start_info->console.domU.mfn = + mfn_to_pfn(xen_start_info->console.domU.mfn); + + BUG_ON(!irqs_disabled()); + + HYPERVISOR_shared_info = &xen_dummy_shared_info; + if (HYPERVISOR_update_va_mapping(fix_to_virt(FIX_PARAVIRT_BOOTMAP), + __pte_ma(0), 0)) + BUG(); +} + +void xen_post_suspend(int suspend_cancelled) +{ + if (suspend_cancelled) { + xen_start_info->store_mfn = + pfn_to_mfn(xen_start_info->store_mfn); + xen_start_info->console.domU.mfn = + pfn_to_mfn(xen_start_info->console.domU.mfn); + } else { +#ifdef CONFIG_SMP + xen_cpu_initialized_map = cpu_online_map; +#endif + } + + xen_setup_shared_info(); +} + diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index c39e1a5..0bef256 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -572,6 +572,14 @@ void xen_setup_cpu_clockevents(void) clockevents_register_device(&__get_cpu_var(xen_clock_events)); } +void xen_time_suspend(void) +{ +} + +void xen_time_resume(void) +{ +} + __init void xen_time_init(void) { int cpu = smp_processor_id(); diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index a1bc89a..a0503ac 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -9,6 +9,7 @@ extern const char xen_hypervisor_callback[]; extern const char xen_failsafe_callback[]; +struct trap_info; void xen_copy_trap_info(struct trap_info *traps); DECLARE_PER_CPU(unsigned long, xen_cr3); @@ -19,6 +20,7 @@ extern struct shared_info xen_dummy_shared_info; extern struct shared_info *HYPERVISOR_shared_info; void xen_setup_mfn_list_list(void); +void xen_setup_shared_info(void); char * __init xen_memory_setup(void); void __init xen_arch_setup(void); @@ -59,6 +61,8 @@ int xen_smp_call_function_single(int cpu, void (*func) (void *info), void *info, int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *), void *info, int wait); +extern cpumask_t xen_cpu_initialized_map; + /* Declare an asm function, along with symbols needed to make it inlineable */ diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 70375a6..73d78dc 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -674,6 +674,89 @@ static int retrigger_dynirq(unsigned int irq) return ret; } +static void restore_cpu_virqs(unsigned int cpu) +{ + struct evtchn_bind_virq bind_virq; + int virq, irq, evtchn; + + for (virq = 0; virq < NR_VIRQS; virq++) { + if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1) + continue; + + BUG_ON(irq_info[irq].type != IRQT_VIRQ); + BUG_ON(irq_info[irq].index != virq); + + /* Get a new binding from Xen. */ + bind_virq.virq = virq; + bind_virq.vcpu = cpu; + if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, + &bind_virq) != 0) + BUG(); + evtchn = bind_virq.port; + + /* Record the new mapping. */ + evtchn_to_irq[evtchn] = irq; + irq_info[irq] = mk_irq_info(IRQT_VIRQ, virq, evtchn); + bind_evtchn_to_cpu(evtchn, cpu); + + /* Ready for use. */ + unmask_evtchn(evtchn); + } +} + +static void restore_cpu_ipis(unsigned int cpu) +{ + struct evtchn_bind_ipi bind_ipi; + int ipi, irq, evtchn; + + for (ipi = 0; ipi < XEN_NR_IPIS; ipi++) { + if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1) + continue; + + BUG_ON(irq_info[irq].type != IRQT_IPI); + BUG_ON(irq_info[irq].index != ipi); + + /* Get a new binding from Xen. */ + bind_ipi.vcpu = cpu; + if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, + &bind_ipi) != 0) + BUG(); + evtchn = bind_ipi.port; + + /* Record the new mapping. */ + evtchn_to_irq[evtchn] = irq; + irq_info[irq] = mk_irq_info(IRQT_IPI, ipi, evtchn); + bind_evtchn_to_cpu(evtchn, cpu); + + /* Ready for use. */ + unmask_evtchn(evtchn); + + } +} + +void xen_irq_resume(void) +{ + unsigned int cpu, irq, evtchn; + + init_evtchn_cpu_bindings(); + + /* New event-channel space is not 'live' yet. */ + for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++) + mask_evtchn(evtchn); + + /* No IRQ <-> event-channel mappings. */ + for (irq = 0; irq < NR_IRQS; irq++) + irq_info[irq].evtchn = 0; /* zap event-channel binding */ + + for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++) + evtchn_to_irq[evtchn] = -1; + + for_each_possible_cpu(cpu) { + restore_cpu_virqs(cpu); + restore_cpu_ipis(cpu); + } +} + static struct irq_chip xen_dynamic_chip __read_mostly = { .name = "xen-dyn", .mask = disable_dynirq, diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 52b6b41..e9e1116 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -471,14 +471,14 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx) return 0; } -static int gnttab_resume(void) +int gnttab_resume(void) { if (max_nr_grant_frames() < nr_grant_frames) return -ENOSYS; return gnttab_map(0, nr_grant_frames - 1); } -static int gnttab_suspend(void) +int gnttab_suspend(void) { arch_gnttab_unmap_shared(shared, nr_grant_frames); return 0; diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c index aa7af9e..ba85fa2 100644 --- a/drivers/xen/manage.c +++ b/drivers/xen/manage.c @@ -5,21 +5,113 @@ #include #include #include +#include +#include #include - -#define SHUTDOWN_INVALID -1 -#define SHUTDOWN_POWEROFF 0 -#define SHUTDOWN_SUSPEND 2 -/* Code 3 is SHUTDOWN_CRASH, which we don't use because the domain can only - * report a crash, not be instructed to crash! - * HALT is the same as POWEROFF, as far as we're concerned. The tools use - * the distinction when we return the reason code to them. - */ -#define SHUTDOWN_HALT 4 +#include +#include +#include +#include + +#include +#include + +enum shutdown_state { + SHUTDOWN_INVALID = -1, + SHUTDOWN_POWEROFF = 0, + SHUTDOWN_SUSPEND = 2, + /* Code 3 is SHUTDOWN_CRASH, which we don't use because the domain can only + report a crash, not be instructed to crash! + HALT is the same as POWEROFF, as far as we're concerned. The tools use + the distinction when we return the reason code to them. */ + SHUTDOWN_HALT = 4, +}; /* Ignore multiple shutdown requests. */ -static int shutting_down = SHUTDOWN_INVALID; +static enum shutdown_state shutting_down = SHUTDOWN_INVALID; + +static int xen_suspend(void *data) +{ + int *cancelled = data; + + BUG_ON(!irqs_disabled()); + + load_cr3(swapper_pg_dir); + + xen_mm_pin_all(); + gnttab_suspend(); + xen_time_suspend(); + xen_pre_suspend(); + + /* + * This hypercall returns 1 if suspend was cancelled + * or the domain was merely checkpointed, and 0 if it + * is resuming in a new domain. + */ + *cancelled = HYPERVISOR_suspend(virt_to_mfn(xen_start_info)); + + xen_post_suspend(*cancelled); + xen_time_resume(); + gnttab_resume(); + xen_mm_unpin_all(); + + if (!*cancelled) { + xen_irq_resume(); + xen_console_resume(); + } + + return 0; +} + +static void do_suspend(void) +{ + int err; + int cancelled = 1; + + shutting_down = SHUTDOWN_SUSPEND; + +#ifdef CONFIG_PREEMPT + /* If the kernel is preemptible, we need to freeze all the processes + to prevent them from being in the middle of a pagetable update + during suspend. */ + err = freeze_processes(); + if (err) { + printk(KERN_ERR "xen suspend: freeze failed %d\n", err); + return; + } +#endif + + err = device_suspend(PMSG_SUSPEND); + if (err) { + printk(KERN_ERR "xen suspend: device_suspend %d\n", err); + goto out; + } + + printk("suspending xenbus...\n"); + /* XXX use normal device tree? */ + xenbus_suspend(); + + err = stop_machine_run(xen_suspend, &cancelled, 0); + if (err) { + printk(KERN_ERR "failed to start xen_suspend: %d\n", err); + goto out; + } + + if (!cancelled) + xenbus_resume(); + else + xenbus_suspend_cancel(); + + device_resume(); + + +out: +#ifdef CONFIG_PREEMPT + thaw_processes(); +#endif + shutting_down = SHUTDOWN_INVALID; +} static void shutdown_handler(struct xenbus_watch *watch, const char **vec, unsigned int len) @@ -52,11 +144,17 @@ static void shutdown_handler(struct xenbus_watch *watch, } if (strcmp(str, "poweroff") == 0 || - strcmp(str, "halt") == 0) + strcmp(str, "halt") == 0) { + shutting_down = SHUTDOWN_POWEROFF; orderly_poweroff(false); - else if (strcmp(str, "reboot") == 0) + } else if (strcmp(str, "reboot") == 0) { + shutting_down = SHUTDOWN_POWEROFF; /* ? */ ctrl_alt_del(); - else { +#ifdef CONFIG_PM_SLEEP + } else if (strcmp(str, "suspend") == 0) { + do_suspend(); +#endif + } else { printk(KERN_INFO "Ignoring shutdown request: %s\n", str); shutting_down = SHUTDOWN_INVALID; } diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 590cff3..02955a1 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -157,6 +157,7 @@ PAGEFLAG(Active, active) __CLEARPAGEFLAG(Active, active) __PAGEFLAG(Slab, slab) PAGEFLAG(Checked, owner_priv_1) /* Used by some filesystems */ PAGEFLAG(Pinned, owner_priv_1) TESTSCFLAG(Pinned, owner_priv_1) /* Xen */ +PAGEFLAG(SavePinned, dirty); /* Xen */ PAGEFLAG(Reserved, reserved) __CLEARPAGEFLAG(Reserved, reserved) PAGEFLAG(Private, private) __CLEARPAGEFLAG(Private, private) __SETPAGEFLAG(Private, private) diff --git a/include/xen/events.h b/include/xen/events.h index a82ec0c..67c4436 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -41,4 +41,7 @@ static inline void notify_remote_via_evtchn(int port) } extern void notify_remote_via_irq(int irq); + +extern void xen_irq_resume(void); + #endif /* _XEN_EVENTS_H */ diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h index 4662048..a40f1cd 100644 --- a/include/xen/grant_table.h +++ b/include/xen/grant_table.h @@ -51,6 +51,9 @@ struct gnttab_free_callback { u16 count; }; +int gnttab_suspend(void); +int gnttab_resume(void); + int gnttab_grant_foreign_access(domid_t domid, unsigned long frame, int readonly); diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index 10ddfe0..5d7a6db 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -5,4 +5,13 @@ DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu); +void xen_pre_suspend(void); +void xen_post_suspend(int suspend_cancelled); + +void xen_mm_pin_all(void); +void xen_mm_unpin_all(void); + +void xen_time_suspend(void); +void xen_time_resume(void); + #endif /* INCLUDE_XEN_OPS_H */ -- cgit v0.10.2 From 359cdd3f866b6219a6729e313faf2221397f3278 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 26 May 2008 23:31:28 +0100 Subject: xen: maintain clock offset over save/restore Hook into the device model to make sure that timekeeping's resume handler is called. This deals with our clocksource's non-monotonicity over the save/restore. Explicitly call clock_has_changed() to make sure that all the timers get retriggered properly. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 0bef256..c39e1a5 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -572,14 +572,6 @@ void xen_setup_cpu_clockevents(void) clockevents_register_device(&__get_cpu_var(xen_clock_events)); } -void xen_time_suspend(void) -{ -} - -void xen_time_resume(void) -{ -} - __init void xen_time_init(void) { int cpu = smp_processor_id(); diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c index ba85fa2..675c3dd 100644 --- a/drivers/xen/manage.c +++ b/drivers/xen/manage.c @@ -34,14 +34,21 @@ static enum shutdown_state shutting_down = SHUTDOWN_INVALID; static int xen_suspend(void *data) { int *cancelled = data; + int err; BUG_ON(!irqs_disabled()); load_cr3(swapper_pg_dir); + err = device_power_down(PMSG_SUSPEND); + if (err) { + printk(KERN_ERR "xen_suspend: device_power_down failed: %d\n", + err); + return err; + } + xen_mm_pin_all(); gnttab_suspend(); - xen_time_suspend(); xen_pre_suspend(); /* @@ -52,10 +59,11 @@ static int xen_suspend(void *data) *cancelled = HYPERVISOR_suspend(virt_to_mfn(xen_start_info)); xen_post_suspend(*cancelled); - xen_time_resume(); gnttab_resume(); xen_mm_unpin_all(); + device_power_up(); + if (!*cancelled) { xen_irq_resume(); xen_console_resume(); @@ -105,7 +113,8 @@ static void do_suspend(void) device_resume(); - + /* Make sure timer events get retriggered on all CPUs */ + clock_was_set(); out: #ifdef CONFIG_PREEMPT thaw_processes(); diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index 5d7a6db..a706d6a 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -11,7 +11,4 @@ void xen_post_suspend(int suspend_cancelled); void xen_mm_pin_all(void); void xen_mm_unpin_all(void); -void xen_time_suspend(void); -void xen_time_resume(void); - #endif /* INCLUDE_XEN_OPS_H */ -- cgit v0.10.2 From 3945e2c9abf8e00c2edc4aa29215ddfad1cd8cf7 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 25 May 2008 10:00:09 -0700 Subject: x86: extend e820 ealy_res support 32bit - fix #2 remove extra -1 in reseve_early calling panic if can not find space for new RAMDISK Signed-off-by: Yinghai Lu Cc: Andrew Morton Cc: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c index c04e8c9..08b47a2 100644 --- a/arch/x86/kernel/setup_32.c +++ b/arch/x86/kernel/setup_32.c @@ -479,7 +479,7 @@ static void __init reserve_initrd(void) initrd_start = 0; if (ramdisk_size >= end_of_lowmem/2) { - free_early(ramdisk_image, ramdisk_image + ramdisk_size - 1); + free_early(ramdisk_image, ramdisk_end); printk(KERN_ERR "initrd too large to handle, " "disabling initrd\n"); return; @@ -501,9 +501,13 @@ static void __init reserve_initrd(void) end_of_lowmem, ramdisk_size, PAGE_SIZE); + if (ramdisk_here == -1ULL) + panic("Cannot find place for new RAMDISK of size %lld\n", + ramdisk_size); + /* Note: this includes all the lowmem currently occupied by the initrd, we rely on that fact to keep the data intact. */ - reserve_early(ramdisk_here, ramdisk_here + ramdisk_size - 1, + reserve_early(ramdisk_here, ramdisk_here + ramdisk_size, "NEW RAMDISK"); initrd_start = ramdisk_here + PAGE_OFFSET; initrd_end = initrd_start + ramdisk_size; @@ -579,15 +583,15 @@ void __init setup_bootmem_allocator(void) PAGE_SIZE); if (bootmap == -1L) panic("Cannot find bootmem map of size %ld\n", bootmap_size); - reserve_early(bootmap, bootmap + bootmap_size - 1, "BOOTMAP"); + reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP"); #ifdef CONFIG_BLK_DEV_INITRD reserve_initrd(); #endif bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, max_low_pfn); printk(KERN_INFO " low ram: %08lx - %08lx\n", min_low_pfn< Date: Mon, 19 May 2008 16:21:33 +0200 Subject: [ALSA] usb-audio - Support for Roland SonicCell sound module Added entry into usbquirks.h to recognize Roland SonicCell sound module by mostly duplicating the entry for the Roland SH-201. USB MIDI works just fine, though the USB audio is a little unreliable (but still works well enough). Signed-off-by: Chris Mennie Signed-off-by: Takashi Iwai diff --git a/sound/usb/usbquirks.h b/sound/usb/usbquirks.h index 82a8d14..b7ab3ee 100644 --- a/sound/usb/usbquirks.h +++ b/sound/usb/usbquirks.h @@ -1379,6 +1379,39 @@ YAMAHA_DEVICE(0x7010, "UB99"), } }, +{ + /* Roland SonicCell */ + USB_DEVICE(0x0582, 0x00c2), + .driver_info = (unsigned long) & (const struct snd_usb_audio_quirk) { + .vendor_name = "Roland", + .product_name = "SonicCell", + .ifnum = QUIRK_ANY_INTERFACE, + .type = QUIRK_COMPOSITE, + .data = (const struct snd_usb_audio_quirk[]) { + { + .ifnum = 0, + .type = QUIRK_AUDIO_STANDARD_INTERFACE + }, + { + .ifnum = 1, + .type = QUIRK_AUDIO_STANDARD_INTERFACE + }, + { + .ifnum = 2, + .type = QUIRK_MIDI_FIXED_ENDPOINT, + .data = & (const struct snd_usb_midi_endpoint_info) { + .out_cables = 0x0001, + .in_cables = 0x0001 + } + }, + { + .ifnum = -1 + } + } + } +}, + + /* Guillemot devices */ { /* -- cgit v0.10.2 From a72e72469a166c825196c3f20dabd352877fec2b Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 20 May 2008 01:06:55 +0300 Subject: [ALSA] remove CVS keywords This patch removes CVS keywords that weren't updated for a long time from comments. Signed-off-by: Adrian Bunk Signed-off-by: Takashi Iwai diff --git a/include/sound/uda1341.h b/include/sound/uda1341.h index 2e564bf..110d5dc 100644 --- a/include/sound/uda1341.h +++ b/include/sound/uda1341.h @@ -15,8 +15,6 @@ * features support */ -/* $Id: uda1341.h,v 1.8 2005/11/17 14:17:21 tiwai Exp $ */ - #define UDA1341_ALSA_NAME "snd-uda1341" /* diff --git a/sound/arm/sa11xx-uda1341.c b/sound/arm/sa11xx-uda1341.c index 0eff33c..faeddf3 100644 --- a/sound/arm/sa11xx-uda1341.c +++ b/sound/arm/sa11xx-uda1341.c @@ -21,8 +21,6 @@ * merged HAL layer (patches from Brian) */ -/* $Id: sa11xx-uda1341.c,v 1.27 2005/12/07 09:13:42 cladisch Exp $ */ - /*************************************************************************************************** * * To understand what Alsa Drivers should be doing look at "Writing an Alsa Driver" by Takashi Iwai diff --git a/sound/i2c/l3/uda1341.c b/sound/i2c/l3/uda1341.c index bfa5d2c..1f4942e 100644 --- a/sound/i2c/l3/uda1341.c +++ b/sound/i2c/l3/uda1341.c @@ -17,8 +17,6 @@ * 2002-05-12 Tomas Kasparek another code cleanup */ -/* $Id: uda1341.c,v 1.18 2005/11/17 14:17:21 tiwai Exp $ */ - #include #include #include diff --git a/sound/pci/au88x0/au88x0_game.c b/sound/pci/au88x0/au88x0_game.c index bc212f4..e291aa5 100644 --- a/sound/pci/au88x0/au88x0_game.c +++ b/sound/pci/au88x0/au88x0_game.c @@ -1,6 +1,4 @@ /* - * $Id: au88x0_game.c,v 1.9 2003/09/22 03:51:28 mjander Exp $ - * * Manuel Jander. * * Based on the work of: -- cgit v0.10.2 From 06b5fb97cec1a3ca61d10164118b00fe98a6a866 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 20 May 2008 00:59:35 +0300 Subject: sound: sound/oss/: remove CVS keywords This patch removes CVS keywords that weren't updated for a long time from comments. Signed-off-by: Adrian Bunk Signed-off-by: Takashi Iwai diff --git a/sound/oss/msnd.c b/sound/oss/msnd.c index ba38d62..e4282d9 100644 --- a/sound/oss/msnd.c +++ b/sound/oss/msnd.c @@ -20,8 +20,6 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * - * $Id: msnd.c,v 1.17 1999/03/21 16:50:09 andrewtv Exp $ - * ********************************************************************/ #include diff --git a/sound/oss/msnd.h b/sound/oss/msnd.h index d0ca582..61b3955 100644 --- a/sound/oss/msnd.h +++ b/sound/oss/msnd.h @@ -24,8 +24,6 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * - * $Id: msnd.h,v 1.36 1999/03/21 17:05:42 andrewtv Exp $ - * ********************************************************************/ #ifndef __MSND_H #define __MSND_H diff --git a/sound/oss/msnd_classic.h b/sound/oss/msnd_classic.h index 7ffea52..1a17dde 100644 --- a/sound/oss/msnd_classic.h +++ b/sound/oss/msnd_classic.h @@ -24,8 +24,6 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * - * $Id: msnd_classic.h,v 1.10 1999/03/21 17:36:09 andrewtv Exp $ - * ********************************************************************/ #ifndef __MSND_CLASSIC_H #define __MSND_CLASSIC_H diff --git a/sound/oss/msnd_pinnacle.c b/sound/oss/msnd_pinnacle.c index f1f49eb..bf27e00 100644 --- a/sound/oss/msnd_pinnacle.c +++ b/sound/oss/msnd_pinnacle.c @@ -29,13 +29,8 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * - * $Id: msnd_pinnacle.c,v 1.8 2000/12/30 00:33:21 sycamore Exp $ - * * 12-3-2000 Modified IO port validation Steve Sycamore * - * - * $$$: msnd_pinnacle.c,v 1.75 1999/03/21 16:50:09 andrewtv $$$ $ - * ********************************************************************/ #include diff --git a/sound/oss/msnd_pinnacle.h b/sound/oss/msnd_pinnacle.h index cce9114..c18d66c 100644 --- a/sound/oss/msnd_pinnacle.h +++ b/sound/oss/msnd_pinnacle.h @@ -24,8 +24,6 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * - * $Id: msnd_pinnacle.h,v 1.11 1999/03/21 17:36:09 andrewtv Exp $ - * ********************************************************************/ #ifndef __MSND_PINNACLE_H #define __MSND_PINNACLE_H -- cgit v0.10.2 From 89fe5117928b2c1272c9376362131ded561c91ad Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 23 May 2008 16:10:37 +0200 Subject: sound: Convert to menuconfig Convert menu in sound Kconfig files to menuconfig and if. Signed-off-by: Takashi Iwai diff --git a/sound/Kconfig b/sound/Kconfig index 4247406..a37bee0 100644 --- a/sound/Kconfig +++ b/sound/Kconfig @@ -1,11 +1,9 @@ # sound/Config.in # -menu "Sound" - depends on HAS_IOMEM - -config SOUND +menuconfig SOUND tristate "Sound card support" + depends on HAS_IOMEM help If you have a sound card in your computer, i.e. if it can say more than an occasional beep, say Y. Be sure to have all the information @@ -28,22 +26,22 @@ config SOUND and read ; the module will be called soundcore. +if SOUND + source "sound/oss/dmasound/Kconfig" if !M68K -menu "Advanced Linux Sound Architecture" - depends on SOUND!=n - -config SND +menuconfig SND tristate "Advanced Linux Sound Architecture" - depends on SOUND help Say 'Y' or 'M' to enable ALSA (Advanced Linux Sound Architecture), the new base sound system. For more information, see +if SND + source "sound/core/Kconfig" source "sound/drivers/Kconfig" @@ -58,9 +56,7 @@ source "sound/aoa/Kconfig" source "sound/arm/Kconfig" -if SPI source "sound/spi/Kconfig" -endif source "sound/mips/Kconfig" @@ -80,22 +76,20 @@ source "sound/parisc/Kconfig" source "sound/soc/Kconfig" -endmenu +endif # SND -menu "Open Sound System" - depends on SOUND!=n - -config SOUND_PRIME +menuconfig SOUND_PRIME tristate "Open Sound System (DEPRECATED)" - depends on SOUND help Say 'Y' or 'M' to enable Open Sound System drivers. +if SOUND_PRIME + source "sound/oss/Kconfig" -endmenu +endif # SOUND_PRIME -endif +endif # !M68K config AC97_BUS tristate @@ -105,4 +99,4 @@ config AC97_BUS sound although they're sharing the AC97 bus. Concerned drivers should "select" this. -endmenu +endif # SOUND diff --git a/sound/aoa/Kconfig b/sound/aoa/Kconfig index 5d5813c..c081e18 100644 --- a/sound/aoa/Kconfig +++ b/sound/aoa/Kconfig @@ -1,18 +1,17 @@ -menu "Apple Onboard Audio driver" - depends on SND!=n && PPC_PMAC - -config SND_AOA +menuconfig SND_AOA tristate "Apple Onboard Audio driver" - depends on SND + depends on PPC_PMAC select SND_PCM ---help--- This option enables the new driver for the various Apple Onboard Audio components. +if SND_AOA + source "sound/aoa/fabrics/Kconfig" source "sound/aoa/codecs/Kconfig" source "sound/aoa/soundbus/Kconfig" -endmenu +endif # SND_AOA diff --git a/sound/aoa/codecs/Kconfig b/sound/aoa/codecs/Kconfig index d5fbd60..808eb11 100644 --- a/sound/aoa/codecs/Kconfig +++ b/sound/aoa/codecs/Kconfig @@ -1,6 +1,5 @@ config SND_AOA_ONYX tristate "support Onyx chip" - depends on SND_AOA select I2C select I2C_POWERMAC ---help--- @@ -10,7 +9,6 @@ config SND_AOA_ONYX #config SND_AOA_TOPAZ # tristate "support Topaz chips" -# depends on SND_AOA # ---help--- # This option enables support for the Topaz (CS84xx) # codec chips found in the latest Apple machines, @@ -19,7 +17,6 @@ config SND_AOA_ONYX config SND_AOA_TAS tristate "support TAS chips" - depends on SND_AOA select I2C select I2C_POWERMAC ---help--- @@ -29,7 +26,6 @@ config SND_AOA_TAS config SND_AOA_TOONIE tristate "support Toonie chip" - depends on SND_AOA ---help--- This option enables support for the toonie codec found in the Mac Mini. If you have a Mac Mini and diff --git a/sound/aoa/fabrics/Kconfig b/sound/aoa/fabrics/Kconfig index 50d7021..3ca475a 100644 --- a/sound/aoa/fabrics/Kconfig +++ b/sound/aoa/fabrics/Kconfig @@ -1,6 +1,5 @@ config SND_AOA_FABRIC_LAYOUT tristate "layout-id fabric" - depends on SND_AOA select SND_AOA_SOUNDBUS select SND_AOA_SOUNDBUS_I2S ---help--- diff --git a/sound/aoa/soundbus/Kconfig b/sound/aoa/soundbus/Kconfig index 7368b7d..839d113 100644 --- a/sound/aoa/soundbus/Kconfig +++ b/sound/aoa/soundbus/Kconfig @@ -1,6 +1,5 @@ config SND_AOA_SOUNDBUS tristate "Apple Soundbus support" - depends on SOUND select SND_PCM ---help--- This option enables the generic driver for the soundbus diff --git a/sound/arm/Kconfig b/sound/arm/Kconfig index 2e4a5e0..351e19e 100644 --- a/sound/arm/Kconfig +++ b/sound/arm/Kconfig @@ -1,11 +1,19 @@ # ALSA ARM drivers -menu "ALSA ARM devices" - depends on SND!=n && ARM +menuconfig SND_ARM + bool "ARM sound devices" + depends on ARM + default y + help + Support for sound devices specific to ARM architectures. + Drivers that are implemented on ASoC can be found in + "ALSA for SoC audio support" section. + +if SND_ARM config SND_SA11XX_UDA1341 tristate "SA11xx UDA1341TS driver (iPaq H3600)" - depends on ARCH_SA1100 && SND && L3 + depends on ARCH_SA1100 && L3 select SND_PCM help Say Y here if you have a Compaq iPaq H3x00 handheld computer @@ -16,7 +24,7 @@ config SND_SA11XX_UDA1341 config SND_ARMAACI tristate "ARM PrimeCell PL041 AC Link support" - depends on SND && ARM_AMBA + depends on ARM_AMBA select SND_PCM select SND_AC97_CODEC @@ -26,11 +34,12 @@ config SND_PXA2XX_PCM config SND_PXA2XX_AC97 tristate "AC97 driver for the Intel PXA2xx chip" - depends on ARCH_PXA && SND + depends on ARCH_PXA select SND_PXA2XX_PCM select SND_AC97_CODEC help Say Y or M if you want to support any AC97 codec attached to the PXA2xx AC97 interface. -endmenu +endif # SND_ARM + diff --git a/sound/core/Kconfig b/sound/core/Kconfig index a8d71c6..db21113 100644 --- a/sound/core/Kconfig +++ b/sound/core/Kconfig @@ -1,24 +1,19 @@ # ALSA soundcard-configuration config SND_TIMER tristate - depends on SND config SND_PCM tristate select SND_TIMER - depends on SND config SND_HWDEP tristate - depends on SND config SND_RAWMIDI tristate - depends on SND config SND_SEQUENCER tristate "Sequencer support" - depends on SND select SND_TIMER help Say Y or M to enable MIDI sequencer and router support. This @@ -44,11 +39,9 @@ config SND_SEQ_DUMMY config SND_OSSEMUL bool - depends on SND config SND_MIXER_OSS tristate "OSS Mixer API" - depends on SND select SND_OSSEMUL help To enable OSS mixer API emulation (/dev/mixer*), say Y here @@ -61,7 +54,6 @@ config SND_MIXER_OSS config SND_PCM_OSS tristate "OSS PCM (digital audio) API" - depends on SND select SND_OSSEMUL select SND_PCM help @@ -84,7 +76,7 @@ config SND_PCM_OSS_PLUGINS config SND_SEQUENCER_OSS bool "OSS Sequencer API" - depends on SND && SND_SEQUENCER + depends on SND_SEQUENCER select SND_OSSEMUL help Say Y here to enable OSS sequencer emulation (both @@ -98,7 +90,7 @@ config SND_SEQUENCER_OSS config SND_RTCTIMER tristate "RTC Timer support" - depends on SND && RTC + depends on RTC select SND_TIMER help Say Y here to enable RTC timer support for ALSA. ALSA uses @@ -123,7 +115,6 @@ config SND_SEQ_RTCTIMER_DEFAULT config SND_DYNAMIC_MINORS bool "Dynamic device file minor numbers" - depends on SND help If you say Y here, the minor numbers of ALSA device files in /dev/snd/ are allocated dynamically. This allows you to have @@ -134,7 +125,6 @@ config SND_DYNAMIC_MINORS config SND_SUPPORT_OLD_API bool "Support old ALSA API" - depends on SND default y help Say Y here to support the obsolete ALSA PCM API (ver.0.9.0 rc3 @@ -142,7 +132,7 @@ config SND_SUPPORT_OLD_API config SND_VERBOSE_PROCFS bool "Verbose procfs contents" - depends on SND && PROC_FS + depends on PROC_FS default y help Say Y here to include code for verbose procfs contents (provides @@ -151,7 +141,6 @@ config SND_VERBOSE_PROCFS config SND_VERBOSE_PRINTK bool "Verbose printk" - depends on SND help Say Y here to enable verbose log messages. These messages will help to identify source file and position containing @@ -161,7 +150,6 @@ config SND_VERBOSE_PRINTK config SND_DEBUG bool "Debug" - depends on SND help Say Y here to enable ALSA debug code. @@ -184,4 +172,3 @@ config SND_PCM_XRUN_DEBUG config SND_VMASTER bool - depends on SND diff --git a/sound/drivers/Kconfig b/sound/drivers/Kconfig index 159137b..79b4d3f 100644 --- a/sound/drivers/Kconfig +++ b/sound/drivers/Kconfig @@ -1,15 +1,41 @@ -# ALSA generic drivers +config SND_MPU401_UART + tristate + select SND_RAWMIDI -menu "Generic devices" - depends on SND!=n +config SND_OPL3_LIB + tristate + select SND_TIMER + select SND_HWDEP +config SND_OPL4_LIB + tristate + select SND_TIMER + select SND_HWDEP + +config SND_VX_LIB + tristate + select SND_HWDEP + select SND_PCM + +config SND_AC97_CODEC + tristate + select SND_PCM + select AC97_BUS + select SND_VMASTER + +menuconfig SND_DRIVERS + bool "Generic sound devices" + default y + help + Support for generic sound devices. + +if SND_DRIVERS config SND_PCSP tristate "PC-Speaker support (READ HELP!)" depends on PCSPKR_PLATFORM && X86_PC && HIGH_RES_TIMERS depends on INPUT depends on EXPERIMENTAL - depends on SND select SND_PCM help If you don't have a sound card in your computer, you can include a @@ -35,34 +61,8 @@ config SND_PCSP Say M if you don't. Say Y only if you really know what you do. -config SND_MPU401_UART - tristate - select SND_RAWMIDI - -config SND_OPL3_LIB - tristate - select SND_TIMER - select SND_HWDEP - -config SND_OPL4_LIB - tristate - select SND_TIMER - select SND_HWDEP - -config SND_VX_LIB - tristate - select SND_HWDEP - select SND_PCM - -config SND_AC97_CODEC - tristate - select SND_PCM - select AC97_BUS - select SND_VMASTER - config SND_DUMMY tristate "Dummy (/dev/null) soundcard" - depends on SND select SND_PCM help Say Y here to include the dummy driver. This driver does @@ -91,7 +91,6 @@ config SND_VIRMIDI config SND_MTPAV tristate "MOTU MidiTimePiece AV multiport MIDI" - depends on SND select SND_RAWMIDI help To use a MOTU MidiTimePiece AV multiport MIDI adapter @@ -103,7 +102,7 @@ config SND_MTPAV config SND_MTS64 tristate "ESI Miditerminal 4140 driver" - depends on SND && PARPORT + depends on PARPORT select SND_RAWMIDI help The ESI Miditerminal 4140 is a 4 In 4 Out MIDI Interface with @@ -116,7 +115,6 @@ config SND_MTS64 config SND_SERIAL_U16550 tristate "UART16550 serial MIDI driver" - depends on SND select SND_RAWMIDI help To include support for MIDI serial port interfaces, say Y here @@ -132,7 +130,6 @@ config SND_SERIAL_U16550 config SND_MPU401 tristate "Generic MPU-401 UART driver" - depends on SND select SND_MPU401_UART help Say Y here to include support for MIDI ports compatible with @@ -143,7 +140,7 @@ config SND_MPU401 config SND_PORTMAN2X4 tristate "Portman 2x4 driver" - depends on SND && PARPORT + depends on PARPORT select SND_RAWMIDI help Say Y here to include support for Midiman Portman 2x4 parallel @@ -154,7 +151,7 @@ config SND_PORTMAN2X4 config SND_ML403_AC97CR tristate "Xilinx ML403 AC97 Controller Reference" - depends on SND && XILINX_VIRTEX + depends on XILINX_VIRTEX select SND_AC97_CODEC help Say Y here to include support for the @@ -164,4 +161,4 @@ config SND_ML403_AC97CR To compile this driver as a module, choose M here: the module will be called snd-ml403_ac97cr. -endmenu +endif # SND_DRIVERS diff --git a/sound/isa/Kconfig b/sound/isa/Kconfig index 2639a6a..4575ba8 100644 --- a/sound/isa/Kconfig +++ b/sound/isa/Kconfig @@ -21,12 +21,17 @@ config SND_SB16_DSP select SND_PCM select SND_SB_COMMON -menu "ISA devices" - depends on SND!=n && ISA && ISA_DMA_API +menuconfig SND_ISA + bool "ISA sound devices" + depends on ISA && ISA_DMA_API + default y + help + Support for sound devices connected via the ISA bus. + +if SND_ISA config SND_ADLIB tristate "AdLib FM card" - depends on SND select SND_OPL3_LIB help Say Y here to include support for AdLib FM cards. @@ -36,7 +41,7 @@ config SND_ADLIB config SND_AD1816A tristate "Analog Devices SoundPort AD1816A" - depends on SND && PNP && ISA + depends on PNP select ISAPNP select SND_OPL3_LIB select SND_MPU401_UART @@ -50,7 +55,6 @@ config SND_AD1816A config SND_AD1848 tristate "Generic AD1848/CS4248 driver" - depends on SND select SND_AD1848_LIB help Say Y here to include support for AD1848 (Analog Devices) or @@ -64,7 +68,7 @@ config SND_AD1848 config SND_ALS100 tristate "Avance Logic ALS100/ALS120" - depends on SND && PNP && ISA + depends on PNP select ISAPNP select SND_OPL3_LIB select SND_MPU401_UART @@ -78,7 +82,7 @@ config SND_ALS100 config SND_AZT2320 tristate "Aztech Systems AZT2320" - depends on SND && PNP && ISA + depends on PNP select ISAPNP select SND_OPL3_LIB select SND_MPU401_UART @@ -92,7 +96,6 @@ config SND_AZT2320 config SND_CMI8330 tristate "C-Media CMI8330" - depends on SND select SND_AD1848_LIB select SND_SB16_DSP help @@ -104,7 +107,6 @@ config SND_CMI8330 config SND_CS4231 tristate "Generic Cirrus Logic CS4231 driver" - depends on SND select SND_MPU401_UART select SND_CS4231_LIB help @@ -116,7 +118,6 @@ config SND_CS4231 config SND_CS4232 tristate "Generic Cirrus Logic CS4232 driver" - depends on SND select SND_OPL3_LIB select SND_MPU401_UART select SND_CS4231_LIB @@ -129,7 +130,6 @@ config SND_CS4232 config SND_CS4236 tristate "Generic Cirrus Logic CS4236+ driver" - depends on SND select SND_OPL3_LIB select SND_MPU401_UART select SND_CS4231_LIB @@ -142,7 +142,7 @@ config SND_CS4236 config SND_DT019X tristate "Diamond Technologies DT-019X, Avance Logic ALS-007" - depends on SND && PNP && ISA + depends on PNP select ISAPNP select SND_OPL3_LIB select SND_MPU401_UART @@ -156,7 +156,7 @@ config SND_DT019X config SND_ES968 tristate "Generic ESS ES968 driver" - depends on SND && PNP && ISA + depends on PNP select ISAPNP select SND_MPU401_UART select SND_SB8_DSP @@ -168,7 +168,6 @@ config SND_ES968 config SND_ES1688 tristate "Generic ESS ES688/ES1688 driver" - depends on SND select SND_OPL3_LIB select SND_MPU401_UART select SND_PCM @@ -181,7 +180,6 @@ config SND_ES1688 config SND_ES18XX tristate "Generic ESS ES18xx driver" - depends on SND select SND_OPL3_LIB select SND_MPU401_UART select SND_PCM @@ -193,7 +191,7 @@ config SND_ES18XX config SND_SC6000 tristate "Gallant SC-6000, Audio Excel DSP 16" - depends on SND && HAS_IOPORT + depends on HAS_IOPORT select SND_AD1848_LIB select SND_OPL3_LIB select SND_MPU401_UART @@ -209,7 +207,6 @@ config SND_GUS_SYNTH config SND_GUSCLASSIC tristate "Gravis UltraSound Classic" - depends on SND select SND_RAWMIDI select SND_PCM select SND_GUS_SYNTH @@ -222,7 +219,6 @@ config SND_GUSCLASSIC config SND_GUSEXTREME tristate "Gravis UltraSound Extreme" - depends on SND select SND_HWDEP select SND_MPU401_UART select SND_PCM @@ -236,7 +232,6 @@ config SND_GUSEXTREME config SND_GUSMAX tristate "Gravis UltraSound MAX" - depends on SND select SND_RAWMIDI select SND_CS4231_LIB select SND_GUS_SYNTH @@ -249,7 +244,7 @@ config SND_GUSMAX config SND_INTERWAVE tristate "AMD InterWave, Gravis UltraSound PnP" - depends on SND && PNP && ISA + depends on PNP select SND_RAWMIDI select SND_CS4231_LIB select SND_GUS_SYNTH @@ -263,7 +258,7 @@ config SND_INTERWAVE config SND_INTERWAVE_STB tristate "AMD InterWave + TEA6330T (UltraSound 32-Pro)" - depends on SND && PNP && ISA + depends on PNP select SND_RAWMIDI select SND_CS4231_LIB select SND_GUS_SYNTH @@ -277,7 +272,6 @@ config SND_INTERWAVE_STB config SND_OPL3SA2 tristate "Yamaha OPL3-SA2/SA3" - depends on SND select SND_OPL3_LIB select SND_MPU401_UART select SND_CS4231_LIB @@ -290,7 +284,6 @@ config SND_OPL3SA2 config SND_OPTI92X_AD1848 tristate "OPTi 82C92x - AD1848" - depends on SND select SND_OPL3_LIB select SND_OPL4_LIB select SND_MPU401_UART @@ -304,7 +297,6 @@ config SND_OPTI92X_AD1848 config SND_OPTI92X_CS4231 tristate "OPTi 82C92x - CS4231" - depends on SND select SND_OPL3_LIB select SND_OPL4_LIB select SND_MPU401_UART @@ -318,7 +310,6 @@ config SND_OPTI92X_CS4231 config SND_OPTI93X tristate "OPTi 82C93x" - depends on SND select SND_OPL3_LIB select SND_MPU401_UART select SND_PCM @@ -331,7 +322,6 @@ config SND_OPTI93X config SND_MIRO tristate "Miro miroSOUND PCM1pro/PCM12/PCM20radio driver" - depends on SND select SND_OPL4_LIB select SND_CS4231_LIB select SND_MPU401_UART @@ -345,7 +335,6 @@ config SND_MIRO config SND_SB8 tristate "Sound Blaster 1.0/2.0/Pro (8-bit)" - depends on SND select SND_OPL3_LIB select SND_RAWMIDI select SND_SB8_DSP @@ -358,7 +347,6 @@ config SND_SB8 config SND_SB16 tristate "Sound Blaster 16 (PnP)" - depends on SND select SND_OPL3_LIB select SND_MPU401_UART select SND_SB16_DSP @@ -371,7 +359,6 @@ config SND_SB16 config SND_SBAWE tristate "Sound Blaster AWE (32,64) (PnP)" - depends on SND select SND_OPL3_LIB select SND_MPU401_UART select SND_SB16_DSP @@ -402,7 +389,6 @@ config SND_SB16_CSP_FIRMWARE_IN_KERNEL config SND_SGALAXY tristate "Aztech Sound Galaxy" - depends on SND select SND_AD1848_LIB help Say Y here to include support for Aztech Sound Galaxy @@ -413,7 +399,6 @@ config SND_SGALAXY config SND_SSCAPE tristate "Ensoniq SoundScape PnP driver" - depends on SND select SND_HWDEP select SND_MPU401_UART select SND_CS4231_LIB @@ -426,7 +411,6 @@ config SND_SSCAPE config SND_WAVEFRONT tristate "Turtle Beach Maui,Tropez,Tropez+ (Wavefront)" - depends on SND select FW_LOADER select SND_OPL3_LIB select SND_MPU401_UART @@ -448,4 +432,5 @@ config SND_WAVEFRONT_FIRMWARE_IN_KERNEL you need to install the firmware files from the alsa-firmware package. -endmenu +endif # SND_ISA + diff --git a/sound/mips/Kconfig b/sound/mips/Kconfig index 531f8ba..bb26f6c 100644 --- a/sound/mips/Kconfig +++ b/sound/mips/Kconfig @@ -1,15 +1,21 @@ # ALSA MIPS drivers -menu "ALSA MIPS devices" - depends on SND!=n && MIPS +menuconfig SND_MIPS + bool "MIPS sound devices" + depends on MIPS + default y + help + Support for sound devices of MIPS architectures. + +if SND_MIPS config SND_AU1X00 tristate "Au1x00 AC97 Port Driver" - depends on (SOC_AU1000 || SOC_AU1100 || SOC_AU1500) && SND + depends on SOC_AU1000 || SOC_AU1100 || SOC_AU1500 select SND_PCM select SND_AC97_CODEC help ALSA Sound driver for the Au1x00's AC97 port. -endmenu +endif # SND_MIPS diff --git a/sound/parisc/Kconfig b/sound/parisc/Kconfig index a5a7f9d..9b61d95 100644 --- a/sound/parisc/Kconfig +++ b/sound/parisc/Kconfig @@ -1,15 +1,20 @@ # ALSA PA-RISC drivers -menu "GSC devices" - depends on SND!=n && GSC +menuconfig SND_GSC + bool "GSC sound devices" + depends on GSC + default y + help + Support for GSC sound devices on PA-RISC architectures. + +if SND_GSC config SND_HARMONY tristate "Harmony/Vivace sound chip" - depends on SND select SND_PCM help Say 'Y' or 'M' to include support for the Harmony/Vivace sound chip found in most GSC-based PA-RISC workstations. It's frequently provided as part of the Lasi multi-function IC. -endmenu +endif # SND_GSC diff --git a/sound/pci/Kconfig b/sound/pci/Kconfig index d95fbb2..b148c0b 100644 --- a/sound/pci/Kconfig +++ b/sound/pci/Kconfig @@ -1,11 +1,16 @@ # ALSA PCI drivers -menu "PCI devices" - depends on SND!=n && PCI +menuconfig SND_PCI + bool "PCI sound devices" + depends on PCI + default y + help + Support for sound devices connected via the PCI bus. + +if SND_PCI config SND_AD1889 tristate "Analog Devices AD1889" - depends on SND select SND_AC97_CODEC help Say Y here to include support for the integrated AC97 sound @@ -17,7 +22,6 @@ config SND_AD1889 config SND_ALS300 tristate "Avance Logic ALS300/ALS300+" - depends on SND select SND_PCM select SND_AC97_CODEC select SND_OPL3_LIB @@ -29,7 +33,7 @@ config SND_ALS300 config SND_ALS4000 tristate "Avance Logic ALS4000" - depends on SND && ISA_DMA_API + depends on ISA_DMA_API select SND_OPL3_LIB select SND_MPU401_UART select SND_PCM @@ -43,7 +47,6 @@ config SND_ALS4000 config SND_ALI5451 tristate "ALi M5451 PCI Audio Controller" - depends on SND select SND_MPU401_UART select SND_AC97_CODEC help @@ -57,7 +60,6 @@ config SND_ALI5451 config SND_ATIIXP tristate "ATI IXP AC97 Controller" - depends on SND select SND_AC97_CODEC help Say Y here to include support for the integrated AC97 sound @@ -69,7 +71,6 @@ config SND_ATIIXP config SND_ATIIXP_MODEM tristate "ATI IXP Modem" - depends on SND select SND_AC97_CODEC help Say Y here to include support for the integrated MC97 modem on @@ -80,7 +81,6 @@ config SND_ATIIXP_MODEM config SND_AU8810 tristate "Aureal Advantage" - depends on SND select SND_MPU401_UART select SND_AC97_CODEC help @@ -95,7 +95,6 @@ config SND_AU8810 config SND_AU8820 tristate "Aureal Vortex" - depends on SND select SND_MPU401_UART select SND_AC97_CODEC help @@ -109,7 +108,6 @@ config SND_AU8820 config SND_AU8830 tristate "Aureal Vortex 2" - depends on SND select SND_MPU401_UART select SND_AC97_CODEC help @@ -124,7 +122,6 @@ config SND_AU8830 config SND_AW2 tristate "Emagic Audiowerk 2" - depends on SND help Say Y here to include support for Emagic Audiowerk 2 soundcards. @@ -139,7 +136,7 @@ config SND_AW2 config SND_AZT3328 tristate "Aztech AZF3328 / PCI168 (EXPERIMENTAL)" - depends on SND && EXPERIMENTAL + depends on EXPERIMENTAL select SND_OPL3_LIB select SND_MPU401_UART select SND_PCM @@ -152,7 +149,6 @@ config SND_AZT3328 config SND_BT87X tristate "Bt87x Audio Capture" - depends on SND select SND_PCM help If you want to record audio from TV cards based on @@ -174,7 +170,6 @@ config SND_BT87X_OVERCLOCK config SND_CA0106 tristate "SB Audigy LS / Live 24bit" - depends on SND select SND_AC97_CODEC select SND_RAWMIDI select SND_VMASTER @@ -187,7 +182,6 @@ config SND_CA0106 config SND_CMIPCI tristate "C-Media 8338, 8738, 8768, 8770" - depends on SND select SND_OPL3_LIB select SND_MPU401_UART select SND_PCM @@ -201,13 +195,11 @@ config SND_CMIPCI config SND_OXYGEN_LIB tristate - depends on SND select SND_PCM select SND_MPU401_UART config SND_OXYGEN tristate "C-Media 8788 (Oxygen)" - depends on SND select SND_OXYGEN_LIB help Say Y here to include support for sound cards based on the @@ -225,7 +217,6 @@ config SND_OXYGEN config SND_CS4281 tristate "Cirrus Logic (Sound Fusion) CS4281" - depends on SND select SND_OPL3_LIB select SND_RAWMIDI select SND_AC97_CODEC @@ -237,7 +228,6 @@ config SND_CS4281 config SND_CS46XX tristate "Cirrus Logic (Sound Fusion) CS4280/CS461x/CS462x/CS463x" - depends on SND select SND_RAWMIDI select SND_AC97_CODEC help @@ -258,7 +248,7 @@ config SND_CS46XX_NEW_DSP config SND_CS5530 tristate "CS5530 Audio" - depends on SND && ISA_DMA_API + depends on ISA_DMA_API select SND_SB16_DSP help Say Y here to include support for audio on Cyrix/NatSemi CS5530 chips. @@ -268,7 +258,7 @@ config SND_CS5530 config SND_CS5535AUDIO tristate "CS5535/CS5536 Audio" - depends on SND && X86 && !X86_64 + depends on X86 && !X86_64 select SND_PCM select SND_AC97_CODEC help @@ -286,7 +276,6 @@ config SND_CS5535AUDIO config SND_DARLA20 tristate "(Echoaudio) Darla20" - depends on SND select FW_LOADER select SND_PCM help @@ -297,7 +286,6 @@ config SND_DARLA20 config SND_GINA20 tristate "(Echoaudio) Gina20" - depends on SND select FW_LOADER select SND_PCM help @@ -308,7 +296,6 @@ config SND_GINA20 config SND_LAYLA20 tristate "(Echoaudio) Layla20" - depends on SND select FW_LOADER select SND_RAWMIDI select SND_PCM @@ -320,7 +307,6 @@ config SND_LAYLA20 config SND_DARLA24 tristate "(Echoaudio) Darla24" - depends on SND select FW_LOADER select SND_PCM help @@ -331,7 +317,6 @@ config SND_DARLA24 config SND_GINA24 tristate "(Echoaudio) Gina24" - depends on SND select FW_LOADER select SND_PCM help @@ -342,7 +327,6 @@ config SND_GINA24 config SND_LAYLA24 tristate "(Echoaudio) Layla24" - depends on SND select FW_LOADER select SND_RAWMIDI select SND_PCM @@ -354,7 +338,6 @@ config SND_LAYLA24 config SND_MONA tristate "(Echoaudio) Mona" - depends on SND select FW_LOADER select SND_RAWMIDI select SND_PCM @@ -366,7 +349,6 @@ config SND_MONA config SND_MIA tristate "(Echoaudio) Mia" - depends on SND select FW_LOADER select SND_RAWMIDI select SND_PCM @@ -378,7 +360,6 @@ config SND_MIA config SND_ECHO3G tristate "(Echoaudio) 3G cards" - depends on SND select FW_LOADER select SND_RAWMIDI select SND_PCM @@ -390,7 +371,6 @@ config SND_ECHO3G config SND_INDIGO tristate "(Echoaudio) Indigo" - depends on SND select FW_LOADER select SND_PCM help @@ -401,7 +381,6 @@ config SND_INDIGO config SND_INDIGOIO tristate "(Echoaudio) Indigo IO" - depends on SND select FW_LOADER select SND_PCM help @@ -412,7 +391,6 @@ config SND_INDIGOIO config SND_INDIGODJ tristate "(Echoaudio) Indigo DJ" - depends on SND select FW_LOADER select SND_PCM help @@ -423,7 +401,6 @@ config SND_INDIGODJ config SND_EMU10K1 tristate "Emu10k1 (SB Live!, Audigy, E-mu APS)" - depends on SND select FW_LOADER select SND_HWDEP select SND_RAWMIDI @@ -441,7 +418,6 @@ config SND_EMU10K1 config SND_EMU10K1X tristate "Emu10k1X (Dell OEM Version)" - depends on SND select SND_AC97_CODEC select SND_RAWMIDI help @@ -453,7 +429,6 @@ config SND_EMU10K1X config SND_ENS1370 tristate "(Creative) Ensoniq AudioPCI 1370" - depends on SND select SND_RAWMIDI select SND_PCM help @@ -464,7 +439,6 @@ config SND_ENS1370 config SND_ENS1371 tristate "(Creative) Ensoniq AudioPCI 1371/1373" - depends on SND select SND_RAWMIDI select SND_AC97_CODEC help @@ -476,7 +450,6 @@ config SND_ENS1371 config SND_ES1938 tristate "ESS ES1938/1946/1969 (Solo-1)" - depends on SND select SND_OPL3_LIB select SND_MPU401_UART select SND_AC97_CODEC @@ -489,7 +462,6 @@ config SND_ES1938 config SND_ES1968 tristate "ESS ES1968/1978 (Maestro-1/2/2E)" - depends on SND select SND_MPU401_UART select SND_AC97_CODEC help @@ -501,7 +473,6 @@ config SND_ES1968 config SND_FM801 tristate "ForteMedia FM801" - depends on SND select SND_OPL3_LIB select SND_MPU401_UART select SND_AC97_CODEC @@ -528,7 +499,6 @@ config SND_FM801_TEA575X config SND_HDA_INTEL tristate "Intel HD Audio" - depends on SND select SND_PCM select SND_VMASTER help @@ -637,7 +607,6 @@ config SND_HDA_POWER_SAVE_DEFAULT config SND_HDSP tristate "RME Hammerfall DSP Audio" - depends on SND select SND_HWDEP select SND_RAWMIDI select SND_PCM @@ -650,7 +619,6 @@ config SND_HDSP config SND_HDSPM tristate "RME Hammerfall DSP MADI" - depends on SND select SND_HWDEP select SND_RAWMIDI select SND_PCM @@ -663,7 +631,6 @@ config SND_HDSPM config SND_HIFIER tristate "TempoTec HiFier Fantasia" - depends on SND select SND_OXYGEN_LIB help Say Y here to include support for the MediaTek/TempoTec HiFier @@ -674,7 +641,6 @@ config SND_HIFIER config SND_ICE1712 tristate "ICEnsemble ICE1712 (Envy24)" - depends on SND select SND_MPU401_UART select SND_AC97_CODEC help @@ -691,7 +657,6 @@ config SND_ICE1712 config SND_ICE1724 tristate "ICE/VT1724/1720 (Envy24HT/PT)" - depends on SND select SND_RAWMIDI select SND_AC97_CODEC select SND_VMASTER @@ -709,7 +674,6 @@ config SND_ICE1724 config SND_INTEL8X0 tristate "Intel/SiS/nVidia/AMD/ALi AC97 Controller" - depends on SND select SND_AC97_CODEC help Say Y here to include support for the integrated AC97 sound @@ -722,7 +686,6 @@ config SND_INTEL8X0 config SND_INTEL8X0M tristate "Intel/SiS/nVidia/AMD MC97 Modem" - depends on SND select SND_AC97_CODEC help Say Y here to include support for the integrated MC97 modem on @@ -733,7 +696,6 @@ config SND_INTEL8X0M config SND_KORG1212 tristate "Korg 1212 IO" - depends on SND select FW_LOADER if !SND_KORG1212_FIRMWARE_IN_KERNEL select SND_PCM help @@ -753,7 +715,6 @@ config SND_KORG1212_FIRMWARE_IN_KERNEL config SND_MAESTRO3 tristate "ESS Allegro/Maestro3" - depends on SND select FW_LOADER if !SND_MAESTRO3_FIRMWARE_IN_KERNEL select SND_AC97_CODEC help @@ -774,7 +735,6 @@ config SND_MAESTRO3_FIRMWARE_IN_KERNEL config SND_MIXART tristate "Digigram miXart" - depends on SND select SND_HWDEP select SND_PCM help @@ -786,7 +746,6 @@ config SND_MIXART config SND_NM256 tristate "NeoMagic NM256AV/ZX" - depends on SND select SND_AC97_CODEC help Say Y here to include support for NeoMagic NM256AV/ZX chips. @@ -796,7 +755,6 @@ config SND_NM256 config SND_PCXHR tristate "Digigram PCXHR" - depends on SND select SND_PCM select SND_HWDEP help @@ -807,7 +765,6 @@ config SND_PCXHR config SND_RIPTIDE tristate "Conexant Riptide" - depends on SND select FW_LOADER select SND_OPL3_LIB select SND_MPU401_UART @@ -820,7 +777,6 @@ config SND_RIPTIDE config SND_RME32 tristate "RME Digi32, 32/8, 32 PRO" - depends on SND select SND_PCM help Say Y to include support for RME Digi32, Digi32 PRO and @@ -832,7 +788,6 @@ config SND_RME32 config SND_RME96 tristate "RME Digi96, 96/8, 96/8 PRO" - depends on SND select SND_PCM help Say Y here to include support for RME Digi96, Digi96/8 and @@ -843,7 +798,6 @@ config SND_RME96 config SND_RME9652 tristate "RME Digi9652 (Hammerfall)" - depends on SND select SND_PCM help Say Y here to include support for RME Hammerfall (RME @@ -854,7 +808,7 @@ config SND_RME9652 config SND_SIS7019 tristate "SiS 7019 Audio Accelerator" - depends on SND && X86 && !X86_64 + depends on X86 && !X86_64 select SND_AC97_CODEC help Say Y here to include support for the SiS 7019 Audio Accelerator. @@ -864,7 +818,6 @@ config SND_SIS7019 config SND_SONICVIBES tristate "S3 SonicVibes" - depends on SND select SND_OPL3_LIB select SND_MPU401_UART select SND_AC97_CODEC @@ -877,7 +830,6 @@ config SND_SONICVIBES config SND_TRIDENT tristate "Trident 4D-Wave DX/NX; SiS 7018" - depends on SND select SND_MPU401_UART select SND_AC97_CODEC help @@ -889,7 +841,6 @@ config SND_TRIDENT config SND_VIA82XX tristate "VIA 82C686A/B, 8233/8235 AC97 Controller" - depends on SND select SND_MPU401_UART select SND_AC97_CODEC help @@ -901,7 +852,6 @@ config SND_VIA82XX config SND_VIA82XX_MODEM tristate "VIA 82C686A/B, 8233 based Modems" - depends on SND select SND_AC97_CODEC help Say Y here to include support for the integrated MC97 modem on @@ -912,7 +862,6 @@ config SND_VIA82XX_MODEM config SND_VIRTUOSO tristate "Asus Virtuoso 100/200 (Xonar)" - depends on SND select SND_OXYGEN_LIB help Say Y here to include support for sound cards based on the @@ -923,7 +872,6 @@ config SND_VIRTUOSO config SND_VX222 tristate "Digigram VX222" - depends on SND select SND_VX_LIB help Say Y here to include support for Digigram VX222 soundcards. @@ -933,7 +881,6 @@ config SND_VX222 config SND_YMFPCI tristate "Yamaha YMF724/740/744/754" - depends on SND select FW_LOADER if !SND_YMFPCI_FIRMWARE_IN_KERNEL select SND_OPL3_LIB select SND_MPU401_UART @@ -975,4 +922,4 @@ config SND_AC97_POWER_SAVE_DEFAULT The default time-out value in seconds for AC97 automatic power-save mode. 0 means to disable the power-save mode. -endmenu +endif # SND_PCI diff --git a/sound/pcmcia/Kconfig b/sound/pcmcia/Kconfig index c9fa1a2..7fbb190 100644 --- a/sound/pcmcia/Kconfig +++ b/sound/pcmcia/Kconfig @@ -1,11 +1,16 @@ # ALSA PCMCIA drivers -menu "PCMCIA devices" - depends on SND!=n && PCMCIA +menuconfig SND_PCMCIA + bool "PCMCIA sound devices" + depends on PCMCIA + default y + help + Support for sound devices connected via the PCMCIA bus. + +if SND_PCMCIA && PCMCIA config SND_VXPOCKET tristate "Digigram VXpocket" - depends on SND && PCMCIA select SND_VX_LIB help Say Y here to include support for Digigram VXpocket and @@ -16,7 +21,6 @@ config SND_VXPOCKET config SND_PDAUDIOCF tristate "Sound Core PDAudioCF" - depends on SND && PCMCIA select SND_PCM help Say Y here to include support for Sound Core PDAudioCF @@ -25,4 +29,5 @@ config SND_PDAUDIOCF To compile this driver as a module, choose M here: the module will be called snd-pdaudiocf. -endmenu +endif # SND_PCMCIA + diff --git a/sound/ppc/Kconfig b/sound/ppc/Kconfig index cacb0b1..777de2b 100644 --- a/sound/ppc/Kconfig +++ b/sound/ppc/Kconfig @@ -1,17 +1,17 @@ # ALSA PowerMac drivers -menu "ALSA PowerMac devices" - depends on SND!=n && PPC - -comment "ALSA PowerMac requires I2C" - depends on SND && I2C=n +menuconfig SND_PPC + bool "PowerPC sound devices" + depends on PPC64 || PPC32 + default y + help + Support for sound devices specific to PowerPC architectures. -comment "ALSA PowerMac requires INPUT" - depends on SND && INPUT=n +if SND_PPC config SND_POWERMAC tristate "PowerMac (AWACS, DACA, Burgundy, Tumbler, Keywest)" - depends on SND && I2C && INPUT && PPC_PMAC + depends on I2C && INPUT && PPC_PMAC select SND_PCM help Say Y here to include support for the integrated sound device. @@ -32,14 +32,9 @@ config SND_POWERMAC_AUTO_DRC Note that you can turn on/off DRC manually even without this option. -endmenu - -menu "ALSA PowerPC devices" - depends on SND!=n && ( PPC64 || PPC32 ) - config SND_PS3 tristate "PS3 Audio support" - depends on SND && PS3_PS3AV + depends on PS3_PS3AV select SND_PCM default m help @@ -52,4 +47,5 @@ config SND_PS3_DEFAULT_START_DELAY int "Startup delay time in ms" depends on SND_PS3 default "2000" -endmenu + +endif # SND_PPC diff --git a/sound/sh/Kconfig b/sound/sh/Kconfig index b7e08ef..cfc1439 100644 --- a/sound/sh/Kconfig +++ b/sound/sh/Kconfig @@ -1,14 +1,22 @@ # ALSA SH drivers -menu "SUPERH devices" - depends on SND!=n && SUPERH +menuconfig SND_SUPERH + bool "SUPERH sound devices" + depends on SUPERH + default y + help + Support for sound devices specific to SUPERH architectures. + Drivers that are implemented on ASoC can be found in + "ALSA for SoC audio support" section. + +if SND_SUPERH config SND_AICA tristate "Dreamcast Yamaha AICA sound" - depends on SH_DREAMCAST && SND + depends on SH_DREAMCAST select SND_PCM help ALSA Sound driver for the SEGA Dreamcast console. -endmenu +endif # SND_SUPERH diff --git a/sound/soc/Kconfig b/sound/soc/Kconfig index 18f28ac..fd7bc4f 100644 --- a/sound/soc/Kconfig +++ b/sound/soc/Kconfig @@ -2,15 +2,8 @@ # SoC audio configuration # -menu "System on Chip audio support" - depends on SND!=n - -config SND_SOC_AC97_BUS - bool - -config SND_SOC +menuconfig SND_SOC tristate "ALSA for SoC audio support" - depends on SND select SND_PCM ---help--- @@ -23,6 +16,11 @@ config SND_SOC This ASoC audio support can also be built as a module. If so, the module will be called snd-soc-core. +if SND_SOC + +config SND_SOC_AC97_BUS + bool + # All the supported Soc's source "sound/soc/at91/Kconfig" source "sound/soc/pxa/Kconfig" @@ -35,4 +33,5 @@ source "sound/soc/omap/Kconfig" # Supported codecs source "sound/soc/codecs/Kconfig" -endmenu +endif # SND_SOC + diff --git a/sound/soc/at91/Kconfig b/sound/soc/at91/Kconfig index 5cb93fd..9051865 100644 --- a/sound/soc/at91/Kconfig +++ b/sound/soc/at91/Kconfig @@ -1,6 +1,6 @@ config SND_AT91_SOC tristate "SoC Audio for the Atmel AT91 System-on-Chip" - depends on ARCH_AT91 && SND_SOC + depends on ARCH_AT91 help Say Y or M if you want to add support for codecs attached to the AT91 SSC interface. You will also need diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig index 3903ab7..d4a5fe4 100644 --- a/sound/soc/codecs/Kconfig +++ b/sound/soc/codecs/Kconfig @@ -1,31 +1,25 @@ config SND_SOC_AC97_CODEC tristate - depends on SND_SOC + select SND_AC97_CODEC config SND_SOC_WM8731 tristate - depends on SND_SOC config SND_SOC_WM8750 tristate - depends on SND_SOC config SND_SOC_WM8753 tristate - depends on SND_SOC config SND_SOC_WM9712 tristate - depends on SND_SOC config SND_SOC_WM9713 tristate - depends on SND_SOC # Cirrus Logic CS4270 Codec config SND_SOC_CS4270 tristate - depends on SND_SOC # Cirrus Logic CS4270 Codec Hardware Mute Support # Select if you have external muting circuitry attached to your CS4270. @@ -43,4 +37,4 @@ config SND_SOC_CS4270_VD33_ERRATA config SND_SOC_TLV320AIC3X tristate - depends on SND_SOC && I2C + depends on I2C diff --git a/sound/soc/davinci/Kconfig b/sound/soc/davinci/Kconfig index 20680c5..8f7e338 100644 --- a/sound/soc/davinci/Kconfig +++ b/sound/soc/davinci/Kconfig @@ -1,6 +1,6 @@ config SND_DAVINCI_SOC tristate "SoC Audio for the TI DAVINCI chip" - depends on ARCH_DAVINCI && SND_SOC + depends on ARCH_DAVINCI help Say Y or M if you want to add support for codecs attached to the DAVINCI AC97 or I2S interface. You will also need diff --git a/sound/soc/fsl/Kconfig b/sound/soc/fsl/Kconfig index 257101f..19802e2 100644 --- a/sound/soc/fsl/Kconfig +++ b/sound/soc/fsl/Kconfig @@ -2,7 +2,7 @@ menu "ALSA SoC audio for Freescale SOCs" config SND_SOC_MPC8610 bool "ALSA SoC support for the MPC8610 SOC" - depends on SND_SOC && MPC8610_HPCD + depends on MPC8610_HPCD default y if MPC8610 help Say Y if you want to add support for codecs attached to the SSI diff --git a/sound/soc/pxa/Kconfig b/sound/soc/pxa/Kconfig index 484f883..18a40dc 100644 --- a/sound/soc/pxa/Kconfig +++ b/sound/soc/pxa/Kconfig @@ -1,6 +1,6 @@ config SND_PXA2XX_SOC tristate "SoC Audio for the Intel PXA2xx chip" - depends on ARCH_PXA && SND_SOC + depends on ARCH_PXA help Say Y or M if you want to add support for codecs attached to the PXA2xx AC97, I2S or SSP interface. You will also need diff --git a/sound/soc/s3c24xx/Kconfig b/sound/soc/s3c24xx/Kconfig index 1f6dbfc..b9f2353 100644 --- a/sound/soc/s3c24xx/Kconfig +++ b/sound/soc/s3c24xx/Kconfig @@ -1,7 +1,6 @@ config SND_S3C24XX_SOC tristate "SoC Audio for the Samsung S3C24XX chips" - depends on ARCH_S3C2410 && SND_SOC - select SND_PCM + depends on ARCH_S3C2410 help Say Y or M if you want to add support for codecs attached to the S3C24XX AC97, I2S or SSP interface. You will also need @@ -16,7 +15,6 @@ config SND_S3C2412_SOC_I2S config SND_S3C2443_SOC_AC97 tristate select AC97_BUS - select SND_AC97_CODEC select SND_SOC_AC97_BUS config SND_S3C24XX_SOC_NEO1973_WM8753 diff --git a/sound/soc/sh/Kconfig b/sound/soc/sh/Kconfig index 4c1e013..54bd604 100644 --- a/sound/soc/sh/Kconfig +++ b/sound/soc/sh/Kconfig @@ -3,7 +3,7 @@ menu "SoC Audio support for SuperH" config SND_SOC_PCM_SH7760 tristate "SoC Audio support for Renesas SH7760" - depends on CPU_SUBTYPE_SH7760 && SND_SOC && SH_DMABRG + depends on CPU_SUBTYPE_SH7760 && SH_DMABRG help Enable this option for SH7760 AC97/I2S audio support. @@ -13,10 +13,9 @@ config SND_SOC_PCM_SH7760 ## config SND_SOC_SH4_HAC + tristate select AC97_BUS select SND_SOC_AC97_BUS - select SND_AC97_CODEC - tristate config SND_SOC_SH4_SSI tristate diff --git a/sound/sparc/Kconfig b/sound/sparc/Kconfig index 079e22a..d75deba 100644 --- a/sound/sparc/Kconfig +++ b/sound/sparc/Kconfig @@ -1,11 +1,17 @@ # ALSA Sparc drivers -menu "ALSA Sparc devices" - depends on SND!=n && SPARC +menuconfig SND_SPARC + bool "Sparc sound devices" + depends on SPARC + default y + help + Support for sound devices specific to Sun SPARC architectures. + +if SND_SPARC config SND_SUN_AMD7930 tristate "Sun AMD7930" - depends on SBUS && SND + depends on SBUS select SND_PCM help Say Y here to include support for AMD7930 sound device on Sun. @@ -15,7 +21,6 @@ config SND_SUN_AMD7930 config SND_SUN_CS4231 tristate "Sun CS4231" - depends on SND select SND_PCM help Say Y here to include support for CS4231 sound device on Sun. @@ -25,7 +30,7 @@ config SND_SUN_CS4231 config SND_SUN_DBRI tristate "Sun DBRI" - depends on SND && SBUS + depends on SBUS select SND_PCM help Say Y here to include support for DBRI sound device on Sun. @@ -33,4 +38,4 @@ config SND_SUN_DBRI To compile this driver as a module, choose M here: the module will be called snd-sun-dbri. -endmenu +endif # SND_SPARC diff --git a/sound/spi/Kconfig b/sound/spi/Kconfig index 0d08c29..e6485be 100644 --- a/sound/spi/Kconfig +++ b/sound/spi/Kconfig @@ -1,7 +1,13 @@ #SPI drivers -menu "SPI devices" - depends on SND != n +menuconfig SND_SPI + bool "SPI sound devices" + depends on SPI + default y + help + Support for sound devices connected via the SPI bus. + +if SND_SPI config SND_AT73C213 tristate "Atmel AT73C213 DAC driver" @@ -28,4 +34,5 @@ config SND_AT73C213_TARGET_BITRATE Set to 48000 Hz by default. -endmenu +endif # SND_SPI + diff --git a/sound/usb/Kconfig b/sound/usb/Kconfig index 9351b8a..ffcdc8f 100644 --- a/sound/usb/Kconfig +++ b/sound/usb/Kconfig @@ -1,11 +1,16 @@ # ALSA USB drivers -menu "USB devices" - depends on SND!=n && USB!=n +menuconfig SND_USB + bool "USB sound devices" + depends on USB + default y + help + Support for sound devices connected via the USB bus. + +if SND_USB && USB config SND_USB_AUDIO tristate "USB Audio/MIDI driver" - depends on SND && USB select SND_HWDEP select SND_RAWMIDI select SND_PCM @@ -18,7 +23,7 @@ config SND_USB_AUDIO config SND_USB_USX2Y tristate "Tascam US-122, US-224 and US-428 USB driver" - depends on SND && USB && (X86 || PPC || ALPHA) + depends on X86 || PPC || ALPHA select SND_HWDEP select SND_RAWMIDI select SND_PCM @@ -31,7 +36,6 @@ config SND_USB_USX2Y config SND_USB_CAIAQ tristate "Native Instruments USB audio devices" - depends on SND && USB select SND_HWDEP select SND_RAWMIDI select SND_PCM @@ -63,5 +67,5 @@ config SND_USB_CAIAQ_INPUT * Native Instruments Kore Controller 2 * Native Instruments Audio Kontrol 1 -endmenu +endif # SND_USB -- cgit v0.10.2 From 6938d6b2a90d5e2ffaef037852652a1333502519 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 23 May 2008 16:11:26 +0200 Subject: [ALSA] Fix AC97 kconfig items The kconfig items related with AC97-powersave must be outside the CONFIG_SND_PCI range. And it'd be better together with CONFIG_SND_AC97. Signed-off-by: Takashi Iwai diff --git a/sound/drivers/Kconfig b/sound/drivers/Kconfig index 79b4d3f..255fd18 100644 --- a/sound/drivers/Kconfig +++ b/sound/drivers/Kconfig @@ -161,4 +161,25 @@ config SND_ML403_AC97CR To compile this driver as a module, choose M here: the module will be called snd-ml403_ac97cr. +config SND_AC97_POWER_SAVE + bool "AC97 Power-Saving Mode" + depends on SND_AC97_CODEC && EXPERIMENTAL + default n + help + Say Y here to enable the aggressive power-saving support of + AC97 codecs. In this mode, the power-mode is dynamically + controlled at each open/close. + + The mode is activated by passing power_save=1 option to + snd-ac97-codec driver. You can toggle it dynamically over + sysfs, too. + +config SND_AC97_POWER_SAVE_DEFAULT + int "Default time-out for AC97 power-save mode" + depends on SND_AC97_POWER_SAVE + default 0 + help + The default time-out value in seconds for AC97 automatic + power-save mode. 0 means to disable the power-save mode. + endif # SND_DRIVERS diff --git a/sound/pci/Kconfig b/sound/pci/Kconfig index b148c0b..8fe5dac 100644 --- a/sound/pci/Kconfig +++ b/sound/pci/Kconfig @@ -901,25 +901,4 @@ config SND_YMFPCI_FIRMWARE_IN_KERNEL for the YMFPCI driver. If you choose N here, you need to install the firmware files from the alsa-firmware package. -config SND_AC97_POWER_SAVE - bool "AC97 Power-Saving Mode" - depends on SND_AC97_CODEC && EXPERIMENTAL - default n - help - Say Y here to enable the aggressive power-saving support of - AC97 codecs. In this mode, the power-mode is dynamically - controlled at each open/close. - - The mode is activated by passing power_save=1 option to - snd-ac97-codec driver. You can toggle it dynamically over - sysfs, too. - -config SND_AC97_POWER_SAVE_DEFAULT - int "Default time-out for AC97 power-save mode" - depends on SND_AC97_POWER_SAVE - default 0 - help - The default time-out value in seconds for AC97 automatic - power-save mode. 0 means to disable the power-save mode. - endif # SND_PCI -- cgit v0.10.2 From 62cf872a8eec1f11aacbec0ac3fe3698bfa9b403 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 20 May 2008 12:15:15 +0200 Subject: [ALSA] Replace CONFIG_SND_DEBUG_DETECT with CONFIG_SND_DEBUG_VERBOSE Replace CONFIG_SND_DEBUG_DETECT with CONFIG_SND_DEBUG_VERBOSE to represent its meaning more better. This config isn't provided only for the detection but for more verbose debug prints in general. Signed-off-by: Takashi Iwai diff --git a/Documentation/sound/alsa/ALSA-Configuration.txt b/Documentation/sound/alsa/ALSA-Configuration.txt index 0bbee38..e595694 100644 --- a/Documentation/sound/alsa/ALSA-Configuration.txt +++ b/Documentation/sound/alsa/ALSA-Configuration.txt @@ -1091,7 +1091,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed. This occurs when the access to non-existing or non-working codec slot (likely a modem one) causes a stall of the communication via HD-audio bus. You can see which codec slots are probed by enabling - CONFIG_SND_DEBUG_DETECT, or simply from the file name of the codec + CONFIG_SND_DEBUG_VERBOSE, or simply from the file name of the codec proc files. Then limit the slots to probe by probe_mask option. For example, probe_mask=1 means to probe only the first slot, and probe_mask=4 means only the third slot. diff --git a/Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl b/Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl index b03df4d4..e13c4e6 100644 --- a/Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl +++ b/Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl @@ -6127,8 +6127,8 @@ struct _snd_pcm_runtime { snd_printdd() is compiled in only when - CONFIG_SND_DEBUG_DETECT is set. Please note - that DEBUG_DETECT is not set as default + CONFIG_SND_DEBUG_VERBOSE is set. Please note + that CONFIG_SND_DEBUG_VERBOSE is not set as default even if you configure the alsa-driver with option. You need to give explicitly option instead. diff --git a/include/sound/core.h b/include/sound/core.h index 695ee53..558b962 100644 --- a/include/sound/core.h +++ b/include/sound/core.h @@ -412,13 +412,13 @@ void snd_verbose_printd(const char *file, int line, const char *format, ...) #endif /* CONFIG_SND_DEBUG */ -#ifdef CONFIG_SND_DEBUG_DETECT +#ifdef CONFIG_SND_DEBUG_VERBOSE /** * snd_printdd - debug printk * @format: format string * * Works like snd_printk() for debugging purposes. - * Ignored when CONFIG_SND_DEBUG_DETECT is not set. + * Ignored when CONFIG_SND_DEBUG_VERBOSE is not set. */ #define snd_printdd(format, args...) snd_printk(format, ##args) #else @@ -442,7 +442,7 @@ struct snd_pci_quirk { unsigned short subvendor; /* PCI subvendor ID */ unsigned short subdevice; /* PCI subdevice ID */ int value; /* value */ -#ifdef CONFIG_SND_DEBUG_DETECT +#ifdef CONFIG_SND_DEBUG_VERBOSE const char *name; /* name of the device (optional) */ #endif }; @@ -450,7 +450,7 @@ struct snd_pci_quirk { #define _SND_PCI_QUIRK_ID(vend,dev) \ .subvendor = (vend), .subdevice = (dev) #define SND_PCI_QUIRK_ID(vend,dev) {_SND_PCI_QUIRK_ID(vend, dev)} -#ifdef CONFIG_SND_DEBUG_DETECT +#ifdef CONFIG_SND_DEBUG_VERBOSE #define SND_PCI_QUIRK(vend,dev,xname,val) \ {_SND_PCI_QUIRK_ID(vend, dev), .value = (val), .name = (xname)} #else diff --git a/sound/core/Kconfig b/sound/core/Kconfig index db21113..335d45e 100644 --- a/sound/core/Kconfig +++ b/sound/core/Kconfig @@ -153,12 +153,14 @@ config SND_DEBUG help Say Y here to enable ALSA debug code. -config SND_DEBUG_DETECT - bool "Debug detection" +config SND_DEBUG_VERBOSE + bool "More verbose debug" depends on SND_DEBUG help - Say Y here to enable extra-verbose log messages printed when - detecting devices. + Say Y here to enable extra-verbose debugging messages. + + Let me repeat: it enables EXTRA-VERBOSE DEBUGGING messages. + So, say Y only if you are ready to be annoyed. config SND_PCM_XRUN_DEBUG bool "Enable PCM ring buffer overrun/underrun debugging" diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index a6be6e3..d2e1093 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -2335,7 +2335,7 @@ int snd_hda_check_board_config(struct hda_codec *codec, if (!tbl) return -1; if (tbl->value >= 0 && tbl->value < num_configs) { -#ifdef CONFIG_SND_DEBUG_DETECT +#ifdef CONFIG_SND_DEBUG_VERBOSE char tmp[10]; const char *model = NULL; if (models) diff --git a/sound/pci/hda/hda_hwdep.c b/sound/pci/hda/hda_hwdep.c index 2177d9a..6e18a42 100644 --- a/sound/pci/hda/hda_hwdep.c +++ b/sound/pci/hda/hda_hwdep.c @@ -88,7 +88,7 @@ static int hda_hwdep_ioctl_compat(struct snd_hwdep *hw, struct file *file, static int hda_hwdep_open(struct snd_hwdep *hw, struct file *file) { -#ifndef CONFIG_SND_DEBUG_DETECT +#ifndef CONFIG_SND_DEBUG_VERBOSE if (!capable(CAP_SYS_RAWIO)) return -EACCES; #endif diff --git a/sound/pci/pcxhr/pcxhr.c b/sound/pci/pcxhr/pcxhr.c index 7fdcdc8..2c7e253 100644 --- a/sound/pci/pcxhr/pcxhr.c +++ b/sound/pci/pcxhr/pcxhr.c @@ -516,7 +516,7 @@ static void pcxhr_trigger_tasklet(unsigned long arg) int capture_mask = 0; int playback_mask = 0; -#ifdef CONFIG_SND_DEBUG_DETECT +#ifdef CONFIG_SND_DEBUG_VERBOSE struct timeval my_tv1, my_tv2; do_gettimeofday(&my_tv1); #endif @@ -623,7 +623,7 @@ static void pcxhr_trigger_tasklet(unsigned long arg) mutex_unlock(&mgr->setup_mutex); -#ifdef CONFIG_SND_DEBUG_DETECT +#ifdef CONFIG_SND_DEBUG_VERBOSE do_gettimeofday(&my_tv2); snd_printdd("***TRIGGER TASKLET*** TIME = %ld (err = %x)\n", (long)(my_tv2.tv_usec - my_tv1.tv_usec), err); diff --git a/sound/pci/pcxhr/pcxhr_core.c b/sound/pci/pcxhr/pcxhr_core.c index 78aa81f..abe5c59 100644 --- a/sound/pci/pcxhr/pcxhr_core.c +++ b/sound/pci/pcxhr/pcxhr_core.c @@ -473,7 +473,7 @@ static struct pcxhr_cmd_info pcxhr_dsp_cmds[] = { [CMD_AUDIO_LEVEL_ADJUST] = { 0xc22000, 0, RMH_SSIZE_FIXED }, }; -#ifdef CONFIG_SND_DEBUG_DETECT +#ifdef CONFIG_SND_DEBUG_VERBOSE static char* cmd_names[] = { [CMD_VERSION] = "CMD_VERSION", [CMD_SUPPORTED] = "CMD_SUPPORTED", @@ -549,7 +549,7 @@ static int pcxhr_read_rmh_status(struct pcxhr_mgr *mgr, struct pcxhr_rmh *rmh) } } } -#ifdef CONFIG_SND_DEBUG_DETECT +#ifdef CONFIG_SND_DEBUG_VERBOSE if (rmh->cmd_idx < CMD_LAST_INDEX) snd_printdd(" stat[%d]=%x\n", i, data); #endif @@ -597,7 +597,7 @@ static int pcxhr_send_msg_nolock(struct pcxhr_mgr *mgr, struct pcxhr_rmh *rmh) data |= 0x008000; /* MASK_MORE_THAN_1_WORD_COMMAND */ else data &= 0xff7fff; /* MASK_1_WORD_COMMAND */ -#ifdef CONFIG_SND_DEBUG_DETECT +#ifdef CONFIG_SND_DEBUG_VERBOSE if (rmh->cmd_idx < CMD_LAST_INDEX) snd_printdd("MSG cmd[0]=%x (%s)\n", data, cmd_names[rmh->cmd_idx]); #endif @@ -624,7 +624,7 @@ static int pcxhr_send_msg_nolock(struct pcxhr_mgr *mgr, struct pcxhr_rmh *rmh) for (i=1; i < rmh->cmd_len; i++) { /* send other words */ data = rmh->cmd[i]; -#ifdef CONFIG_SND_DEBUG_DETECT +#ifdef CONFIG_SND_DEBUG_VERBOSE if (rmh->cmd_idx < CMD_LAST_INDEX) snd_printdd(" cmd[%d]=%x\n", i, data); #endif @@ -847,7 +847,7 @@ int pcxhr_set_pipe_state(struct pcxhr_mgr *mgr, int playback_mask, int capture_m int state, i, err; int audio_mask; -#ifdef CONFIG_SND_DEBUG_DETECT +#ifdef CONFIG_SND_DEBUG_VERBOSE struct timeval my_tv1, my_tv2; do_gettimeofday(&my_tv1); #endif @@ -894,7 +894,7 @@ int pcxhr_set_pipe_state(struct pcxhr_mgr *mgr, int playback_mask, int capture_m if (err) return err; } -#ifdef CONFIG_SND_DEBUG_DETECT +#ifdef CONFIG_SND_DEBUG_VERBOSE do_gettimeofday(&my_tv2); snd_printdd("***SET PIPE STATE*** TIME = %ld (err = %x)\n", (long)(my_tv2.tv_usec - my_tv1.tv_usec), err); @@ -951,7 +951,7 @@ static int pcxhr_handle_async_err(struct pcxhr_mgr *mgr, u32 err, enum pcxhr_async_err_src err_src, int pipe, int is_capture) { -#ifdef CONFIG_SND_DEBUG_DETECT +#ifdef CONFIG_SND_DEBUG_VERBOSE static char* err_src_name[] = { [PCXHR_ERR_PIPE] = "Pipe", [PCXHR_ERR_STREAM] = "Stream", @@ -1169,7 +1169,7 @@ irqreturn_t pcxhr_interrupt(int irq, void *dev_id) mgr->dsp_time_last, dsp_time_new); mgr->dsp_time_err++; } -#ifdef CONFIG_SND_DEBUG_DETECT +#ifdef CONFIG_SND_DEBUG_VERBOSE if (dsp_time_diff == 0) snd_printdd("ERROR DSP TIME NO DIFF time(%d)\n", dsp_time_new); else if (dsp_time_diff >= (2*PCXHR_GRANULARITY)) @@ -1208,7 +1208,7 @@ irqreturn_t pcxhr_interrupt(int irq, void *dev_id) mgr->src_it_dsp = reg; tasklet_hi_schedule(&mgr->msg_taskq); } -#ifdef CONFIG_SND_DEBUG_DETECT +#ifdef CONFIG_SND_DEBUG_VERBOSE if (reg & PCXHR_FATAL_DSP_ERR) snd_printdd("FATAL DSP ERROR : %x\n", reg); #endif -- cgit v0.10.2 From 142054a389ebf7972b4eee822ad7c55ff852b649 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Mon, 26 May 2008 12:59:16 +0300 Subject: [ALSA] Add EM-X270 ASoC driver This patch adds ASoC support for EM-X270 machine. Signed-off-by: Mike Rapoport Signed-off-by: Takashi Iwai diff --git a/sound/soc/pxa/Kconfig b/sound/soc/pxa/Kconfig index 18a40dc..12f6ac9 100644 --- a/sound/soc/pxa/Kconfig +++ b/sound/soc/pxa/Kconfig @@ -62,3 +62,12 @@ config SND_PXA2XX_SOC_E800 help Say Y if you want to add support for SoC audio on the Toshiba e800 PDA + +config SND_PXA2XX_SOC_EM_X270 + tristate "SoC Audio support for CompuLab EM-x270" + depends on SND_PXA2XX_SOC && MACH_EM_X270 + select SND_PXA2XX_SOC_AC97 + select SND_SOC_WM9712 + help + Say Y if you want to add support for SoC audio on + CompuLab EM-x270. diff --git a/sound/soc/pxa/Makefile b/sound/soc/pxa/Makefile index 04e5646..5bc8edf 100644 --- a/sound/soc/pxa/Makefile +++ b/sound/soc/pxa/Makefile @@ -13,10 +13,11 @@ snd-soc-poodle-objs := poodle.o snd-soc-tosa-objs := tosa.o snd-soc-e800-objs := e800_wm9712.o snd-soc-spitz-objs := spitz.o +snd-soc-em-x270-objs := em-x270.o obj-$(CONFIG_SND_PXA2XX_SOC_CORGI) += snd-soc-corgi.o obj-$(CONFIG_SND_PXA2XX_SOC_POODLE) += snd-soc-poodle.o obj-$(CONFIG_SND_PXA2XX_SOC_TOSA) += snd-soc-tosa.o obj-$(CONFIG_SND_PXA2XX_SOC_E800) += snd-soc-e800.o obj-$(CONFIG_SND_PXA2XX_SOC_SPITZ) += snd-soc-spitz.o - +obj-$(CONFIG_SND_PXA2XX_SOC_EM_X270) += snd-soc-em-x270.o diff --git a/sound/soc/pxa/em-x270.c b/sound/soc/pxa/em-x270.c new file mode 100644 index 0000000..02dcac3 --- /dev/null +++ b/sound/soc/pxa/em-x270.c @@ -0,0 +1,102 @@ +/* + * em-x270.c -- SoC audio for EM-X270 + * + * Copyright 2007 CompuLab, Ltd. + * + * Author: Mike Rapoport + * + * Copied from tosa.c: + * Copyright 2005 Wolfson Microelectronics PLC. + * Copyright 2005 Openedhand Ltd. + * + * Authors: Liam Girdwood + * Richard Purdie + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "../codecs/wm9712.h" +#include "pxa2xx-pcm.h" +#include "pxa2xx-ac97.h" + +static struct snd_soc_dai_link em_x270_dai[] = { + { + .name = "AC97", + .stream_name = "AC97 HiFi", + .cpu_dai = &pxa_ac97_dai[PXA2XX_DAI_AC97_HIFI], + .codec_dai = &wm9712_dai[WM9712_DAI_AC97_HIFI], + }, + { + .name = "AC97 Aux", + .stream_name = "AC97 Aux", + .cpu_dai = &pxa_ac97_dai[PXA2XX_DAI_AC97_AUX], + .codec_dai = &wm9712_dai[WM9712_DAI_AC97_AUX], + }, +}; + +static struct snd_soc_machine em_x270 = { + .name = "EM-X270", + .dai_link = em_x270_dai, + .num_links = ARRAY_SIZE(em_x270_dai), +}; + +static struct snd_soc_device em_x270_snd_devdata = { + .machine = &em_x270, + .platform = &pxa2xx_soc_platform, + .codec_dev = &soc_codec_dev_wm9712, +}; + +static struct platform_device *em_x270_snd_device; + +static int __init em_x270_init(void) +{ + int ret; + + if (!machine_is_em_x270()) + return -ENODEV; + + em_x270_snd_device = platform_device_alloc("soc-audio", -1); + if (!em_x270_snd_device) + return -ENOMEM; + + platform_set_drvdata(em_x270_snd_device, &em_x270_snd_devdata); + em_x270_snd_devdata.dev = &em_x270_snd_device->dev; + ret = platform_device_add(em_x270_snd_device); + + if (ret) + platform_device_put(em_x270_snd_device); + + return ret; +} + +static void __exit em_x270_exit(void) +{ + platform_device_unregister(em_x270_snd_device); +} + +module_init(em_x270_init); +module_exit(em_x270_exit); + +/* Module information */ +MODULE_AUTHOR("Mike Rapoport"); +MODULE_DESCRIPTION("ALSA SoC EM-X270"); +MODULE_LICENSE("GPL"); -- cgit v0.10.2 From f269002e61446ed3410d8ca5f06ebca1e2760cb5 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Tue, 27 May 2008 11:44:55 +0200 Subject: [ALSA] hda - Add support of Teradici controller Add the new PCI ID 0x6549 0x1200 Teradici controller. Signed-off-by: Kailang Yang Signed-off-by: Takashi Iwai diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 6ba7ac0..7f62196 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -197,6 +197,10 @@ enum { SDI0, SDI1, SDI2, SDI3, SDO0, SDO1, SDO2, SDO3 }; #define ATIHDMI_NUM_CAPTURE 0 #define ATIHDMI_NUM_PLAYBACK 1 +/* TERA has 4 playback and 3 capture */ +#define TERA_NUM_CAPTURE 3 +#define TERA_NUM_PLAYBACK 4 + /* this number is statically defined for simplicity */ #define MAX_AZX_DEV 16 @@ -384,6 +388,7 @@ enum { AZX_DRIVER_SIS, AZX_DRIVER_ULI, AZX_DRIVER_NVIDIA, + AZX_DRIVER_TERA, }; static char *driver_short_names[] __devinitdata = { @@ -395,6 +400,7 @@ static char *driver_short_names[] __devinitdata = { [AZX_DRIVER_SIS] = "HDA SIS966", [AZX_DRIVER_ULI] = "HDA ULI M5461", [AZX_DRIVER_NVIDIA] = "HDA NVidia", + [AZX_DRIVER_TERA] = "HDA Teradici", }; /* @@ -1106,6 +1112,7 @@ static unsigned int azx_max_codecs[] __devinitdata = { [AZX_DRIVER_SIS] = 3, /* FIXME: correct? */ [AZX_DRIVER_ULI] = 3, /* FIXME: correct? */ [AZX_DRIVER_NVIDIA] = 3, /* FIXME: correct? */ + [AZX_DRIVER_TERA] = 1, }; static int __devinit azx_codec_create(struct azx *chip, const char *model, @@ -2229,6 +2236,8 @@ static struct pci_device_id azx_ids[] = { { PCI_DEVICE(0x10de, 0x0bd5), .driver_data = AZX_DRIVER_NVIDIA }, { PCI_DEVICE(0x10de, 0x0bd6), .driver_data = AZX_DRIVER_NVIDIA }, { PCI_DEVICE(0x10de, 0x0bd7), .driver_data = AZX_DRIVER_NVIDIA }, + /* Teradici */ + { PCI_DEVICE(0x6549, 0x1200), .driver_data = AZX_DRIVER_TERA }, { 0, } }; MODULE_DEVICE_TABLE(pci, azx_ids); -- cgit v0.10.2 From abbc9d1b25637b1948a4718fa8f7b257233136bc Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Tue, 27 May 2008 11:48:01 +0200 Subject: [ALSA] hda - Add ICH9 controller support (8086:2911) Added the missing PCI ID for ICH9 controller (8086:2911) Signed-off-by: Kailang Yang Signed-off-by: Takashi Iwai diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 7f62196..6e2dc4f 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2184,6 +2184,7 @@ static struct pci_device_id azx_ids[] = { { PCI_DEVICE(0x8086, 0x27d8), .driver_data = AZX_DRIVER_ICH }, { PCI_DEVICE(0x8086, 0x269a), .driver_data = AZX_DRIVER_ICH }, { PCI_DEVICE(0x8086, 0x284b), .driver_data = AZX_DRIVER_ICH }, + { PCI_DEVICE(0x8086, 0x2911), .driver_data = AZX_DRIVER_ICH }, { PCI_DEVICE(0x8086, 0x293e), .driver_data = AZX_DRIVER_ICH }, { PCI_DEVICE(0x8086, 0x293f), .driver_data = AZX_DRIVER_ICH }, { PCI_DEVICE(0x8086, 0x3a3e), .driver_data = AZX_DRIVER_ICH }, -- cgit v0.10.2 From 6dda9f4a95905f2b38e79e3737da5e25397e6acb Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Tue, 27 May 2008 12:05:31 +0200 Subject: [ALSA] hda - Add ALC663 support Added the support of ALC663 codec, including specific models for ASUS M51VA, ASUS G71V, ASUS H13 and ASUS G50V. Signed-off-by: Kailang Yang Signed-off-by: Takashi Iwai diff --git a/Documentation/sound/alsa/ALSA-Configuration.txt b/Documentation/sound/alsa/ALSA-Configuration.txt index e595694..f48939e 100644 --- a/Documentation/sound/alsa/ALSA-Configuration.txt +++ b/Documentation/sound/alsa/ALSA-Configuration.txt @@ -845,7 +845,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed. ALC269 basic Basic preset - ALC662 + ALC662/663 3stack-dig 3-stack (2-channel) with SPDIF 3stack-6ch 3-stack (6-channel) 3stack-6ch-dig 3-stack (6-channel) with SPDIF @@ -853,6 +853,10 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed. lenovo-101e Lenovo laptop eeepc-p701 ASUS Eeepc P701 eeepc-ep20 ASUS Eeepc EP20 + m51va ASUS M51VA + g71v ASUS G71V + h13 ASUS H13 + g50v ASUS G50V auto auto-config reading BIOS (default) ALC882/885 diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index b0a2a26..c659588 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -163,6 +163,10 @@ enum { ALC662_LENOVO_101E, ALC662_ASUS_EEEPC_P701, ALC662_ASUS_EEEPC_EP20, + ALC663_ASUS_M51VA, + ALC663_ASUS_G71V, + ALC663_ASUS_H13, + ALC663_ASUS_G50V, ALC662_AUTO, ALC662_MODEL_LAST, }; @@ -13251,6 +13255,23 @@ static struct hda_input_mux alc662_eeepc_capture_source = { }, }; +static struct hda_input_mux alc663_capture_source = { + .num_items = 3, + .items = { + { "Mic", 0x0 }, + { "Front Mic", 0x1 }, + { "Line", 0x2 }, + }, +}; + +static struct hda_input_mux alc663_m51va_capture_source = { + .num_items = 2, + .items = { + { "Ext-Mic", 0x0 }, + { "D-Mic", 0x9 }, + }, +}; + #define alc662_mux_enum_info alc_mux_enum_info #define alc662_mux_enum_get alc_mux_enum_get #define alc662_mux_enum_put alc882_mux_enum_put @@ -13431,6 +13452,44 @@ static struct snd_kcontrol_new alc662_eeepc_ep20_mixer[] = { { } /* end */ }; +static struct snd_kcontrol_new alc663_m51va_mixer[] = { + HDA_CODEC_VOLUME("Speaker Playback Volume", 0x02, 0x0, HDA_OUTPUT), + HDA_CODEC_MUTE("Speaker Playback Switch", 0x14, 0x0, HDA_OUTPUT), + HDA_CODEC_MUTE("Headphone Playback Switch", 0x21, 0x0, HDA_OUTPUT), + HDA_CODEC_VOLUME("Mic Playback Volume", 0x0b, 0x0, HDA_INPUT), + HDA_CODEC_MUTE("Mic Playback Switch", 0x0b, 0x0, HDA_INPUT), + HDA_CODEC_MUTE("DMic Playback Switch", 0x23, 0x9, HDA_INPUT), + { } /* end */ +}; + +static struct snd_kcontrol_new alc663_g71v_mixer[] = { + HDA_CODEC_VOLUME("Speaker Playback Volume", 0x02, 0x0, HDA_OUTPUT), + HDA_CODEC_MUTE("Speaker Playback Switch", 0x14, 0x0, HDA_OUTPUT), + HDA_CODEC_VOLUME("Front Playback Volume", 0x03, 0x0, HDA_OUTPUT), + HDA_CODEC_MUTE("Front Playback Switch", 0x15, 0x0, HDA_OUTPUT), + HDA_CODEC_MUTE("Headphone Playback Switch", 0x21, 0x0, HDA_OUTPUT), + + HDA_CODEC_VOLUME("Mic Playback Volume", 0x0b, 0x0, HDA_INPUT), + HDA_CODEC_MUTE("Mic Playback Switch", 0x0b, 0x0, HDA_INPUT), + HDA_CODEC_VOLUME("i-Mic Playback Volume", 0x0b, 0x1, HDA_INPUT), + HDA_CODEC_MUTE("i-Mic Playback Switch", 0x0b, 0x1, HDA_INPUT), + { } /* end */ +}; + +static struct snd_kcontrol_new alc663_g50v_mixer[] = { + HDA_CODEC_VOLUME("Speaker Playback Volume", 0x02, 0x0, HDA_OUTPUT), + HDA_CODEC_MUTE("Speaker Playback Switch", 0x14, 0x0, HDA_OUTPUT), + HDA_CODEC_MUTE("Headphone Playback Switch", 0x21, 0x0, HDA_OUTPUT), + + HDA_CODEC_VOLUME("Mic Playback Volume", 0x0b, 0x0, HDA_INPUT), + HDA_CODEC_MUTE("Mic Playback Switch", 0x0b, 0x0, HDA_INPUT), + HDA_CODEC_VOLUME("i-Mic Playback Volume", 0x0b, 0x1, HDA_INPUT), + HDA_CODEC_MUTE("i-Mic Playback Switch", 0x0b, 0x1, HDA_INPUT), + HDA_CODEC_VOLUME("Line Playback Volume", 0x0b, 0x02, HDA_INPUT), + HDA_CODEC_MUTE("Line Playback Switch", 0x0b, 0x02, HDA_INPUT), + { } /* end */ +}; + static struct snd_kcontrol_new alc662_chmode_mixer[] = { { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, @@ -13501,6 +13560,11 @@ static struct hda_verb alc662_init_verbs[] = { {0x23, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)}, {0x23, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(2)}, {0x23, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(4)}, + + /* always trun on EAPD */ + {0x14, AC_VERB_SET_EAPD_BTLENABLE, 2}, + {0x15, AC_VERB_SET_EAPD_BTLENABLE, 2}, + { } }; @@ -13571,6 +13635,43 @@ static struct hda_verb alc662_auto_init_verbs[] = { { } }; +static struct hda_verb alc663_m51va_init_verbs[] = { + {0x21, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_HP}, + {0x21, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE}, + {0x21, AC_VERB_SET_CONNECT_SEL, 0x00}, /* Headphone */ + + {0x23, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(9)}, + + {0x18, AC_VERB_SET_UNSOLICITED_ENABLE, AC_USRSP_EN | ALC880_MIC_EVENT}, + {0x21, AC_VERB_SET_UNSOLICITED_ENABLE, AC_USRSP_EN | ALC880_HP_EVENT}, + {} +}; + +static struct hda_verb alc663_g71v_init_verbs[] = { + {0x15, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_HP}, + /* {0x15, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE}, */ + /* {0x15, AC_VERB_SET_CONNECT_SEL, 0x01}, */ /* Headphone */ + + {0x21, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_HP}, + {0x21, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE}, + {0x21, AC_VERB_SET_CONNECT_SEL, 0x00}, /* Headphone */ + + {0x15, AC_VERB_SET_UNSOLICITED_ENABLE, AC_USRSP_EN|ALC880_FRONT_EVENT}, + {0x18, AC_VERB_SET_UNSOLICITED_ENABLE, AC_USRSP_EN|ALC880_MIC_EVENT}, + {0x21, AC_VERB_SET_UNSOLICITED_ENABLE, AC_USRSP_EN|ALC880_HP_EVENT}, + {} +}; + +static struct hda_verb alc663_g50v_init_verbs[] = { + {0x21, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_HP}, + {0x21, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE}, + {0x21, AC_VERB_SET_CONNECT_SEL, 0x00}, /* Headphone */ + + {0x18, AC_VERB_SET_UNSOLICITED_ENABLE, AC_USRSP_EN | ALC880_MIC_EVENT}, + {0x21, AC_VERB_SET_UNSOLICITED_ENABLE, AC_USRSP_EN | ALC880_HP_EVENT}, + {} +}; + /* capture mixer elements */ static struct snd_kcontrol_new alc662_capture_mixer[] = { HDA_CODEC_VOLUME("Capture Volume", 0x09, 0x0, HDA_INPUT), @@ -13692,6 +13793,125 @@ static void alc662_eeepc_ep20_inithook(struct hda_codec *codec) alc662_eeepc_ep20_automute(codec); } +static void alc663_m51va_speaker_automute(struct hda_codec *codec) +{ + unsigned int present; + unsigned char bits; + + present = snd_hda_codec_read(codec, 0x21, 0, + AC_VERB_GET_PIN_SENSE, 0) + & AC_PINSENSE_PRESENCE; + bits = present ? HDA_AMP_MUTE : 0; + snd_hda_codec_amp_stereo(codec, 0x14, HDA_OUTPUT, 0, + HDA_AMP_MUTE, bits); +} + +static void alc663_m51va_mic_automute(struct hda_codec *codec) +{ + unsigned int present; + + present = snd_hda_codec_read(codec, 0x18, 0, + AC_VERB_GET_PIN_SENSE, 0) + & AC_PINSENSE_PRESENCE; + snd_hda_codec_write_cache(codec, 0x22, 0, AC_VERB_SET_AMP_GAIN_MUTE, + 0x7000 | (0x00 << 8) | (present ? 0 : 0x80)); + snd_hda_codec_write_cache(codec, 0x23, 0, AC_VERB_SET_AMP_GAIN_MUTE, + 0x7000 | (0x00 << 8) | (present ? 0 : 0x80)); + snd_hda_codec_write_cache(codec, 0x22, 0, AC_VERB_SET_AMP_GAIN_MUTE, + 0x7000 | (0x09 << 8) | (present ? 0x80 : 0)); + snd_hda_codec_write_cache(codec, 0x23, 0, AC_VERB_SET_AMP_GAIN_MUTE, + 0x7000 | (0x09 << 8) | (present ? 0x80 : 0)); +} + +static void alc663_m51va_unsol_event(struct hda_codec *codec, + unsigned int res) +{ + switch (res >> 26) { + case ALC880_HP_EVENT: + alc663_m51va_speaker_automute(codec); + break; + case ALC880_MIC_EVENT: + alc663_m51va_mic_automute(codec); + break; + } +} + +static void alc663_m51va_inithook(struct hda_codec *codec) +{ + alc663_m51va_speaker_automute(codec); + alc663_m51va_mic_automute(codec); +} + +static void alc663_g71v_hp_automute(struct hda_codec *codec) +{ + unsigned int present; + unsigned char bits; + + present = snd_hda_codec_read(codec, 0x21, 0, + AC_VERB_GET_PIN_SENSE, 0) + & AC_PINSENSE_PRESENCE; + bits = present ? HDA_AMP_MUTE : 0; + snd_hda_codec_amp_stereo(codec, 0x15, HDA_OUTPUT, 0, + HDA_AMP_MUTE, bits); + snd_hda_codec_amp_stereo(codec, 0x14, HDA_OUTPUT, 0, + HDA_AMP_MUTE, bits); +} + +static void alc663_g71v_front_automute(struct hda_codec *codec) +{ + unsigned int present; + unsigned char bits; + + present = snd_hda_codec_read(codec, 0x15, 0, + AC_VERB_GET_PIN_SENSE, 0) + & AC_PINSENSE_PRESENCE; + bits = present ? HDA_AMP_MUTE : 0; + snd_hda_codec_amp_stereo(codec, 0x14, HDA_OUTPUT, 0, + HDA_AMP_MUTE, bits); +} + +static void alc663_g71v_unsol_event(struct hda_codec *codec, + unsigned int res) +{ + switch (res >> 26) { + case ALC880_HP_EVENT: + alc663_g71v_hp_automute(codec); + break; + case ALC880_FRONT_EVENT: + alc663_g71v_front_automute(codec); + break; + case ALC880_MIC_EVENT: + alc662_eeepc_mic_automute(codec); + break; + } +} + +static void alc663_g71v_inithook(struct hda_codec *codec) +{ + alc663_g71v_front_automute(codec); + alc663_g71v_hp_automute(codec); + alc662_eeepc_mic_automute(codec); +} + +static void alc663_g50v_unsol_event(struct hda_codec *codec, + unsigned int res) +{ + switch (res >> 26) { + case ALC880_HP_EVENT: + alc663_m51va_speaker_automute(codec); + break; + case ALC880_MIC_EVENT: + alc662_eeepc_mic_automute(codec); + break; + } +} + +static void alc663_g50v_inithook(struct hda_codec *codec) +{ + alc663_m51va_speaker_automute(codec); + alc662_eeepc_mic_automute(codec); +} + #ifdef CONFIG_SND_HDA_POWER_SAVE #define alc662_loopbacks alc880_loopbacks #endif @@ -13714,14 +13934,24 @@ static const char *alc662_models[ALC662_MODEL_LAST] = { [ALC662_LENOVO_101E] = "lenovo-101e", [ALC662_ASUS_EEEPC_P701] = "eeepc-p701", [ALC662_ASUS_EEEPC_EP20] = "eeepc-ep20", + [ALC663_ASUS_M51VA] = "m51va", + [ALC663_ASUS_G71V] = "g71v", + [ALC663_ASUS_H13] = "h13", + [ALC663_ASUS_G50V] = "g50v", [ALC662_AUTO] = "auto", }; static struct snd_pci_quirk alc662_cfg_tbl[] = { + SND_PCI_QUIRK(0x1043, 0x11c3, "ASUS G71V", ALC663_ASUS_G71V), + SND_PCI_QUIRK(0x1043, 0x1878, "ASUS M51VA", ALC663_ASUS_M51VA), + SND_PCI_QUIRK(0x1043, 0x19a3, "ASUS M51VA", ALC663_ASUS_G50V), SND_PCI_QUIRK(0x1043, 0x8290, "ASUS P5GC-MX", ALC662_3ST_6ch_DIG), SND_PCI_QUIRK(0x1043, 0x82a1, "ASUS Eeepc", ALC662_ASUS_EEEPC_P701), SND_PCI_QUIRK(0x1043, 0x82d1, "ASUS Eeepc EP20", ALC662_ASUS_EEEPC_EP20), SND_PCI_QUIRK(0x17aa, 0x101e, "Lenovo", ALC662_LENOVO_101E), + SND_PCI_QUIRK(0x1854, 0x2000, "ASUS H13-2000", ALC663_ASUS_H13), + SND_PCI_QUIRK(0x1854, 0x2001, "ASUS H13-2001", ALC663_ASUS_H13), + SND_PCI_QUIRK(0x1854, 0x2002, "ASUS H13-2002", ALC663_ASUS_H13), {} }; @@ -13809,7 +14039,53 @@ static struct alc_config_preset alc662_presets[] = { .unsol_event = alc662_eeepc_ep20_unsol_event, .init_hook = alc662_eeepc_ep20_inithook, }, - + [ALC663_ASUS_M51VA] = { + .mixers = { alc663_m51va_mixer, alc662_capture_mixer}, + .init_verbs = { alc662_init_verbs, alc663_m51va_init_verbs }, + .num_dacs = ARRAY_SIZE(alc662_dac_nids), + .dac_nids = alc662_dac_nids, + .dig_out_nid = ALC662_DIGOUT_NID, + .num_channel_mode = ARRAY_SIZE(alc662_3ST_2ch_modes), + .channel_mode = alc662_3ST_2ch_modes, + .input_mux = &alc663_m51va_capture_source, + .unsol_event = alc663_m51va_unsol_event, + .init_hook = alc663_m51va_inithook, + }, + [ALC663_ASUS_G71V] = { + .mixers = { alc663_g71v_mixer, alc662_capture_mixer}, + .init_verbs = { alc662_init_verbs, alc663_g71v_init_verbs }, + .num_dacs = ARRAY_SIZE(alc662_dac_nids), + .dac_nids = alc662_dac_nids, + .dig_out_nid = ALC662_DIGOUT_NID, + .num_channel_mode = ARRAY_SIZE(alc662_3ST_2ch_modes), + .channel_mode = alc662_3ST_2ch_modes, + .input_mux = &alc662_eeepc_capture_source, + .unsol_event = alc663_g71v_unsol_event, + .init_hook = alc663_g71v_inithook, + }, + [ALC663_ASUS_H13] = { + .mixers = { alc663_m51va_mixer, alc662_capture_mixer}, + .init_verbs = { alc662_init_verbs, alc663_m51va_init_verbs }, + .num_dacs = ARRAY_SIZE(alc662_dac_nids), + .dac_nids = alc662_dac_nids, + .num_channel_mode = ARRAY_SIZE(alc662_3ST_2ch_modes), + .channel_mode = alc662_3ST_2ch_modes, + .input_mux = &alc663_m51va_capture_source, + .unsol_event = alc663_m51va_unsol_event, + .init_hook = alc663_m51va_inithook, + }, + [ALC663_ASUS_G50V] = { + .mixers = { alc663_g50v_mixer, alc662_capture_mixer}, + .init_verbs = { alc662_init_verbs, alc663_g50v_init_verbs }, + .num_dacs = ARRAY_SIZE(alc662_dac_nids), + .dac_nids = alc662_dac_nids, + .dig_out_nid = ALC662_DIGOUT_NID, + .num_channel_mode = ARRAY_SIZE(alc662_3ST_6ch_modes), + .channel_mode = alc662_3ST_6ch_modes, + .input_mux = &alc663_capture_source, + .unsol_event = alc663_g50v_unsol_event, + .init_hook = alc663_g50v_inithook, + }, }; @@ -14108,11 +14384,17 @@ static int patch_alc662(struct hda_codec *codec) if (board_config != ALC662_AUTO) setup_preset(spec, &alc662_presets[board_config]); - spec->stream_name_analog = "ALC662 Analog"; + if (codec->vendor_id == 0x10ec0663) { + spec->stream_name_analog = "ALC663 Analog"; + spec->stream_name_digital = "ALC663 Digital"; + } else { + spec->stream_name_analog = "ALC662 Analog"; + spec->stream_name_digital = "ALC662 Digital"; + } + spec->stream_analog_playback = &alc662_pcm_analog_playback; spec->stream_analog_capture = &alc662_pcm_analog_capture; - spec->stream_name_digital = "ALC662 Digital"; spec->stream_digital_playback = &alc662_pcm_digital_playback; spec->stream_digital_capture = &alc662_pcm_digital_capture; @@ -14151,6 +14433,7 @@ struct hda_codec_preset snd_hda_preset_realtek[] = { .patch = patch_alc883 }, { .id = 0x10ec0662, .rev = 0x100101, .name = "ALC662 rev1", .patch = patch_alc662 }, + { .id = 0x10ec0663, .name = "ALC663", .patch = patch_alc662 }, { .id = 0x10ec0880, .name = "ALC880", .patch = patch_alc880 }, { .id = 0x10ec0882, .name = "ALC882", .patch = patch_alc882 }, { .id = 0x10ec0883, .name = "ALC883", .patch = patch_alc883 }, -- cgit v0.10.2 From 531240ff520406c793a110e1c0f187d931f47d66 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Tue, 27 May 2008 12:10:25 +0200 Subject: [ALSA] hda - Fix vref pincap check in alc882 auto-detection Signed-off-by: Kailang Yang Signed-off-by: Takashi Iwai diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index c659588..8174050 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6357,7 +6357,9 @@ static void alc882_auto_init_analog_input(struct hda_codec *codec) continue; vref = PIN_IN; if (1 /*i <= AUTO_PIN_FRONT_MIC*/) { - if (snd_hda_param_read(codec, nid, AC_PAR_PIN_CAP) & + unsigned int pincap; + pincap = snd_hda_param_read(codec, nid, AC_PAR_PIN_CAP); + if ((pincap >> AC_PINCAP_VREF_SHIFT) & AC_PINCAP_VREF_80) vref = PIN_VREF80; } -- cgit v0.10.2 From 2f8932863d243a744ccd3dc005490ad9d2eae478 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Tue, 27 May 2008 12:14:47 +0200 Subject: [ALSA] hda - show correct codec chip in PCM stream names Show more exact codec chip name in the PCM stream name strings. Signed-off-by: Kailang Yang Signed-off-by: Takashi Iwai diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 8174050..5770b9c 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6491,14 +6491,20 @@ static int patch_alc882(struct hda_codec *codec) if (board_config != ALC882_AUTO) setup_preset(spec, &alc882_presets[board_config]); - spec->stream_name_analog = "ALC882 Analog"; + if (codec->vendor_id == 0x10ec0885) { + spec->stream_name_analog = "ALC885 Analog"; + spec->stream_name_digital = "ALC885 Digital"; + } else { + spec->stream_name_analog = "ALC882 Analog"; + spec->stream_name_digital = "ALC882 Digital"; + } + spec->stream_analog_playback = &alc882_pcm_analog_playback; spec->stream_analog_capture = &alc882_pcm_analog_capture; /* FIXME: setup DAC5 */ /*spec->stream_analog_alt_playback = &alc880_pcm_analog_alt_playback;*/ spec->stream_analog_alt_capture = &alc880_pcm_analog_alt_capture; - spec->stream_name_digital = "ALC882 Digital"; spec->stream_digital_playback = &alc882_pcm_digital_playback; spec->stream_digital_capture = &alc882_pcm_digital_capture; @@ -8177,12 +8183,25 @@ static int patch_alc883(struct hda_codec *codec) if (board_config != ALC883_AUTO) setup_preset(spec, &alc883_presets[board_config]); - spec->stream_name_analog = "ALC883 Analog"; + switch (codec->vendor_id) { + case 0x10ec0888: + spec->stream_name_analog = "ALC888 Analog"; + spec->stream_name_digital = "ALC888 Digital"; + break; + case 0x10ec0889: + spec->stream_name_analog = "ALC889 Analog"; + spec->stream_name_digital = "ALC889 Digital"; + break; + default: + spec->stream_name_analog = "ALC883 Analog"; + spec->stream_name_digital = "ALC883 Digital"; + break; + } + spec->stream_analog_playback = &alc883_pcm_analog_playback; spec->stream_analog_capture = &alc883_pcm_analog_capture; spec->stream_analog_alt_capture = &alc883_pcm_analog_alt_capture; - spec->stream_name_digital = "ALC883 Digital"; spec->stream_digital_playback = &alc883_pcm_digital_playback; spec->stream_digital_capture = &alc883_pcm_digital_capture; @@ -10680,12 +10699,18 @@ static int patch_alc268(struct hda_codec *codec) if (board_config != ALC268_AUTO) setup_preset(spec, &alc268_presets[board_config]); - spec->stream_name_analog = "ALC268 Analog"; + if (codec->vendor_id == 0x10ec0267) { + spec->stream_name_analog = "ALC267 Analog"; + spec->stream_name_digital = "ALC267 Digital"; + } else { + spec->stream_name_analog = "ALC268 Analog"; + spec->stream_name_digital = "ALC268 Digital"; + } + spec->stream_analog_playback = &alc268_pcm_analog_playback; spec->stream_analog_capture = &alc268_pcm_analog_capture; spec->stream_analog_alt_capture = &alc268_pcm_analog_alt_capture; - spec->stream_name_digital = "ALC268 Digital"; spec->stream_digital_playback = &alc268_pcm_digital_playback; if (!query_amp_caps(codec, 0x1d, HDA_INPUT)) @@ -13174,11 +13199,17 @@ static int patch_alc861vd(struct hda_codec *codec) if (board_config != ALC861VD_AUTO) setup_preset(spec, &alc861vd_presets[board_config]); - spec->stream_name_analog = "ALC861VD Analog"; + if (codec->vendor_id == 0x10ec0660) { + spec->stream_name_analog = "ALC660-VD Analog"; + spec->stream_name_digital = "ALC660-VD Digital"; + } else { + spec->stream_name_analog = "ALC861VD Analog"; + spec->stream_name_digital = "ALC861VD Digital"; + } + spec->stream_analog_playback = &alc861vd_pcm_analog_playback; spec->stream_analog_capture = &alc861vd_pcm_analog_capture; - spec->stream_name_digital = "ALC861VD Digital"; spec->stream_digital_playback = &alc861vd_pcm_digital_playback; spec->stream_digital_capture = &alc861vd_pcm_digital_capture; -- cgit v0.10.2 From f9423e7a94eb2dfef3503dde76d17eaf342ab962 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Tue, 27 May 2008 12:32:25 +0200 Subject: [ALSA] hda - Fix EAPD and COEF setups for realtek codecs Fixed EAPD and COEF setups for Realtek ALC662/663, 660-VD and 888 codecs. Signed-off-by: Kailang Yang Signed-off-by: Takashi Iwai diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 5770b9c..cb3e028 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -780,6 +780,24 @@ static void alc_sku_unsol_event(struct hda_codec *codec, unsigned int res) alc_sku_automute(codec); } +/* additional initialization for ALC888 variants */ +static void alc888_coef_init(struct hda_codec *codec) +{ + unsigned int tmp; + + snd_hda_codec_write(codec, 0x20, 0, AC_VERB_SET_COEF_INDEX, 0); + tmp = snd_hda_codec_read(codec, 0x20, 0, AC_VERB_GET_PROC_COEF, 0); + snd_hda_codec_write(codec, 0x20, 0, AC_VERB_SET_COEF_INDEX, 7); + if ((tmp & 0xf0) == 2) + /* alc888S-VC */ + snd_hda_codec_read(codec, 0x20, 0, + AC_VERB_SET_PROC_COEF, 0x830); + else + /* alc888-VB */ + snd_hda_codec_read(codec, 0x20, 0, + AC_VERB_SET_PROC_COEF, 0x3030); +} + /* 32-bit subsystem ID for BIOS loading in HD Audio codec. * 31 ~ 16 : Manufacture ID * 15 ~ 8 : SKU ID @@ -855,8 +873,10 @@ do_sku: case 0x10ec0267: case 0x10ec0268: case 0x10ec0269: + case 0x10ec0660: + case 0x10ec0662: + case 0x10ec0663: case 0x10ec0862: - case 0x10ec0662: case 0x10ec0889: snd_hda_codec_write(codec, 0x14, 0, AC_VERB_SET_EAPD_BTLENABLE, 2); @@ -881,7 +901,6 @@ do_sku: case 0x10ec0882: case 0x10ec0883: case 0x10ec0885: - case 0x10ec0888: case 0x10ec0889: snd_hda_codec_write(codec, 0x20, 0, AC_VERB_SET_COEF_INDEX, 7); @@ -893,6 +912,9 @@ do_sku: AC_VERB_SET_PROC_COEF, tmp | 0x2010); break; + case 0x10ec0888: + alc888_coef_init(codec); + break; case 0x10ec0267: case 0x10ec0268: snd_hda_codec_write(codec, 0x20, 0, @@ -8214,6 +8236,9 @@ static int patch_alc883(struct hda_codec *codec) codec->patch_ops = alc_patch_ops; if (board_config == ALC883_AUTO) spec->init_hook = alc883_auto_init; + else if (codec->vendor_id == 0x10ec0888) + spec->init_hook = alc888_coef_init; + #ifdef CONFIG_SND_HDA_POWER_SAVE if (!spec->loopback.amplist) spec->loopback.amplist = alc883_loopbacks; @@ -12662,6 +12687,12 @@ static struct hda_verb alc861vd_eapd_verbs[] = { { } }; +static struct hda_verb alc660vd_eapd_verbs[] = { + {0x14, AC_VERB_SET_EAPD_BTLENABLE, 2}, + {0x15, AC_VERB_SET_EAPD_BTLENABLE, 2}, + { } +}; + static struct hda_verb alc861vd_lenovo_unsol_verbs[] = { {0x0c, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)}, {0x0c, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)}, @@ -13202,6 +13233,8 @@ static int patch_alc861vd(struct hda_codec *codec) if (codec->vendor_id == 0x10ec0660) { spec->stream_name_analog = "ALC660-VD Analog"; spec->stream_name_digital = "ALC660-VD Digital"; + /* always turn on EAPD */ + spec->init_verbs[spec->num_init_verbs++] = alc660vd_eapd_verbs; } else { spec->stream_name_analog = "ALC861VD Analog"; spec->stream_name_digital = "ALC861VD Digital"; -- cgit v0.10.2 From dd564d0cf08686cf0cc332bf9d48cba5b26a8171 Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Tue, 27 May 2008 18:03:56 +0200 Subject: x86: aperture_64.c: cleanups Some small cleanups for aperture_64.c; they should not really change any code. Signed-off-by: Pavel Machek Cc: Dave Jones Cc: Andi Kleen Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 5373f78..6eea42e 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -111,7 +111,7 @@ static u32 __init allocate_aperture(void) /* Find a PCI capability */ -static __u32 __init find_cap(int bus, int slot, int func, int cap) +static u32 __init find_cap(int bus, int slot, int func, int cap) { int bytes; u8 pos; @@ -137,7 +137,7 @@ static __u32 __init find_cap(int bus, int slot, int func, int cap) } /* Read a standard AGPv3 bridge header */ -static __u32 __init read_agp(int bus, int slot, int func, int cap, u32 *order) +static u32 __init read_agp(int bus, int slot, int func, int cap, u32 *order) { u32 apsize; u32 apsizereg; @@ -202,7 +202,7 @@ static __u32 __init read_agp(int bus, int slot, int func, int cap, u32 *order) * the AGP bridges should be always an own bus on the HT hierarchy, * but do it here for future safety. */ -static __u32 __init search_agp_bridge(u32 *order, int *valid_agp) +static u32 __init search_agp_bridge(u32 *order, int *valid_agp) { int bus, slot, func; -- cgit v0.10.2 From b1829d2705daa7cb72eb1e08bdc8b7e9fad34266 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 May 2008 01:22:08 +0200 Subject: ftrace: fix merge Signed-off-by: Ingo Molnar diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index ffbbd54..b482fe8 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -122,7 +122,7 @@ static inline void tracer_disable(void) # define trace_preempt_off(a0, a1) do { } while (0) #endif -#ifdef CONFIG_CONTEXT_SWITCH_TRACER +#ifdef CONFIG_TRACING extern void ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3); #else -- cgit v0.10.2 From b20aeccd6ad42ccb6be1b3d1d32618ddd2b31bf0 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 May 2008 14:24:38 +0200 Subject: xen: fix early bootup crash on native hardware -tip tree auto-testing found the following early bootup hang: --------------> get_memcfg_from_srat: assigning address to rsdp RSD PTR v0 [Nvidia] BUG: Int 14: CR2 ffd00040 EDI 8092fbfe ESI ffd00040 EBP 80b0aee8 ESP 80b0aed0 EBX 000f76f0 EDX 0000000e ECX 00000003 EAX ffd00040 err 00000000 EIP 802c055a CS 00000060 flg 00010006 Stack: ffd00040 80bc78d0 80b0af6c 80b1dbfe 8093d8ba 00000008 80b42810 80b4ddb4 80b42842 00000000 80b0af1c 801079c8 808e724e 00000000 80b42871 802c0531 00000100 00000000 0003fff0 80b0af40 80129999 00040100 00040100 00000000 Pid: 0, comm: swapper Not tainted 2.6.26-rc4-sched-devel.git #570 [<802c055a>] ? strncmp+0x11/0x25 [<80b1dbfe>] ? get_memcfg_from_srat+0xb4/0x568 [<801079c8>] ? mcount_call+0x5/0x9 [<802c0531>] ? strcmp+0xa/0x22 [<80129999>] ? printk+0x38/0x3a [<80129999>] ? printk+0x38/0x3a [<8011b122>] ? memory_present+0x66/0x6f [<80b216b4>] ? setup_memory+0x13/0x40c [<80b16b47>] ? propagate_e820_map+0x80/0x97 [<80b1622a>] ? setup_arch+0x248/0x477 [<80129999>] ? printk+0x38/0x3a [<80b11759>] ? start_kernel+0x6e/0x2eb [<80b110fc>] ? i386_start_kernel+0xeb/0xf2 ======================= <------ with this config: http://redhat.com/~mingo/misc/config-Wed_May_28_01_33_33_CEST_2008.bad The thing is, the crash makes little sense at first sight. We crash on a benign-looking printk. The code around it got changed in -tip but checking those topic branches individually did not reproduce the bug. Bisection led to this commit: | d5edbc1f75420935b1ec7e65df10c8f81cea82de is first bad commit | commit d5edbc1f75420935b1ec7e65df10c8f81cea82de | Author: Jeremy Fitzhardinge | Date: Mon May 26 23:31:22 2008 +0100 | | xen: add p2m mfn_list_list Which is somewhat surprising, as on native hardware Xen client side should have little to no side-effects. After some head scratching, it turns out the following happened: randconfig enabled the following Xen options: CONFIG_XEN=y CONFIG_XEN_MAX_DOMAIN_MEMORY=8 # CONFIG_XEN_BLKDEV_FRONTEND is not set # CONFIG_XEN_NETDEV_FRONTEND is not set CONFIG_HVC_XEN=y # CONFIG_XEN_BALLOON is not set which activated this piece of code in arch/x86/xen/mmu.c: > @@ -69,6 +69,13 @@ > __attribute__((section(".data.page_aligned"))) = > { [ 0 ... TOP_ENTRIES - 1] = &p2m_missing[0] }; > > +/* Arrays of p2m arrays expressed in mfns used for save/restore */ > +static unsigned long p2m_top_mfn[TOP_ENTRIES] > + __attribute__((section(".bss.page_aligned"))); > + > +static unsigned long p2m_top_mfn_list[TOP_ENTRIES / P2M_ENTRIES_PER_PAGE] > + __attribute__((section(".bss.page_aligned"))); The problem is, you must only put variables into .bss.page_aligned that have a _size_ that is _exactly_ page aligned. In this case the size of p2m_top_mfn_list is not page aligned: 80b8d000 b p2m_top_mfn 80b8f000 b p2m_top_mfn_list 80b8f008 b softirq_stack 80b97008 b hardirq_stack 80b9f008 b bm_pte So all subsequent variables get unaligned which, depending on luck, breaks the kernel in various funny ways. In this case what killed the kernel first was the misaligned bootmap pte page, resulting in that creative crash above. Anyway, this was a fun bug to track down :-) I think the moral is that .bss.page_aligned is a dangerous construct in its current form, and the symptoms of breakage are very non-trivial, so i think we need build-time checks to make sure all symbols in .bss.page_aligned are truly page aligned. The Xen fix below gets the kernel booting again. Signed-off-by: Ingo Molnar diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index e959559..eef3b5c 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -73,7 +73,8 @@ static unsigned long *p2m_top[TOP_ENTRIES] static unsigned long p2m_top_mfn[TOP_ENTRIES] __attribute__((section(".bss.page_aligned"))); -static unsigned long p2m_top_mfn_list[TOP_ENTRIES / P2M_ENTRIES_PER_PAGE] +static unsigned long p2m_top_mfn_list[ + PAGE_ALIGN(TOP_ENTRIES / P2M_ENTRIES_PER_PAGE)] __attribute__((section(".bss.page_aligned"))); static inline unsigned p2m_top_index(unsigned long pfn) -- cgit v0.10.2 From a93bbaa77ea61c6bad684263a65f812b31bf9791 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 27 May 2008 17:59:24 +0200 Subject: [ALSA] Improve the slots option handling Fix and improve the slots option handling. The sound core tries to find the slot with the given module name first and assign if it's still available. If all pre-given slots are unavailable, then try to find another free slot. Also, when a module name begins with '!', it means the negative match: the slot will be given for any modules but that one. Signed-off-by: Takashi Iwai diff --git a/Documentation/sound/alsa/ALSA-Configuration.txt b/Documentation/sound/alsa/ALSA-Configuration.txt index f48939e..529073d 100644 --- a/Documentation/sound/alsa/ALSA-Configuration.txt +++ b/Documentation/sound/alsa/ALSA-Configuration.txt @@ -2271,6 +2271,10 @@ case above again, the first two slots are already reserved. If any other driver (e.g. snd-usb-audio) is loaded before snd-interwave or snd-ens1371, it will be assigned to the third or later slot. +When a module name is given with '!', the slot will be given for any +modules but that name. For example, "slots=!snd-pcsp" will reserve +the first slot for any modules but snd-pcsp. + ALSA PCM devices to OSS devices mapping ======================================= diff --git a/sound/core/init.c b/sound/core/init.c index ac05734..5c254d4 100644 --- a/sound/core/init.c +++ b/sound/core/init.c @@ -46,17 +46,24 @@ static char *slots[SNDRV_CARDS]; module_param_array(slots, charp, NULL, 0444); MODULE_PARM_DESC(slots, "Module names assigned to the slots."); -/* return non-zero if the given index is already reserved for another +/* return non-zero if the given index is reserved for the given * module via slots option */ -static int module_slot_mismatch(struct module *module, int idx) +static int module_slot_match(struct module *module, int idx) { + int match = 1; #ifdef MODULE - char *s1, *s2; + const char *s1, *s2; + if (!module || !module->name || !slots[idx]) return 0; - s1 = slots[idx]; - s2 = module->name; + + s1 = module->name; + s2 = slots[idx]; + if (*s2 == '!') { + match = 0; /* negative match */ + s2++; + } /* compare module name strings * hyphens are handled as equivalent with underscore */ @@ -68,12 +75,12 @@ static int module_slot_mismatch(struct module *module, int idx) if (c2 == '-') c2 = '_'; if (c1 != c2) - return 1; + return !match; if (!c1) break; } -#endif - return 0; +#endif /* MODULE */ + return match; } #if defined(CONFIG_SND_MIXER_OSS) || defined(CONFIG_SND_MIXER_OSS_MODULE) @@ -129,7 +136,7 @@ struct snd_card *snd_card_new(int idx, const char *xid, struct module *module, int extra_size) { struct snd_card *card; - int err; + int err, idx2; if (extra_size < 0) extra_size = 0; @@ -144,35 +151,41 @@ struct snd_card *snd_card_new(int idx, const char *xid, err = 0; mutex_lock(&snd_card_mutex); if (idx < 0) { - int idx2; for (idx2 = 0; idx2 < SNDRV_CARDS; idx2++) /* idx == -1 == 0xffff means: take any free slot */ if (~snd_cards_lock & idx & 1<= snd_ecards_limit) - snd_ecards_limit = idx + 1; - break; + if (module_slot_match(module, idx2)) { + idx = idx2; + break; + } + } + } + if (idx < 0) { + for (idx2 = 0; idx2 < SNDRV_CARDS; idx2++) + /* idx == -1 == 0xffff means: take any free slot */ + if (~snd_cards_lock & idx & 1<= SNDRV_CARDS) + err = -ENODEV; + if (err < 0) { mutex_unlock(&snd_card_mutex); snd_printk(KERN_ERR "cannot find the slot for index %d (range 0-%i), error: %d\n", idx, snd_ecards_limit - 1, err); goto __error; } snd_cards_lock |= 1 << idx; /* lock it */ + if (idx >= snd_ecards_limit) + snd_ecards_limit = idx + 1; /* increase the limit */ mutex_unlock(&snd_card_mutex); card->number = idx; card->module = module; -- cgit v0.10.2 From 3c65e8743bf8b5cf0f90e8d767bf1d8b50c14c76 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 28 May 2008 08:58:56 -0500 Subject: JFS: diAlloc() should return -EIO rather than EIO The comment above the function says one of its return value is -EIO, and also the caller of diAlloc() checks for -EIO: struct inode *ialloc(struct inode *parent, umode_t mode) { ... rc = diAlloc(parent, S_ISDIR(mode), inode); if (rc) { jfs_warn("ialloc: diAlloc returned %d!", rc); if (rc == -EIO) make_bad_inode(inode); ... Signed-off-by: Li Zefan Signed-off-by: Dave Kleikamp diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c index 734ec91..d6363d8 100644 --- a/fs/jfs/jfs_imap.c +++ b/fs/jfs/jfs_imap.c @@ -1520,7 +1520,7 @@ int diAlloc(struct inode *pip, bool dir, struct inode *ip) jfs_error(ip->i_sb, "diAlloc: can't find free bit " "in wmap"); - return EIO; + return -EIO; } /* determine the inode number within the -- cgit v0.10.2 From c7d624d1ee7b77622305bd638755394e4d3f2d2f Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Wed, 28 May 2008 12:57:13 -0400 Subject: x86: Fix up silly i1586 boot message. Trying to boot a 64-bit kernel on a 32bit Pentium 4 gets you an amusing message along the lines of. "you need an x86-64, but you only have an i1586" due to the P4 being family F. Munge it to be 686. Signed-off-by: Dave Jones Signed-off-by: H. Peter Anvin diff --git a/arch/x86/boot/cpu.c b/arch/x86/boot/cpu.c index 00e19ed..92d6fd7 100644 --- a/arch/x86/boot/cpu.c +++ b/arch/x86/boot/cpu.c @@ -28,6 +28,8 @@ static char *cpu_name(int level) if (level == 64) { return "x86-64"; } else { + if (level == 15) + level = 6; sprintf(buf, "i%d86", level); return buf; } -- cgit v0.10.2 From 19ec673ced067316b9732bc6d1c4ff4052e5f795 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Wed, 28 May 2008 23:00:47 +0400 Subject: x86: nmi - fix incorrect NMI watchdog used by default The commit commit 4b82b277707a39b97271439c475f186f63ec4692 Author: Cyrill Gorcunov Date: Sat May 24 19:36:35 2008 +0400 set nmi_watchdog to NMI_IO_APIC as by default. This causes hangs on some machines with buggy watchdogs. Fix it - i.e. restore old behaviour. Thanks to Sitsofe Wheeler and Adrian Bunk for catching the problem and Maciej W. Rozycki for explanation what is going on there. Signed-off-by: Cyrill Gorcunov CC: Maciej W. Rozycki Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 69a839f..3671a9f 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -84,20 +84,15 @@ static inline unsigned int get_timer_irqs(int cpu) #endif } +#ifdef CONFIG_X86_64 /* Run after command line and cpu_init init, but before all other checks */ void nmi_watchdog_default(void) { if (nmi_watchdog != NMI_DEFAULT) return; -#ifdef CONFIG_X86_64 nmi_watchdog = NMI_NONE; -#else - if (lapic_watchdog_ok()) - nmi_watchdog = NMI_LOCAL_APIC; - else - nmi_watchdog = NMI_IO_APIC; -#endif } +#endif #ifdef CONFIG_SMP /* @@ -488,8 +483,15 @@ int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file, return -EIO; } +#ifdef CONFIG_X86_64 /* if nmi_watchdog is not set yet, then set it */ nmi_watchdog_default(); +#else + if (lapic_watchdog_ok()) + nmi_watchdog = NMI_LOCAL_APIC; + else + nmi_watchdog = NMI_IO_APIC; +#endif if (nmi_watchdog == NMI_LOCAL_APIC) { if (nmi_watchdog_enabled) diff --git a/include/asm-x86/nmi.h b/include/asm-x86/nmi.h index 1e8f34d..6f4d44f 100644 --- a/include/asm-x86/nmi.h +++ b/include/asm-x86/nmi.h @@ -38,9 +38,11 @@ static inline void unset_nmi_pm_callback(struct pm_dev *dev) #ifdef CONFIG_X86_64 extern void default_do_nmi(struct pt_regs *); +extern void nmi_watchdog_default(void); +#else +#define nmi_watchdog_default(void) do {} while (0) #endif -extern void nmi_watchdog_default(void); extern void die_nmi(char *str, struct pt_regs *regs, int do_panic); extern int check_nmi_watchdog(void); extern int nmi_watchdog_enabled; -- cgit v0.10.2 From e13ac2e9b18bde51cf32c69c2209df25791ab3e5 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 28 May 2008 17:58:05 +0100 Subject: [ALSA] ASoC: Add SOC_DOUBLE_S8_TLV control type The SOC_DOUBLE_S8_TLV control type was originally implemented in the UDA1380 driver by Philipp Zabel and was moved into the core by me. Signed-off-by: Philipp Zabel Signed-off-by: Mark Brown Signed-off-by: Takashi Iwai diff --git a/include/sound/soc.h b/include/sound/soc.h index bca9538..9fa2093 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -73,6 +73,15 @@ .get = snd_soc_get_volsw_2r, .put = snd_soc_put_volsw_2r, \ .private_value = (reg_left) | ((shift) << 8) | \ ((max) << 12) | ((invert) << 20) | ((reg_right) << 24) } +#define SOC_DOUBLE_S8_TLV(xname, reg, min, max, tlv_array) \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = (xname), \ + .access = SNDRV_CTL_ELEM_ACCESS_TLV_READ | \ + SNDRV_CTL_ELEM_ACCESS_READWRITE, \ + .tlv.p = (tlv_array), \ + .info = snd_soc_info_volsw_s8, .get = snd_soc_get_volsw_s8, \ + .put = snd_soc_put_volsw_s8, \ + .private_value = (reg) | (((signed char)max) << 16) | \ + (((signed char)min) << 24) } #define SOC_ENUM_DOUBLE(xreg, xshift_l, xshift_r, xmask, xtexts) \ { .reg = xreg, .shift_l = xshift_l, .shift_r = xshift_r, \ .mask = xmask, .texts = xtexts } @@ -267,6 +276,12 @@ int snd_soc_get_volsw_2r(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol); int snd_soc_put_volsw_2r(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol); +int snd_soc_info_volsw_s8(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_info *uinfo); +int snd_soc_get_volsw_s8(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol); +int snd_soc_put_volsw_s8(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol); /* SoC PCM stream information */ struct snd_soc_pcm_stream { diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index a3f091e..f594ab8 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -1589,6 +1589,78 @@ int snd_soc_put_volsw_2r(struct snd_kcontrol *kcontrol, } EXPORT_SYMBOL_GPL(snd_soc_put_volsw_2r); +/** + * snd_soc_info_volsw_s8 - signed mixer info callback + * @kcontrol: mixer control + * @uinfo: control element information + * + * Callback to provide information about a signed mixer control. + * + * Returns 0 for success. + */ +int snd_soc_info_volsw_s8(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_info *uinfo) +{ + int max = (signed char)((kcontrol->private_value >> 16) & 0xff); + int min = (signed char)((kcontrol->private_value >> 24) & 0xff); + + uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER; + uinfo->count = 2; + uinfo->value.integer.min = 0; + uinfo->value.integer.max = max-min; + return 0; +} +EXPORT_SYMBOL_GPL(snd_soc_info_volsw_s8); + +/** + * snd_soc_get_volsw_s8 - signed mixer get callback + * @kcontrol: mixer control + * @uinfo: control element information + * + * Callback to get the value of a signed mixer control. + * + * Returns 0 for success. + */ +int snd_soc_get_volsw_s8(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol); + int reg = kcontrol->private_value & 0xff; + int min = (signed char)((kcontrol->private_value >> 24) & 0xff); + int val = snd_soc_read(codec, reg); + + ucontrol->value.integer.value[0] = + ((signed char)(val & 0xff))-min; + ucontrol->value.integer.value[1] = + ((signed char)((val >> 8) & 0xff))-min; + return 0; +} +EXPORT_SYMBOL_GPL(snd_soc_get_volsw_s8); + +/** + * snd_soc_put_volsw_sgn - signed mixer put callback + * @kcontrol: mixer control + * @uinfo: control element information + * + * Callback to set the value of a signed mixer control. + * + * Returns 0 for success. + */ +int snd_soc_put_volsw_s8(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol); + int reg = kcontrol->private_value & 0xff; + int min = (signed char)((kcontrol->private_value >> 24) & 0xff); + unsigned short val; + + val = (ucontrol->value.integer.value[0]+min) & 0xff; + val |= ((ucontrol->value.integer.value[1]+min) & 0xff) << 8; + + return snd_soc_update_bits(codec, reg, 0xffff, val); +} +EXPORT_SYMBOL_GPL(snd_soc_put_volsw_s8); + static int __devinit snd_soc_init(void) { printk(KERN_INFO "ASoC version %s\n", SND_SOC_VERSION); -- cgit v0.10.2 From b7482f52789266e2548be5d0f6420c9fc12428d8 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Wed, 28 May 2008 17:58:06 +0100 Subject: [ALSA] ASoC: Add UDA1380 driver The UDA1380 codec is used by the HTC Magician and a number of Samsung reference boards. This driver has had a long out of tree history, having originally been written by Giorgio Padrin and converted to ASoC by Richard Purdie. Since conversion to ASoC most of the work on the driver has been done by Philipp Zabel with some review and updates for new APIs by Liam Girdwood and Mark Brown. Signed-off-by: Richard Purdie Signed-off-by: Philipp Zabel Signed-off-by: Liam Girdwood Signed-off-by: Mark Brown Signed-off-by: Takashi Iwai diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig index d4a5fe4..7beefcc 100644 --- a/sound/soc/codecs/Kconfig +++ b/sound/soc/codecs/Kconfig @@ -2,6 +2,9 @@ config SND_SOC_AC97_CODEC tristate select SND_AC97_CODEC +config SND_SOC_UDA1380 + tristate + config SND_SOC_WM8731 tristate diff --git a/sound/soc/codecs/Makefile b/sound/soc/codecs/Makefile index 4e1314c..d5926a1 100644 --- a/sound/soc/codecs/Makefile +++ b/sound/soc/codecs/Makefile @@ -1,4 +1,5 @@ snd-soc-ac97-objs := ac97.o +snd-soc-uda1380-objs := uda1380.o snd-soc-wm8731-objs := wm8731.o snd-soc-wm8750-objs := wm8750.o snd-soc-wm8753-objs := wm8753.o @@ -8,6 +9,7 @@ snd-soc-cs4270-objs := cs4270.o snd-soc-tlv320aic3x-objs := tlv320aic3x.o obj-$(CONFIG_SND_SOC_AC97_CODEC) += snd-soc-ac97.o +obj-$(CONFIG_SND_SOC_UDA1380) += snd-soc-uda1380.o obj-$(CONFIG_SND_SOC_WM8731) += snd-soc-wm8731.o obj-$(CONFIG_SND_SOC_WM8750) += snd-soc-wm8750.o obj-$(CONFIG_SND_SOC_WM8753) += snd-soc-wm8753.o diff --git a/sound/soc/codecs/uda1380.c b/sound/soc/codecs/uda1380.c new file mode 100644 index 0000000..cb50486 --- /dev/null +++ b/sound/soc/codecs/uda1380.c @@ -0,0 +1,852 @@ +/* + * uda1380.c - Philips UDA1380 ALSA SoC audio driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Copyright (c) 2007 Philipp Zabel + * Improved support for DAPM and audio routing/mixing capabilities, + * added TLV support. + * + * Modified by Richard Purdie to fit into SoC + * codec model. + * + * Copyright (c) 2005 Giorgio Padrin + * Copyright 2005 Openedhand Ltd. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "uda1380.h" + +#define UDA1380_VERSION "0.6" +#define AUDIO_NAME "uda1380" + +/* + * uda1380 register cache + */ +static const u16 uda1380_reg[UDA1380_CACHEREGNUM] = { + 0x0502, 0x0000, 0x0000, 0x3f3f, + 0x0202, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0xff00, 0x0000, 0x4800, + 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x8000, 0x0002, 0x0000, +}; + +/* + * read uda1380 register cache + */ +static inline unsigned int uda1380_read_reg_cache(struct snd_soc_codec *codec, + unsigned int reg) +{ + u16 *cache = codec->reg_cache; + if (reg == UDA1380_RESET) + return 0; + if (reg >= UDA1380_CACHEREGNUM) + return -1; + return cache[reg]; +} + +/* + * write uda1380 register cache + */ +static inline void uda1380_write_reg_cache(struct snd_soc_codec *codec, + u16 reg, unsigned int value) +{ + u16 *cache = codec->reg_cache; + if (reg >= UDA1380_CACHEREGNUM) + return; + cache[reg] = value; +} + +/* + * write to the UDA1380 register space + */ +static int uda1380_write(struct snd_soc_codec *codec, unsigned int reg, + unsigned int value) +{ + u8 data[3]; + + /* data is + * data[0] is register offset + * data[1] is MS byte + * data[2] is LS byte + */ + data[0] = reg; + data[1] = (value & 0xff00) >> 8; + data[2] = value & 0x00ff; + + uda1380_write_reg_cache(codec, reg, value); + + /* the interpolator & decimator regs must only be written when the + * codec DAI is active. + */ + if (!codec->active && (reg >= UDA1380_MVOL)) + return 0; + pr_debug("uda1380: hw write %x val %x\n", reg, value); + if (codec->hw_write(codec->control_data, data, 3) == 3) { + unsigned int val; + i2c_master_send(codec->control_data, data, 1); + i2c_master_recv(codec->control_data, data, 2); + val = (data[0]<<8) | data[1]; + if (val != value) { + pr_debug("uda1380: READ BACK VAL %x\n", + (data[0]<<8) | data[1]); + return -EIO; + } + return 0; + } else + return -EIO; +} + +#define uda1380_reset(c) uda1380_write(c, UDA1380_RESET, 0) + +/* declarations of ALSA reg_elem_REAL controls */ +static const char *uda1380_deemp[] = { + "None", + "32kHz", + "44.1kHz", + "48kHz", + "96kHz", +}; +static const char *uda1380_input_sel[] = { + "Line", + "Mic + Line R", + "Line L", + "Mic", +}; +static const char *uda1380_output_sel[] = { + "DAC", + "Analog Mixer", +}; +static const char *uda1380_spf_mode[] = { + "Flat", + "Minimum1", + "Minimum2", + "Maximum" +}; +static const char *uda1380_capture_sel[] = { + "ADC", + "Digital Mixer" +}; +static const char *uda1380_sel_ns[] = { + "3rd-order", + "5th-order" +}; +static const char *uda1380_mix_control[] = { + "off", + "PCM only", + "before sound processing", + "after sound processing" +}; +static const char *uda1380_sdet_setting[] = { + "3200", + "4800", + "9600", + "19200" +}; +static const char *uda1380_os_setting[] = { + "single-speed", + "double-speed (no mixing)", + "quad-speed (no mixing)" +}; + +static const struct soc_enum uda1380_deemp_enum[] = { + SOC_ENUM_SINGLE(UDA1380_DEEMP, 8, 5, uda1380_deemp), + SOC_ENUM_SINGLE(UDA1380_DEEMP, 0, 5, uda1380_deemp), +}; +static const struct soc_enum uda1380_input_sel_enum = + SOC_ENUM_SINGLE(UDA1380_ADC, 2, 4, uda1380_input_sel); /* SEL_MIC, SEL_LNA */ +static const struct soc_enum uda1380_output_sel_enum = + SOC_ENUM_SINGLE(UDA1380_PM, 7, 2, uda1380_output_sel); /* R02_EN_AVC */ +static const struct soc_enum uda1380_spf_enum = + SOC_ENUM_SINGLE(UDA1380_MODE, 14, 4, uda1380_spf_mode); /* M */ +static const struct soc_enum uda1380_capture_sel_enum = + SOC_ENUM_SINGLE(UDA1380_IFACE, 6, 2, uda1380_capture_sel); /* SEL_SOURCE */ +static const struct soc_enum uda1380_sel_ns_enum = + SOC_ENUM_SINGLE(UDA1380_MIXER, 14, 2, uda1380_sel_ns); /* SEL_NS */ +static const struct soc_enum uda1380_mix_enum = + SOC_ENUM_SINGLE(UDA1380_MIXER, 12, 4, uda1380_mix_control); /* MIX, MIX_POS */ +static const struct soc_enum uda1380_sdet_enum = + SOC_ENUM_SINGLE(UDA1380_MIXER, 4, 4, uda1380_sdet_setting); /* SD_VALUE */ +static const struct soc_enum uda1380_os_enum = + SOC_ENUM_SINGLE(UDA1380_MIXER, 0, 3, uda1380_os_setting); /* OS */ + +/* + * from -48 dB in 1.5 dB steps (mute instead of -49.5 dB) + */ +static DECLARE_TLV_DB_SCALE(amix_tlv, -4950, 150, 1); + +/* + * from -78 dB in 1 dB steps (3 dB steps, really. LSB are ignored), + * from -66 dB in 0.5 dB steps (2 dB steps, really) and + * from -52 dB in 0.25 dB steps + */ +static const unsigned int mvol_tlv[] = { + TLV_DB_RANGE_HEAD(3), + 0, 15, TLV_DB_SCALE_ITEM(-8200, 100, 1), + 16, 43, TLV_DB_SCALE_ITEM(-6600, 50, 0), + 44, 252, TLV_DB_SCALE_ITEM(-5200, 25, 0), +}; + +/* + * from -72 dB in 1.5 dB steps (6 dB steps really), + * from -66 dB in 0.75 dB steps (3 dB steps really), + * from -60 dB in 0.5 dB steps (2 dB steps really) and + * from -46 dB in 0.25 dB steps + */ +static const unsigned int vc_tlv[] = { + TLV_DB_RANGE_HEAD(4), + 0, 7, TLV_DB_SCALE_ITEM(-7800, 150, 1), + 8, 15, TLV_DB_SCALE_ITEM(-6600, 75, 0), + 16, 43, TLV_DB_SCALE_ITEM(-6000, 50, 0), + 44, 228, TLV_DB_SCALE_ITEM(-4600, 25, 0), +}; + +/* from 0 to 6 dB in 2 dB steps if SPF mode != flat */ +static DECLARE_TLV_DB_SCALE(tr_tlv, 0, 200, 0); + +/* from 0 to 24 dB in 2 dB steps, if SPF mode == maximum, otherwise cuts + * off at 18 dB max) */ +static DECLARE_TLV_DB_SCALE(bb_tlv, 0, 200, 0); + +/* from -63 to 24 dB in 0.5 dB steps (-128...48) */ +static DECLARE_TLV_DB_SCALE(dec_tlv, -6400, 50, 1); + +/* from 0 to 24 dB in 3 dB steps */ +static DECLARE_TLV_DB_SCALE(pga_tlv, 0, 300, 0); + +/* from 0 to 30 dB in 2 dB steps */ +static DECLARE_TLV_DB_SCALE(vga_tlv, 0, 200, 0); + +static const struct snd_kcontrol_new uda1380_snd_controls[] = { + SOC_DOUBLE_TLV("Analog Mixer Volume", UDA1380_AMIX, 0, 8, 44, 1, amix_tlv), /* AVCR, AVCL */ + SOC_DOUBLE_TLV("Master Playback Volume", UDA1380_MVOL, 0, 8, 252, 1, mvol_tlv), /* MVCL, MVCR */ + SOC_SINGLE_TLV("ADC Playback Volume", UDA1380_MIXVOL, 8, 228, 1, vc_tlv), /* VC2 */ + SOC_SINGLE_TLV("PCM Playback Volume", UDA1380_MIXVOL, 0, 228, 1, vc_tlv), /* VC1 */ + SOC_ENUM("Sound Processing Filter", uda1380_spf_enum), /* M */ + SOC_DOUBLE_TLV("Tone Control - Treble", UDA1380_MODE, 4, 12, 3, 0, tr_tlv), /* TRL, TRR */ + SOC_DOUBLE_TLV("Tone Control - Bass", UDA1380_MODE, 0, 8, 15, 0, bb_tlv), /* BBL, BBR */ +/**/ SOC_SINGLE("Master Playback Switch", UDA1380_DEEMP, 14, 1, 1), /* MTM */ + SOC_SINGLE("ADC Playback Switch", UDA1380_DEEMP, 11, 1, 1), /* MT2 from decimation filter */ + SOC_ENUM("ADC Playback De-emphasis", uda1380_deemp_enum[0]), /* DE2 */ + SOC_SINGLE("PCM Playback Switch", UDA1380_DEEMP, 3, 1, 1), /* MT1, from digital data input */ + SOC_ENUM("PCM Playback De-emphasis", uda1380_deemp_enum[1]), /* DE1 */ + SOC_SINGLE("DAC Polarity inverting Switch", UDA1380_MIXER, 15, 1, 0), /* DA_POL_INV */ + SOC_ENUM("Noise Shaper", uda1380_sel_ns_enum), /* SEL_NS */ + SOC_ENUM("Digital Mixer Signal Control", uda1380_mix_enum), /* MIX_POS, MIX */ + SOC_SINGLE("Silence Switch", UDA1380_MIXER, 7, 1, 0), /* SILENCE, force DAC output to silence */ + SOC_SINGLE("Silence Detector Switch", UDA1380_MIXER, 6, 1, 0), /* SDET_ON */ + SOC_ENUM("Silence Detector Setting", uda1380_sdet_enum), /* SD_VALUE */ + SOC_ENUM("Oversampling Input", uda1380_os_enum), /* OS */ + SOC_DOUBLE_S8_TLV("ADC Capture Volume", UDA1380_DEC, -128, 48, dec_tlv), /* ML_DEC, MR_DEC */ +/**/ SOC_SINGLE("ADC Capture Switch", UDA1380_PGA, 15, 1, 1), /* MT_ADC */ + SOC_DOUBLE_TLV("Line Capture Volume", UDA1380_PGA, 0, 8, 8, 0, pga_tlv), /* PGA_GAINCTRLL, PGA_GAINCTRLR */ + SOC_SINGLE("ADC Polarity inverting Switch", UDA1380_ADC, 12, 1, 0), /* ADCPOL_INV */ + SOC_SINGLE_TLV("Mic Capture Volume", UDA1380_ADC, 8, 15, 0, vga_tlv), /* VGA_CTRL */ + SOC_SINGLE("DC Filter Bypass Switch", UDA1380_ADC, 1, 1, 0), /* SKIP_DCFIL (before decimator) */ + SOC_SINGLE("DC Filter Enable Switch", UDA1380_ADC, 0, 1, 0), /* EN_DCFIL (at output of decimator) */ + SOC_SINGLE("AGC Timing", UDA1380_AGC, 8, 7, 0), /* TODO: enum, see table 62 */ + SOC_SINGLE("AGC Target level", UDA1380_AGC, 2, 3, 1), /* AGC_LEVEL */ + /* -5.5, -8, -11.5, -14 dBFS */ + SOC_SINGLE("AGC Switch", UDA1380_AGC, 0, 1, 0), +}; + +/* add non dapm controls */ +static int uda1380_add_controls(struct snd_soc_codec *codec) +{ + int err, i; + + for (i = 0; i < ARRAY_SIZE(uda1380_snd_controls); i++) { + err = snd_ctl_add(codec->card, + snd_soc_cnew(&uda1380_snd_controls[i], codec, NULL)); + if (err < 0) + return err; + } + + return 0; +} + +/* Input mux */ +static const struct snd_kcontrol_new uda1380_input_mux_control = + SOC_DAPM_ENUM("Route", uda1380_input_sel_enum); + +/* Output mux */ +static const struct snd_kcontrol_new uda1380_output_mux_control = + SOC_DAPM_ENUM("Route", uda1380_output_sel_enum); + +/* Capture mux */ +static const struct snd_kcontrol_new uda1380_capture_mux_control = + SOC_DAPM_ENUM("Route", uda1380_capture_sel_enum); + + +static const struct snd_soc_dapm_widget uda1380_dapm_widgets[] = { + SND_SOC_DAPM_MUX("Input Mux", SND_SOC_NOPM, 0, 0, + &uda1380_input_mux_control), + SND_SOC_DAPM_MUX("Output Mux", SND_SOC_NOPM, 0, 0, + &uda1380_output_mux_control), + SND_SOC_DAPM_MUX("Capture Mux", SND_SOC_NOPM, 0, 0, + &uda1380_capture_mux_control), + SND_SOC_DAPM_PGA("Left PGA", UDA1380_PM, 3, 0, NULL, 0), + SND_SOC_DAPM_PGA("Right PGA", UDA1380_PM, 1, 0, NULL, 0), + SND_SOC_DAPM_PGA("Mic LNA", UDA1380_PM, 4, 0, NULL, 0), + SND_SOC_DAPM_ADC("Left ADC", "Left Capture", UDA1380_PM, 2, 0), + SND_SOC_DAPM_ADC("Right ADC", "Right Capture", UDA1380_PM, 0, 0), + SND_SOC_DAPM_INPUT("VINM"), + SND_SOC_DAPM_INPUT("VINL"), + SND_SOC_DAPM_INPUT("VINR"), + SND_SOC_DAPM_MIXER("Analog Mixer", UDA1380_PM, 6, 0, NULL, 0), + SND_SOC_DAPM_OUTPUT("VOUTLHP"), + SND_SOC_DAPM_OUTPUT("VOUTRHP"), + SND_SOC_DAPM_OUTPUT("VOUTL"), + SND_SOC_DAPM_OUTPUT("VOUTR"), + SND_SOC_DAPM_DAC("DAC", "Playback", UDA1380_PM, 10, 0), + SND_SOC_DAPM_PGA("HeadPhone Driver", UDA1380_PM, 13, 0, NULL, 0), +}; + +static const struct snd_soc_dapm_route audio_map[] = { + + /* output mux */ + {"HeadPhone Driver", NULL, "Output Mux"}, + {"VOUTR", NULL, "Output Mux"}, + {"VOUTL", NULL, "Output Mux"}, + + {"Analog Mixer", NULL, "VINR"}, + {"Analog Mixer", NULL, "VINL"}, + {"Analog Mixer", NULL, "DAC"}, + + {"Output Mux", "DAC", "DAC"}, + {"Output Mux", "Analog Mixer", "Analog Mixer"}, + + /* {"DAC", "Digital Mixer", "I2S" } */ + + /* headphone driver */ + {"VOUTLHP", NULL, "HeadPhone Driver"}, + {"VOUTRHP", NULL, "HeadPhone Driver"}, + + /* input mux */ + {"Left ADC", NULL, "Input Mux"}, + {"Input Mux", "Mic", "Mic LNA"}, + {"Input Mux", "Mic + Line R", "Mic LNA"}, + {"Input Mux", "Line L", "Left PGA"}, + {"Input Mux", "Line", "Left PGA"}, + + /* right input */ + {"Right ADC", "Mic + Line R", "Right PGA"}, + {"Right ADC", "Line", "Right PGA"}, + + /* inputs */ + {"Mic LNA", NULL, "VINM"}, + {"Left PGA", NULL, "VINL"}, + {"Right PGA", NULL, "VINR"}, +}; + +static int uda1380_add_widgets(struct snd_soc_codec *codec) +{ + snd_soc_dapm_new_controls(codec, uda1380_dapm_widgets, + ARRAY_SIZE(uda1380_dapm_widgets)); + + snd_soc_dapm_add_routes(codec, audio_map, ARRAY_SIZE(audio_map)); + + snd_soc_dapm_new_widgets(codec); + return 0; +} + +static int uda1380_set_dai_fmt(struct snd_soc_codec_dai *codec_dai, + unsigned int fmt) +{ + struct snd_soc_codec *codec = codec_dai->codec; + int iface; + + /* set up DAI based upon fmt */ + iface = uda1380_read_reg_cache(codec, UDA1380_IFACE); + iface &= ~(R01_SFORI_MASK | R01_SIM | R01_SFORO_MASK); + + /* FIXME: how to select I2S for DATAO and MSB for DATAI correctly? */ + switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) { + case SND_SOC_DAIFMT_I2S: + iface |= R01_SFORI_I2S | R01_SFORO_I2S; + break; + case SND_SOC_DAIFMT_LSB: + iface |= R01_SFORI_LSB16 | R01_SFORO_I2S; + break; + case SND_SOC_DAIFMT_MSB: + iface |= R01_SFORI_MSB | R01_SFORO_I2S; + } + + if ((fmt & SND_SOC_DAIFMT_MASTER_MASK) == SND_SOC_DAIFMT_CBM_CFM) + iface |= R01_SIM; + + uda1380_write(codec, UDA1380_IFACE, iface); + + return 0; +} + +/* + * Flush reg cache + * We can only write the interpolator and decimator registers + * when the DAI is being clocked by the CPU DAI. It's up to the + * machine and cpu DAI driver to do this before we are called. + */ +static int uda1380_pcm_prepare(struct snd_pcm_substream *substream) +{ + struct snd_soc_pcm_runtime *rtd = substream->private_data; + struct snd_soc_device *socdev = rtd->socdev; + struct snd_soc_codec *codec = socdev->codec; + int reg, reg_start, reg_end, clk; + + if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) { + reg_start = UDA1380_MVOL; + reg_end = UDA1380_MIXER; + } else { + reg_start = UDA1380_DEC; + reg_end = UDA1380_AGC; + } + + /* FIXME disable DAC_CLK */ + clk = uda1380_read_reg_cache(codec, UDA1380_CLK); + uda1380_write(codec, UDA1380_CLK, clk & ~R00_DAC_CLK); + + for (reg = reg_start; reg <= reg_end; reg++) { + pr_debug("uda1380: flush reg %x val %x:", reg, + uda1380_read_reg_cache(codec, reg)); + uda1380_write(codec, reg, uda1380_read_reg_cache(codec, reg)); + } + + /* FIXME enable DAC_CLK */ + uda1380_write(codec, UDA1380_CLK, clk | R00_DAC_CLK); + + return 0; +} + +static int uda1380_pcm_hw_params(struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *params) +{ + struct snd_soc_pcm_runtime *rtd = substream->private_data; + struct snd_soc_device *socdev = rtd->socdev; + struct snd_soc_codec *codec = socdev->codec; + u16 clk = uda1380_read_reg_cache(codec, UDA1380_CLK); + + /* set WSPLL power and divider if running from this clock */ + if (clk & R00_DAC_CLK) { + int rate = params_rate(params); + u16 pm = uda1380_read_reg_cache(codec, UDA1380_PM); + clk &= ~0x3; /* clear SEL_LOOP_DIV */ + switch (rate) { + case 6250 ... 12500: + clk |= 0x0; + break; + case 12501 ... 25000: + clk |= 0x1; + break; + case 25001 ... 50000: + clk |= 0x2; + break; + case 50001 ... 100000: + clk |= 0x3; + break; + } + uda1380_write(codec, UDA1380_PM, R02_PON_PLL | pm); + } + + if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) + clk |= R00_EN_DAC | R00_EN_INT; + else + clk |= R00_EN_ADC | R00_EN_DEC; + + uda1380_write(codec, UDA1380_CLK, clk); + return 0; +} + +static void uda1380_pcm_shutdown(struct snd_pcm_substream *substream) +{ + struct snd_soc_pcm_runtime *rtd = substream->private_data; + struct snd_soc_device *socdev = rtd->socdev; + struct snd_soc_codec *codec = socdev->codec; + u16 clk = uda1380_read_reg_cache(codec, UDA1380_CLK); + + /* shut down WSPLL power if running from this clock */ + if (clk & R00_DAC_CLK) { + u16 pm = uda1380_read_reg_cache(codec, UDA1380_PM); + uda1380_write(codec, UDA1380_PM, ~R02_PON_PLL & pm); + } + + if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) + clk &= ~(R00_EN_DAC | R00_EN_INT); + else + clk &= ~(R00_EN_ADC | R00_EN_DEC); + + uda1380_write(codec, UDA1380_CLK, clk); +} + +static int uda1380_mute(struct snd_soc_codec_dai *codec_dai, int mute) +{ + struct snd_soc_codec *codec = codec_dai->codec; + u16 mute_reg = uda1380_read_reg_cache(codec, UDA1380_DEEMP) & ~R13_MTM; + + /* FIXME: mute(codec,0) is called when the magician clock is already + * set to WSPLL, but for some unknown reason writing to interpolator + * registers works only when clocked by SYSCLK */ + u16 clk = uda1380_read_reg_cache(codec, UDA1380_CLK); + uda1380_write(codec, UDA1380_CLK, ~R00_DAC_CLK & clk); + if (mute) + uda1380_write(codec, UDA1380_DEEMP, mute_reg | R13_MTM); + else + uda1380_write(codec, UDA1380_DEEMP, mute_reg); + uda1380_write(codec, UDA1380_CLK, clk); + return 0; +} + +static int uda1380_set_bias_level(struct snd_soc_codec *codec, + enum snd_soc_bias_level level) +{ + int pm = uda1380_read_reg_cache(codec, UDA1380_PM); + + switch (level) { + case SND_SOC_BIAS_ON: + case SND_SOC_BIAS_PREPARE: + uda1380_write(codec, UDA1380_PM, R02_PON_BIAS | pm); + break; + case SND_SOC_BIAS_STANDBY: + uda1380_write(codec, UDA1380_PM, R02_PON_BIAS); + break; + case SND_SOC_BIAS_OFF: + uda1380_write(codec, UDA1380_PM, 0x0); + break; + } + codec->bias_level = level; + return 0; +} + +#define UDA1380_RATES (SNDRV_PCM_RATE_8000 | SNDRV_PCM_RATE_11025 |\ + SNDRV_PCM_RATE_16000 | SNDRV_PCM_RATE_22050 |\ + SNDRV_PCM_RATE_44100 | SNDRV_PCM_RATE_48000) + +struct snd_soc_codec_dai uda1380_dai[] = { +{ + .name = "UDA1380", + .playback = { + .stream_name = "Playback", + .channels_min = 1, + .channels_max = 2, + .rates = UDA1380_RATES, + .formats = SNDRV_PCM_FMTBIT_S16_LE,}, + .capture = { + .stream_name = "Capture", + .channels_min = 1, + .channels_max = 2, + .rates = UDA1380_RATES, + .formats = SNDRV_PCM_FMTBIT_S16_LE,}, + .ops = { + .hw_params = uda1380_pcm_hw_params, + .shutdown = uda1380_pcm_shutdown, + .prepare = uda1380_pcm_prepare, + }, + .dai_ops = { + .digital_mute = uda1380_mute, + .set_fmt = uda1380_set_dai_fmt, + }, +}, +{ /* playback only - dual interface */ + .name = "UDA1380", + .playback = { + .stream_name = "Playback", + .channels_min = 1, + .channels_max = 2, + .rates = UDA1380_RATES, + .formats = SNDRV_PCM_FMTBIT_S16_LE, + }, + .ops = { + .hw_params = uda1380_pcm_hw_params, + .shutdown = uda1380_pcm_shutdown, + .prepare = uda1380_pcm_prepare, + }, + .dai_ops = { + .digital_mute = uda1380_mute, + .set_fmt = uda1380_set_dai_fmt, + }, +}, +{ /* capture only - dual interface*/ + .name = "UDA1380", + .capture = { + .stream_name = "Capture", + .channels_min = 1, + .channels_max = 2, + .rates = UDA1380_RATES, + .formats = SNDRV_PCM_FMTBIT_S16_LE, + }, + .ops = { + .hw_params = uda1380_pcm_hw_params, + .shutdown = uda1380_pcm_shutdown, + .prepare = uda1380_pcm_prepare, + }, + .dai_ops = { + .set_fmt = uda1380_set_dai_fmt, + }, +}, +}; +EXPORT_SYMBOL_GPL(uda1380_dai); + +static int uda1380_suspend(struct platform_device *pdev, pm_message_t state) +{ + struct snd_soc_device *socdev = platform_get_drvdata(pdev); + struct snd_soc_codec *codec = socdev->codec; + + uda1380_set_bias_level(codec, SND_SOC_BIAS_OFF); + return 0; +} + +static int uda1380_resume(struct platform_device *pdev) +{ + struct snd_soc_device *socdev = platform_get_drvdata(pdev); + struct snd_soc_codec *codec = socdev->codec; + int i; + u8 data[2]; + u16 *cache = codec->reg_cache; + + /* Sync reg_cache with the hardware */ + for (i = 0; i < ARRAY_SIZE(uda1380_reg); i++) { + data[0] = (i << 1) | ((cache[i] >> 8) & 0x0001); + data[1] = cache[i] & 0x00ff; + codec->hw_write(codec->control_data, data, 2); + } + uda1380_set_bias_level(codec, SND_SOC_BIAS_STANDBY); + uda1380_set_bias_level(codec, codec->suspend_bias_level); + return 0; +} + +/* + * initialise the UDA1380 driver + * register mixer and dsp interfaces with the kernel + */ +static int uda1380_init(struct snd_soc_device *socdev, int dac_clk) +{ + struct snd_soc_codec *codec = socdev->codec; + int ret = 0; + + codec->name = "UDA1380"; + codec->owner = THIS_MODULE; + codec->read = uda1380_read_reg_cache; + codec->write = uda1380_write; + codec->set_bias_level = uda1380_set_bias_level; + codec->dai = uda1380_dai; + codec->num_dai = ARRAY_SIZE(uda1380_dai); + codec->reg_cache = kmemdup(uda1380_reg, sizeof(uda1380_reg), + GFP_KERNEL); + if (codec->reg_cache == NULL) + return -ENOMEM; + codec->reg_cache_size = sizeof(uda1380_reg); + codec->reg_cache_step = 2; + uda1380_reset(codec); + + /* register pcms */ + ret = snd_soc_new_pcms(socdev, SNDRV_DEFAULT_IDX1, SNDRV_DEFAULT_STR1); + if (ret < 0) { + pr_err("uda1380: failed to create pcms\n"); + goto pcm_err; + } + + /* power on device */ + uda1380_set_bias_level(codec, SND_SOC_BIAS_STANDBY); + /* set clock input */ + switch (dac_clk) { + case UDA1380_DAC_CLK_SYSCLK: + uda1380_write(codec, UDA1380_CLK, 0); + break; + case UDA1380_DAC_CLK_WSPLL: + uda1380_write(codec, UDA1380_CLK, R00_DAC_CLK); + break; + } + + /* uda1380 init */ + uda1380_add_controls(codec); + uda1380_add_widgets(codec); + ret = snd_soc_register_card(socdev); + if (ret < 0) { + pr_err("uda1380: failed to register card\n"); + goto card_err; + } + + return ret; + +card_err: + snd_soc_free_pcms(socdev); + snd_soc_dapm_free(socdev); +pcm_err: + kfree(codec->reg_cache); + return ret; +} + +static struct snd_soc_device *uda1380_socdev; + +#if defined(CONFIG_I2C) || defined(CONFIG_I2C_MODULE) + +#define I2C_DRIVERID_UDA1380 0xfefe /* liam - need a proper id */ + +static unsigned short normal_i2c[] = { 0, I2C_CLIENT_END }; + +/* Magic definition of all other variables and things */ +I2C_CLIENT_INSMOD; + +static struct i2c_driver uda1380_i2c_driver; +static struct i2c_client client_template; + +/* If the i2c layer weren't so broken, we could pass this kind of data + around */ + +static int uda1380_codec_probe(struct i2c_adapter *adap, int addr, int kind) +{ + struct snd_soc_device *socdev = uda1380_socdev; + struct uda1380_setup_data *setup = socdev->codec_data; + struct snd_soc_codec *codec = socdev->codec; + struct i2c_client *i2c; + int ret; + + if (addr != setup->i2c_address) + return -ENODEV; + + client_template.adapter = adap; + client_template.addr = addr; + + i2c = kmemdup(&client_template, sizeof(client_template), GFP_KERNEL); + if (i2c == NULL) { + kfree(codec); + return -ENOMEM; + } + i2c_set_clientdata(i2c, codec); + codec->control_data = i2c; + + ret = i2c_attach_client(i2c); + if (ret < 0) { + pr_err("uda1380: failed to attach codec at addr %x\n", addr); + goto err; + } + + ret = uda1380_init(socdev, setup->dac_clk); + if (ret < 0) { + pr_err("uda1380: failed to initialise UDA1380\n"); + goto err; + } + return ret; + +err: + kfree(codec); + kfree(i2c); + return ret; +} + +static int uda1380_i2c_detach(struct i2c_client *client) +{ + struct snd_soc_codec *codec = i2c_get_clientdata(client); + i2c_detach_client(client); + kfree(codec->reg_cache); + kfree(client); + return 0; +} + +static int uda1380_i2c_attach(struct i2c_adapter *adap) +{ + return i2c_probe(adap, &addr_data, uda1380_codec_probe); +} + +static struct i2c_driver uda1380_i2c_driver = { + .driver = { + .name = "UDA1380 I2C Codec", + .owner = THIS_MODULE, + }, + .id = I2C_DRIVERID_UDA1380, + .attach_adapter = uda1380_i2c_attach, + .detach_client = uda1380_i2c_detach, + .command = NULL, +}; + +static struct i2c_client client_template = { + .name = "UDA1380", + .driver = &uda1380_i2c_driver, +}; +#endif + +static int uda1380_probe(struct platform_device *pdev) +{ + struct snd_soc_device *socdev = platform_get_drvdata(pdev); + struct uda1380_setup_data *setup; + struct snd_soc_codec *codec; + int ret = 0; + + pr_info("UDA1380 Audio Codec %s", UDA1380_VERSION); + + setup = socdev->codec_data; + codec = kzalloc(sizeof(struct snd_soc_codec), GFP_KERNEL); + if (codec == NULL) + return -ENOMEM; + + socdev->codec = codec; + mutex_init(&codec->mutex); + INIT_LIST_HEAD(&codec->dapm_widgets); + INIT_LIST_HEAD(&codec->dapm_paths); + + uda1380_socdev = socdev; +#if defined(CONFIG_I2C) || defined(CONFIG_I2C_MODULE) + if (setup->i2c_address) { + normal_i2c[0] = setup->i2c_address; + codec->hw_write = (hw_write_t)i2c_master_send; + ret = i2c_add_driver(&uda1380_i2c_driver); + if (ret != 0) + printk(KERN_ERR "can't add i2c driver"); + } +#else + /* Add other interfaces here */ +#endif + return ret; +} + +/* power down chip */ +static int uda1380_remove(struct platform_device *pdev) +{ + struct snd_soc_device *socdev = platform_get_drvdata(pdev); + struct snd_soc_codec *codec = socdev->codec; + + if (codec->control_data) + uda1380_set_bias_level(codec, SND_SOC_BIAS_OFF); + + snd_soc_free_pcms(socdev); + snd_soc_dapm_free(socdev); +#if defined(CONFIG_I2C) || defined(CONFIG_I2C_MODULE) + i2c_del_driver(&uda1380_i2c_driver); +#endif + kfree(codec); + + return 0; +} + +struct snd_soc_codec_device soc_codec_dev_uda1380 = { + .probe = uda1380_probe, + .remove = uda1380_remove, + .suspend = uda1380_suspend, + .resume = uda1380_resume, +}; +EXPORT_SYMBOL_GPL(soc_codec_dev_uda1380); + +MODULE_AUTHOR("Giorgio Padrin"); +MODULE_DESCRIPTION("Audio support for codec Philips UDA1380"); +MODULE_LICENSE("GPL"); diff --git a/sound/soc/codecs/uda1380.h b/sound/soc/codecs/uda1380.h new file mode 100644 index 0000000..f9d885c8 --- /dev/null +++ b/sound/soc/codecs/uda1380.h @@ -0,0 +1,89 @@ +/* + * Audio support for Philips UDA1380 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Copyright (c) 2005 Giorgio Padrin + */ + +#ifndef _UDA1380_H +#define _UDA1380_H + +#define UDA1380_CLK 0x00 +#define UDA1380_IFACE 0x01 +#define UDA1380_PM 0x02 +#define UDA1380_AMIX 0x03 +#define UDA1380_HP 0x04 +#define UDA1380_MVOL 0x10 +#define UDA1380_MIXVOL 0x11 +#define UDA1380_MODE 0x12 +#define UDA1380_DEEMP 0x13 +#define UDA1380_MIXER 0x14 +#define UDA1380_INTSTAT 0x18 +#define UDA1380_DEC 0x20 +#define UDA1380_PGA 0x21 +#define UDA1380_ADC 0x22 +#define UDA1380_AGC 0x23 +#define UDA1380_DECSTAT 0x28 +#define UDA1380_RESET 0x7f + +#define UDA1380_CACHEREGNUM 0x24 + +/* Register flags */ +#define R00_EN_ADC 0x0800 +#define R00_EN_DEC 0x0400 +#define R00_EN_DAC 0x0200 +#define R00_EN_INT 0x0100 +#define R00_DAC_CLK 0x0010 +#define R01_SFORI_I2S 0x0000 +#define R01_SFORI_LSB16 0x0100 +#define R01_SFORI_LSB18 0x0200 +#define R01_SFORI_LSB20 0x0300 +#define R01_SFORI_MSB 0x0500 +#define R01_SFORI_MASK 0x0700 +#define R01_SFORO_I2S 0x0000 +#define R01_SFORO_LSB16 0x0001 +#define R01_SFORO_LSB18 0x0002 +#define R01_SFORO_LSB20 0x0003 +#define R01_SFORO_LSB24 0x0004 +#define R01_SFORO_MSB 0x0005 +#define R01_SFORO_MASK 0x0007 +#define R01_SEL_SOURCE 0x0040 +#define R01_SIM 0x0010 +#define R02_PON_PLL 0x8000 +#define R02_PON_HP 0x2000 +#define R02_PON_DAC 0x0400 +#define R02_PON_BIAS 0x0100 +#define R02_EN_AVC 0x0080 +#define R02_PON_AVC 0x0040 +#define R02_PON_LNA 0x0010 +#define R02_PON_PGAL 0x0008 +#define R02_PON_ADCL 0x0004 +#define R02_PON_PGAR 0x0002 +#define R02_PON_ADCR 0x0001 +#define R13_MTM 0x4000 +#define R14_SILENCE 0x0080 +#define R14_SDET_ON 0x0040 +#define R21_MT_ADC 0x8000 +#define R22_SEL_LNA 0x0008 +#define R22_SEL_MIC 0x0004 +#define R22_SKIP_DCFIL 0x0002 +#define R23_AGC_EN 0x0001 + +struct uda1380_setup_data { + unsigned short i2c_address; + int dac_clk; +#define UDA1380_DAC_CLK_SYSCLK 0 +#define UDA1380_DAC_CLK_WSPLL 1 +}; + +#define UDA1380_DAI_DUPLEX 0 /* playback and capture on single DAI */ +#define UDA1380_DAI_PLAYBACK 1 /* playback DAI */ +#define UDA1380_DAI_CAPTURE 2 /* capture DAI */ + +extern struct snd_soc_codec_dai uda1380_dai[3]; +extern struct snd_soc_codec_device soc_codec_dev_uda1380; + +#endif /* _UDA1380_H */ -- cgit v0.10.2 From 0261ac5f2f43a1906cfacfb19d62ed643d162cbe Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 29 May 2008 09:31:50 +0200 Subject: xen: fix "xen: implement save/restore" -tip testing found the following build breakage: drivers/built-in.o: In function `xen_suspend': manage.c:(.text+0x4390f): undefined reference to `xen_console_resume' with this config: http://redhat.com/~mingo/misc/config-Thu_May_29_09_23_16_CEST_2008.bad i have bisected it down to: | commit 0e91398f2a5d4eb6b07df8115917d0d1cf3e9b58 | Author: Jeremy Fitzhardinge | Date: Mon May 26 23:31:27 2008 +0100 | | xen: implement save/restore the problem is that drivers/xen/manage.c is built unconditionally if CONFIG_XEN is enabled and makes use of xen_suspend(), but drivers/char/hvc_xen.c, where the xen_suspend() method is implemented, is only build if CONFIG_HVC_XEN=y as well. i have solved this by providing a NOP implementation for xen_suspend() in the !CONFIG_HVC_XEN case. Signed-off-by: Ingo Molnar diff --git a/include/xen/hvc-console.h b/include/xen/hvc-console.h index fd5483a..98b79bc 100644 --- a/include/xen/hvc-console.h +++ b/include/xen/hvc-console.h @@ -3,7 +3,11 @@ extern struct console xenboot_console; +#ifdef CONFIG_HVC_XEN void xen_console_resume(void); +#else +static inline void xen_console_resume(void) { } +#endif void xen_raw_console_write(const char *str); void xen_raw_printk(const char *fmt, ...); -- cgit v0.10.2 From c18f68e4d809a517ed8df540bac2993a4f14d9a4 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 29 May 2008 18:53:05 +0200 Subject: [ALSA] Clean up sound/pci/ac97/Makefile Signed-off-by: Takashi Iwai diff --git a/sound/pci/ac97/Makefile b/sound/pci/ac97/Makefile index 0be48b1..ae36950 100644 --- a/sound/pci/ac97/Makefile +++ b/sound/pci/ac97/Makefile @@ -3,16 +3,11 @@ # Copyright (c) 2001 by Jaroslav Kysela # -snd-ac97-codec-objs := ac97_codec.o ac97_pcm.o - -ifneq ($(CONFIG_PROC_FS),) -snd-ac97-codec-objs += ac97_proc.o -endif +snd-ac97-codec-y := ac97_codec.o ac97_pcm.o +snd-ac97-codec-$(CONFIG_PROC_FS) += ac97_proc.o snd-ak4531-codec-objs := ak4531_codec.o # Toplevel Module Dependency obj-$(CONFIG_SND_AC97_CODEC) += snd-ac97-codec.o obj-$(CONFIG_SND_ENS1370) += snd-ak4531-codec.o - -obj-m := $(sort $(obj-m)) -- cgit v0.10.2 From 23ce1547638443f0053dd674e728062c48ff0e39 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 30 May 2008 09:22:22 +0200 Subject: [ALSA] Make ak4531 local to ens1370 driver The ak4531 module is used only by ens1370 driver (and unlikely that any other will use it ever). Let's make it local to ens1370. Signed-off-by: Takashi Iwai diff --git a/sound/pci/Makefile b/sound/pci/Makefile index 85ef14b..65b25d2 100644 --- a/sound/pci/Makefile +++ b/sound/pci/Makefile @@ -13,7 +13,7 @@ snd-bt87x-objs := bt87x.o snd-cmipci-objs := cmipci.o snd-cs4281-objs := cs4281.o snd-cs5530-objs := cs5530.o -snd-ens1370-objs := ens1370.o +snd-ens1370-objs := ens1370.o ak4531_codec.o snd-ens1371-objs := ens1371.o snd-es1938-objs := es1938.o snd-es1968-objs := es1968.o diff --git a/sound/pci/ac97/Makefile b/sound/pci/ac97/Makefile index ae36950..41fa322 100644 --- a/sound/pci/ac97/Makefile +++ b/sound/pci/ac97/Makefile @@ -6,8 +6,5 @@ snd-ac97-codec-y := ac97_codec.o ac97_pcm.o snd-ac97-codec-$(CONFIG_PROC_FS) += ac97_proc.o -snd-ak4531-codec-objs := ak4531_codec.o - # Toplevel Module Dependency obj-$(CONFIG_SND_AC97_CODEC) += snd-ac97-codec.o -obj-$(CONFIG_SND_ENS1370) += snd-ak4531-codec.o diff --git a/sound/pci/ac97/ak4531_codec.c b/sound/pci/ac97/ak4531_codec.c deleted file mode 100644 index c0c16339..0000000 --- a/sound/pci/ac97/ak4531_codec.c +++ /dev/null @@ -1,508 +0,0 @@ -/* - * Copyright (c) by Jaroslav Kysela - * Universal routines for AK4531 codec - * - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -#include -#include -#include -#include - -#include -#include -#include - -MODULE_AUTHOR("Jaroslav Kysela "); -MODULE_DESCRIPTION("Universal routines for AK4531 codec"); -MODULE_LICENSE("GPL"); - -#ifdef CONFIG_PROC_FS -static void snd_ak4531_proc_init(struct snd_card *card, struct snd_ak4531 *ak4531); -#else -#define snd_ak4531_proc_init(card,ak) -#endif - -/* - * - */ - -#if 0 - -static void snd_ak4531_dump(struct snd_ak4531 *ak4531) -{ - int idx; - - for (idx = 0; idx < 0x19; idx++) - printk("ak4531 0x%x: 0x%x\n", idx, ak4531->regs[idx]); -} - -#endif - -/* - * - */ - -#define AK4531_SINGLE(xname, xindex, reg, shift, mask, invert) \ -{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, .index = xindex, \ - .info = snd_ak4531_info_single, \ - .get = snd_ak4531_get_single, .put = snd_ak4531_put_single, \ - .private_value = reg | (shift << 16) | (mask << 24) | (invert << 22) } -#define AK4531_SINGLE_TLV(xname, xindex, reg, shift, mask, invert, xtlv) \ -{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \ - .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | SNDRV_CTL_ELEM_ACCESS_TLV_READ, \ - .name = xname, .index = xindex, \ - .info = snd_ak4531_info_single, \ - .get = snd_ak4531_get_single, .put = snd_ak4531_put_single, \ - .private_value = reg | (shift << 16) | (mask << 24) | (invert << 22), \ - .tlv = { .p = (xtlv) } } - -static int snd_ak4531_info_single(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo) -{ - int mask = (kcontrol->private_value >> 24) & 0xff; - - uinfo->type = mask == 1 ? SNDRV_CTL_ELEM_TYPE_BOOLEAN : SNDRV_CTL_ELEM_TYPE_INTEGER; - uinfo->count = 1; - uinfo->value.integer.min = 0; - uinfo->value.integer.max = mask; - return 0; -} - -static int snd_ak4531_get_single(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) -{ - struct snd_ak4531 *ak4531 = snd_kcontrol_chip(kcontrol); - int reg = kcontrol->private_value & 0xff; - int shift = (kcontrol->private_value >> 16) & 0x07; - int mask = (kcontrol->private_value >> 24) & 0xff; - int invert = (kcontrol->private_value >> 22) & 1; - int val; - - mutex_lock(&ak4531->reg_mutex); - val = (ak4531->regs[reg] >> shift) & mask; - mutex_unlock(&ak4531->reg_mutex); - if (invert) { - val = mask - val; - } - ucontrol->value.integer.value[0] = val; - return 0; -} - -static int snd_ak4531_put_single(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) -{ - struct snd_ak4531 *ak4531 = snd_kcontrol_chip(kcontrol); - int reg = kcontrol->private_value & 0xff; - int shift = (kcontrol->private_value >> 16) & 0x07; - int mask = (kcontrol->private_value >> 24) & 0xff; - int invert = (kcontrol->private_value >> 22) & 1; - int change; - int val; - - val = ucontrol->value.integer.value[0] & mask; - if (invert) { - val = mask - val; - } - val <<= shift; - mutex_lock(&ak4531->reg_mutex); - val = (ak4531->regs[reg] & ~(mask << shift)) | val; - change = val != ak4531->regs[reg]; - ak4531->write(ak4531, reg, ak4531->regs[reg] = val); - mutex_unlock(&ak4531->reg_mutex); - return change; -} - -#define AK4531_DOUBLE(xname, xindex, left_reg, right_reg, left_shift, right_shift, mask, invert) \ -{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, .index = xindex, \ - .info = snd_ak4531_info_double, \ - .get = snd_ak4531_get_double, .put = snd_ak4531_put_double, \ - .private_value = left_reg | (right_reg << 8) | (left_shift << 16) | (right_shift << 19) | (mask << 24) | (invert << 22) } -#define AK4531_DOUBLE_TLV(xname, xindex, left_reg, right_reg, left_shift, right_shift, mask, invert, xtlv) \ -{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \ - .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | SNDRV_CTL_ELEM_ACCESS_TLV_READ, \ - .name = xname, .index = xindex, \ - .info = snd_ak4531_info_double, \ - .get = snd_ak4531_get_double, .put = snd_ak4531_put_double, \ - .private_value = left_reg | (right_reg << 8) | (left_shift << 16) | (right_shift << 19) | (mask << 24) | (invert << 22), \ - .tlv = { .p = (xtlv) } } - -static int snd_ak4531_info_double(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo) -{ - int mask = (kcontrol->private_value >> 24) & 0xff; - - uinfo->type = mask == 1 ? SNDRV_CTL_ELEM_TYPE_BOOLEAN : SNDRV_CTL_ELEM_TYPE_INTEGER; - uinfo->count = 2; - uinfo->value.integer.min = 0; - uinfo->value.integer.max = mask; - return 0; -} - -static int snd_ak4531_get_double(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) -{ - struct snd_ak4531 *ak4531 = snd_kcontrol_chip(kcontrol); - int left_reg = kcontrol->private_value & 0xff; - int right_reg = (kcontrol->private_value >> 8) & 0xff; - int left_shift = (kcontrol->private_value >> 16) & 0x07; - int right_shift = (kcontrol->private_value >> 19) & 0x07; - int mask = (kcontrol->private_value >> 24) & 0xff; - int invert = (kcontrol->private_value >> 22) & 1; - int left, right; - - mutex_lock(&ak4531->reg_mutex); - left = (ak4531->regs[left_reg] >> left_shift) & mask; - right = (ak4531->regs[right_reg] >> right_shift) & mask; - mutex_unlock(&ak4531->reg_mutex); - if (invert) { - left = mask - left; - right = mask - right; - } - ucontrol->value.integer.value[0] = left; - ucontrol->value.integer.value[1] = right; - return 0; -} - -static int snd_ak4531_put_double(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) -{ - struct snd_ak4531 *ak4531 = snd_kcontrol_chip(kcontrol); - int left_reg = kcontrol->private_value & 0xff; - int right_reg = (kcontrol->private_value >> 8) & 0xff; - int left_shift = (kcontrol->private_value >> 16) & 0x07; - int right_shift = (kcontrol->private_value >> 19) & 0x07; - int mask = (kcontrol->private_value >> 24) & 0xff; - int invert = (kcontrol->private_value >> 22) & 1; - int change; - int left, right; - - left = ucontrol->value.integer.value[0] & mask; - right = ucontrol->value.integer.value[1] & mask; - if (invert) { - left = mask - left; - right = mask - right; - } - left <<= left_shift; - right <<= right_shift; - mutex_lock(&ak4531->reg_mutex); - if (left_reg == right_reg) { - left = (ak4531->regs[left_reg] & ~((mask << left_shift) | (mask << right_shift))) | left | right; - change = left != ak4531->regs[left_reg]; - ak4531->write(ak4531, left_reg, ak4531->regs[left_reg] = left); - } else { - left = (ak4531->regs[left_reg] & ~(mask << left_shift)) | left; - right = (ak4531->regs[right_reg] & ~(mask << right_shift)) | right; - change = left != ak4531->regs[left_reg] || right != ak4531->regs[right_reg]; - ak4531->write(ak4531, left_reg, ak4531->regs[left_reg] = left); - ak4531->write(ak4531, right_reg, ak4531->regs[right_reg] = right); - } - mutex_unlock(&ak4531->reg_mutex); - return change; -} - -#define AK4531_INPUT_SW(xname, xindex, reg1, reg2, left_shift, right_shift) \ -{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, .index = xindex, \ - .info = snd_ak4531_info_input_sw, \ - .get = snd_ak4531_get_input_sw, .put = snd_ak4531_put_input_sw, \ - .private_value = reg1 | (reg2 << 8) | (left_shift << 16) | (right_shift << 24) } - -static int snd_ak4531_info_input_sw(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo) -{ - uinfo->type = SNDRV_CTL_ELEM_TYPE_BOOLEAN; - uinfo->count = 4; - uinfo->value.integer.min = 0; - uinfo->value.integer.max = 1; - return 0; -} - -static int snd_ak4531_get_input_sw(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) -{ - struct snd_ak4531 *ak4531 = snd_kcontrol_chip(kcontrol); - int reg1 = kcontrol->private_value & 0xff; - int reg2 = (kcontrol->private_value >> 8) & 0xff; - int left_shift = (kcontrol->private_value >> 16) & 0x0f; - int right_shift = (kcontrol->private_value >> 24) & 0x0f; - - mutex_lock(&ak4531->reg_mutex); - ucontrol->value.integer.value[0] = (ak4531->regs[reg1] >> left_shift) & 1; - ucontrol->value.integer.value[1] = (ak4531->regs[reg2] >> left_shift) & 1; - ucontrol->value.integer.value[2] = (ak4531->regs[reg1] >> right_shift) & 1; - ucontrol->value.integer.value[3] = (ak4531->regs[reg2] >> right_shift) & 1; - mutex_unlock(&ak4531->reg_mutex); - return 0; -} - -static int snd_ak4531_put_input_sw(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) -{ - struct snd_ak4531 *ak4531 = snd_kcontrol_chip(kcontrol); - int reg1 = kcontrol->private_value & 0xff; - int reg2 = (kcontrol->private_value >> 8) & 0xff; - int left_shift = (kcontrol->private_value >> 16) & 0x0f; - int right_shift = (kcontrol->private_value >> 24) & 0x0f; - int change; - int val1, val2; - - mutex_lock(&ak4531->reg_mutex); - val1 = ak4531->regs[reg1] & ~((1 << left_shift) | (1 << right_shift)); - val2 = ak4531->regs[reg2] & ~((1 << left_shift) | (1 << right_shift)); - val1 |= (ucontrol->value.integer.value[0] & 1) << left_shift; - val2 |= (ucontrol->value.integer.value[1] & 1) << left_shift; - val1 |= (ucontrol->value.integer.value[2] & 1) << right_shift; - val2 |= (ucontrol->value.integer.value[3] & 1) << right_shift; - change = val1 != ak4531->regs[reg1] || val2 != ak4531->regs[reg2]; - ak4531->write(ak4531, reg1, ak4531->regs[reg1] = val1); - ak4531->write(ak4531, reg2, ak4531->regs[reg2] = val2); - mutex_unlock(&ak4531->reg_mutex); - return change; -} - -static const DECLARE_TLV_DB_SCALE(db_scale_master, -6200, 200, 0); -static const DECLARE_TLV_DB_SCALE(db_scale_mono, -2800, 400, 0); -static const DECLARE_TLV_DB_SCALE(db_scale_input, -5000, 200, 0); - -static struct snd_kcontrol_new snd_ak4531_controls[] = { - -AK4531_DOUBLE_TLV("Master Playback Switch", 0, - AK4531_LMASTER, AK4531_RMASTER, 7, 7, 1, 1, - db_scale_master), -AK4531_DOUBLE("Master Playback Volume", 0, AK4531_LMASTER, AK4531_RMASTER, 0, 0, 0x1f, 1), - -AK4531_SINGLE_TLV("Master Mono Playback Switch", 0, AK4531_MONO_OUT, 7, 1, 1, - db_scale_mono), -AK4531_SINGLE("Master Mono Playback Volume", 0, AK4531_MONO_OUT, 0, 0x07, 1), - -AK4531_DOUBLE("PCM Switch", 0, AK4531_LVOICE, AK4531_RVOICE, 7, 7, 1, 1), -AK4531_DOUBLE_TLV("PCM Volume", 0, AK4531_LVOICE, AK4531_RVOICE, 0, 0, 0x1f, 1, - db_scale_input), -AK4531_DOUBLE("PCM Playback Switch", 0, AK4531_OUT_SW2, AK4531_OUT_SW2, 3, 2, 1, 0), -AK4531_DOUBLE("PCM Capture Switch", 0, AK4531_LIN_SW2, AK4531_RIN_SW2, 2, 2, 1, 0), - -AK4531_DOUBLE("PCM Switch", 1, AK4531_LFM, AK4531_RFM, 7, 7, 1, 1), -AK4531_DOUBLE_TLV("PCM Volume", 1, AK4531_LFM, AK4531_RFM, 0, 0, 0x1f, 1, - db_scale_input), -AK4531_DOUBLE("PCM Playback Switch", 1, AK4531_OUT_SW1, AK4531_OUT_SW1, 6, 5, 1, 0), -AK4531_INPUT_SW("PCM Capture Route", 1, AK4531_LIN_SW1, AK4531_RIN_SW1, 6, 5), - -AK4531_DOUBLE("CD Switch", 0, AK4531_LCD, AK4531_RCD, 7, 7, 1, 1), -AK4531_DOUBLE_TLV("CD Volume", 0, AK4531_LCD, AK4531_RCD, 0, 0, 0x1f, 1, - db_scale_input), -AK4531_DOUBLE("CD Playback Switch", 0, AK4531_OUT_SW1, AK4531_OUT_SW1, 2, 1, 1, 0), -AK4531_INPUT_SW("CD Capture Route", 0, AK4531_LIN_SW1, AK4531_RIN_SW1, 2, 1), - -AK4531_DOUBLE("Line Switch", 0, AK4531_LLINE, AK4531_RLINE, 7, 7, 1, 1), -AK4531_DOUBLE_TLV("Line Volume", 0, AK4531_LLINE, AK4531_RLINE, 0, 0, 0x1f, 1, - db_scale_input), -AK4531_DOUBLE("Line Playback Switch", 0, AK4531_OUT_SW1, AK4531_OUT_SW1, 4, 3, 1, 0), -AK4531_INPUT_SW("Line Capture Route", 0, AK4531_LIN_SW1, AK4531_RIN_SW1, 4, 3), - -AK4531_DOUBLE("Aux Switch", 0, AK4531_LAUXA, AK4531_RAUXA, 7, 7, 1, 1), -AK4531_DOUBLE_TLV("Aux Volume", 0, AK4531_LAUXA, AK4531_RAUXA, 0, 0, 0x1f, 1, - db_scale_input), -AK4531_DOUBLE("Aux Playback Switch", 0, AK4531_OUT_SW2, AK4531_OUT_SW2, 5, 4, 1, 0), -AK4531_INPUT_SW("Aux Capture Route", 0, AK4531_LIN_SW2, AK4531_RIN_SW2, 4, 3), - -AK4531_SINGLE("Mono Switch", 0, AK4531_MONO1, 7, 1, 1), -AK4531_SINGLE_TLV("Mono Volume", 0, AK4531_MONO1, 0, 0x1f, 1, db_scale_input), -AK4531_SINGLE("Mono Playback Switch", 0, AK4531_OUT_SW2, 0, 1, 0), -AK4531_DOUBLE("Mono Capture Switch", 0, AK4531_LIN_SW2, AK4531_RIN_SW2, 0, 0, 1, 0), - -AK4531_SINGLE("Mono Switch", 1, AK4531_MONO2, 7, 1, 1), -AK4531_SINGLE_TLV("Mono Volume", 1, AK4531_MONO2, 0, 0x1f, 1, db_scale_input), -AK4531_SINGLE("Mono Playback Switch", 1, AK4531_OUT_SW2, 1, 1, 0), -AK4531_DOUBLE("Mono Capture Switch", 1, AK4531_LIN_SW2, AK4531_RIN_SW2, 1, 1, 1, 0), - -AK4531_SINGLE_TLV("Mic Volume", 0, AK4531_MIC, 0, 0x1f, 1, db_scale_input), -AK4531_SINGLE("Mic Switch", 0, AK4531_MIC, 7, 1, 1), -AK4531_SINGLE("Mic Playback Switch", 0, AK4531_OUT_SW1, 0, 1, 0), -AK4531_DOUBLE("Mic Capture Switch", 0, AK4531_LIN_SW1, AK4531_RIN_SW1, 0, 0, 1, 0), - -AK4531_DOUBLE("Mic Bypass Capture Switch", 0, AK4531_LIN_SW2, AK4531_RIN_SW2, 7, 7, 1, 0), -AK4531_DOUBLE("Mono1 Bypass Capture Switch", 0, AK4531_LIN_SW2, AK4531_RIN_SW2, 6, 6, 1, 0), -AK4531_DOUBLE("Mono2 Bypass Capture Switch", 0, AK4531_LIN_SW2, AK4531_RIN_SW2, 5, 5, 1, 0), - -AK4531_SINGLE("AD Input Select", 0, AK4531_AD_IN, 0, 1, 0), -AK4531_SINGLE("Mic Boost (+30dB)", 0, AK4531_MIC_GAIN, 0, 1, 0) -}; - -static int snd_ak4531_free(struct snd_ak4531 *ak4531) -{ - if (ak4531) { - if (ak4531->private_free) - ak4531->private_free(ak4531); - kfree(ak4531); - } - return 0; -} - -static int snd_ak4531_dev_free(struct snd_device *device) -{ - struct snd_ak4531 *ak4531 = device->device_data; - return snd_ak4531_free(ak4531); -} - -static u8 snd_ak4531_initial_map[0x19 + 1] = { - 0x9f, /* 00: Master Volume Lch */ - 0x9f, /* 01: Master Volume Rch */ - 0x9f, /* 02: Voice Volume Lch */ - 0x9f, /* 03: Voice Volume Rch */ - 0x9f, /* 04: FM Volume Lch */ - 0x9f, /* 05: FM Volume Rch */ - 0x9f, /* 06: CD Audio Volume Lch */ - 0x9f, /* 07: CD Audio Volume Rch */ - 0x9f, /* 08: Line Volume Lch */ - 0x9f, /* 09: Line Volume Rch */ - 0x9f, /* 0a: Aux Volume Lch */ - 0x9f, /* 0b: Aux Volume Rch */ - 0x9f, /* 0c: Mono1 Volume */ - 0x9f, /* 0d: Mono2 Volume */ - 0x9f, /* 0e: Mic Volume */ - 0x87, /* 0f: Mono-out Volume */ - 0x00, /* 10: Output Mixer SW1 */ - 0x00, /* 11: Output Mixer SW2 */ - 0x00, /* 12: Lch Input Mixer SW1 */ - 0x00, /* 13: Rch Input Mixer SW1 */ - 0x00, /* 14: Lch Input Mixer SW2 */ - 0x00, /* 15: Rch Input Mixer SW2 */ - 0x00, /* 16: Reset & Power Down */ - 0x00, /* 17: Clock Select */ - 0x00, /* 18: AD Input Select */ - 0x01 /* 19: Mic Amp Setup */ -}; - -int snd_ak4531_mixer(struct snd_card *card, struct snd_ak4531 *_ak4531, - struct snd_ak4531 **rak4531) -{ - unsigned int idx; - int err; - struct snd_ak4531 *ak4531; - static struct snd_device_ops ops = { - .dev_free = snd_ak4531_dev_free, - }; - - snd_assert(rak4531 != NULL, return -EINVAL); - *rak4531 = NULL; - snd_assert(card != NULL && _ak4531 != NULL, return -EINVAL); - ak4531 = kzalloc(sizeof(*ak4531), GFP_KERNEL); - if (ak4531 == NULL) - return -ENOMEM; - *ak4531 = *_ak4531; - mutex_init(&ak4531->reg_mutex); - if ((err = snd_component_add(card, "AK4531")) < 0) { - snd_ak4531_free(ak4531); - return err; - } - strcpy(card->mixername, "Asahi Kasei AK4531"); - ak4531->write(ak4531, AK4531_RESET, 0x03); /* no RST, PD */ - udelay(100); - ak4531->write(ak4531, AK4531_CLOCK, 0x00); /* CODEC ADC and CODEC DAC use {LR,B}CLK2 and run off LRCLK2 PLL */ - for (idx = 0; idx <= 0x19; idx++) { - if (idx == AK4531_RESET || idx == AK4531_CLOCK) - continue; - ak4531->write(ak4531, idx, ak4531->regs[idx] = snd_ak4531_initial_map[idx]); /* recording source is mixer */ - } - for (idx = 0; idx < ARRAY_SIZE(snd_ak4531_controls); idx++) { - if ((err = snd_ctl_add(card, snd_ctl_new1(&snd_ak4531_controls[idx], ak4531))) < 0) { - snd_ak4531_free(ak4531); - return err; - } - } - snd_ak4531_proc_init(card, ak4531); - if ((err = snd_device_new(card, SNDRV_DEV_CODEC, ak4531, &ops)) < 0) { - snd_ak4531_free(ak4531); - return err; - } - -#if 0 - snd_ak4531_dump(ak4531); -#endif - *rak4531 = ak4531; - return 0; -} - -/* - * power management - */ -#ifdef CONFIG_PM -void snd_ak4531_suspend(struct snd_ak4531 *ak4531) -{ - /* mute */ - ak4531->write(ak4531, AK4531_LMASTER, 0x9f); - ak4531->write(ak4531, AK4531_RMASTER, 0x9f); - /* powerdown */ - ak4531->write(ak4531, AK4531_RESET, 0x01); -} - -void snd_ak4531_resume(struct snd_ak4531 *ak4531) -{ - int idx; - - /* initialize */ - ak4531->write(ak4531, AK4531_RESET, 0x03); - udelay(100); - ak4531->write(ak4531, AK4531_CLOCK, 0x00); - /* restore mixer registers */ - for (idx = 0; idx <= 0x19; idx++) { - if (idx == AK4531_RESET || idx == AK4531_CLOCK) - continue; - ak4531->write(ak4531, idx, ak4531->regs[idx]); - } -} -#endif - -#ifdef CONFIG_PROC_FS -/* - * /proc interface - */ - -static void snd_ak4531_proc_read(struct snd_info_entry *entry, - struct snd_info_buffer *buffer) -{ - struct snd_ak4531 *ak4531 = entry->private_data; - - snd_iprintf(buffer, "Asahi Kasei AK4531\n\n"); - snd_iprintf(buffer, "Recording source : %s\n" - "MIC gain : %s\n", - ak4531->regs[AK4531_AD_IN] & 1 ? "external" : "mixer", - ak4531->regs[AK4531_MIC_GAIN] & 1 ? "+30dB" : "+0dB"); -} - -static void snd_ak4531_proc_init(struct snd_card *card, struct snd_ak4531 *ak4531) -{ - struct snd_info_entry *entry; - - if (! snd_card_proc_new(card, "ak4531", &entry)) - snd_info_set_text_ops(entry, ak4531, snd_ak4531_proc_read); -} -#endif - -EXPORT_SYMBOL(snd_ak4531_mixer); -#ifdef CONFIG_PM -EXPORT_SYMBOL(snd_ak4531_suspend); -EXPORT_SYMBOL(snd_ak4531_resume); -#endif - -/* - * INIT part - */ - -static int __init alsa_ak4531_init(void) -{ - return 0; -} - -static void __exit alsa_ak4531_exit(void) -{ -} - -module_init(alsa_ak4531_init) -module_exit(alsa_ak4531_exit) diff --git a/sound/pci/ak4531_codec.c b/sound/pci/ak4531_codec.c new file mode 100644 index 0000000..6a99eed --- /dev/null +++ b/sound/pci/ak4531_codec.c @@ -0,0 +1,488 @@ +/* + * Copyright (c) by Jaroslav Kysela + * Universal routines for AK4531 codec + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include +#include +#include +#include + +#include +#include +#include + +MODULE_AUTHOR("Jaroslav Kysela "); +MODULE_DESCRIPTION("Universal routines for AK4531 codec"); +MODULE_LICENSE("GPL"); + +#ifdef CONFIG_PROC_FS +static void snd_ak4531_proc_init(struct snd_card *card, struct snd_ak4531 *ak4531); +#else +#define snd_ak4531_proc_init(card,ak) +#endif + +/* + * + */ + +#if 0 + +static void snd_ak4531_dump(struct snd_ak4531 *ak4531) +{ + int idx; + + for (idx = 0; idx < 0x19; idx++) + printk("ak4531 0x%x: 0x%x\n", idx, ak4531->regs[idx]); +} + +#endif + +/* + * + */ + +#define AK4531_SINGLE(xname, xindex, reg, shift, mask, invert) \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, .index = xindex, \ + .info = snd_ak4531_info_single, \ + .get = snd_ak4531_get_single, .put = snd_ak4531_put_single, \ + .private_value = reg | (shift << 16) | (mask << 24) | (invert << 22) } +#define AK4531_SINGLE_TLV(xname, xindex, reg, shift, mask, invert, xtlv) \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \ + .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | SNDRV_CTL_ELEM_ACCESS_TLV_READ, \ + .name = xname, .index = xindex, \ + .info = snd_ak4531_info_single, \ + .get = snd_ak4531_get_single, .put = snd_ak4531_put_single, \ + .private_value = reg | (shift << 16) | (mask << 24) | (invert << 22), \ + .tlv = { .p = (xtlv) } } + +static int snd_ak4531_info_single(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo) +{ + int mask = (kcontrol->private_value >> 24) & 0xff; + + uinfo->type = mask == 1 ? SNDRV_CTL_ELEM_TYPE_BOOLEAN : SNDRV_CTL_ELEM_TYPE_INTEGER; + uinfo->count = 1; + uinfo->value.integer.min = 0; + uinfo->value.integer.max = mask; + return 0; +} + +static int snd_ak4531_get_single(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) +{ + struct snd_ak4531 *ak4531 = snd_kcontrol_chip(kcontrol); + int reg = kcontrol->private_value & 0xff; + int shift = (kcontrol->private_value >> 16) & 0x07; + int mask = (kcontrol->private_value >> 24) & 0xff; + int invert = (kcontrol->private_value >> 22) & 1; + int val; + + mutex_lock(&ak4531->reg_mutex); + val = (ak4531->regs[reg] >> shift) & mask; + mutex_unlock(&ak4531->reg_mutex); + if (invert) { + val = mask - val; + } + ucontrol->value.integer.value[0] = val; + return 0; +} + +static int snd_ak4531_put_single(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) +{ + struct snd_ak4531 *ak4531 = snd_kcontrol_chip(kcontrol); + int reg = kcontrol->private_value & 0xff; + int shift = (kcontrol->private_value >> 16) & 0x07; + int mask = (kcontrol->private_value >> 24) & 0xff; + int invert = (kcontrol->private_value >> 22) & 1; + int change; + int val; + + val = ucontrol->value.integer.value[0] & mask; + if (invert) { + val = mask - val; + } + val <<= shift; + mutex_lock(&ak4531->reg_mutex); + val = (ak4531->regs[reg] & ~(mask << shift)) | val; + change = val != ak4531->regs[reg]; + ak4531->write(ak4531, reg, ak4531->regs[reg] = val); + mutex_unlock(&ak4531->reg_mutex); + return change; +} + +#define AK4531_DOUBLE(xname, xindex, left_reg, right_reg, left_shift, right_shift, mask, invert) \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, .index = xindex, \ + .info = snd_ak4531_info_double, \ + .get = snd_ak4531_get_double, .put = snd_ak4531_put_double, \ + .private_value = left_reg | (right_reg << 8) | (left_shift << 16) | (right_shift << 19) | (mask << 24) | (invert << 22) } +#define AK4531_DOUBLE_TLV(xname, xindex, left_reg, right_reg, left_shift, right_shift, mask, invert, xtlv) \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \ + .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | SNDRV_CTL_ELEM_ACCESS_TLV_READ, \ + .name = xname, .index = xindex, \ + .info = snd_ak4531_info_double, \ + .get = snd_ak4531_get_double, .put = snd_ak4531_put_double, \ + .private_value = left_reg | (right_reg << 8) | (left_shift << 16) | (right_shift << 19) | (mask << 24) | (invert << 22), \ + .tlv = { .p = (xtlv) } } + +static int snd_ak4531_info_double(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo) +{ + int mask = (kcontrol->private_value >> 24) & 0xff; + + uinfo->type = mask == 1 ? SNDRV_CTL_ELEM_TYPE_BOOLEAN : SNDRV_CTL_ELEM_TYPE_INTEGER; + uinfo->count = 2; + uinfo->value.integer.min = 0; + uinfo->value.integer.max = mask; + return 0; +} + +static int snd_ak4531_get_double(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) +{ + struct snd_ak4531 *ak4531 = snd_kcontrol_chip(kcontrol); + int left_reg = kcontrol->private_value & 0xff; + int right_reg = (kcontrol->private_value >> 8) & 0xff; + int left_shift = (kcontrol->private_value >> 16) & 0x07; + int right_shift = (kcontrol->private_value >> 19) & 0x07; + int mask = (kcontrol->private_value >> 24) & 0xff; + int invert = (kcontrol->private_value >> 22) & 1; + int left, right; + + mutex_lock(&ak4531->reg_mutex); + left = (ak4531->regs[left_reg] >> left_shift) & mask; + right = (ak4531->regs[right_reg] >> right_shift) & mask; + mutex_unlock(&ak4531->reg_mutex); + if (invert) { + left = mask - left; + right = mask - right; + } + ucontrol->value.integer.value[0] = left; + ucontrol->value.integer.value[1] = right; + return 0; +} + +static int snd_ak4531_put_double(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) +{ + struct snd_ak4531 *ak4531 = snd_kcontrol_chip(kcontrol); + int left_reg = kcontrol->private_value & 0xff; + int right_reg = (kcontrol->private_value >> 8) & 0xff; + int left_shift = (kcontrol->private_value >> 16) & 0x07; + int right_shift = (kcontrol->private_value >> 19) & 0x07; + int mask = (kcontrol->private_value >> 24) & 0xff; + int invert = (kcontrol->private_value >> 22) & 1; + int change; + int left, right; + + left = ucontrol->value.integer.value[0] & mask; + right = ucontrol->value.integer.value[1] & mask; + if (invert) { + left = mask - left; + right = mask - right; + } + left <<= left_shift; + right <<= right_shift; + mutex_lock(&ak4531->reg_mutex); + if (left_reg == right_reg) { + left = (ak4531->regs[left_reg] & ~((mask << left_shift) | (mask << right_shift))) | left | right; + change = left != ak4531->regs[left_reg]; + ak4531->write(ak4531, left_reg, ak4531->regs[left_reg] = left); + } else { + left = (ak4531->regs[left_reg] & ~(mask << left_shift)) | left; + right = (ak4531->regs[right_reg] & ~(mask << right_shift)) | right; + change = left != ak4531->regs[left_reg] || right != ak4531->regs[right_reg]; + ak4531->write(ak4531, left_reg, ak4531->regs[left_reg] = left); + ak4531->write(ak4531, right_reg, ak4531->regs[right_reg] = right); + } + mutex_unlock(&ak4531->reg_mutex); + return change; +} + +#define AK4531_INPUT_SW(xname, xindex, reg1, reg2, left_shift, right_shift) \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, .index = xindex, \ + .info = snd_ak4531_info_input_sw, \ + .get = snd_ak4531_get_input_sw, .put = snd_ak4531_put_input_sw, \ + .private_value = reg1 | (reg2 << 8) | (left_shift << 16) | (right_shift << 24) } + +static int snd_ak4531_info_input_sw(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo) +{ + uinfo->type = SNDRV_CTL_ELEM_TYPE_BOOLEAN; + uinfo->count = 4; + uinfo->value.integer.min = 0; + uinfo->value.integer.max = 1; + return 0; +} + +static int snd_ak4531_get_input_sw(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) +{ + struct snd_ak4531 *ak4531 = snd_kcontrol_chip(kcontrol); + int reg1 = kcontrol->private_value & 0xff; + int reg2 = (kcontrol->private_value >> 8) & 0xff; + int left_shift = (kcontrol->private_value >> 16) & 0x0f; + int right_shift = (kcontrol->private_value >> 24) & 0x0f; + + mutex_lock(&ak4531->reg_mutex); + ucontrol->value.integer.value[0] = (ak4531->regs[reg1] >> left_shift) & 1; + ucontrol->value.integer.value[1] = (ak4531->regs[reg2] >> left_shift) & 1; + ucontrol->value.integer.value[2] = (ak4531->regs[reg1] >> right_shift) & 1; + ucontrol->value.integer.value[3] = (ak4531->regs[reg2] >> right_shift) & 1; + mutex_unlock(&ak4531->reg_mutex); + return 0; +} + +static int snd_ak4531_put_input_sw(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) +{ + struct snd_ak4531 *ak4531 = snd_kcontrol_chip(kcontrol); + int reg1 = kcontrol->private_value & 0xff; + int reg2 = (kcontrol->private_value >> 8) & 0xff; + int left_shift = (kcontrol->private_value >> 16) & 0x0f; + int right_shift = (kcontrol->private_value >> 24) & 0x0f; + int change; + int val1, val2; + + mutex_lock(&ak4531->reg_mutex); + val1 = ak4531->regs[reg1] & ~((1 << left_shift) | (1 << right_shift)); + val2 = ak4531->regs[reg2] & ~((1 << left_shift) | (1 << right_shift)); + val1 |= (ucontrol->value.integer.value[0] & 1) << left_shift; + val2 |= (ucontrol->value.integer.value[1] & 1) << left_shift; + val1 |= (ucontrol->value.integer.value[2] & 1) << right_shift; + val2 |= (ucontrol->value.integer.value[3] & 1) << right_shift; + change = val1 != ak4531->regs[reg1] || val2 != ak4531->regs[reg2]; + ak4531->write(ak4531, reg1, ak4531->regs[reg1] = val1); + ak4531->write(ak4531, reg2, ak4531->regs[reg2] = val2); + mutex_unlock(&ak4531->reg_mutex); + return change; +} + +static const DECLARE_TLV_DB_SCALE(db_scale_master, -6200, 200, 0); +static const DECLARE_TLV_DB_SCALE(db_scale_mono, -2800, 400, 0); +static const DECLARE_TLV_DB_SCALE(db_scale_input, -5000, 200, 0); + +static struct snd_kcontrol_new snd_ak4531_controls[] __devinitdata = { + +AK4531_DOUBLE_TLV("Master Playback Switch", 0, + AK4531_LMASTER, AK4531_RMASTER, 7, 7, 1, 1, + db_scale_master), +AK4531_DOUBLE("Master Playback Volume", 0, AK4531_LMASTER, AK4531_RMASTER, 0, 0, 0x1f, 1), + +AK4531_SINGLE_TLV("Master Mono Playback Switch", 0, AK4531_MONO_OUT, 7, 1, 1, + db_scale_mono), +AK4531_SINGLE("Master Mono Playback Volume", 0, AK4531_MONO_OUT, 0, 0x07, 1), + +AK4531_DOUBLE("PCM Switch", 0, AK4531_LVOICE, AK4531_RVOICE, 7, 7, 1, 1), +AK4531_DOUBLE_TLV("PCM Volume", 0, AK4531_LVOICE, AK4531_RVOICE, 0, 0, 0x1f, 1, + db_scale_input), +AK4531_DOUBLE("PCM Playback Switch", 0, AK4531_OUT_SW2, AK4531_OUT_SW2, 3, 2, 1, 0), +AK4531_DOUBLE("PCM Capture Switch", 0, AK4531_LIN_SW2, AK4531_RIN_SW2, 2, 2, 1, 0), + +AK4531_DOUBLE("PCM Switch", 1, AK4531_LFM, AK4531_RFM, 7, 7, 1, 1), +AK4531_DOUBLE_TLV("PCM Volume", 1, AK4531_LFM, AK4531_RFM, 0, 0, 0x1f, 1, + db_scale_input), +AK4531_DOUBLE("PCM Playback Switch", 1, AK4531_OUT_SW1, AK4531_OUT_SW1, 6, 5, 1, 0), +AK4531_INPUT_SW("PCM Capture Route", 1, AK4531_LIN_SW1, AK4531_RIN_SW1, 6, 5), + +AK4531_DOUBLE("CD Switch", 0, AK4531_LCD, AK4531_RCD, 7, 7, 1, 1), +AK4531_DOUBLE_TLV("CD Volume", 0, AK4531_LCD, AK4531_RCD, 0, 0, 0x1f, 1, + db_scale_input), +AK4531_DOUBLE("CD Playback Switch", 0, AK4531_OUT_SW1, AK4531_OUT_SW1, 2, 1, 1, 0), +AK4531_INPUT_SW("CD Capture Route", 0, AK4531_LIN_SW1, AK4531_RIN_SW1, 2, 1), + +AK4531_DOUBLE("Line Switch", 0, AK4531_LLINE, AK4531_RLINE, 7, 7, 1, 1), +AK4531_DOUBLE_TLV("Line Volume", 0, AK4531_LLINE, AK4531_RLINE, 0, 0, 0x1f, 1, + db_scale_input), +AK4531_DOUBLE("Line Playback Switch", 0, AK4531_OUT_SW1, AK4531_OUT_SW1, 4, 3, 1, 0), +AK4531_INPUT_SW("Line Capture Route", 0, AK4531_LIN_SW1, AK4531_RIN_SW1, 4, 3), + +AK4531_DOUBLE("Aux Switch", 0, AK4531_LAUXA, AK4531_RAUXA, 7, 7, 1, 1), +AK4531_DOUBLE_TLV("Aux Volume", 0, AK4531_LAUXA, AK4531_RAUXA, 0, 0, 0x1f, 1, + db_scale_input), +AK4531_DOUBLE("Aux Playback Switch", 0, AK4531_OUT_SW2, AK4531_OUT_SW2, 5, 4, 1, 0), +AK4531_INPUT_SW("Aux Capture Route", 0, AK4531_LIN_SW2, AK4531_RIN_SW2, 4, 3), + +AK4531_SINGLE("Mono Switch", 0, AK4531_MONO1, 7, 1, 1), +AK4531_SINGLE_TLV("Mono Volume", 0, AK4531_MONO1, 0, 0x1f, 1, db_scale_input), +AK4531_SINGLE("Mono Playback Switch", 0, AK4531_OUT_SW2, 0, 1, 0), +AK4531_DOUBLE("Mono Capture Switch", 0, AK4531_LIN_SW2, AK4531_RIN_SW2, 0, 0, 1, 0), + +AK4531_SINGLE("Mono Switch", 1, AK4531_MONO2, 7, 1, 1), +AK4531_SINGLE_TLV("Mono Volume", 1, AK4531_MONO2, 0, 0x1f, 1, db_scale_input), +AK4531_SINGLE("Mono Playback Switch", 1, AK4531_OUT_SW2, 1, 1, 0), +AK4531_DOUBLE("Mono Capture Switch", 1, AK4531_LIN_SW2, AK4531_RIN_SW2, 1, 1, 1, 0), + +AK4531_SINGLE_TLV("Mic Volume", 0, AK4531_MIC, 0, 0x1f, 1, db_scale_input), +AK4531_SINGLE("Mic Switch", 0, AK4531_MIC, 7, 1, 1), +AK4531_SINGLE("Mic Playback Switch", 0, AK4531_OUT_SW1, 0, 1, 0), +AK4531_DOUBLE("Mic Capture Switch", 0, AK4531_LIN_SW1, AK4531_RIN_SW1, 0, 0, 1, 0), + +AK4531_DOUBLE("Mic Bypass Capture Switch", 0, AK4531_LIN_SW2, AK4531_RIN_SW2, 7, 7, 1, 0), +AK4531_DOUBLE("Mono1 Bypass Capture Switch", 0, AK4531_LIN_SW2, AK4531_RIN_SW2, 6, 6, 1, 0), +AK4531_DOUBLE("Mono2 Bypass Capture Switch", 0, AK4531_LIN_SW2, AK4531_RIN_SW2, 5, 5, 1, 0), + +AK4531_SINGLE("AD Input Select", 0, AK4531_AD_IN, 0, 1, 0), +AK4531_SINGLE("Mic Boost (+30dB)", 0, AK4531_MIC_GAIN, 0, 1, 0) +}; + +static int snd_ak4531_free(struct snd_ak4531 *ak4531) +{ + if (ak4531) { + if (ak4531->private_free) + ak4531->private_free(ak4531); + kfree(ak4531); + } + return 0; +} + +static int snd_ak4531_dev_free(struct snd_device *device) +{ + struct snd_ak4531 *ak4531 = device->device_data; + return snd_ak4531_free(ak4531); +} + +static u8 snd_ak4531_initial_map[0x19 + 1] = { + 0x9f, /* 00: Master Volume Lch */ + 0x9f, /* 01: Master Volume Rch */ + 0x9f, /* 02: Voice Volume Lch */ + 0x9f, /* 03: Voice Volume Rch */ + 0x9f, /* 04: FM Volume Lch */ + 0x9f, /* 05: FM Volume Rch */ + 0x9f, /* 06: CD Audio Volume Lch */ + 0x9f, /* 07: CD Audio Volume Rch */ + 0x9f, /* 08: Line Volume Lch */ + 0x9f, /* 09: Line Volume Rch */ + 0x9f, /* 0a: Aux Volume Lch */ + 0x9f, /* 0b: Aux Volume Rch */ + 0x9f, /* 0c: Mono1 Volume */ + 0x9f, /* 0d: Mono2 Volume */ + 0x9f, /* 0e: Mic Volume */ + 0x87, /* 0f: Mono-out Volume */ + 0x00, /* 10: Output Mixer SW1 */ + 0x00, /* 11: Output Mixer SW2 */ + 0x00, /* 12: Lch Input Mixer SW1 */ + 0x00, /* 13: Rch Input Mixer SW1 */ + 0x00, /* 14: Lch Input Mixer SW2 */ + 0x00, /* 15: Rch Input Mixer SW2 */ + 0x00, /* 16: Reset & Power Down */ + 0x00, /* 17: Clock Select */ + 0x00, /* 18: AD Input Select */ + 0x01 /* 19: Mic Amp Setup */ +}; + +int __devinit snd_ak4531_mixer(struct snd_card *card, + struct snd_ak4531 *_ak4531, + struct snd_ak4531 **rak4531) +{ + unsigned int idx; + int err; + struct snd_ak4531 *ak4531; + static struct snd_device_ops ops = { + .dev_free = snd_ak4531_dev_free, + }; + + snd_assert(rak4531 != NULL, return -EINVAL); + *rak4531 = NULL; + snd_assert(card != NULL && _ak4531 != NULL, return -EINVAL); + ak4531 = kzalloc(sizeof(*ak4531), GFP_KERNEL); + if (ak4531 == NULL) + return -ENOMEM; + *ak4531 = *_ak4531; + mutex_init(&ak4531->reg_mutex); + if ((err = snd_component_add(card, "AK4531")) < 0) { + snd_ak4531_free(ak4531); + return err; + } + strcpy(card->mixername, "Asahi Kasei AK4531"); + ak4531->write(ak4531, AK4531_RESET, 0x03); /* no RST, PD */ + udelay(100); + ak4531->write(ak4531, AK4531_CLOCK, 0x00); /* CODEC ADC and CODEC DAC use {LR,B}CLK2 and run off LRCLK2 PLL */ + for (idx = 0; idx <= 0x19; idx++) { + if (idx == AK4531_RESET || idx == AK4531_CLOCK) + continue; + ak4531->write(ak4531, idx, ak4531->regs[idx] = snd_ak4531_initial_map[idx]); /* recording source is mixer */ + } + for (idx = 0; idx < ARRAY_SIZE(snd_ak4531_controls); idx++) { + if ((err = snd_ctl_add(card, snd_ctl_new1(&snd_ak4531_controls[idx], ak4531))) < 0) { + snd_ak4531_free(ak4531); + return err; + } + } + snd_ak4531_proc_init(card, ak4531); + if ((err = snd_device_new(card, SNDRV_DEV_CODEC, ak4531, &ops)) < 0) { + snd_ak4531_free(ak4531); + return err; + } + +#if 0 + snd_ak4531_dump(ak4531); +#endif + *rak4531 = ak4531; + return 0; +} + +/* + * power management + */ +#ifdef CONFIG_PM +void snd_ak4531_suspend(struct snd_ak4531 *ak4531) +{ + /* mute */ + ak4531->write(ak4531, AK4531_LMASTER, 0x9f); + ak4531->write(ak4531, AK4531_RMASTER, 0x9f); + /* powerdown */ + ak4531->write(ak4531, AK4531_RESET, 0x01); +} + +void snd_ak4531_resume(struct snd_ak4531 *ak4531) +{ + int idx; + + /* initialize */ + ak4531->write(ak4531, AK4531_RESET, 0x03); + udelay(100); + ak4531->write(ak4531, AK4531_CLOCK, 0x00); + /* restore mixer registers */ + for (idx = 0; idx <= 0x19; idx++) { + if (idx == AK4531_RESET || idx == AK4531_CLOCK) + continue; + ak4531->write(ak4531, idx, ak4531->regs[idx]); + } +} +#endif + +#ifdef CONFIG_PROC_FS +/* + * /proc interface + */ + +static void snd_ak4531_proc_read(struct snd_info_entry *entry, + struct snd_info_buffer *buffer) +{ + struct snd_ak4531 *ak4531 = entry->private_data; + + snd_iprintf(buffer, "Asahi Kasei AK4531\n\n"); + snd_iprintf(buffer, "Recording source : %s\n" + "MIC gain : %s\n", + ak4531->regs[AK4531_AD_IN] & 1 ? "external" : "mixer", + ak4531->regs[AK4531_MIC_GAIN] & 1 ? "+30dB" : "+0dB"); +} + +static void __devinit +snd_ak4531_proc_init(struct snd_card *card, struct snd_ak4531 *ak4531) +{ + struct snd_info_entry *entry; + + if (! snd_card_proc_new(card, "ak4531", &entry)) + snd_info_set_text_ops(entry, ak4531, snd_ak4531_proc_read); +} +#endif -- cgit v0.10.2 From 8bb8b453cb458d8f62411e78a4cfd6d860b503b6 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 30 May 2008 09:47:45 +0200 Subject: [ALSA] trident - clean up obsolete synth codes Clean up the unused synth codes in the memory handling of trident driver. Signed-off-by: Takashi Iwai diff --git a/sound/pci/trident/trident_memory.c b/sound/pci/trident/trident_memory.c index df9b487..3fd7f1b 100644 --- a/sound/pci/trident/trident_memory.c +++ b/sound/pci/trident/trident_memory.c @@ -310,181 +310,3 @@ int snd_trident_free_pages(struct snd_trident *trident, mutex_unlock(&hdr->block_mutex); return 0; } - - -/*---------------------------------------------------------------- - * memory allocation using multiple pages (for synth) - *---------------------------------------------------------------- - * Unlike the DMA allocation above, non-contiguous pages are - * assigned to TLB. - *----------------------------------------------------------------*/ - -/* - */ -static int synth_alloc_pages(struct snd_trident *hw, struct snd_util_memblk *blk); -static int synth_free_pages(struct snd_trident *hw, struct snd_util_memblk *blk); - -/* - * allocate a synth sample area - */ -struct snd_util_memblk * -snd_trident_synth_alloc(struct snd_trident *hw, unsigned int size) -{ - struct snd_util_memblk *blk; - struct snd_util_memhdr *hdr = hw->tlb.memhdr; - - mutex_lock(&hdr->block_mutex); - blk = __snd_util_mem_alloc(hdr, size); - if (blk == NULL) { - mutex_unlock(&hdr->block_mutex); - return NULL; - } - if (synth_alloc_pages(hw, blk)) { - __snd_util_mem_free(hdr, blk); - mutex_unlock(&hdr->block_mutex); - return NULL; - } - mutex_unlock(&hdr->block_mutex); - return blk; -} - -EXPORT_SYMBOL(snd_trident_synth_alloc); - -/* - * free a synth sample area - */ -int -snd_trident_synth_free(struct snd_trident *hw, struct snd_util_memblk *blk) -{ - struct snd_util_memhdr *hdr = hw->tlb.memhdr; - - mutex_lock(&hdr->block_mutex); - synth_free_pages(hw, blk); - __snd_util_mem_free(hdr, blk); - mutex_unlock(&hdr->block_mutex); - return 0; -} - -EXPORT_SYMBOL(snd_trident_synth_free); - -/* - * reset TLB entry and free kernel page - */ -static void clear_tlb(struct snd_trident *trident, int page) -{ - void *ptr = page_to_ptr(trident, page); - dma_addr_t addr = page_to_addr(trident, page); - set_silent_tlb(trident, page); - if (ptr) { - struct snd_dma_buffer dmab; - dmab.dev.type = SNDRV_DMA_TYPE_DEV; - dmab.dev.dev = snd_dma_pci_data(trident->pci); - dmab.area = ptr; - dmab.addr = addr; - dmab.bytes = ALIGN_PAGE_SIZE; - snd_dma_free_pages(&dmab); - } -} - -/* check new allocation range */ -static void get_single_page_range(struct snd_util_memhdr *hdr, - struct snd_util_memblk *blk, - int *first_page_ret, int *last_page_ret) -{ - struct list_head *p; - struct snd_util_memblk *q; - int first_page, last_page; - first_page = firstpg(blk); - if ((p = blk->list.prev) != &hdr->block) { - q = list_entry(p, struct snd_util_memblk, list); - if (lastpg(q) == first_page) - first_page++; /* first page was already allocated */ - } - last_page = lastpg(blk); - if ((p = blk->list.next) != &hdr->block) { - q = list_entry(p, struct snd_util_memblk, list); - if (firstpg(q) == last_page) - last_page--; /* last page was already allocated */ - } - *first_page_ret = first_page; - *last_page_ret = last_page; -} - -/* - * allocate kernel pages and assign them to TLB - */ -static int synth_alloc_pages(struct snd_trident *hw, struct snd_util_memblk *blk) -{ - int page, first_page, last_page; - struct snd_dma_buffer dmab; - - firstpg(blk) = get_aligned_page(blk->offset); - lastpg(blk) = get_aligned_page(blk->offset + blk->size - 1); - get_single_page_range(hw->tlb.memhdr, blk, &first_page, &last_page); - - /* allocate a kernel page for each Trident page - - * fortunately Trident page size and kernel PAGE_SIZE is identical! - */ - for (page = first_page; page <= last_page; page++) { - if (snd_dma_alloc_pages(SNDRV_DMA_TYPE_DEV, snd_dma_pci_data(hw->pci), - ALIGN_PAGE_SIZE, &dmab) < 0) - goto __fail; - if (! is_valid_page(dmab.addr)) { - snd_dma_free_pages(&dmab); - goto __fail; - } - set_tlb_bus(hw, page, (unsigned long)dmab.area, dmab.addr); - } - return 0; - -__fail: - /* release allocated pages */ - last_page = page - 1; - for (page = first_page; page <= last_page; page++) - clear_tlb(hw, page); - - return -ENOMEM; -} - -/* - * free pages - */ -static int synth_free_pages(struct snd_trident *trident, struct snd_util_memblk *blk) -{ - int page, first_page, last_page; - - get_single_page_range(trident->tlb.memhdr, blk, &first_page, &last_page); - for (page = first_page; page <= last_page; page++) - clear_tlb(trident, page); - - return 0; -} - -/* - * copy_from_user(blk + offset, data, size) - */ -int snd_trident_synth_copy_from_user(struct snd_trident *trident, - struct snd_util_memblk *blk, - int offset, const char __user *data, int size) -{ - int page, nextofs, end_offset, temp, temp1; - - offset += blk->offset; - end_offset = offset + size; - page = get_aligned_page(offset) + 1; - do { - nextofs = aligned_page_offset(page); - temp = nextofs - offset; - temp1 = end_offset - offset; - if (temp1 < temp) - temp = temp1; - if (copy_from_user(offset_ptr(trident, offset), data, temp)) - return -EFAULT; - offset = nextofs; - data += temp; - page++; - } while (offset < end_offset); - return 0; -} - -EXPORT_SYMBOL(snd_trident_synth_copy_from_user); -- cgit v0.10.2 From 8ff7f2a46bd8831e1f1f2a694ec13921720180b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Thu, 21 Feb 2008 16:30:10 +0100 Subject: There is no need to have BOOT_PARAMS_SIZE known outside of atags.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit atags.c was the only user of KEXEC_BOOT_PARAMS_SIZE and kexec.h was only included to get that definition. Signed-off-by: Uwe Kleine-König Acked-by: Uli Luckas diff --git a/arch/arm/kernel/atags.c b/arch/arm/kernel/atags.c index 64c4208..56fef9a 100644 --- a/arch/arm/kernel/atags.c +++ b/arch/arm/kernel/atags.c @@ -1,5 +1,4 @@ #include -#include #include #include #include @@ -42,14 +41,14 @@ create_proc_entries(void) return 0; } - -static char __initdata atags_copy_buf[KEXEC_BOOT_PARAMS_SIZE]; +#define BOOT_PARAMS_SIZE 1536 +static char __initdata atags_copy_buf[BOOT_PARAMS_SIZE]; static char __initdata *atags_copy; void __init save_atags(const struct tag *tags) { atags_copy = atags_copy_buf; - memcpy(atags_copy, tags, KEXEC_BOOT_PARAMS_SIZE); + memcpy(atags_copy, tags, sizeof(atags_copy_buf)); } diff --git a/include/asm-arm/kexec.h b/include/asm-arm/kexec.h index 47fe34d..c8986bb 100644 --- a/include/asm-arm/kexec.h +++ b/include/asm-arm/kexec.h @@ -14,8 +14,6 @@ #define KEXEC_ARCH KEXEC_ARCH_ARM -#define KEXEC_BOOT_PARAMS_SIZE 1536 - #define KEXEC_ARM_ATAGS_OFFSET 0x1000 #define KEXEC_ARM_ZIMAGE_OFFSET 0x8000 -- cgit v0.10.2 From f7b0b939d54b457ba172e088f477af2012aef9b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Thu, 21 Feb 2008 15:04:40 +0100 Subject: clean up atags exporting code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This gets rid of two static variables (one of them being __initdata) and a static function. Signed-off-by: Uwe Kleine-König Acked-by: Uli Luckas diff --git a/arch/arm/kernel/atags.c b/arch/arm/kernel/atags.c index 56fef9a..42a1a14 100644 --- a/arch/arm/kernel/atags.c +++ b/arch/arm/kernel/atags.c @@ -6,9 +6,8 @@ struct buffer { size_t size; - char *data; + char data[]; }; -static struct buffer tags_buffer; static int read_buffer(char* page, char** start, off_t off, int count, @@ -28,58 +27,57 @@ read_buffer(char* page, char** start, off_t off, int count, return count; } - -static int -create_proc_entries(void) -{ - struct proc_dir_entry* tags_entry; - - tags_entry = create_proc_read_entry("atags", 0400, NULL, read_buffer, &tags_buffer); - if (!tags_entry) - return -ENOMEM; - - return 0; -} - #define BOOT_PARAMS_SIZE 1536 -static char __initdata atags_copy_buf[BOOT_PARAMS_SIZE]; -static char __initdata *atags_copy; +static char __initdata atags_copy[BOOT_PARAMS_SIZE]; void __init save_atags(const struct tag *tags) { - atags_copy = atags_copy_buf; - memcpy(atags_copy, tags, sizeof(atags_copy_buf)); + memcpy(atags_copy, tags, sizeof(atags_copy)); } - static int __init init_atags_procfs(void) { - struct tag *tag; - int error; + /* + * This cannot go into save_atags() because kmalloc and proc don't work + * yet when it is called. + */ + struct proc_dir_entry *tags_entry; + struct tag *tag = (struct tag *)atags_copy; + struct buffer *b; + size_t size; - if (!atags_copy) { - printk(KERN_WARNING "Exporting ATAGs: No saved tags found\n"); - return -EIO; + if (tag->hdr.tag != ATAG_CORE) { + printk(KERN_INFO "No ATAGs?"); + return -EINVAL; } - for (tag = (struct tag *) atags_copy; tag->hdr.size; tag = tag_next(tag)) + for (; tag->hdr.size; tag = tag_next(tag)) ; - tags_buffer.size = ((char *) tag - atags_copy) + sizeof(tag->hdr); - tags_buffer.data = kmalloc(tags_buffer.size, GFP_KERNEL); - if (tags_buffer.data == NULL) - return -ENOMEM; - memcpy(tags_buffer.data, atags_copy, tags_buffer.size); - - error = create_proc_entries(); - if (error) { - printk(KERN_ERR "Exporting ATAGs: not enough memory\n"); - kfree(tags_buffer.data); - tags_buffer.size = 0; - tags_buffer.data = NULL; - } + /* include the terminating ATAG_NONE */ + size = (char *)tag - atags_copy + sizeof(struct tag_header); - return error; -} + WARN_ON(tag->hdr.tag != ATAG_NONE); + + b = kmalloc(sizeof(*b) + size, GFP_KERNEL); + if (!b) + goto nomem; + b->size = size; + memcpy(b->data, atags_copy, size); + + tags_entry = create_proc_read_entry("atags", 0400, + NULL, read_buffer, b); + + if (!tags_entry) + goto nomem; + + return 0; + +nomem: + kfree(b); + printk(KERN_ERR "Exporting ATAGs: not enough memory\n"); + + return -ENOMEM; +} arch_initcall(init_atags_procfs); -- cgit v0.10.2 From a4831fbe096a6f4f691fd71b041ce27add54088e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 21 May 2008 10:17:07 +0200 Subject: ns9xxx: fix assembler version of __REG2 to be consistent with the C version MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's not very critical because __REG2 isn't used in assembler code currently. Additionally some white space noise is fixed. Signed-off-by: Uwe Kleine-König diff --git a/include/asm-arm/arch-ns9xxx/hardware.h b/include/asm-arm/arch-ns9xxx/hardware.h index 0b7b346..0dca11c 100644 --- a/include/asm-arm/arch-ns9xxx/hardware.h +++ b/include/asm-arm/arch-ns9xxx/hardware.h @@ -66,13 +66,13 @@ __REGGET(var, reg ## _ ## field) / __REGSHIFT(reg ## _ ## field) # define REGGETIM_IDX(var, reg, field, idx) \ - __REGGET(var, reg ## _ ## field((idx))) / \ + __REGGET(var, reg ## _ ## field((idx))) / \ __REGSHIFT(reg ## _ ## field((idx))) #else # define __REG(x) io_p2v(x) -# define __REG2(x, y) io_p2v((x) + (y)) +# define __REG2(x, y) io_p2v((x) + 4 * (y)) #endif -- cgit v0.10.2 From 72860b0f3c4a303498c9e16c8a4a38ddf23d56de Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 30 May 2008 11:25:03 +0200 Subject: sound: Clean up sound/oss/Kconfig Removed unnecessary dependencies, fix a wrong selection, and make CONFIG_SOUND_OSS menuconfig to simplify. Signed-off-by: Takashi Iwai diff --git a/sound/oss/Kconfig b/sound/oss/Kconfig index 3be2dc1..3394013 100644 --- a/sound/oss/Kconfig +++ b/sound/oss/Kconfig @@ -7,7 +7,7 @@ config SOUND_BCM_CS4297A tristate "Crystal Sound CS4297a (for Swarm)" - depends on SOUND_PRIME && SIBYTE_SWARM + depends on SIBYTE_SWARM help The BCM91250A has a Crystal CS4297a on synchronous serial port B (in addition to the DB-9 serial port). Say Y or M @@ -17,7 +17,7 @@ config SOUND_BCM_CS4297A config SOUND_VWSND tristate "SGI Visual Workstation Sound" - depends on SOUND_PRIME && X86_VISWS + depends on X86_VISWS help Say Y or M if you have an SGI Visual Workstation and you want to be able to use its on-board audio. Read @@ -26,19 +26,18 @@ config SOUND_VWSND config SOUND_HAL2 tristate "SGI HAL2 sound (EXPERIMENTAL)" - depends on SOUND_PRIME && SGI_IP22 && EXPERIMENTAL + depends on SGI_IP22 && EXPERIMENTAL help Say Y or M if you have an SGI Indy or Indigo2 system and want to be able to use its on-board A2 audio system. config SOUND_AU1550_AC97 tristate "Au1550/Au1200 AC97 Sound" - select SND_AC97_CODEC - depends on SOUND_PRIME && (SOC_AU1550 || SOC_AU1200) + depends on SOC_AU1550 || SOC_AU1200 config SOUND_TRIDENT tristate "Trident 4DWave DX/NX, SiS 7018 or ALi 5451 PCI Audio Core" - depends on SOUND_PRIME && PCI + depends on PCI ---help--- Say Y or M if you have a PCI sound card utilizing the Trident 4DWave-DX/NX chipset or your mother board chipset has SiS 7018 @@ -79,7 +78,7 @@ config SOUND_TRIDENT config SOUND_MSNDCLAS tristate "Support for Turtle Beach MultiSound Classic, Tahiti, Monterey" - depends on SOUND_PRIME && (m || !STANDALONE) && ISA + depends on (m || !STANDALONE) && ISA help Say M here if you have a Turtle Beach MultiSound Classic, Tahiti or Monterey (not for the Pinnacle or Fiji). @@ -143,7 +142,7 @@ config MSNDCLAS_IO config SOUND_MSNDPIN tristate "Support for Turtle Beach MultiSound Pinnacle, Fiji" - depends on SOUND_PRIME && (m || !STANDALONE) && ISA + depends on (m || !STANDALONE) && ISA help Say M here if you have a Turtle Beach MultiSound Pinnacle or Fiji. See for important information @@ -229,7 +228,7 @@ config MSNDPIN_NONPNP configure the card's resources. comment "MSND Pinnacle DSP section will be configured to above parameters." - depends on SOUND_PRIME && SOUND_MSNDPIN=y && MSNDPIN_NONPNP + depends on SOUND_MSNDPIN=y && MSNDPIN_NONPNP config MSNDPIN_CFG hex "MSND Pinnacle config port 250,260,270" @@ -242,7 +241,7 @@ config MSNDPIN_CFG Mode". comment "Pinnacle-specific Device Configuration (0 disables)" - depends on SOUND_PRIME && SOUND_MSNDPIN=y && MSNDPIN_NONPNP + depends on SOUND_MSNDPIN=y && MSNDPIN_NONPNP config MSNDPIN_MPU_IO hex "MSND Pinnacle MPU I/O (e.g. 330)" @@ -294,7 +293,7 @@ config MSNDPIN_JOYSTICK_IO config MSND_FIFOSIZE int "MSND buffer size (kB)" - depends on SOUND_PRIME && (SOUND_MSNDPIN=y || SOUND_MSNDCLAS=y) + depends on SOUND_MSNDPIN=y || SOUND_MSNDCLAS=y default "128" help Configures the size of each audio buffer, in kilobytes, for @@ -302,9 +301,9 @@ config MSND_FIFOSIZE and Pinnacle). Larger values reduce the chance of data overruns at the expense of overall latency. If unsure, use the default. -config SOUND_OSS +menuconfig SOUND_OSS tristate "OSS sound modules" - depends on SOUND_PRIME && ISA_DMA_API && VIRT_TO_BUS + depends on ISA_DMA_API && VIRT_TO_BUS help OSS is the Open Sound System suite of sound card drivers. They make sound programming easier since they provide a common API. Say Y or @@ -312,16 +311,16 @@ config SOUND_OSS driver for your sound card above, then pick your driver from the list below. +if SOUND_OSS + config SOUND_TRACEINIT bool "Verbose initialisation" - depends on SOUND_OSS help Verbose soundcard initialization -- affects the format of autoprobe and initialization messages at boot time. config SOUND_DMAP bool "Persistent DMA buffers" - depends on SOUND_OSS ---help--- Linux can often have problems allocating DMA buffers for ISA sound cards on machines with more than 16MB of RAM. This is because ISA @@ -338,8 +337,6 @@ config SOUND_DMAP config SOUND_SSCAPE tristate "Ensoniq SoundScape support" - depends on SOUND_OSS - depends on VIRT_TO_BUS help Answer Y if you have a sound card based on the Ensoniq SoundScape chipset. Such cards are being manufactured at least by Ensoniq, Spea @@ -352,13 +349,11 @@ config SOUND_SSCAPE config SOUND_VMIDI tristate "Loopback MIDI device support" - depends on SOUND_OSS help Support for MIDI loopback on port 1 or 2. config SOUND_TRIX tristate "MediaTrix AudioTrix Pro support" - depends on SOUND_OSS help Answer Y if you have the AudioTriX Pro sound card manufactured by MediaTrix. @@ -382,7 +377,6 @@ config TRIX_BOOT_FILE config SOUND_MSS tristate "Microsoft Sound System support" - depends on SOUND_OSS ---help--- Again think carefully before answering Y to this question. It's safe to answer Y if you have the original Windows Sound System card @@ -414,7 +408,6 @@ config SOUND_MSS config SOUND_MPU401 tristate "MPU-401 support (NOT for SB16)" - depends on SOUND_OSS ---help--- Be careful with this question. The MPU401 interface is supported by all sound cards. However, some natively supported cards have their @@ -430,7 +423,6 @@ config SOUND_MPU401 config SOUND_PAS tristate "ProAudioSpectrum 16 support" - depends on SOUND_OSS ---help--- Answer Y only if you have a Pro Audio Spectrum 16, ProAudio Studio 16 or Logitech SoundMan 16 sound card. Answer N if you have some @@ -452,7 +444,6 @@ config PAS_JOYSTICK config SOUND_PSS tristate "PSS (AD1848, ADSP-2115, ESC614) support" - depends on SOUND_OSS help Answer Y or M if you have an Orchid SW32, Cardinal DSP16, Beethoven ADSP-16 or some other card based on the PSS chipset (AD1848 codec + @@ -495,7 +486,6 @@ config PSS_BOOT_FILE config SOUND_SB tristate "100% Sound Blaster compatibles (SB16/32/64, ESS, Jazz16) support" - depends on SOUND_OSS ---help--- Answer Y if you have an original Sound Blaster card made by Creative Labs or a 100% hardware compatible clone (like the Thunderboard or @@ -522,7 +512,6 @@ config SOUND_SB config SOUND_YM3812 tristate "Yamaha FM synthesizer (YM3812/OPL-3) support" - depends on SOUND_OSS ---help--- Answer Y if your card has a FM chip made by Yamaha (OPL2/OPL3/OPL4). Answering Y is usually a safe and recommended choice, however some @@ -538,7 +527,6 @@ config SOUND_YM3812 config SOUND_UART6850 tristate "6850 UART support" - depends on SOUND_OSS help This option enables support for MIDI interfaces based on the 6850 UART chip. This interface is rarely found on sound cards. It's safe @@ -549,7 +537,6 @@ config SOUND_UART6850 config SOUND_AEDSP16 tristate "Gallant Audio Cards (SC-6000 and SC-6600 based)" - depends on SOUND_OSS ---help--- Answer Y if you have a Gallant's Audio Excel DSP 16 card. This driver supports Audio Excel DSP 16 but not the III nor PnP versions @@ -630,14 +617,14 @@ endchoice config SOUND_VIDC tristate "VIDC 16-bit sound" - depends on ARM && (ARCH_ACORN || ARCH_CLPS7500) && SOUND_OSS + depends on ARM && (ARCH_ACORN || ARCH_CLPS7500) help 16-bit support for the VIDC onboard sound hardware found on Acorn machines. config SOUND_WAVEARTIST tristate "Netwinder WaveArtist" - depends on ARM && SOUND_OSS && ARCH_NETWINDER + depends on ARM && ARCH_NETWINDER help Say Y here to include support for the Rockwell WaveArtist sound system. This driver is mainly for the NetWinder. @@ -646,9 +633,11 @@ config SOUND_KAHLUA tristate "XpressAudio Sound Blaster emulation" depends on SOUND_SB +endif # SOUND_OSS + config SOUND_SH_DAC_AUDIO tristate "SuperH DAC audio support" - depends on SOUND_PRIME && CPU_SH3 + depends on CPU_SH3 config SOUND_SH_DAC_AUDIO_CHANNEL int "DAC channel" -- cgit v0.10.2 From 2621f0338ce4e3e57cc32a967f5a3d2999390fe3 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 30 May 2008 11:27:24 +0200 Subject: [ALSA] trivial clean up of sound/isa/sb/Makefile Remove unneeded sort in sound/isa/sb/Makefile. Signed-off-by: Takashi Iwai diff --git a/sound/isa/sb/Makefile b/sound/isa/sb/Makefile index c9d1c98..1098a56 100644 --- a/sound/isa/sb/Makefile +++ b/sound/isa/sb/Makefile @@ -34,5 +34,3 @@ ifeq ($(CONFIG_SND_SB16_CSP),y) obj-$(CONFIG_SND_SBAWE) += snd-sb16-csp.o endif obj-$(call sequencer,$(CONFIG_SND_SBAWE)) += snd-emu8000-synth.o - -obj-m := $(sort $(obj-m)) -- cgit v0.10.2 From a5003fc04113c217370409beac812831cbf6e0ac Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 30 May 2008 09:49:41 +0200 Subject: [ALSA] emu10k1 - simplify page allocation for synth Simplify the page allocation of emu10k1 driver for emux synth support. Since these pages aren't be necessarily coherent, we can avoid expensive DMA-coherent routines. Signed-off-by: Takashi Iwai diff --git a/sound/pci/emu10k1/memory.c b/sound/pci/emu10k1/memory.c index 916c1db..128eaca 100644 --- a/sound/pci/emu10k1/memory.c +++ b/sound/pci/emu10k1/memory.c @@ -437,43 +437,46 @@ static void get_single_page_range(struct snd_util_memhdr *hdr, *last_page_ret = last_page; } +/* release allocated pages */ +static void __synth_free_pages(struct snd_emu10k1 *emu, int first_page, + int last_page) +{ + int page; + + for (page = first_page; page <= last_page; page++) { + free_page((unsigned long)emu->page_ptr_table[page]); + emu->page_addr_table[page] = 0; + emu->page_ptr_table[page] = NULL; + } +} + /* * allocate kernel pages */ static int synth_alloc_pages(struct snd_emu10k1 *emu, struct snd_emu10k1_memblk *blk) { int page, first_page, last_page; - struct snd_dma_buffer dmab; emu10k1_memblk_init(blk); get_single_page_range(emu->memhdr, blk, &first_page, &last_page); /* allocate kernel pages */ for (page = first_page; page <= last_page; page++) { - if (snd_dma_alloc_pages(SNDRV_DMA_TYPE_DEV, snd_dma_pci_data(emu->pci), - PAGE_SIZE, &dmab) < 0) - goto __fail; - if (! is_valid_page(emu, dmab.addr)) { - snd_dma_free_pages(&dmab); - goto __fail; + /* first try to allocate from <4GB zone */ + struct page *p = alloc_page(GFP_KERNEL | GFP_DMA32 | + __GFP_NOWARN); + if (!p || (page_to_pfn(p) & ~(emu->dma_mask >> PAGE_SHIFT))) + /* try to allocate from <16MB zone */ + p = alloc_page(GFP_DMA | + __GFP_NORETRY | /* no OOM-killer */ + __GFP_NOWARN); + if (!p) { + __synth_free_pages(emu, first_page, page - 1); + return -ENOMEM; } - emu->page_addr_table[page] = dmab.addr; - emu->page_ptr_table[page] = dmab.area; + emu->page_addr_table[page] = page_to_phys(p); + emu->page_ptr_table[page] = page_address(p); } return 0; - -__fail: - /* release allocated pages */ - last_page = page - 1; - for (page = first_page; page <= last_page; page++) { - dmab.area = emu->page_ptr_table[page]; - dmab.addr = emu->page_addr_table[page]; - dmab.bytes = PAGE_SIZE; - snd_dma_free_pages(&dmab); - emu->page_addr_table[page] = 0; - emu->page_ptr_table[page] = NULL; - } - - return -ENOMEM; } /* @@ -481,23 +484,10 @@ __fail: */ static int synth_free_pages(struct snd_emu10k1 *emu, struct snd_emu10k1_memblk *blk) { - int page, first_page, last_page; - struct snd_dma_buffer dmab; + int first_page, last_page; get_single_page_range(emu->memhdr, blk, &first_page, &last_page); - dmab.dev.type = SNDRV_DMA_TYPE_DEV; - dmab.dev.dev = snd_dma_pci_data(emu->pci); - for (page = first_page; page <= last_page; page++) { - if (emu->page_ptr_table[page] == NULL) - continue; - dmab.area = emu->page_ptr_table[page]; - dmab.addr = emu->page_addr_table[page]; - dmab.bytes = PAGE_SIZE; - snd_dma_free_pages(&dmab); - emu->page_addr_table[page] = 0; - emu->page_ptr_table[page] = NULL; - } - + __synth_free_pages(emu, first_page, last_page); return 0; } -- cgit v0.10.2 From 7a14ce1d8c1d3a6118d406e64eaf9aa70375e085 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 15:43:53 +0200 Subject: nohz: reduce jiffies polling overhead Signed-off-by: Ingo Molnar diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index b854a89..cb75394 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -48,6 +48,13 @@ static void tick_do_update_jiffies64(ktime_t now) unsigned long ticks = 0; ktime_t delta; + /* + * Do a quick check without holding xtime_lock: + */ + delta = ktime_sub(now, last_jiffies_update); + if (delta.tv64 < tick_period.tv64) + return; + /* Reevalute with xtime_lock held */ write_seqlock(&xtime_lock); -- cgit v0.10.2 From ba3a5974239293d921235e6fa82b09b670e674ef Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 30 May 2008 14:55:47 +0200 Subject: - fix typo in include/asm-x86/nmi.h diff --git a/include/asm-x86/nmi.h b/include/asm-x86/nmi.h index 6f4d44f..972a4f6 100644 --- a/include/asm-x86/nmi.h +++ b/include/asm-x86/nmi.h @@ -40,7 +40,7 @@ static inline void unset_nmi_pm_callback(struct pm_dev *dev) extern void default_do_nmi(struct pt_regs *); extern void nmi_watchdog_default(void); #else -#define nmi_watchdog_default(void) do {} while (0) +#define nmi_watchdog_default() do { } while (0) #endif extern void die_nmi(char *str, struct pt_regs *regs, int do_panic); -- cgit v0.10.2 From d364319b989967b74e57fef5c8017fd56a16c392 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 2 May 2008 23:42:01 +0200 Subject: x86: move mmconfig declarations to header arch/x86/kernel/mmconf-fam10h_64.c is missing the prototypes, which are decalred in arch/x86/kernel/setup_64.c. Move the prototypes and the inline stubs to the appropriate header file. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c index edc5fbf..fdfdc55 100644 --- a/arch/x86/kernel/mmconf-fam10h_64.c +++ b/arch/x86/kernel/mmconf-fam10h_64.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "../pci/pci.h" diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index 6dff128..341230d 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -71,6 +71,7 @@ #include #include #include +#include #include #ifdef CONFIG_PARAVIRT @@ -293,18 +294,6 @@ static void __init parse_setup_data(void) } } -#ifdef CONFIG_PCI_MMCONFIG -extern void __cpuinit fam10h_check_enable_mmcfg(void); -extern void __init check_enable_amd_mmconf_dmi(void); -#else -void __cpuinit fam10h_check_enable_mmcfg(void) -{ -} -void __init check_enable_amd_mmconf_dmi(void) -{ -} -#endif - /* * setup_arch - architecture-specific boot-time initializations * diff --git a/include/asm-x86/mmconfig.h b/include/asm-x86/mmconfig.h new file mode 100644 index 0000000..95beda0 --- /dev/null +++ b/include/asm-x86/mmconfig.h @@ -0,0 +1,12 @@ +#ifndef _ASM_MMCONFIG_H +#define _ASM_MMCONFIG_H + +#ifdef CONFIG_PCI_MMCONFIG +extern void __cpuinit fam10h_check_enable_mmcfg(void); +extern void __init check_enable_amd_mmconf_dmi(void); +#else +static inline void fam10h_check_enable_mmcfg(void) { } +static inline void check_enable_amd_mmconf_dmi(void) { } +#endif + +#endif -- cgit v0.10.2 From 4d285878564bb46cf64e54be18eeffe33ca583a0 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Thu, 22 May 2008 18:48:32 -0400 Subject: x86: Move the AMD64 specific parts out of setup_64.c Create a separate amd_64.c file in the cpu/ dir for the useful parts to live in. Signed-off-by: Dave Jones Signed-off-by: H. Peter Anvin diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index a0c6f81..ef065c1 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -7,6 +7,7 @@ obj-y += proc.o feature_names.o obj-$(CONFIG_X86_32) += common.o bugs.o obj-$(CONFIG_X86_32) += amd.o +obj-$(CONFIG_X86_64) += amd_64.o obj-$(CONFIG_X86_32) += cyrix.o obj-$(CONFIG_X86_32) += centaur.o obj-$(CONFIG_X86_32) += transmeta.o diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c new file mode 100644 index 0000000..1746f6f --- /dev/null +++ b/arch/x86/kernel/cpu/amd_64.c @@ -0,0 +1,235 @@ +#include +#include + +#include +#include +#include + +#include + +extern int __cpuinit get_model_name(struct cpuinfo_x86 *c); +extern void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c); + +int force_mwait __cpuinitdata; + +#ifdef CONFIG_NUMA +static int __cpuinit nearby_node(int apicid) +{ + int i, node; + + for (i = apicid - 1; i >= 0; i--) { + node = apicid_to_node[i]; + if (node != NUMA_NO_NODE && node_online(node)) + return node; + } + for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) { + node = apicid_to_node[i]; + if (node != NUMA_NO_NODE && node_online(node)) + return node; + } + return first_node(node_online_map); /* Shouldn't happen */ +} +#endif + +/* + * On a AMD dual core setup the lower bits of the APIC id distingush the cores. + * Assumes number of cores is a power of two. + */ +static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) +{ +#ifdef CONFIG_SMP + unsigned bits; +#ifdef CONFIG_NUMA + int cpu = smp_processor_id(); + int node = 0; + unsigned apicid = hard_smp_processor_id(); +#endif + bits = c->x86_coreid_bits; + + /* Low order bits define the core id (index of core in socket) */ + c->cpu_core_id = c->initial_apicid & ((1 << bits)-1); + /* Convert the initial APIC ID into the socket ID */ + c->phys_proc_id = c->initial_apicid >> bits; + +#ifdef CONFIG_NUMA + node = c->phys_proc_id; + if (apicid_to_node[apicid] != NUMA_NO_NODE) + node = apicid_to_node[apicid]; + if (!node_online(node)) { + /* Two possibilities here: + - The CPU is missing memory and no node was created. + In that case try picking one from a nearby CPU + - The APIC IDs differ from the HyperTransport node IDs + which the K8 northbridge parsing fills in. + Assume they are all increased by a constant offset, + but in the same order as the HT nodeids. + If that doesn't result in a usable node fall back to the + path for the previous case. */ + + int ht_nodeid = c->initial_apicid; + + if (ht_nodeid >= 0 && + apicid_to_node[ht_nodeid] != NUMA_NO_NODE) + node = apicid_to_node[ht_nodeid]; + /* Pick a nearby node */ + if (!node_online(node)) + node = nearby_node(apicid); + } + numa_set_node(cpu, node); + + printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); +#endif +#endif +} + +static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c) +{ +#ifdef CONFIG_SMP + unsigned bits, ecx; + + /* Multi core CPU? */ + if (c->extended_cpuid_level < 0x80000008) + return; + + ecx = cpuid_ecx(0x80000008); + + c->x86_max_cores = (ecx & 0xff) + 1; + + /* CPU telling us the core id bits shift? */ + bits = (ecx >> 12) & 0xF; + + /* Otherwise recompute */ + if (bits == 0) { + while ((1 << bits) < c->x86_max_cores) + bits++; + } + + c->x86_coreid_bits = bits; + +#endif +} + +#define ENABLE_C1E_MASK 0x18000000 +#define CPUID_PROCESSOR_SIGNATURE 1 +#define CPUID_XFAM 0x0ff00000 +#define CPUID_XFAM_K8 0x00000000 +#define CPUID_XFAM_10H 0x00100000 +#define CPUID_XFAM_11H 0x00200000 +#define CPUID_XMOD 0x000f0000 +#define CPUID_XMOD_REV_F 0x00040000 + +/* AMD systems with C1E don't have a working lAPIC timer. Check for that. */ +static __cpuinit int amd_apic_timer_broken(void) +{ + u32 lo, hi, eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); + + switch (eax & CPUID_XFAM) { + case CPUID_XFAM_K8: + if ((eax & CPUID_XMOD) < CPUID_XMOD_REV_F) + break; + case CPUID_XFAM_10H: + case CPUID_XFAM_11H: + rdmsr(MSR_K8_ENABLE_C1E, lo, hi); + if (lo & ENABLE_C1E_MASK) + return 1; + break; + default: + /* err on the side of caution */ + return 1; + } + return 0; +} + +void __cpuinit early_init_amd(struct cpuinfo_x86 *c) +{ + early_init_amd_mc(c); + + /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */ + if (c->x86_power & (1<<8)) + set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); +} + +void __cpuinit init_amd(struct cpuinfo_x86 *c) +{ + unsigned level; + +#ifdef CONFIG_SMP + unsigned long value; + + /* + * Disable TLB flush filter by setting HWCR.FFDIS on K8 + * bit 6 of msr C001_0015 + * + * Errata 63 for SH-B3 steppings + * Errata 122 for all steppings (F+ have it disabled by default) + */ + if (c->x86 == 15) { + rdmsrl(MSR_K8_HWCR, value); + value |= 1 << 6; + wrmsrl(MSR_K8_HWCR, value); + } +#endif + + /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; + 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ + clear_cpu_cap(c, 0*32+31); + + /* On C+ stepping K8 rep microcode works well for copy/memset */ + level = cpuid_eax(1); + if (c->x86 == 15 && ((level >= 0x0f48 && level < 0x0f50) || + level >= 0x0f58)) + set_cpu_cap(c, X86_FEATURE_REP_GOOD); + if (c->x86 == 0x10 || c->x86 == 0x11) + set_cpu_cap(c, X86_FEATURE_REP_GOOD); + + /* Enable workaround for FXSAVE leak */ + if (c->x86 >= 6) + set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK); + + level = get_model_name(c); + if (!level) { + switch (c->x86) { + case 15: + /* Should distinguish Models here, but this is only + a fallback anyways. */ + strcpy(c->x86_model_id, "Hammer"); + break; + } + } + display_cacheinfo(c); + + /* Multi core CPU? */ + if (c->extended_cpuid_level >= 0x80000008) + amd_detect_cmp(c); + + if (c->extended_cpuid_level >= 0x80000006 && + (cpuid_edx(0x80000006) & 0xf000)) + num_cache_leaves = 4; + else + num_cache_leaves = 3; + + if (c->x86 == 0xf || c->x86 == 0x10 || c->x86 == 0x11) + set_cpu_cap(c, X86_FEATURE_K8); + + /* MFENCE stops RDTSC speculation */ + set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); + + if (c->x86 == 0x10) + fam10h_check_enable_mmcfg(); + + if (amd_apic_timer_broken()) + disable_apic_timer = 1; + + if (c == &boot_cpu_data && c->x86 >= 0xf && c->x86 <= 0x11) { + unsigned long long tseg; + + /* + * Split up direct mapping around the TSEG SMM area. + * Don't do it for gbpages because there seems very little + * benefit in doing so. + */ + if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg) && + (tseg >> PMD_SHIFT) < (max_pfn_mapped >> (PMD_SHIFT-PAGE_SHIFT))) + set_memory_4k((unsigned long)__va(tseg), 1); + } +} diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index 341230d..b07b199 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -96,8 +96,6 @@ int bootloader_type; unsigned long saved_video_mode; -int force_mwait __cpuinitdata; - /* * Early DMI memory */ @@ -526,7 +524,7 @@ void __init setup_arch(char **cmdline_p) check_enable_amd_mmconf_dmi(); } -static int __cpuinit get_model_name(struct cpuinfo_x86 *c) +int __cpuinit get_model_name(struct cpuinfo_x86 *c) { unsigned int *v; @@ -542,7 +540,7 @@ static int __cpuinit get_model_name(struct cpuinfo_x86 *c) } -static void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) +void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) { unsigned int n, dummy, eax, ebx, ecx, edx; @@ -574,228 +572,6 @@ static void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) } } -#ifdef CONFIG_NUMA -static int __cpuinit nearby_node(int apicid) -{ - int i, node; - - for (i = apicid - 1; i >= 0; i--) { - node = apicid_to_node[i]; - if (node != NUMA_NO_NODE && node_online(node)) - return node; - } - for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) { - node = apicid_to_node[i]; - if (node != NUMA_NO_NODE && node_online(node)) - return node; - } - return first_node(node_online_map); /* Shouldn't happen */ -} -#endif - -/* - * On a AMD dual core setup the lower bits of the APIC id distingush the cores. - * Assumes number of cores is a power of two. - */ -static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_SMP - unsigned bits; -#ifdef CONFIG_NUMA - int cpu = smp_processor_id(); - int node = 0; - unsigned apicid = hard_smp_processor_id(); -#endif - bits = c->x86_coreid_bits; - - /* Low order bits define the core id (index of core in socket) */ - c->cpu_core_id = c->initial_apicid & ((1 << bits)-1); - /* Convert the initial APIC ID into the socket ID */ - c->phys_proc_id = c->initial_apicid >> bits; - -#ifdef CONFIG_NUMA - node = c->phys_proc_id; - if (apicid_to_node[apicid] != NUMA_NO_NODE) - node = apicid_to_node[apicid]; - if (!node_online(node)) { - /* Two possibilities here: - - The CPU is missing memory and no node was created. - In that case try picking one from a nearby CPU - - The APIC IDs differ from the HyperTransport node IDs - which the K8 northbridge parsing fills in. - Assume they are all increased by a constant offset, - but in the same order as the HT nodeids. - If that doesn't result in a usable node fall back to the - path for the previous case. */ - - int ht_nodeid = c->initial_apicid; - - if (ht_nodeid >= 0 && - apicid_to_node[ht_nodeid] != NUMA_NO_NODE) - node = apicid_to_node[ht_nodeid]; - /* Pick a nearby node */ - if (!node_online(node)) - node = nearby_node(apicid); - } - numa_set_node(cpu, node); - - printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); -#endif -#endif -} - -static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_SMP - unsigned bits, ecx; - - /* Multi core CPU? */ - if (c->extended_cpuid_level < 0x80000008) - return; - - ecx = cpuid_ecx(0x80000008); - - c->x86_max_cores = (ecx & 0xff) + 1; - - /* CPU telling us the core id bits shift? */ - bits = (ecx >> 12) & 0xF; - - /* Otherwise recompute */ - if (bits == 0) { - while ((1 << bits) < c->x86_max_cores) - bits++; - } - - c->x86_coreid_bits = bits; - -#endif -} - -#define ENABLE_C1E_MASK 0x18000000 -#define CPUID_PROCESSOR_SIGNATURE 1 -#define CPUID_XFAM 0x0ff00000 -#define CPUID_XFAM_K8 0x00000000 -#define CPUID_XFAM_10H 0x00100000 -#define CPUID_XFAM_11H 0x00200000 -#define CPUID_XMOD 0x000f0000 -#define CPUID_XMOD_REV_F 0x00040000 - -/* AMD systems with C1E don't have a working lAPIC timer. Check for that. */ -static __cpuinit int amd_apic_timer_broken(void) -{ - u32 lo, hi, eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); - - switch (eax & CPUID_XFAM) { - case CPUID_XFAM_K8: - if ((eax & CPUID_XMOD) < CPUID_XMOD_REV_F) - break; - case CPUID_XFAM_10H: - case CPUID_XFAM_11H: - rdmsr(MSR_K8_ENABLE_C1E, lo, hi); - if (lo & ENABLE_C1E_MASK) - return 1; - break; - default: - /* err on the side of caution */ - return 1; - } - return 0; -} - -static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) -{ - early_init_amd_mc(c); - - /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */ - if (c->x86_power & (1<<8)) - set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); -} - -static void __cpuinit init_amd(struct cpuinfo_x86 *c) -{ - unsigned level; - -#ifdef CONFIG_SMP - unsigned long value; - - /* - * Disable TLB flush filter by setting HWCR.FFDIS on K8 - * bit 6 of msr C001_0015 - * - * Errata 63 for SH-B3 steppings - * Errata 122 for all steppings (F+ have it disabled by default) - */ - if (c->x86 == 15) { - rdmsrl(MSR_K8_HWCR, value); - value |= 1 << 6; - wrmsrl(MSR_K8_HWCR, value); - } -#endif - - /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; - 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ - clear_cpu_cap(c, 0*32+31); - - /* On C+ stepping K8 rep microcode works well for copy/memset */ - level = cpuid_eax(1); - if (c->x86 == 15 && ((level >= 0x0f48 && level < 0x0f50) || - level >= 0x0f58)) - set_cpu_cap(c, X86_FEATURE_REP_GOOD); - if (c->x86 == 0x10 || c->x86 == 0x11) - set_cpu_cap(c, X86_FEATURE_REP_GOOD); - - /* Enable workaround for FXSAVE leak */ - if (c->x86 >= 6) - set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK); - - level = get_model_name(c); - if (!level) { - switch (c->x86) { - case 15: - /* Should distinguish Models here, but this is only - a fallback anyways. */ - strcpy(c->x86_model_id, "Hammer"); - break; - } - } - display_cacheinfo(c); - - /* Multi core CPU? */ - if (c->extended_cpuid_level >= 0x80000008) - amd_detect_cmp(c); - - if (c->extended_cpuid_level >= 0x80000006 && - (cpuid_edx(0x80000006) & 0xf000)) - num_cache_leaves = 4; - else - num_cache_leaves = 3; - - if (c->x86 == 0xf || c->x86 == 0x10 || c->x86 == 0x11) - set_cpu_cap(c, X86_FEATURE_K8); - - /* MFENCE stops RDTSC speculation */ - set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); - - if (c->x86 == 0x10) - fam10h_check_enable_mmcfg(); - - if (amd_apic_timer_broken()) - disable_apic_timer = 1; - - if (c == &boot_cpu_data && c->x86 >= 0xf && c->x86 <= 0x11) { - unsigned long long tseg; - - /* - * Split up direct mapping around the TSEG SMM area. - * Don't do it for gbpages because there seems very little - * benefit in doing so. - */ - if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg) && - (tseg >> PMD_SHIFT) < (max_pfn_mapped >> (PMD_SHIFT-PAGE_SHIFT))) - set_memory_4k((unsigned long)__va(tseg), 1); - } -} - void __cpuinit detect_ht(struct cpuinfo_x86 *c) { #ifdef CONFIG_SMP @@ -977,6 +753,10 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) c->x86_vendor = X86_VENDOR_UNKNOWN; } +// FIXME: Needs to use cpu_vendor_dev_register +extern void __cpuinit early_init_amd(struct cpuinfo_x86 *c); +extern void __cpuinit init_amd(struct cpuinfo_x86 *c); + /* Do some early cpuid on the boot CPU to get some parameter that are needed before check_bugs. Everything advanced is in identify_cpu below. */ -- cgit v0.10.2 From a82fbe31cb387bb246e2d3b3c177f551bb991135 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Thu, 22 May 2008 18:54:32 -0400 Subject: x86: Move the 64-bit Intel specific parts out of setup_64.c Create a separate intel_64.c file in the cpu/ dir for the useful parts to live in. Signed-off-by: Dave Jones Signed-off-by: H. Peter Anvin diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index ef065c1..b7a1192 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -12,6 +12,7 @@ obj-$(CONFIG_X86_32) += cyrix.o obj-$(CONFIG_X86_32) += centaur.o obj-$(CONFIG_X86_32) += transmeta.o obj-$(CONFIG_X86_32) += intel.o +obj-$(CONFIG_X86_64) += intel_64.o obj-$(CONFIG_X86_32) += umc.o obj-$(CONFIG_X86_MCE) += mcheck/ diff --git a/arch/x86/kernel/cpu/intel_64.c b/arch/x86/kernel/cpu/intel_64.c new file mode 100644 index 0000000..e5f929f --- /dev/null +++ b/arch/x86/kernel/cpu/intel_64.c @@ -0,0 +1,97 @@ +#include +#include +#include +#include +#include +#include + +void __cpuinit early_init_intel(struct cpuinfo_x86 *c) +{ + if ((c->x86 == 0xf && c->x86_model >= 0x03) || + (c->x86 == 0x6 && c->x86_model >= 0x0e)) + set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); +} + +/* + * find out the number of processor cores on the die + */ +static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c) +{ + unsigned int eax, t; + + if (c->cpuid_level < 4) + return 1; + + cpuid_count(4, 0, &eax, &t, &t, &t); + + if (eax & 0x1f) + return ((eax >> 26) + 1); + else + return 1; +} + +static void __cpuinit srat_detect_node(void) +{ +#ifdef CONFIG_NUMA + unsigned node; + int cpu = smp_processor_id(); + int apicid = hard_smp_processor_id(); + + /* Don't do the funky fallback heuristics the AMD version employs + for now. */ + node = apicid_to_node[apicid]; + if (node == NUMA_NO_NODE || !node_online(node)) + node = first_node(node_online_map); + numa_set_node(cpu, node); + + printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); +#endif +} + +void __cpuinit init_intel(struct cpuinfo_x86 *c) +{ + /* Cache sizes */ + unsigned n; + + init_intel_cacheinfo(c); + if (c->cpuid_level > 9) { + unsigned eax = cpuid_eax(10); + /* Check for version and the number of counters */ + if ((eax & 0xff) && (((eax>>8) & 0xff) > 1)) + set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); + } + + if (cpu_has_ds) { + unsigned int l1, l2; + rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); + if (!(l1 & (1<<11))) + set_cpu_cap(c, X86_FEATURE_BTS); + if (!(l1 & (1<<12))) + set_cpu_cap(c, X86_FEATURE_PEBS); + } + + + if (cpu_has_bts) + ds_init_intel(c); + + n = c->extended_cpuid_level; + if (n >= 0x80000008) { + unsigned eax = cpuid_eax(0x80000008); + c->x86_virt_bits = (eax >> 8) & 0xff; + c->x86_phys_bits = eax & 0xff; + /* CPUID workaround for Intel 0F34 CPU */ + if (c->x86_vendor == X86_VENDOR_INTEL && + c->x86 == 0xF && c->x86_model == 0x3 && + c->x86_mask == 0x4) + c->x86_phys_bits = 36; + } + + if (c->x86 == 15) + c->x86_cache_alignment = c->x86_clflush_size * 2; + if (c->x86 == 6) + set_cpu_cap(c, X86_FEATURE_REP_GOOD); + set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); + c->x86_max_cores = intel_num_cpu_cores(c); + + srat_detect_node(); +} diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index b07b199..c4e6a0b 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -622,97 +622,6 @@ out: #endif } -/* - * find out the number of processor cores on the die - */ -static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c) -{ - unsigned int eax, t; - - if (c->cpuid_level < 4) - return 1; - - cpuid_count(4, 0, &eax, &t, &t, &t); - - if (eax & 0x1f) - return ((eax >> 26) + 1); - else - return 1; -} - -static void __cpuinit srat_detect_node(void) -{ -#ifdef CONFIG_NUMA - unsigned node; - int cpu = smp_processor_id(); - int apicid = hard_smp_processor_id(); - - /* Don't do the funky fallback heuristics the AMD version employs - for now. */ - node = apicid_to_node[apicid]; - if (node == NUMA_NO_NODE || !node_online(node)) - node = first_node(node_online_map); - numa_set_node(cpu, node); - - printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); -#endif -} - -static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) -{ - if ((c->x86 == 0xf && c->x86_model >= 0x03) || - (c->x86 == 0x6 && c->x86_model >= 0x0e)) - set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); -} - -static void __cpuinit init_intel(struct cpuinfo_x86 *c) -{ - /* Cache sizes */ - unsigned n; - - init_intel_cacheinfo(c); - if (c->cpuid_level > 9) { - unsigned eax = cpuid_eax(10); - /* Check for version and the number of counters */ - if ((eax & 0xff) && (((eax>>8) & 0xff) > 1)) - set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); - } - - if (cpu_has_ds) { - unsigned int l1, l2; - rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); - if (!(l1 & (1<<11))) - set_cpu_cap(c, X86_FEATURE_BTS); - if (!(l1 & (1<<12))) - set_cpu_cap(c, X86_FEATURE_PEBS); - } - - - if (cpu_has_bts) - ds_init_intel(c); - - n = c->extended_cpuid_level; - if (n >= 0x80000008) { - unsigned eax = cpuid_eax(0x80000008); - c->x86_virt_bits = (eax >> 8) & 0xff; - c->x86_phys_bits = eax & 0xff; - /* CPUID workaround for Intel 0F34 CPU */ - if (c->x86_vendor == X86_VENDOR_INTEL && - c->x86 == 0xF && c->x86_model == 0x3 && - c->x86_mask == 0x4) - c->x86_phys_bits = 36; - } - - if (c->x86 == 15) - c->x86_cache_alignment = c->x86_clflush_size * 2; - if (c->x86 == 6) - set_cpu_cap(c, X86_FEATURE_REP_GOOD); - set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); - c->x86_max_cores = intel_num_cpu_cores(c); - - srat_detect_node(); -} - static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c) { if (c->x86 == 0x6 && c->x86_model >= 0xf) @@ -756,6 +665,8 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) // FIXME: Needs to use cpu_vendor_dev_register extern void __cpuinit early_init_amd(struct cpuinfo_x86 *c); extern void __cpuinit init_amd(struct cpuinfo_x86 *c); +extern void __cpuinit early_init_intel(struct cpuinfo_x86 *c); +extern void __cpuinit init_intel(struct cpuinfo_x86 *c); /* Do some early cpuid on the boot CPU to get some parameter that are needed before check_bugs. Everything advanced is in identify_cpu -- cgit v0.10.2 From 7e2191127eb414d7d5a11df6552ab6e3845d17a1 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Thu, 22 May 2008 18:55:06 -0400 Subject: x86: Remove workaround for prescott (32bit P4) from 64-bit code. Signed-off-by: Dave Jones Signed-off-by: H. Peter Anvin diff --git a/arch/x86/kernel/cpu/intel_64.c b/arch/x86/kernel/cpu/intel_64.c index e5f929f..b339121 100644 --- a/arch/x86/kernel/cpu/intel_64.c +++ b/arch/x86/kernel/cpu/intel_64.c @@ -79,11 +79,6 @@ void __cpuinit init_intel(struct cpuinfo_x86 *c) unsigned eax = cpuid_eax(0x80000008); c->x86_virt_bits = (eax >> 8) & 0xff; c->x86_phys_bits = eax & 0xff; - /* CPUID workaround for Intel 0F34 CPU */ - if (c->x86_vendor == X86_VENDOR_INTEL && - c->x86 == 0xF && c->x86_model == 0x3 && - c->x86_mask == 0x4) - c->x86_phys_bits = 36; } if (c->x86 == 15) -- cgit v0.10.2 From 30a713180b3d08fdec5ca572e5a1cd35253c5d8e Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Thu, 22 May 2008 18:57:25 -0400 Subject: x86: Move the 64-bit Centaur specific parts out of setup_64.c Create a separate centaur_64.c file in the cpu/ dir for the useful parts to live in. Signed-off-by: Dave Jones Signed-off-by: H. Peter Anvin diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index b7a1192..c77a1c5 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -10,6 +10,7 @@ obj-$(CONFIG_X86_32) += amd.o obj-$(CONFIG_X86_64) += amd_64.o obj-$(CONFIG_X86_32) += cyrix.o obj-$(CONFIG_X86_32) += centaur.o +obj-$(CONFIG_X86_64) += centaur_64.o obj-$(CONFIG_X86_32) += transmeta.o obj-$(CONFIG_X86_32) += intel.o obj-$(CONFIG_X86_64) += intel_64.o diff --git a/arch/x86/kernel/cpu/centaur_64.c b/arch/x86/kernel/cpu/centaur_64.c new file mode 100644 index 0000000..bac96d1 --- /dev/null +++ b/arch/x86/kernel/cpu/centaur_64.c @@ -0,0 +1,31 @@ +#include +#include + +#include +#include + +void __cpuinit early_init_centaur(struct cpuinfo_x86 *c) +{ + if (c->x86 == 0x6 && c->x86_model >= 0xf) + set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); +} + +void __cpuinit init_centaur(struct cpuinfo_x86 *c) +{ + /* Cache sizes */ + unsigned n; + + n = c->extended_cpuid_level; + if (n >= 0x80000008) { + unsigned eax = cpuid_eax(0x80000008); + c->x86_virt_bits = (eax >> 8) & 0xff; + c->x86_phys_bits = eax & 0xff; + } + + if (c->x86 == 0x6 && c->x86_model >= 0xf) { + c->x86_cache_alignment = c->x86_clflush_size * 2; + set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); + set_cpu_cap(c, X86_FEATURE_REP_GOOD); + } + set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); +} diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index c4e6a0b..25afdd8 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -622,32 +622,6 @@ out: #endif } -static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c) -{ - if (c->x86 == 0x6 && c->x86_model >= 0xf) - set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); -} - -static void __cpuinit init_centaur(struct cpuinfo_x86 *c) -{ - /* Cache sizes */ - unsigned n; - - n = c->extended_cpuid_level; - if (n >= 0x80000008) { - unsigned eax = cpuid_eax(0x80000008); - c->x86_virt_bits = (eax >> 8) & 0xff; - c->x86_phys_bits = eax & 0xff; - } - - if (c->x86 == 0x6 && c->x86_model >= 0xf) { - c->x86_cache_alignment = c->x86_clflush_size * 2; - set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); - set_cpu_cap(c, X86_FEATURE_REP_GOOD); - } - set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); -} - static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) { char *v = c->x86_vendor_id; @@ -667,6 +641,8 @@ extern void __cpuinit early_init_amd(struct cpuinfo_x86 *c); extern void __cpuinit init_amd(struct cpuinfo_x86 *c); extern void __cpuinit early_init_intel(struct cpuinfo_x86 *c); extern void __cpuinit init_intel(struct cpuinfo_x86 *c); +extern void __cpuinit early_init_centaur(struct cpuinfo_x86 *c); +extern void __cpuinit init_centaur(struct cpuinfo_x86 *c); /* Do some early cpuid on the boot CPU to get some parameter that are needed before check_bugs. Everything advanced is in identify_cpu -- cgit v0.10.2 From 1c47cd638e8302bc38be1f6d81067950e038ebd3 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 30 May 2008 15:42:45 -0700 Subject: x86: fix overlong line in arch/x86/kernel/cpu/amd_64.c Clean up an overlong line in arch/x86/kernel/cpu/amd_64.c. Signed-off-by: H. Peter Anvin diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c index 1746f6f..c815c2c 100644 --- a/arch/x86/kernel/cpu/amd_64.c +++ b/arch/x86/kernel/cpu/amd_64.c @@ -229,7 +229,8 @@ void __cpuinit init_amd(struct cpuinfo_x86 *c) * benefit in doing so. */ if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg) && - (tseg >> PMD_SHIFT) < (max_pfn_mapped >> (PMD_SHIFT-PAGE_SHIFT))) + (tseg >> PMD_SHIFT) < + (max_pfn_mapped >> (PMD_SHIFT-PAGE_SHIFT))) set_memory_4k((unsigned long)__va(tseg), 1); } } -- cgit v0.10.2 From 23968f71b26ece45ed52895d41b0208b90a516e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristian=20H=C3=B8gsberg?= Date: Thu, 29 May 2008 18:31:14 -0400 Subject: x86: Use structs instead of hardcoded offsets in x86 boot decompressor. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace hardcoded offsets embedded in macros in arch/x86/boot/compressed with proper structure references. Signed-off-by: Kristian Høgsberg Signed-off-by: H. Peter Anvin diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 90456ce..74ed3c0 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -30,6 +30,7 @@ #include #include #include +#include /* WARNING!! * This code is compiled with -fPIC and it is relocated dynamically @@ -187,13 +188,7 @@ static void gzip_release(void **); /* * This is set up by the setup-routine at boot-time */ -static unsigned char *real_mode; /* Pointer to real-mode data */ - -#define RM_EXT_MEM_K (*(unsigned short *)(real_mode + 0x2)) -#ifndef STANDARD_MEMORY_BIOS_CALL -#define RM_ALT_MEM_K (*(unsigned long *)(real_mode + 0x1e0)) -#endif -#define RM_SCREEN_INFO (*(struct screen_info *)(real_mode+0)) +static struct boot_params *real_mode; /* Pointer to real-mode data */ extern unsigned char input_data[]; extern int input_len; @@ -276,12 +271,13 @@ static void putstr(const char *s) char c; #ifdef CONFIG_X86_32 - if (RM_SCREEN_INFO.orig_video_mode == 0 && lines == 0 && cols == 0) + if (real_mode->screen_info.orig_video_mode == 0 && + lines == 0 && cols == 0) return; #endif - x = RM_SCREEN_INFO.orig_x; - y = RM_SCREEN_INFO.orig_y; + x = real_mode->screen_info.orig_x; + y = real_mode->screen_info.orig_y; while ((c = *s++) != '\0') { if (c == '\n') { @@ -302,8 +298,8 @@ static void putstr(const char *s) } } - RM_SCREEN_INFO.orig_x = x; - RM_SCREEN_INFO.orig_y = y; + real_mode->screen_info.orig_x = x; + real_mode->screen_info.orig_y = y; pos = (x + cols * y) * 2; /* Update cursor position */ outb(14, vidport); @@ -430,7 +426,7 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap, { real_mode = rmode; - if (RM_SCREEN_INFO.orig_video_mode == 7) { + if (real_mode->screen_info.orig_video_mode == 7) { vidmem = (char *) 0xb0000; vidport = 0x3b4; } else { @@ -438,8 +434,8 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap, vidport = 0x3d4; } - lines = RM_SCREEN_INFO.orig_video_lines; - cols = RM_SCREEN_INFO.orig_video_cols; + lines = real_mode->screen_info.orig_video_lines; + cols = real_mode->screen_info.orig_video_cols; window = output; /* Output buffer (Normally at 1M) */ free_mem_ptr = heap; /* Heap */ -- cgit v0.10.2 From 3b6b9293d0f8e1b11630102013ca2a1dcef17d44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristian=20H=C3=B8gsberg?= Date: Thu, 29 May 2008 18:31:15 -0400 Subject: x86: Honor 'quiet' command line option in real mode boot decompressor. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch lets the early real mode code look for the 'quiet' option on the kernel command line and pass a loadflag to the decompressor. When this flag is set, we suppress the "Decompressing Linux... Parsing ELF... done." messages. Signed-off-by: Kristian Høgsberg Signed-off-by: H. Peter Anvin diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 74ed3c0..d10e727 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -189,6 +189,7 @@ static void gzip_release(void **); * This is set up by the setup-routine at boot-time */ static struct boot_params *real_mode; /* Pointer to real-mode data */ +static int quiet; extern unsigned char input_data[]; extern int input_len; @@ -391,7 +392,8 @@ static void parse_elf(void *output) return; } - putstr("Parsing ELF... "); + if (!quiet) + putstr("Parsing ELF... "); phdrs = malloc(sizeof(*phdrs) * ehdr.e_phnum); if (!phdrs) @@ -426,6 +428,9 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap, { real_mode = rmode; + if (real_mode->hdr.loadflags & QUIET_FLAG) + quiet = 1; + if (real_mode->screen_info.orig_video_mode == 7) { vidmem = (char *) 0xb0000; vidport = 0x3b4; @@ -461,9 +466,11 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap, #endif makecrc(); - putstr("\nDecompressing Linux... "); + if (!quiet) + putstr("\nDecompressing Linux... "); gunzip(); parse_elf(output); - putstr("done.\nBooting the kernel.\n"); + if (!quiet) + putstr("done.\nBooting the kernel.\n"); return; } diff --git a/arch/x86/boot/main.c b/arch/x86/boot/main.c index 77569a4..2296164 100644 --- a/arch/x86/boot/main.c +++ b/arch/x86/boot/main.c @@ -165,6 +165,10 @@ void main(void) /* Set the video mode */ set_video(); + /* Parse command line for 'quiet' and pass it to decompressor. */ + if (cmdline_find_option_bool("quiet")) + boot_params.hdr.loadflags |= QUIET_FLAG; + /* Do the last things and invoke protected mode */ go_to_protected_mode(); } diff --git a/include/asm-x86/bootparam.h b/include/asm-x86/bootparam.h index f62f473..3e36ba8 100644 --- a/include/asm-x86/bootparam.h +++ b/include/asm-x86/bootparam.h @@ -40,6 +40,7 @@ struct setup_header { __u8 type_of_loader; __u8 loadflags; #define LOADED_HIGH (1<<0) +#define QUIET_FLAG (1<<5) #define KEEP_SEGMENTS (1<<6) #define CAN_USE_HEAP (1<<7) __u16 setup_move_size; -- cgit v0.10.2 From 4039feb5bae72a5fed9ba6bc1a9cfd8dfe0a8613 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 30 May 2008 17:16:20 -0700 Subject: x86: update Documentation/i386/boot.txt Document QUIET_FLAG, correct the definition of several fields, make it clear this applies to the entire x86 architecture, not just i386. Signed-off-by: H. Peter Anvin diff --git a/Documentation/i386/boot.txt b/Documentation/i386/boot.txt index 95ad15c..147bfe5 100644 --- a/Documentation/i386/boot.txt +++ b/Documentation/i386/boot.txt @@ -1,17 +1,14 @@ - THE LINUX/I386 BOOT PROTOCOL - ---------------------------- + THE LINUX/x86 BOOT PROTOCOL + --------------------------- - H. Peter Anvin - Last update 2007-05-23 - -On the i386 platform, the Linux kernel uses a rather complicated boot +On the x86 platform, the Linux kernel uses a rather complicated boot convention. This has evolved partially due to historical aspects, as well as the desire in the early days to have the kernel itself be a bootable image, the complicated PC memory model and due to changed expectations in the PC industry caused by the effective demise of real-mode DOS as a mainstream operating system. -Currently, the following versions of the Linux/i386 boot protocol exist. +Currently, the following versions of the Linux/x86 boot protocol exist. Old kernels: zImage/Image support only. Some very early kernels may not even support a command line. @@ -372,10 +369,17 @@ Protocol: 2.00+ - If 0, the protected-mode code is loaded at 0x10000. - If 1, the protected-mode code is loaded at 0x100000. + Bit 5 (write): QUIET_FLAG + - If 0, print early messages. + - If 1, suppress early messages. + This requests to the kernel (decompressor and early + kernel) to not write early messages that require + accessing the display hardware directly. + Bit 6 (write): KEEP_SEGMENTS Protocol: 2.07+ - - if 0, reload the segment registers in the 32bit entry point. - - if 1, do not reload the segment registers in the 32bit entry point. + - If 0, reload the segment registers in the 32bit entry point. + - If 1, do not reload the segment registers in the 32bit entry point. Assume that %cs %ds %ss %es are all set to flat segments with a base of 0 (or the equivalent for their environment). @@ -504,7 +508,7 @@ Protocol: 2.06+ maximum size was 255. Field name: hardware_subarch -Type: write +Type: write (optional, defaults to x86/PC) Offset/size: 0x23c/4 Protocol: 2.07+ @@ -520,11 +524,13 @@ Protocol: 2.07+ 0x00000002 Xen Field name: hardware_subarch_data -Type: write +Type: write (subarch-dependent) Offset/size: 0x240/8 Protocol: 2.07+ A pointer to data that is specific to hardware subarch + This field is currently unused for the default x86/PC environment, + do not modify. Field name: payload_offset Type: read @@ -545,6 +551,34 @@ Protocol: 2.08+ The length of the payload. +Field name: setup_data +Type: write (special) +Offset/size: 0x250/8 +Protocol: 2.09+ + + The 64-bit physical pointer to NULL terminated single linked list of + struct setup_data. This is used to define a more extensible boot + parameters passing mechanism. The definition of struct setup_data is + as follow: + + struct setup_data { + u64 next; + u32 type; + u32 len; + u8 data[0]; + }; + + Where, the next is a 64-bit physical pointer to the next node of + linked list, the next field of the last node is 0; the type is used + to identify the contents of data; the len is the length of data + field; the data holds the real payload. + + This list may be modified at a number of points during the bootup + process. Therefore, when modifying this list one should always make + sure to consider the case where the linked list already contains + entries. + + **** THE IMAGE CHECKSUM From boot protocol version 2.08 onwards the CRC-32 is calculated over @@ -553,6 +587,7 @@ initial remainder of 0xffffffff. The checksum is appended to the file; therefore the CRC of the file up to the limit specified in the syssize field of the header is always 0. + **** THE KERNEL COMMAND LINE The kernel command line has become an important way for the boot @@ -584,28 +619,6 @@ command line is entered using the following protocol: covered by setup_move_size, so you may need to adjust this field. -Field name: setup_data -Type: write (obligatory) -Offset/size: 0x250/8 -Protocol: 2.09+ - - The 64-bit physical pointer to NULL terminated single linked list of - struct setup_data. This is used to define a more extensible boot - parameters passing mechanism. The definition of struct setup_data is - as follow: - - struct setup_data { - u64 next; - u32 type; - u32 len; - u8 data[0]; - }; - - Where, the next is a 64-bit physical pointer to the next node of - linked list, the next field of the last node is 0; the type is used - to identify the contents of data; the len is the length of data - field; the data holds the real payload. - **** MEMORY LAYOUT OF THE REAL-MODE CODE -- cgit v0.10.2 From 23deb06821442506615f34bd92ccd6a2422629d7 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 30 May 2008 17:19:03 -0700 Subject: x86: move x86-specific documentation into Documentation/x86 The current organization of the x86 documentation makes it appear as if the "i386" documentation doesn't apply to x86-64, which is does. Thus, move that documentation into Documentation/x86, and move the x86-64-specific stuff into Documentation/x86/x86_64 with the eventual goal to move stuff that isn't actually 64-bit specific back into Documentation/x86. Signed-off-by: H. Peter Anvin diff --git a/Documentation/i386/IO-APIC.txt b/Documentation/i386/IO-APIC.txt deleted file mode 100644 index 30b4c71..0000000 --- a/Documentation/i386/IO-APIC.txt +++ /dev/null @@ -1,119 +0,0 @@ -Most (all) Intel-MP compliant SMP boards have the so-called 'IO-APIC', -which is an enhanced interrupt controller. It enables us to route -hardware interrupts to multiple CPUs, or to CPU groups. Without an -IO-APIC, interrupts from hardware will be delivered only to the -CPU which boots the operating system (usually CPU#0). - -Linux supports all variants of compliant SMP boards, including ones with -multiple IO-APICs. Multiple IO-APICs are used in high-end servers to -distribute IRQ load further. - -There are (a few) known breakages in certain older boards, such bugs are -usually worked around by the kernel. If your MP-compliant SMP board does -not boot Linux, then consult the linux-smp mailing list archives first. - -If your box boots fine with enabled IO-APIC IRQs, then your -/proc/interrupts will look like this one: - - ----------------------------> - hell:~> cat /proc/interrupts - CPU0 - 0: 1360293 IO-APIC-edge timer - 1: 4 IO-APIC-edge keyboard - 2: 0 XT-PIC cascade - 13: 1 XT-PIC fpu - 14: 1448 IO-APIC-edge ide0 - 16: 28232 IO-APIC-level Intel EtherExpress Pro 10/100 Ethernet - 17: 51304 IO-APIC-level eth0 - NMI: 0 - ERR: 0 - hell:~> - <---------------------------- - -Some interrupts are still listed as 'XT PIC', but this is not a problem; -none of those IRQ sources is performance-critical. - - -In the unlikely case that your board does not create a working mp-table, -you can use the pirq= boot parameter to 'hand-construct' IRQ entries. This -is non-trivial though and cannot be automated. One sample /etc/lilo.conf -entry: - - append="pirq=15,11,10" - -The actual numbers depend on your system, on your PCI cards and on their -PCI slot position. Usually PCI slots are 'daisy chained' before they are -connected to the PCI chipset IRQ routing facility (the incoming PIRQ1-4 -lines): - - ,-. ,-. ,-. ,-. ,-. - PIRQ4 ----| |-. ,-| |-. ,-| |-. ,-| |--------| | - |S| \ / |S| \ / |S| \ / |S| |S| - PIRQ3 ----|l|-. `/---|l|-. `/---|l|-. `/---|l|--------|l| - |o| \/ |o| \/ |o| \/ |o| |o| - PIRQ2 ----|t|-./`----|t|-./`----|t|-./`----|t|--------|t| - |1| /\ |2| /\ |3| /\ |4| |5| - PIRQ1 ----| |- `----| |- `----| |- `----| |--------| | - `-' `-' `-' `-' `-' - -Every PCI card emits a PCI IRQ, which can be INTA, INTB, INTC or INTD: - - ,-. - INTD--| | - |S| - INTC--|l| - |o| - INTB--|t| - |x| - INTA--| | - `-' - -These INTA-D PCI IRQs are always 'local to the card', their real meaning -depends on which slot they are in. If you look at the daisy chaining diagram, -a card in slot4, issuing INTA IRQ, it will end up as a signal on PIRQ4 of -the PCI chipset. Most cards issue INTA, this creates optimal distribution -between the PIRQ lines. (distributing IRQ sources properly is not a -necessity, PCI IRQs can be shared at will, but it's a good for performance -to have non shared interrupts). Slot5 should be used for videocards, they -do not use interrupts normally, thus they are not daisy chained either. - -so if you have your SCSI card (IRQ11) in Slot1, Tulip card (IRQ9) in -Slot2, then you'll have to specify this pirq= line: - - append="pirq=11,9" - -the following script tries to figure out such a default pirq= line from -your PCI configuration: - - echo -n pirq=; echo `scanpci | grep T_L | cut -c56-` | sed 's/ /,/g' - -note that this script wont work if you have skipped a few slots or if your -board does not do default daisy-chaining. (or the IO-APIC has the PIRQ pins -connected in some strange way). E.g. if in the above case you have your SCSI -card (IRQ11) in Slot3, and have Slot1 empty: - - append="pirq=0,9,11" - -[value '0' is a generic 'placeholder', reserved for empty (or non-IRQ emitting) -slots.] - -Generally, it's always possible to find out the correct pirq= settings, just -permute all IRQ numbers properly ... it will take some time though. An -'incorrect' pirq line will cause the booting process to hang, or a device -won't function properly (e.g. if it's inserted as a module). - -If you have 2 PCI buses, then you can use up to 8 pirq values, although such -boards tend to have a good configuration. - -Be prepared that it might happen that you need some strange pirq line: - - append="pirq=0,0,0,0,0,0,9,11" - -Use smart trial-and-error techniques to find out the correct pirq line ... - -Good luck and mail to linux-smp@vger.kernel.org or -linux-kernel@vger.kernel.org if you have any problems that are not covered -by this document. - --- mingo - diff --git a/Documentation/i386/boot.txt b/Documentation/i386/boot.txt deleted file mode 100644 index 147bfe5..0000000 --- a/Documentation/i386/boot.txt +++ /dev/null @@ -1,900 +0,0 @@ - THE LINUX/x86 BOOT PROTOCOL - --------------------------- - -On the x86 platform, the Linux kernel uses a rather complicated boot -convention. This has evolved partially due to historical aspects, as -well as the desire in the early days to have the kernel itself be a -bootable image, the complicated PC memory model and due to changed -expectations in the PC industry caused by the effective demise of -real-mode DOS as a mainstream operating system. - -Currently, the following versions of the Linux/x86 boot protocol exist. - -Old kernels: zImage/Image support only. Some very early kernels - may not even support a command line. - -Protocol 2.00: (Kernel 1.3.73) Added bzImage and initrd support, as - well as a formalized way to communicate between the - boot loader and the kernel. setup.S made relocatable, - although the traditional setup area still assumed - writable. - -Protocol 2.01: (Kernel 1.3.76) Added a heap overrun warning. - -Protocol 2.02: (Kernel 2.4.0-test3-pre3) New command line protocol. - Lower the conventional memory ceiling. No overwrite - of the traditional setup area, thus making booting - safe for systems which use the EBDA from SMM or 32-bit - BIOS entry points. zImage deprecated but still - supported. - -Protocol 2.03: (Kernel 2.4.18-pre1) Explicitly makes the highest possible - initrd address available to the bootloader. - -Protocol 2.04: (Kernel 2.6.14) Extend the syssize field to four bytes. - -Protocol 2.05: (Kernel 2.6.20) Make protected mode kernel relocatable. - Introduce relocatable_kernel and kernel_alignment fields. - -Protocol 2.06: (Kernel 2.6.22) Added a field that contains the size of - the boot command line. - -Protocol 2.07: (Kernel 2.6.24) Added paravirtualised boot protocol. - Introduced hardware_subarch and hardware_subarch_data - and KEEP_SEGMENTS flag in load_flags. - -Protocol 2.08: (Kernel 2.6.26) Added crc32 checksum and ELF format - payload. Introduced payload_offset and payload length - fields to aid in locating the payload. - -Protocol 2.09: (Kernel 2.6.26) Added a field of 64-bit physical - pointer to single linked list of struct setup_data. - -**** MEMORY LAYOUT - -The traditional memory map for the kernel loader, used for Image or -zImage kernels, typically looks like: - - | | -0A0000 +------------------------+ - | Reserved for BIOS | Do not use. Reserved for BIOS EBDA. -09A000 +------------------------+ - | Command line | - | Stack/heap | For use by the kernel real-mode code. -098000 +------------------------+ - | Kernel setup | The kernel real-mode code. -090200 +------------------------+ - | Kernel boot sector | The kernel legacy boot sector. -090000 +------------------------+ - | Protected-mode kernel | The bulk of the kernel image. -010000 +------------------------+ - | Boot loader | <- Boot sector entry point 0000:7C00 -001000 +------------------------+ - | Reserved for MBR/BIOS | -000800 +------------------------+ - | Typically used by MBR | -000600 +------------------------+ - | BIOS use only | -000000 +------------------------+ - - -When using bzImage, the protected-mode kernel was relocated to -0x100000 ("high memory"), and the kernel real-mode block (boot sector, -setup, and stack/heap) was made relocatable to any address between -0x10000 and end of low memory. Unfortunately, in protocols 2.00 and -2.01 the 0x90000+ memory range is still used internally by the kernel; -the 2.02 protocol resolves that problem. - -It is desirable to keep the "memory ceiling" -- the highest point in -low memory touched by the boot loader -- as low as possible, since -some newer BIOSes have begun to allocate some rather large amounts of -memory, called the Extended BIOS Data Area, near the top of low -memory. The boot loader should use the "INT 12h" BIOS call to verify -how much low memory is available. - -Unfortunately, if INT 12h reports that the amount of memory is too -low, there is usually nothing the boot loader can do but to report an -error to the user. The boot loader should therefore be designed to -take up as little space in low memory as it reasonably can. For -zImage or old bzImage kernels, which need data written into the -0x90000 segment, the boot loader should make sure not to use memory -above the 0x9A000 point; too many BIOSes will break above that point. - -For a modern bzImage kernel with boot protocol version >= 2.02, a -memory layout like the following is suggested: - - ~ ~ - | Protected-mode kernel | -100000 +------------------------+ - | I/O memory hole | -0A0000 +------------------------+ - | Reserved for BIOS | Leave as much as possible unused - ~ ~ - | Command line | (Can also be below the X+10000 mark) -X+10000 +------------------------+ - | Stack/heap | For use by the kernel real-mode code. -X+08000 +------------------------+ - | Kernel setup | The kernel real-mode code. - | Kernel boot sector | The kernel legacy boot sector. -X +------------------------+ - | Boot loader | <- Boot sector entry point 0000:7C00 -001000 +------------------------+ - | Reserved for MBR/BIOS | -000800 +------------------------+ - | Typically used by MBR | -000600 +------------------------+ - | BIOS use only | -000000 +------------------------+ - -... where the address X is as low as the design of the boot loader -permits. - - -**** THE REAL-MODE KERNEL HEADER - -In the following text, and anywhere in the kernel boot sequence, "a -sector" refers to 512 bytes. It is independent of the actual sector -size of the underlying medium. - -The first step in loading a Linux kernel should be to load the -real-mode code (boot sector and setup code) and then examine the -following header at offset 0x01f1. The real-mode code can total up to -32K, although the boot loader may choose to load only the first two -sectors (1K) and then examine the bootup sector size. - -The header looks like: - -Offset Proto Name Meaning -/Size - -01F1/1 ALL(1 setup_sects The size of the setup in sectors -01F2/2 ALL root_flags If set, the root is mounted readonly -01F4/4 2.04+(2 syssize The size of the 32-bit code in 16-byte paras -01F8/2 ALL ram_size DO NOT USE - for bootsect.S use only -01FA/2 ALL vid_mode Video mode control -01FC/2 ALL root_dev Default root device number -01FE/2 ALL boot_flag 0xAA55 magic number -0200/2 2.00+ jump Jump instruction -0202/4 2.00+ header Magic signature "HdrS" -0206/2 2.00+ version Boot protocol version supported -0208/4 2.00+ realmode_swtch Boot loader hook (see below) -020C/2 2.00+ start_sys The load-low segment (0x1000) (obsolete) -020E/2 2.00+ kernel_version Pointer to kernel version string -0210/1 2.00+ type_of_loader Boot loader identifier -0211/1 2.00+ loadflags Boot protocol option flags -0212/2 2.00+ setup_move_size Move to high memory size (used with hooks) -0214/4 2.00+ code32_start Boot loader hook (see below) -0218/4 2.00+ ramdisk_image initrd load address (set by boot loader) -021C/4 2.00+ ramdisk_size initrd size (set by boot loader) -0220/4 2.00+ bootsect_kludge DO NOT USE - for bootsect.S use only -0224/2 2.01+ heap_end_ptr Free memory after setup end -0226/2 N/A pad1 Unused -0228/4 2.02+ cmd_line_ptr 32-bit pointer to the kernel command line -022C/4 2.03+ initrd_addr_max Highest legal initrd address -0230/4 2.05+ kernel_alignment Physical addr alignment required for kernel -0234/1 2.05+ relocatable_kernel Whether kernel is relocatable or not -0235/3 N/A pad2 Unused -0238/4 2.06+ cmdline_size Maximum size of the kernel command line -023C/4 2.07+ hardware_subarch Hardware subarchitecture -0240/8 2.07+ hardware_subarch_data Subarchitecture-specific data -0248/4 2.08+ payload_offset Offset of kernel payload -024C/4 2.08+ payload_length Length of kernel payload -0250/8 2.09+ setup_data 64-bit physical pointer to linked list - of struct setup_data - -(1) For backwards compatibility, if the setup_sects field contains 0, the - real value is 4. - -(2) For boot protocol prior to 2.04, the upper two bytes of the syssize - field are unusable, which means the size of a bzImage kernel - cannot be determined. - -If the "HdrS" (0x53726448) magic number is not found at offset 0x202, -the boot protocol version is "old". Loading an old kernel, the -following parameters should be assumed: - - Image type = zImage - initrd not supported - Real-mode kernel must be located at 0x90000. - -Otherwise, the "version" field contains the protocol version, -e.g. protocol version 2.01 will contain 0x0201 in this field. When -setting fields in the header, you must make sure only to set fields -supported by the protocol version in use. - - -**** DETAILS OF HEADER FIELDS - -For each field, some are information from the kernel to the bootloader -("read"), some are expected to be filled out by the bootloader -("write"), and some are expected to be read and modified by the -bootloader ("modify"). - -All general purpose boot loaders should write the fields marked -(obligatory). Boot loaders who want to load the kernel at a -nonstandard address should fill in the fields marked (reloc); other -boot loaders can ignore those fields. - -The byte order of all fields is littleendian (this is x86, after all.) - -Field name: setup_sects -Type: read -Offset/size: 0x1f1/1 -Protocol: ALL - - The size of the setup code in 512-byte sectors. If this field is - 0, the real value is 4. The real-mode code consists of the boot - sector (always one 512-byte sector) plus the setup code. - -Field name: root_flags -Type: modify (optional) -Offset/size: 0x1f2/2 -Protocol: ALL - - If this field is nonzero, the root defaults to readonly. The use of - this field is deprecated; use the "ro" or "rw" options on the - command line instead. - -Field name: syssize -Type: read -Offset/size: 0x1f4/4 (protocol 2.04+) 0x1f4/2 (protocol ALL) -Protocol: 2.04+ - - The size of the protected-mode code in units of 16-byte paragraphs. - For protocol versions older than 2.04 this field is only two bytes - wide, and therefore cannot be trusted for the size of a kernel if - the LOAD_HIGH flag is set. - -Field name: ram_size -Type: kernel internal -Offset/size: 0x1f8/2 -Protocol: ALL - - This field is obsolete. - -Field name: vid_mode -Type: modify (obligatory) -Offset/size: 0x1fa/2 - - Please see the section on SPECIAL COMMAND LINE OPTIONS. - -Field name: root_dev -Type: modify (optional) -Offset/size: 0x1fc/2 -Protocol: ALL - - The default root device device number. The use of this field is - deprecated, use the "root=" option on the command line instead. - -Field name: boot_flag -Type: read -Offset/size: 0x1fe/2 -Protocol: ALL - - Contains 0xAA55. This is the closest thing old Linux kernels have - to a magic number. - -Field name: jump -Type: read -Offset/size: 0x200/2 -Protocol: 2.00+ - - Contains an x86 jump instruction, 0xEB followed by a signed offset - relative to byte 0x202. This can be used to determine the size of - the header. - -Field name: header -Type: read -Offset/size: 0x202/4 -Protocol: 2.00+ - - Contains the magic number "HdrS" (0x53726448). - -Field name: version -Type: read -Offset/size: 0x206/2 -Protocol: 2.00+ - - Contains the boot protocol version, in (major << 8)+minor format, - e.g. 0x0204 for version 2.04, and 0x0a11 for a hypothetical version - 10.17. - -Field name: readmode_swtch -Type: modify (optional) -Offset/size: 0x208/4 -Protocol: 2.00+ - - Boot loader hook (see ADVANCED BOOT LOADER HOOKS below.) - -Field name: start_sys -Type: read -Offset/size: 0x20c/4 -Protocol: 2.00+ - - The load low segment (0x1000). Obsolete. - -Field name: kernel_version -Type: read -Offset/size: 0x20e/2 -Protocol: 2.00+ - - If set to a nonzero value, contains a pointer to a NUL-terminated - human-readable kernel version number string, less 0x200. This can - be used to display the kernel version to the user. This value - should be less than (0x200*setup_sects). - - For example, if this value is set to 0x1c00, the kernel version - number string can be found at offset 0x1e00 in the kernel file. - This is a valid value if and only if the "setup_sects" field - contains the value 15 or higher, as: - - 0x1c00 < 15*0x200 (= 0x1e00) but - 0x1c00 >= 14*0x200 (= 0x1c00) - - 0x1c00 >> 9 = 14, so the minimum value for setup_secs is 15. - -Field name: type_of_loader -Type: write (obligatory) -Offset/size: 0x210/1 -Protocol: 2.00+ - - If your boot loader has an assigned id (see table below), enter - 0xTV here, where T is an identifier for the boot loader and V is - a version number. Otherwise, enter 0xFF here. - - Assigned boot loader ids: - 0 LILO (0x00 reserved for pre-2.00 bootloader) - 1 Loadlin - 2 bootsect-loader (0x20, all other values reserved) - 3 SYSLINUX - 4 EtherBoot - 5 ELILO - 7 GRuB - 8 U-BOOT - 9 Xen - A Gujin - B Qemu - - Please contact if you need a bootloader ID - value assigned. - -Field name: loadflags -Type: modify (obligatory) -Offset/size: 0x211/1 -Protocol: 2.00+ - - This field is a bitmask. - - Bit 0 (read): LOADED_HIGH - - If 0, the protected-mode code is loaded at 0x10000. - - If 1, the protected-mode code is loaded at 0x100000. - - Bit 5 (write): QUIET_FLAG - - If 0, print early messages. - - If 1, suppress early messages. - This requests to the kernel (decompressor and early - kernel) to not write early messages that require - accessing the display hardware directly. - - Bit 6 (write): KEEP_SEGMENTS - Protocol: 2.07+ - - If 0, reload the segment registers in the 32bit entry point. - - If 1, do not reload the segment registers in the 32bit entry point. - Assume that %cs %ds %ss %es are all set to flat segments with - a base of 0 (or the equivalent for their environment). - - Bit 7 (write): CAN_USE_HEAP - Set this bit to 1 to indicate that the value entered in the - heap_end_ptr is valid. If this field is clear, some setup code - functionality will be disabled. - -Field name: setup_move_size -Type: modify (obligatory) -Offset/size: 0x212/2 -Protocol: 2.00-2.01 - - When using protocol 2.00 or 2.01, if the real mode kernel is not - loaded at 0x90000, it gets moved there later in the loading - sequence. Fill in this field if you want additional data (such as - the kernel command line) moved in addition to the real-mode kernel - itself. - - The unit is bytes starting with the beginning of the boot sector. - - This field is can be ignored when the protocol is 2.02 or higher, or - if the real-mode code is loaded at 0x90000. - -Field name: code32_start -Type: modify (optional, reloc) -Offset/size: 0x214/4 -Protocol: 2.00+ - - The address to jump to in protected mode. This defaults to the load - address of the kernel, and can be used by the boot loader to - determine the proper load address. - - This field can be modified for two purposes: - - 1. as a boot loader hook (see ADVANCED BOOT LOADER HOOKS below.) - - 2. if a bootloader which does not install a hook loads a - relocatable kernel at a nonstandard address it will have to modify - this field to point to the load address. - -Field name: ramdisk_image -Type: write (obligatory) -Offset/size: 0x218/4 -Protocol: 2.00+ - - The 32-bit linear address of the initial ramdisk or ramfs. Leave at - zero if there is no initial ramdisk/ramfs. - -Field name: ramdisk_size -Type: write (obligatory) -Offset/size: 0x21c/4 -Protocol: 2.00+ - - Size of the initial ramdisk or ramfs. Leave at zero if there is no - initial ramdisk/ramfs. - -Field name: bootsect_kludge -Type: kernel internal -Offset/size: 0x220/4 -Protocol: 2.00+ - - This field is obsolete. - -Field name: heap_end_ptr -Type: write (obligatory) -Offset/size: 0x224/2 -Protocol: 2.01+ - - Set this field to the offset (from the beginning of the real-mode - code) of the end of the setup stack/heap, minus 0x0200. - -Field name: cmd_line_ptr -Type: write (obligatory) -Offset/size: 0x228/4 -Protocol: 2.02+ - - Set this field to the linear address of the kernel command line. - The kernel command line can be located anywhere between the end of - the setup heap and 0xA0000; it does not have to be located in the - same 64K segment as the real-mode code itself. - - Fill in this field even if your boot loader does not support a - command line, in which case you can point this to an empty string - (or better yet, to the string "auto".) If this field is left at - zero, the kernel will assume that your boot loader does not support - the 2.02+ protocol. - -Field name: initrd_addr_max -Type: read -Offset/size: 0x22c/4 -Protocol: 2.03+ - - The maximum address that may be occupied by the initial - ramdisk/ramfs contents. For boot protocols 2.02 or earlier, this - field is not present, and the maximum address is 0x37FFFFFF. (This - address is defined as the address of the highest safe byte, so if - your ramdisk is exactly 131072 bytes long and this field is - 0x37FFFFFF, you can start your ramdisk at 0x37FE0000.) - -Field name: kernel_alignment -Type: read (reloc) -Offset/size: 0x230/4 -Protocol: 2.05+ - - Alignment unit required by the kernel (if relocatable_kernel is true.) - -Field name: relocatable_kernel -Type: read (reloc) -Offset/size: 0x234/1 -Protocol: 2.05+ - - If this field is nonzero, the protected-mode part of the kernel can - be loaded at any address that satisfies the kernel_alignment field. - After loading, the boot loader must set the code32_start field to - point to the loaded code, or to a boot loader hook. - -Field name: cmdline_size -Type: read -Offset/size: 0x238/4 -Protocol: 2.06+ - - The maximum size of the command line without the terminating - zero. This means that the command line can contain at most - cmdline_size characters. With protocol version 2.05 and earlier, the - maximum size was 255. - -Field name: hardware_subarch -Type: write (optional, defaults to x86/PC) -Offset/size: 0x23c/4 -Protocol: 2.07+ - - In a paravirtualized environment the hardware low level architectural - pieces such as interrupt handling, page table handling, and - accessing process control registers needs to be done differently. - - This field allows the bootloader to inform the kernel we are in one - one of those environments. - - 0x00000000 The default x86/PC environment - 0x00000001 lguest - 0x00000002 Xen - -Field name: hardware_subarch_data -Type: write (subarch-dependent) -Offset/size: 0x240/8 -Protocol: 2.07+ - - A pointer to data that is specific to hardware subarch - This field is currently unused for the default x86/PC environment, - do not modify. - -Field name: payload_offset -Type: read -Offset/size: 0x248/4 -Protocol: 2.08+ - - If non-zero then this field contains the offset from the end of the - real-mode code to the payload. - - The payload may be compressed. The format of both the compressed and - uncompressed data should be determined using the standard magic - numbers. Currently only gzip compressed ELF is used. - -Field name: payload_length -Type: read -Offset/size: 0x24c/4 -Protocol: 2.08+ - - The length of the payload. - -Field name: setup_data -Type: write (special) -Offset/size: 0x250/8 -Protocol: 2.09+ - - The 64-bit physical pointer to NULL terminated single linked list of - struct setup_data. This is used to define a more extensible boot - parameters passing mechanism. The definition of struct setup_data is - as follow: - - struct setup_data { - u64 next; - u32 type; - u32 len; - u8 data[0]; - }; - - Where, the next is a 64-bit physical pointer to the next node of - linked list, the next field of the last node is 0; the type is used - to identify the contents of data; the len is the length of data - field; the data holds the real payload. - - This list may be modified at a number of points during the bootup - process. Therefore, when modifying this list one should always make - sure to consider the case where the linked list already contains - entries. - - -**** THE IMAGE CHECKSUM - -From boot protocol version 2.08 onwards the CRC-32 is calculated over -the entire file using the characteristic polynomial 0x04C11DB7 and an -initial remainder of 0xffffffff. The checksum is appended to the -file; therefore the CRC of the file up to the limit specified in the -syssize field of the header is always 0. - - -**** THE KERNEL COMMAND LINE - -The kernel command line has become an important way for the boot -loader to communicate with the kernel. Some of its options are also -relevant to the boot loader itself, see "special command line options" -below. - -The kernel command line is a null-terminated string. The maximum -length can be retrieved from the field cmdline_size. Before protocol -version 2.06, the maximum was 255 characters. A string that is too -long will be automatically truncated by the kernel. - -If the boot protocol version is 2.02 or later, the address of the -kernel command line is given by the header field cmd_line_ptr (see -above.) This address can be anywhere between the end of the setup -heap and 0xA0000. - -If the protocol version is *not* 2.02 or higher, the kernel -command line is entered using the following protocol: - - At offset 0x0020 (word), "cmd_line_magic", enter the magic - number 0xA33F. - - At offset 0x0022 (word), "cmd_line_offset", enter the offset - of the kernel command line (relative to the start of the - real-mode kernel). - - The kernel command line *must* be within the memory region - covered by setup_move_size, so you may need to adjust this - field. - - -**** MEMORY LAYOUT OF THE REAL-MODE CODE - -The real-mode code requires a stack/heap to be set up, as well as -memory allocated for the kernel command line. This needs to be done -in the real-mode accessible memory in bottom megabyte. - -It should be noted that modern machines often have a sizable Extended -BIOS Data Area (EBDA). As a result, it is advisable to use as little -of the low megabyte as possible. - -Unfortunately, under the following circumstances the 0x90000 memory -segment has to be used: - - - When loading a zImage kernel ((loadflags & 0x01) == 0). - - When loading a 2.01 or earlier boot protocol kernel. - - -> For the 2.00 and 2.01 boot protocols, the real-mode code - can be loaded at another address, but it is internally - relocated to 0x90000. For the "old" protocol, the - real-mode code must be loaded at 0x90000. - -When loading at 0x90000, avoid using memory above 0x9a000. - -For boot protocol 2.02 or higher, the command line does not have to be -located in the same 64K segment as the real-mode setup code; it is -thus permitted to give the stack/heap the full 64K segment and locate -the command line above it. - -The kernel command line should not be located below the real-mode -code, nor should it be located in high memory. - - -**** SAMPLE BOOT CONFIGURATION - -As a sample configuration, assume the following layout of the real -mode segment: - - When loading below 0x90000, use the entire segment: - - 0x0000-0x7fff Real mode kernel - 0x8000-0xdfff Stack and heap - 0xe000-0xffff Kernel command line - - When loading at 0x90000 OR the protocol version is 2.01 or earlier: - - 0x0000-0x7fff Real mode kernel - 0x8000-0x97ff Stack and heap - 0x9800-0x9fff Kernel command line - -Such a boot loader should enter the following fields in the header: - - unsigned long base_ptr; /* base address for real-mode segment */ - - if ( setup_sects == 0 ) { - setup_sects = 4; - } - - if ( protocol >= 0x0200 ) { - type_of_loader = ; - if ( loading_initrd ) { - ramdisk_image = ; - ramdisk_size = ; - } - - if ( protocol >= 0x0202 && loadflags & 0x01 ) - heap_end = 0xe000; - else - heap_end = 0x9800; - - if ( protocol >= 0x0201 ) { - heap_end_ptr = heap_end - 0x200; - loadflags |= 0x80; /* CAN_USE_HEAP */ - } - - if ( protocol >= 0x0202 ) { - cmd_line_ptr = base_ptr + heap_end; - strcpy(cmd_line_ptr, cmdline); - } else { - cmd_line_magic = 0xA33F; - cmd_line_offset = heap_end; - setup_move_size = heap_end + strlen(cmdline)+1; - strcpy(base_ptr+cmd_line_offset, cmdline); - } - } else { - /* Very old kernel */ - - heap_end = 0x9800; - - cmd_line_magic = 0xA33F; - cmd_line_offset = heap_end; - - /* A very old kernel MUST have its real-mode code - loaded at 0x90000 */ - - if ( base_ptr != 0x90000 ) { - /* Copy the real-mode kernel */ - memcpy(0x90000, base_ptr, (setup_sects+1)*512); - base_ptr = 0x90000; /* Relocated */ - } - - strcpy(0x90000+cmd_line_offset, cmdline); - - /* It is recommended to clear memory up to the 32K mark */ - memset(0x90000 + (setup_sects+1)*512, 0, - (64-(setup_sects+1))*512); - } - - -**** LOADING THE REST OF THE KERNEL - -The 32-bit (non-real-mode) kernel starts at offset (setup_sects+1)*512 -in the kernel file (again, if setup_sects == 0 the real value is 4.) -It should be loaded at address 0x10000 for Image/zImage kernels and -0x100000 for bzImage kernels. - -The kernel is a bzImage kernel if the protocol >= 2.00 and the 0x01 -bit (LOAD_HIGH) in the loadflags field is set: - - is_bzImage = (protocol >= 0x0200) && (loadflags & 0x01); - load_address = is_bzImage ? 0x100000 : 0x10000; - -Note that Image/zImage kernels can be up to 512K in size, and thus use -the entire 0x10000-0x90000 range of memory. This means it is pretty -much a requirement for these kernels to load the real-mode part at -0x90000. bzImage kernels allow much more flexibility. - - -**** SPECIAL COMMAND LINE OPTIONS - -If the command line provided by the boot loader is entered by the -user, the user may expect the following command line options to work. -They should normally not be deleted from the kernel command line even -though not all of them are actually meaningful to the kernel. Boot -loader authors who need additional command line options for the boot -loader itself should get them registered in -Documentation/kernel-parameters.txt to make sure they will not -conflict with actual kernel options now or in the future. - - vga= - here is either an integer (in C notation, either - decimal, octal, or hexadecimal) or one of the strings - "normal" (meaning 0xFFFF), "ext" (meaning 0xFFFE) or "ask" - (meaning 0xFFFD). This value should be entered into the - vid_mode field, as it is used by the kernel before the command - line is parsed. - - mem= - is an integer in C notation optionally followed by - (case insensitive) K, M, G, T, P or E (meaning << 10, << 20, - << 30, << 40, << 50 or << 60). This specifies the end of - memory to the kernel. This affects the possible placement of - an initrd, since an initrd should be placed near end of - memory. Note that this is an option to *both* the kernel and - the bootloader! - - initrd= - An initrd should be loaded. The meaning of is - obviously bootloader-dependent, and some boot loaders - (e.g. LILO) do not have such a command. - -In addition, some boot loaders add the following options to the -user-specified command line: - - BOOT_IMAGE= - The boot image which was loaded. Again, the meaning of - is obviously bootloader-dependent. - - auto - The kernel was booted without explicit user intervention. - -If these options are added by the boot loader, it is highly -recommended that they are located *first*, before the user-specified -or configuration-specified command line. Otherwise, "init=/bin/sh" -gets confused by the "auto" option. - - -**** RUNNING THE KERNEL - -The kernel is started by jumping to the kernel entry point, which is -located at *segment* offset 0x20 from the start of the real mode -kernel. This means that if you loaded your real-mode kernel code at -0x90000, the kernel entry point is 9020:0000. - -At entry, ds = es = ss should point to the start of the real-mode -kernel code (0x9000 if the code is loaded at 0x90000), sp should be -set up properly, normally pointing to the top of the heap, and -interrupts should be disabled. Furthermore, to guard against bugs in -the kernel, it is recommended that the boot loader sets fs = gs = ds = -es = ss. - -In our example from above, we would do: - - /* Note: in the case of the "old" kernel protocol, base_ptr must - be == 0x90000 at this point; see the previous sample code */ - - seg = base_ptr >> 4; - - cli(); /* Enter with interrupts disabled! */ - - /* Set up the real-mode kernel stack */ - _SS = seg; - _SP = heap_end; - - _DS = _ES = _FS = _GS = seg; - jmp_far(seg+0x20, 0); /* Run the kernel */ - -If your boot sector accesses a floppy drive, it is recommended to -switch off the floppy motor before running the kernel, since the -kernel boot leaves interrupts off and thus the motor will not be -switched off, especially if the loaded kernel has the floppy driver as -a demand-loaded module! - - -**** ADVANCED BOOT LOADER HOOKS - -If the boot loader runs in a particularly hostile environment (such as -LOADLIN, which runs under DOS) it may be impossible to follow the -standard memory location requirements. Such a boot loader may use the -following hooks that, if set, are invoked by the kernel at the -appropriate time. The use of these hooks should probably be -considered an absolutely last resort! - -IMPORTANT: All the hooks are required to preserve %esp, %ebp, %esi and -%edi across invocation. - - realmode_swtch: - A 16-bit real mode far subroutine invoked immediately before - entering protected mode. The default routine disables NMI, so - your routine should probably do so, too. - - code32_start: - A 32-bit flat-mode routine *jumped* to immediately after the - transition to protected mode, but before the kernel is - uncompressed. No segments, except CS, are guaranteed to be - set up (current kernels do, but older ones do not); you should - set them up to BOOT_DS (0x18) yourself. - - After completing your hook, you should jump to the address - that was in this field before your boot loader overwrote it - (relocated, if appropriate.) - - -**** 32-bit BOOT PROTOCOL - -For machine with some new BIOS other than legacy BIOS, such as EFI, -LinuxBIOS, etc, and kexec, the 16-bit real mode setup code in kernel -based on legacy BIOS can not be used, so a 32-bit boot protocol needs -to be defined. - -In 32-bit boot protocol, the first step in loading a Linux kernel -should be to setup the boot parameters (struct boot_params, -traditionally known as "zero page"). The memory for struct boot_params -should be allocated and initialized to all zero. Then the setup header -from offset 0x01f1 of kernel image on should be loaded into struct -boot_params and examined. The end of setup header can be calculated as -follow: - - 0x0202 + byte value at offset 0x0201 - -In addition to read/modify/write the setup header of the struct -boot_params as that of 16-bit boot protocol, the boot loader should -also fill the additional fields of the struct boot_params as that -described in zero-page.txt. - -After setupping the struct boot_params, the boot loader can load the -32/64-bit kernel in the same way as that of 16-bit boot protocol. - -In 32-bit boot protocol, the kernel is started by jumping to the -32-bit kernel entry point, which is the start address of loaded -32/64-bit kernel. - -At entry, the CPU must be in 32-bit protected mode with paging -disabled; a GDT must be loaded with the descriptors for selectors -__BOOT_CS(0x10) and __BOOT_DS(0x18); both descriptors must be 4G flat -segment; __BOOS_CS must have execute/read permission, and __BOOT_DS -must have read/write permission; CS must be __BOOT_CS and DS, ES, SS -must be __BOOT_DS; interrupt must be disabled; %esi must hold the base -address of the struct boot_params; %ebp, %edi and %ebx must be zero. diff --git a/Documentation/i386/usb-legacy-support.txt b/Documentation/i386/usb-legacy-support.txt deleted file mode 100644 index 1894cdf..0000000 --- a/Documentation/i386/usb-legacy-support.txt +++ /dev/null @@ -1,44 +0,0 @@ -USB Legacy support -~~~~~~~~~~~~~~~~~~ - -Vojtech Pavlik , January 2004 - - -Also known as "USB Keyboard" or "USB Mouse support" in the BIOS Setup is a -feature that allows one to use the USB mouse and keyboard as if they were -their classic PS/2 counterparts. This means one can use an USB keyboard to -type in LILO for example. - -It has several drawbacks, though: - -1) On some machines, the emulated PS/2 mouse takes over even when no USB - mouse is present and a real PS/2 mouse is present. In that case the extra - features (wheel, extra buttons, touchpad mode) of the real PS/2 mouse may - not be available. - -2) If CONFIG_HIGHMEM64G is enabled, the PS/2 mouse emulation can cause - system crashes, because the SMM BIOS is not expecting to be in PAE mode. - The Intel E7505 is a typical machine where this happens. - -3) If AMD64 64-bit mode is enabled, again system crashes often happen, - because the SMM BIOS isn't expecting the CPU to be in 64-bit mode. The - BIOS manufacturers only test with Windows, and Windows doesn't do 64-bit - yet. - -Solutions: - -Problem 1) can be solved by loading the USB drivers prior to loading the -PS/2 mouse driver. Since the PS/2 mouse driver is in 2.6 compiled into -the kernel unconditionally, this means the USB drivers need to be -compiled-in, too. - -Problem 2) can currently only be solved by either disabling HIGHMEM64G -in the kernel config or USB Legacy support in the BIOS. A BIOS update -could help, but so far no such update exists. - -Problem 3) is usually fixed by a BIOS update. Check the board -manufacturers web site. If an update is not available, disable USB -Legacy support in the BIOS. If this alone doesn't help, try also adding -idle=poll on the kernel command line. The BIOS may be entering the SMM -on the HLT instruction as well. - diff --git a/Documentation/i386/zero-page.txt b/Documentation/i386/zero-page.txt deleted file mode 100644 index 169ad42..0000000 --- a/Documentation/i386/zero-page.txt +++ /dev/null @@ -1,31 +0,0 @@ -The additional fields in struct boot_params as a part of 32-bit boot -protocol of kernel. These should be filled by bootloader or 16-bit -real-mode setup code of the kernel. References/settings to it mainly -are in: - - include/asm-x86/bootparam.h - - -Offset Proto Name Meaning -/Size - -000/040 ALL screen_info Text mode or frame buffer information - (struct screen_info) -040/014 ALL apm_bios_info APM BIOS information (struct apm_bios_info) -060/010 ALL ist_info Intel SpeedStep (IST) BIOS support information - (struct ist_info) -080/010 ALL hd0_info hd0 disk parameter, OBSOLETE!! -090/010 ALL hd1_info hd1 disk parameter, OBSOLETE!! -0A0/010 ALL sys_desc_table System description table (struct sys_desc_table) -140/080 ALL edid_info Video mode setup (struct edid_info) -1C0/020 ALL efi_info EFI 32 information (struct efi_info) -1E0/004 ALL alk_mem_k Alternative mem check, in KB -1E4/004 ALL scratch Scratch field for the kernel setup code -1E8/001 ALL e820_entries Number of entries in e820_map (below) -1E9/001 ALL eddbuf_entries Number of entries in eddbuf (below) -1EA/001 ALL edd_mbr_sig_buf_entries Number of entries in edd_mbr_sig_buffer - (below) -290/040 ALL edd_mbr_sig_buffer EDD MBR signatures -2D0/A00 ALL e820_map E820 memory map table - (array of struct e820entry) -D00/1EC ALL eddbuf EDD data (array of struct edd_info) diff --git a/Documentation/x86/i386/IO-APIC.txt b/Documentation/x86/i386/IO-APIC.txt new file mode 100644 index 0000000..30b4c71 --- /dev/null +++ b/Documentation/x86/i386/IO-APIC.txt @@ -0,0 +1,119 @@ +Most (all) Intel-MP compliant SMP boards have the so-called 'IO-APIC', +which is an enhanced interrupt controller. It enables us to route +hardware interrupts to multiple CPUs, or to CPU groups. Without an +IO-APIC, interrupts from hardware will be delivered only to the +CPU which boots the operating system (usually CPU#0). + +Linux supports all variants of compliant SMP boards, including ones with +multiple IO-APICs. Multiple IO-APICs are used in high-end servers to +distribute IRQ load further. + +There are (a few) known breakages in certain older boards, such bugs are +usually worked around by the kernel. If your MP-compliant SMP board does +not boot Linux, then consult the linux-smp mailing list archives first. + +If your box boots fine with enabled IO-APIC IRQs, then your +/proc/interrupts will look like this one: + + ----------------------------> + hell:~> cat /proc/interrupts + CPU0 + 0: 1360293 IO-APIC-edge timer + 1: 4 IO-APIC-edge keyboard + 2: 0 XT-PIC cascade + 13: 1 XT-PIC fpu + 14: 1448 IO-APIC-edge ide0 + 16: 28232 IO-APIC-level Intel EtherExpress Pro 10/100 Ethernet + 17: 51304 IO-APIC-level eth0 + NMI: 0 + ERR: 0 + hell:~> + <---------------------------- + +Some interrupts are still listed as 'XT PIC', but this is not a problem; +none of those IRQ sources is performance-critical. + + +In the unlikely case that your board does not create a working mp-table, +you can use the pirq= boot parameter to 'hand-construct' IRQ entries. This +is non-trivial though and cannot be automated. One sample /etc/lilo.conf +entry: + + append="pirq=15,11,10" + +The actual numbers depend on your system, on your PCI cards and on their +PCI slot position. Usually PCI slots are 'daisy chained' before they are +connected to the PCI chipset IRQ routing facility (the incoming PIRQ1-4 +lines): + + ,-. ,-. ,-. ,-. ,-. + PIRQ4 ----| |-. ,-| |-. ,-| |-. ,-| |--------| | + |S| \ / |S| \ / |S| \ / |S| |S| + PIRQ3 ----|l|-. `/---|l|-. `/---|l|-. `/---|l|--------|l| + |o| \/ |o| \/ |o| \/ |o| |o| + PIRQ2 ----|t|-./`----|t|-./`----|t|-./`----|t|--------|t| + |1| /\ |2| /\ |3| /\ |4| |5| + PIRQ1 ----| |- `----| |- `----| |- `----| |--------| | + `-' `-' `-' `-' `-' + +Every PCI card emits a PCI IRQ, which can be INTA, INTB, INTC or INTD: + + ,-. + INTD--| | + |S| + INTC--|l| + |o| + INTB--|t| + |x| + INTA--| | + `-' + +These INTA-D PCI IRQs are always 'local to the card', their real meaning +depends on which slot they are in. If you look at the daisy chaining diagram, +a card in slot4, issuing INTA IRQ, it will end up as a signal on PIRQ4 of +the PCI chipset. Most cards issue INTA, this creates optimal distribution +between the PIRQ lines. (distributing IRQ sources properly is not a +necessity, PCI IRQs can be shared at will, but it's a good for performance +to have non shared interrupts). Slot5 should be used for videocards, they +do not use interrupts normally, thus they are not daisy chained either. + +so if you have your SCSI card (IRQ11) in Slot1, Tulip card (IRQ9) in +Slot2, then you'll have to specify this pirq= line: + + append="pirq=11,9" + +the following script tries to figure out such a default pirq= line from +your PCI configuration: + + echo -n pirq=; echo `scanpci | grep T_L | cut -c56-` | sed 's/ /,/g' + +note that this script wont work if you have skipped a few slots or if your +board does not do default daisy-chaining. (or the IO-APIC has the PIRQ pins +connected in some strange way). E.g. if in the above case you have your SCSI +card (IRQ11) in Slot3, and have Slot1 empty: + + append="pirq=0,9,11" + +[value '0' is a generic 'placeholder', reserved for empty (or non-IRQ emitting) +slots.] + +Generally, it's always possible to find out the correct pirq= settings, just +permute all IRQ numbers properly ... it will take some time though. An +'incorrect' pirq line will cause the booting process to hang, or a device +won't function properly (e.g. if it's inserted as a module). + +If you have 2 PCI buses, then you can use up to 8 pirq values, although such +boards tend to have a good configuration. + +Be prepared that it might happen that you need some strange pirq line: + + append="pirq=0,0,0,0,0,0,9,11" + +Use smart trial-and-error techniques to find out the correct pirq line ... + +Good luck and mail to linux-smp@vger.kernel.org or +linux-kernel@vger.kernel.org if you have any problems that are not covered +by this document. + +-- mingo + diff --git a/Documentation/x86/i386/boot.txt b/Documentation/x86/i386/boot.txt new file mode 100644 index 0000000..147bfe5 --- /dev/null +++ b/Documentation/x86/i386/boot.txt @@ -0,0 +1,900 @@ + THE LINUX/x86 BOOT PROTOCOL + --------------------------- + +On the x86 platform, the Linux kernel uses a rather complicated boot +convention. This has evolved partially due to historical aspects, as +well as the desire in the early days to have the kernel itself be a +bootable image, the complicated PC memory model and due to changed +expectations in the PC industry caused by the effective demise of +real-mode DOS as a mainstream operating system. + +Currently, the following versions of the Linux/x86 boot protocol exist. + +Old kernels: zImage/Image support only. Some very early kernels + may not even support a command line. + +Protocol 2.00: (Kernel 1.3.73) Added bzImage and initrd support, as + well as a formalized way to communicate between the + boot loader and the kernel. setup.S made relocatable, + although the traditional setup area still assumed + writable. + +Protocol 2.01: (Kernel 1.3.76) Added a heap overrun warning. + +Protocol 2.02: (Kernel 2.4.0-test3-pre3) New command line protocol. + Lower the conventional memory ceiling. No overwrite + of the traditional setup area, thus making booting + safe for systems which use the EBDA from SMM or 32-bit + BIOS entry points. zImage deprecated but still + supported. + +Protocol 2.03: (Kernel 2.4.18-pre1) Explicitly makes the highest possible + initrd address available to the bootloader. + +Protocol 2.04: (Kernel 2.6.14) Extend the syssize field to four bytes. + +Protocol 2.05: (Kernel 2.6.20) Make protected mode kernel relocatable. + Introduce relocatable_kernel and kernel_alignment fields. + +Protocol 2.06: (Kernel 2.6.22) Added a field that contains the size of + the boot command line. + +Protocol 2.07: (Kernel 2.6.24) Added paravirtualised boot protocol. + Introduced hardware_subarch and hardware_subarch_data + and KEEP_SEGMENTS flag in load_flags. + +Protocol 2.08: (Kernel 2.6.26) Added crc32 checksum and ELF format + payload. Introduced payload_offset and payload length + fields to aid in locating the payload. + +Protocol 2.09: (Kernel 2.6.26) Added a field of 64-bit physical + pointer to single linked list of struct setup_data. + +**** MEMORY LAYOUT + +The traditional memory map for the kernel loader, used for Image or +zImage kernels, typically looks like: + + | | +0A0000 +------------------------+ + | Reserved for BIOS | Do not use. Reserved for BIOS EBDA. +09A000 +------------------------+ + | Command line | + | Stack/heap | For use by the kernel real-mode code. +098000 +------------------------+ + | Kernel setup | The kernel real-mode code. +090200 +------------------------+ + | Kernel boot sector | The kernel legacy boot sector. +090000 +------------------------+ + | Protected-mode kernel | The bulk of the kernel image. +010000 +------------------------+ + | Boot loader | <- Boot sector entry point 0000:7C00 +001000 +------------------------+ + | Reserved for MBR/BIOS | +000800 +------------------------+ + | Typically used by MBR | +000600 +------------------------+ + | BIOS use only | +000000 +------------------------+ + + +When using bzImage, the protected-mode kernel was relocated to +0x100000 ("high memory"), and the kernel real-mode block (boot sector, +setup, and stack/heap) was made relocatable to any address between +0x10000 and end of low memory. Unfortunately, in protocols 2.00 and +2.01 the 0x90000+ memory range is still used internally by the kernel; +the 2.02 protocol resolves that problem. + +It is desirable to keep the "memory ceiling" -- the highest point in +low memory touched by the boot loader -- as low as possible, since +some newer BIOSes have begun to allocate some rather large amounts of +memory, called the Extended BIOS Data Area, near the top of low +memory. The boot loader should use the "INT 12h" BIOS call to verify +how much low memory is available. + +Unfortunately, if INT 12h reports that the amount of memory is too +low, there is usually nothing the boot loader can do but to report an +error to the user. The boot loader should therefore be designed to +take up as little space in low memory as it reasonably can. For +zImage or old bzImage kernels, which need data written into the +0x90000 segment, the boot loader should make sure not to use memory +above the 0x9A000 point; too many BIOSes will break above that point. + +For a modern bzImage kernel with boot protocol version >= 2.02, a +memory layout like the following is suggested: + + ~ ~ + | Protected-mode kernel | +100000 +------------------------+ + | I/O memory hole | +0A0000 +------------------------+ + | Reserved for BIOS | Leave as much as possible unused + ~ ~ + | Command line | (Can also be below the X+10000 mark) +X+10000 +------------------------+ + | Stack/heap | For use by the kernel real-mode code. +X+08000 +------------------------+ + | Kernel setup | The kernel real-mode code. + | Kernel boot sector | The kernel legacy boot sector. +X +------------------------+ + | Boot loader | <- Boot sector entry point 0000:7C00 +001000 +------------------------+ + | Reserved for MBR/BIOS | +000800 +------------------------+ + | Typically used by MBR | +000600 +------------------------+ + | BIOS use only | +000000 +------------------------+ + +... where the address X is as low as the design of the boot loader +permits. + + +**** THE REAL-MODE KERNEL HEADER + +In the following text, and anywhere in the kernel boot sequence, "a +sector" refers to 512 bytes. It is independent of the actual sector +size of the underlying medium. + +The first step in loading a Linux kernel should be to load the +real-mode code (boot sector and setup code) and then examine the +following header at offset 0x01f1. The real-mode code can total up to +32K, although the boot loader may choose to load only the first two +sectors (1K) and then examine the bootup sector size. + +The header looks like: + +Offset Proto Name Meaning +/Size + +01F1/1 ALL(1 setup_sects The size of the setup in sectors +01F2/2 ALL root_flags If set, the root is mounted readonly +01F4/4 2.04+(2 syssize The size of the 32-bit code in 16-byte paras +01F8/2 ALL ram_size DO NOT USE - for bootsect.S use only +01FA/2 ALL vid_mode Video mode control +01FC/2 ALL root_dev Default root device number +01FE/2 ALL boot_flag 0xAA55 magic number +0200/2 2.00+ jump Jump instruction +0202/4 2.00+ header Magic signature "HdrS" +0206/2 2.00+ version Boot protocol version supported +0208/4 2.00+ realmode_swtch Boot loader hook (see below) +020C/2 2.00+ start_sys The load-low segment (0x1000) (obsolete) +020E/2 2.00+ kernel_version Pointer to kernel version string +0210/1 2.00+ type_of_loader Boot loader identifier +0211/1 2.00+ loadflags Boot protocol option flags +0212/2 2.00+ setup_move_size Move to high memory size (used with hooks) +0214/4 2.00+ code32_start Boot loader hook (see below) +0218/4 2.00+ ramdisk_image initrd load address (set by boot loader) +021C/4 2.00+ ramdisk_size initrd size (set by boot loader) +0220/4 2.00+ bootsect_kludge DO NOT USE - for bootsect.S use only +0224/2 2.01+ heap_end_ptr Free memory after setup end +0226/2 N/A pad1 Unused +0228/4 2.02+ cmd_line_ptr 32-bit pointer to the kernel command line +022C/4 2.03+ initrd_addr_max Highest legal initrd address +0230/4 2.05+ kernel_alignment Physical addr alignment required for kernel +0234/1 2.05+ relocatable_kernel Whether kernel is relocatable or not +0235/3 N/A pad2 Unused +0238/4 2.06+ cmdline_size Maximum size of the kernel command line +023C/4 2.07+ hardware_subarch Hardware subarchitecture +0240/8 2.07+ hardware_subarch_data Subarchitecture-specific data +0248/4 2.08+ payload_offset Offset of kernel payload +024C/4 2.08+ payload_length Length of kernel payload +0250/8 2.09+ setup_data 64-bit physical pointer to linked list + of struct setup_data + +(1) For backwards compatibility, if the setup_sects field contains 0, the + real value is 4. + +(2) For boot protocol prior to 2.04, the upper two bytes of the syssize + field are unusable, which means the size of a bzImage kernel + cannot be determined. + +If the "HdrS" (0x53726448) magic number is not found at offset 0x202, +the boot protocol version is "old". Loading an old kernel, the +following parameters should be assumed: + + Image type = zImage + initrd not supported + Real-mode kernel must be located at 0x90000. + +Otherwise, the "version" field contains the protocol version, +e.g. protocol version 2.01 will contain 0x0201 in this field. When +setting fields in the header, you must make sure only to set fields +supported by the protocol version in use. + + +**** DETAILS OF HEADER FIELDS + +For each field, some are information from the kernel to the bootloader +("read"), some are expected to be filled out by the bootloader +("write"), and some are expected to be read and modified by the +bootloader ("modify"). + +All general purpose boot loaders should write the fields marked +(obligatory). Boot loaders who want to load the kernel at a +nonstandard address should fill in the fields marked (reloc); other +boot loaders can ignore those fields. + +The byte order of all fields is littleendian (this is x86, after all.) + +Field name: setup_sects +Type: read +Offset/size: 0x1f1/1 +Protocol: ALL + + The size of the setup code in 512-byte sectors. If this field is + 0, the real value is 4. The real-mode code consists of the boot + sector (always one 512-byte sector) plus the setup code. + +Field name: root_flags +Type: modify (optional) +Offset/size: 0x1f2/2 +Protocol: ALL + + If this field is nonzero, the root defaults to readonly. The use of + this field is deprecated; use the "ro" or "rw" options on the + command line instead. + +Field name: syssize +Type: read +Offset/size: 0x1f4/4 (protocol 2.04+) 0x1f4/2 (protocol ALL) +Protocol: 2.04+ + + The size of the protected-mode code in units of 16-byte paragraphs. + For protocol versions older than 2.04 this field is only two bytes + wide, and therefore cannot be trusted for the size of a kernel if + the LOAD_HIGH flag is set. + +Field name: ram_size +Type: kernel internal +Offset/size: 0x1f8/2 +Protocol: ALL + + This field is obsolete. + +Field name: vid_mode +Type: modify (obligatory) +Offset/size: 0x1fa/2 + + Please see the section on SPECIAL COMMAND LINE OPTIONS. + +Field name: root_dev +Type: modify (optional) +Offset/size: 0x1fc/2 +Protocol: ALL + + The default root device device number. The use of this field is + deprecated, use the "root=" option on the command line instead. + +Field name: boot_flag +Type: read +Offset/size: 0x1fe/2 +Protocol: ALL + + Contains 0xAA55. This is the closest thing old Linux kernels have + to a magic number. + +Field name: jump +Type: read +Offset/size: 0x200/2 +Protocol: 2.00+ + + Contains an x86 jump instruction, 0xEB followed by a signed offset + relative to byte 0x202. This can be used to determine the size of + the header. + +Field name: header +Type: read +Offset/size: 0x202/4 +Protocol: 2.00+ + + Contains the magic number "HdrS" (0x53726448). + +Field name: version +Type: read +Offset/size: 0x206/2 +Protocol: 2.00+ + + Contains the boot protocol version, in (major << 8)+minor format, + e.g. 0x0204 for version 2.04, and 0x0a11 for a hypothetical version + 10.17. + +Field name: readmode_swtch +Type: modify (optional) +Offset/size: 0x208/4 +Protocol: 2.00+ + + Boot loader hook (see ADVANCED BOOT LOADER HOOKS below.) + +Field name: start_sys +Type: read +Offset/size: 0x20c/4 +Protocol: 2.00+ + + The load low segment (0x1000). Obsolete. + +Field name: kernel_version +Type: read +Offset/size: 0x20e/2 +Protocol: 2.00+ + + If set to a nonzero value, contains a pointer to a NUL-terminated + human-readable kernel version number string, less 0x200. This can + be used to display the kernel version to the user. This value + should be less than (0x200*setup_sects). + + For example, if this value is set to 0x1c00, the kernel version + number string can be found at offset 0x1e00 in the kernel file. + This is a valid value if and only if the "setup_sects" field + contains the value 15 or higher, as: + + 0x1c00 < 15*0x200 (= 0x1e00) but + 0x1c00 >= 14*0x200 (= 0x1c00) + + 0x1c00 >> 9 = 14, so the minimum value for setup_secs is 15. + +Field name: type_of_loader +Type: write (obligatory) +Offset/size: 0x210/1 +Protocol: 2.00+ + + If your boot loader has an assigned id (see table below), enter + 0xTV here, where T is an identifier for the boot loader and V is + a version number. Otherwise, enter 0xFF here. + + Assigned boot loader ids: + 0 LILO (0x00 reserved for pre-2.00 bootloader) + 1 Loadlin + 2 bootsect-loader (0x20, all other values reserved) + 3 SYSLINUX + 4 EtherBoot + 5 ELILO + 7 GRuB + 8 U-BOOT + 9 Xen + A Gujin + B Qemu + + Please contact if you need a bootloader ID + value assigned. + +Field name: loadflags +Type: modify (obligatory) +Offset/size: 0x211/1 +Protocol: 2.00+ + + This field is a bitmask. + + Bit 0 (read): LOADED_HIGH + - If 0, the protected-mode code is loaded at 0x10000. + - If 1, the protected-mode code is loaded at 0x100000. + + Bit 5 (write): QUIET_FLAG + - If 0, print early messages. + - If 1, suppress early messages. + This requests to the kernel (decompressor and early + kernel) to not write early messages that require + accessing the display hardware directly. + + Bit 6 (write): KEEP_SEGMENTS + Protocol: 2.07+ + - If 0, reload the segment registers in the 32bit entry point. + - If 1, do not reload the segment registers in the 32bit entry point. + Assume that %cs %ds %ss %es are all set to flat segments with + a base of 0 (or the equivalent for their environment). + + Bit 7 (write): CAN_USE_HEAP + Set this bit to 1 to indicate that the value entered in the + heap_end_ptr is valid. If this field is clear, some setup code + functionality will be disabled. + +Field name: setup_move_size +Type: modify (obligatory) +Offset/size: 0x212/2 +Protocol: 2.00-2.01 + + When using protocol 2.00 or 2.01, if the real mode kernel is not + loaded at 0x90000, it gets moved there later in the loading + sequence. Fill in this field if you want additional data (such as + the kernel command line) moved in addition to the real-mode kernel + itself. + + The unit is bytes starting with the beginning of the boot sector. + + This field is can be ignored when the protocol is 2.02 or higher, or + if the real-mode code is loaded at 0x90000. + +Field name: code32_start +Type: modify (optional, reloc) +Offset/size: 0x214/4 +Protocol: 2.00+ + + The address to jump to in protected mode. This defaults to the load + address of the kernel, and can be used by the boot loader to + determine the proper load address. + + This field can be modified for two purposes: + + 1. as a boot loader hook (see ADVANCED BOOT LOADER HOOKS below.) + + 2. if a bootloader which does not install a hook loads a + relocatable kernel at a nonstandard address it will have to modify + this field to point to the load address. + +Field name: ramdisk_image +Type: write (obligatory) +Offset/size: 0x218/4 +Protocol: 2.00+ + + The 32-bit linear address of the initial ramdisk or ramfs. Leave at + zero if there is no initial ramdisk/ramfs. + +Field name: ramdisk_size +Type: write (obligatory) +Offset/size: 0x21c/4 +Protocol: 2.00+ + + Size of the initial ramdisk or ramfs. Leave at zero if there is no + initial ramdisk/ramfs. + +Field name: bootsect_kludge +Type: kernel internal +Offset/size: 0x220/4 +Protocol: 2.00+ + + This field is obsolete. + +Field name: heap_end_ptr +Type: write (obligatory) +Offset/size: 0x224/2 +Protocol: 2.01+ + + Set this field to the offset (from the beginning of the real-mode + code) of the end of the setup stack/heap, minus 0x0200. + +Field name: cmd_line_ptr +Type: write (obligatory) +Offset/size: 0x228/4 +Protocol: 2.02+ + + Set this field to the linear address of the kernel command line. + The kernel command line can be located anywhere between the end of + the setup heap and 0xA0000; it does not have to be located in the + same 64K segment as the real-mode code itself. + + Fill in this field even if your boot loader does not support a + command line, in which case you can point this to an empty string + (or better yet, to the string "auto".) If this field is left at + zero, the kernel will assume that your boot loader does not support + the 2.02+ protocol. + +Field name: initrd_addr_max +Type: read +Offset/size: 0x22c/4 +Protocol: 2.03+ + + The maximum address that may be occupied by the initial + ramdisk/ramfs contents. For boot protocols 2.02 or earlier, this + field is not present, and the maximum address is 0x37FFFFFF. (This + address is defined as the address of the highest safe byte, so if + your ramdisk is exactly 131072 bytes long and this field is + 0x37FFFFFF, you can start your ramdisk at 0x37FE0000.) + +Field name: kernel_alignment +Type: read (reloc) +Offset/size: 0x230/4 +Protocol: 2.05+ + + Alignment unit required by the kernel (if relocatable_kernel is true.) + +Field name: relocatable_kernel +Type: read (reloc) +Offset/size: 0x234/1 +Protocol: 2.05+ + + If this field is nonzero, the protected-mode part of the kernel can + be loaded at any address that satisfies the kernel_alignment field. + After loading, the boot loader must set the code32_start field to + point to the loaded code, or to a boot loader hook. + +Field name: cmdline_size +Type: read +Offset/size: 0x238/4 +Protocol: 2.06+ + + The maximum size of the command line without the terminating + zero. This means that the command line can contain at most + cmdline_size characters. With protocol version 2.05 and earlier, the + maximum size was 255. + +Field name: hardware_subarch +Type: write (optional, defaults to x86/PC) +Offset/size: 0x23c/4 +Protocol: 2.07+ + + In a paravirtualized environment the hardware low level architectural + pieces such as interrupt handling, page table handling, and + accessing process control registers needs to be done differently. + + This field allows the bootloader to inform the kernel we are in one + one of those environments. + + 0x00000000 The default x86/PC environment + 0x00000001 lguest + 0x00000002 Xen + +Field name: hardware_subarch_data +Type: write (subarch-dependent) +Offset/size: 0x240/8 +Protocol: 2.07+ + + A pointer to data that is specific to hardware subarch + This field is currently unused for the default x86/PC environment, + do not modify. + +Field name: payload_offset +Type: read +Offset/size: 0x248/4 +Protocol: 2.08+ + + If non-zero then this field contains the offset from the end of the + real-mode code to the payload. + + The payload may be compressed. The format of both the compressed and + uncompressed data should be determined using the standard magic + numbers. Currently only gzip compressed ELF is used. + +Field name: payload_length +Type: read +Offset/size: 0x24c/4 +Protocol: 2.08+ + + The length of the payload. + +Field name: setup_data +Type: write (special) +Offset/size: 0x250/8 +Protocol: 2.09+ + + The 64-bit physical pointer to NULL terminated single linked list of + struct setup_data. This is used to define a more extensible boot + parameters passing mechanism. The definition of struct setup_data is + as follow: + + struct setup_data { + u64 next; + u32 type; + u32 len; + u8 data[0]; + }; + + Where, the next is a 64-bit physical pointer to the next node of + linked list, the next field of the last node is 0; the type is used + to identify the contents of data; the len is the length of data + field; the data holds the real payload. + + This list may be modified at a number of points during the bootup + process. Therefore, when modifying this list one should always make + sure to consider the case where the linked list already contains + entries. + + +**** THE IMAGE CHECKSUM + +From boot protocol version 2.08 onwards the CRC-32 is calculated over +the entire file using the characteristic polynomial 0x04C11DB7 and an +initial remainder of 0xffffffff. The checksum is appended to the +file; therefore the CRC of the file up to the limit specified in the +syssize field of the header is always 0. + + +**** THE KERNEL COMMAND LINE + +The kernel command line has become an important way for the boot +loader to communicate with the kernel. Some of its options are also +relevant to the boot loader itself, see "special command line options" +below. + +The kernel command line is a null-terminated string. The maximum +length can be retrieved from the field cmdline_size. Before protocol +version 2.06, the maximum was 255 characters. A string that is too +long will be automatically truncated by the kernel. + +If the boot protocol version is 2.02 or later, the address of the +kernel command line is given by the header field cmd_line_ptr (see +above.) This address can be anywhere between the end of the setup +heap and 0xA0000. + +If the protocol version is *not* 2.02 or higher, the kernel +command line is entered using the following protocol: + + At offset 0x0020 (word), "cmd_line_magic", enter the magic + number 0xA33F. + + At offset 0x0022 (word), "cmd_line_offset", enter the offset + of the kernel command line (relative to the start of the + real-mode kernel). + + The kernel command line *must* be within the memory region + covered by setup_move_size, so you may need to adjust this + field. + + +**** MEMORY LAYOUT OF THE REAL-MODE CODE + +The real-mode code requires a stack/heap to be set up, as well as +memory allocated for the kernel command line. This needs to be done +in the real-mode accessible memory in bottom megabyte. + +It should be noted that modern machines often have a sizable Extended +BIOS Data Area (EBDA). As a result, it is advisable to use as little +of the low megabyte as possible. + +Unfortunately, under the following circumstances the 0x90000 memory +segment has to be used: + + - When loading a zImage kernel ((loadflags & 0x01) == 0). + - When loading a 2.01 or earlier boot protocol kernel. + + -> For the 2.00 and 2.01 boot protocols, the real-mode code + can be loaded at another address, but it is internally + relocated to 0x90000. For the "old" protocol, the + real-mode code must be loaded at 0x90000. + +When loading at 0x90000, avoid using memory above 0x9a000. + +For boot protocol 2.02 or higher, the command line does not have to be +located in the same 64K segment as the real-mode setup code; it is +thus permitted to give the stack/heap the full 64K segment and locate +the command line above it. + +The kernel command line should not be located below the real-mode +code, nor should it be located in high memory. + + +**** SAMPLE BOOT CONFIGURATION + +As a sample configuration, assume the following layout of the real +mode segment: + + When loading below 0x90000, use the entire segment: + + 0x0000-0x7fff Real mode kernel + 0x8000-0xdfff Stack and heap + 0xe000-0xffff Kernel command line + + When loading at 0x90000 OR the protocol version is 2.01 or earlier: + + 0x0000-0x7fff Real mode kernel + 0x8000-0x97ff Stack and heap + 0x9800-0x9fff Kernel command line + +Such a boot loader should enter the following fields in the header: + + unsigned long base_ptr; /* base address for real-mode segment */ + + if ( setup_sects == 0 ) { + setup_sects = 4; + } + + if ( protocol >= 0x0200 ) { + type_of_loader = ; + if ( loading_initrd ) { + ramdisk_image = ; + ramdisk_size = ; + } + + if ( protocol >= 0x0202 && loadflags & 0x01 ) + heap_end = 0xe000; + else + heap_end = 0x9800; + + if ( protocol >= 0x0201 ) { + heap_end_ptr = heap_end - 0x200; + loadflags |= 0x80; /* CAN_USE_HEAP */ + } + + if ( protocol >= 0x0202 ) { + cmd_line_ptr = base_ptr + heap_end; + strcpy(cmd_line_ptr, cmdline); + } else { + cmd_line_magic = 0xA33F; + cmd_line_offset = heap_end; + setup_move_size = heap_end + strlen(cmdline)+1; + strcpy(base_ptr+cmd_line_offset, cmdline); + } + } else { + /* Very old kernel */ + + heap_end = 0x9800; + + cmd_line_magic = 0xA33F; + cmd_line_offset = heap_end; + + /* A very old kernel MUST have its real-mode code + loaded at 0x90000 */ + + if ( base_ptr != 0x90000 ) { + /* Copy the real-mode kernel */ + memcpy(0x90000, base_ptr, (setup_sects+1)*512); + base_ptr = 0x90000; /* Relocated */ + } + + strcpy(0x90000+cmd_line_offset, cmdline); + + /* It is recommended to clear memory up to the 32K mark */ + memset(0x90000 + (setup_sects+1)*512, 0, + (64-(setup_sects+1))*512); + } + + +**** LOADING THE REST OF THE KERNEL + +The 32-bit (non-real-mode) kernel starts at offset (setup_sects+1)*512 +in the kernel file (again, if setup_sects == 0 the real value is 4.) +It should be loaded at address 0x10000 for Image/zImage kernels and +0x100000 for bzImage kernels. + +The kernel is a bzImage kernel if the protocol >= 2.00 and the 0x01 +bit (LOAD_HIGH) in the loadflags field is set: + + is_bzImage = (protocol >= 0x0200) && (loadflags & 0x01); + load_address = is_bzImage ? 0x100000 : 0x10000; + +Note that Image/zImage kernels can be up to 512K in size, and thus use +the entire 0x10000-0x90000 range of memory. This means it is pretty +much a requirement for these kernels to load the real-mode part at +0x90000. bzImage kernels allow much more flexibility. + + +**** SPECIAL COMMAND LINE OPTIONS + +If the command line provided by the boot loader is entered by the +user, the user may expect the following command line options to work. +They should normally not be deleted from the kernel command line even +though not all of them are actually meaningful to the kernel. Boot +loader authors who need additional command line options for the boot +loader itself should get them registered in +Documentation/kernel-parameters.txt to make sure they will not +conflict with actual kernel options now or in the future. + + vga= + here is either an integer (in C notation, either + decimal, octal, or hexadecimal) or one of the strings + "normal" (meaning 0xFFFF), "ext" (meaning 0xFFFE) or "ask" + (meaning 0xFFFD). This value should be entered into the + vid_mode field, as it is used by the kernel before the command + line is parsed. + + mem= + is an integer in C notation optionally followed by + (case insensitive) K, M, G, T, P or E (meaning << 10, << 20, + << 30, << 40, << 50 or << 60). This specifies the end of + memory to the kernel. This affects the possible placement of + an initrd, since an initrd should be placed near end of + memory. Note that this is an option to *both* the kernel and + the bootloader! + + initrd= + An initrd should be loaded. The meaning of is + obviously bootloader-dependent, and some boot loaders + (e.g. LILO) do not have such a command. + +In addition, some boot loaders add the following options to the +user-specified command line: + + BOOT_IMAGE= + The boot image which was loaded. Again, the meaning of + is obviously bootloader-dependent. + + auto + The kernel was booted without explicit user intervention. + +If these options are added by the boot loader, it is highly +recommended that they are located *first*, before the user-specified +or configuration-specified command line. Otherwise, "init=/bin/sh" +gets confused by the "auto" option. + + +**** RUNNING THE KERNEL + +The kernel is started by jumping to the kernel entry point, which is +located at *segment* offset 0x20 from the start of the real mode +kernel. This means that if you loaded your real-mode kernel code at +0x90000, the kernel entry point is 9020:0000. + +At entry, ds = es = ss should point to the start of the real-mode +kernel code (0x9000 if the code is loaded at 0x90000), sp should be +set up properly, normally pointing to the top of the heap, and +interrupts should be disabled. Furthermore, to guard against bugs in +the kernel, it is recommended that the boot loader sets fs = gs = ds = +es = ss. + +In our example from above, we would do: + + /* Note: in the case of the "old" kernel protocol, base_ptr must + be == 0x90000 at this point; see the previous sample code */ + + seg = base_ptr >> 4; + + cli(); /* Enter with interrupts disabled! */ + + /* Set up the real-mode kernel stack */ + _SS = seg; + _SP = heap_end; + + _DS = _ES = _FS = _GS = seg; + jmp_far(seg+0x20, 0); /* Run the kernel */ + +If your boot sector accesses a floppy drive, it is recommended to +switch off the floppy motor before running the kernel, since the +kernel boot leaves interrupts off and thus the motor will not be +switched off, especially if the loaded kernel has the floppy driver as +a demand-loaded module! + + +**** ADVANCED BOOT LOADER HOOKS + +If the boot loader runs in a particularly hostile environment (such as +LOADLIN, which runs under DOS) it may be impossible to follow the +standard memory location requirements. Such a boot loader may use the +following hooks that, if set, are invoked by the kernel at the +appropriate time. The use of these hooks should probably be +considered an absolutely last resort! + +IMPORTANT: All the hooks are required to preserve %esp, %ebp, %esi and +%edi across invocation. + + realmode_swtch: + A 16-bit real mode far subroutine invoked immediately before + entering protected mode. The default routine disables NMI, so + your routine should probably do so, too. + + code32_start: + A 32-bit flat-mode routine *jumped* to immediately after the + transition to protected mode, but before the kernel is + uncompressed. No segments, except CS, are guaranteed to be + set up (current kernels do, but older ones do not); you should + set them up to BOOT_DS (0x18) yourself. + + After completing your hook, you should jump to the address + that was in this field before your boot loader overwrote it + (relocated, if appropriate.) + + +**** 32-bit BOOT PROTOCOL + +For machine with some new BIOS other than legacy BIOS, such as EFI, +LinuxBIOS, etc, and kexec, the 16-bit real mode setup code in kernel +based on legacy BIOS can not be used, so a 32-bit boot protocol needs +to be defined. + +In 32-bit boot protocol, the first step in loading a Linux kernel +should be to setup the boot parameters (struct boot_params, +traditionally known as "zero page"). The memory for struct boot_params +should be allocated and initialized to all zero. Then the setup header +from offset 0x01f1 of kernel image on should be loaded into struct +boot_params and examined. The end of setup header can be calculated as +follow: + + 0x0202 + byte value at offset 0x0201 + +In addition to read/modify/write the setup header of the struct +boot_params as that of 16-bit boot protocol, the boot loader should +also fill the additional fields of the struct boot_params as that +described in zero-page.txt. + +After setupping the struct boot_params, the boot loader can load the +32/64-bit kernel in the same way as that of 16-bit boot protocol. + +In 32-bit boot protocol, the kernel is started by jumping to the +32-bit kernel entry point, which is the start address of loaded +32/64-bit kernel. + +At entry, the CPU must be in 32-bit protected mode with paging +disabled; a GDT must be loaded with the descriptors for selectors +__BOOT_CS(0x10) and __BOOT_DS(0x18); both descriptors must be 4G flat +segment; __BOOS_CS must have execute/read permission, and __BOOT_DS +must have read/write permission; CS must be __BOOT_CS and DS, ES, SS +must be __BOOT_DS; interrupt must be disabled; %esi must hold the base +address of the struct boot_params; %ebp, %edi and %ebx must be zero. diff --git a/Documentation/x86/i386/usb-legacy-support.txt b/Documentation/x86/i386/usb-legacy-support.txt new file mode 100644 index 0000000..1894cdf --- /dev/null +++ b/Documentation/x86/i386/usb-legacy-support.txt @@ -0,0 +1,44 @@ +USB Legacy support +~~~~~~~~~~~~~~~~~~ + +Vojtech Pavlik , January 2004 + + +Also known as "USB Keyboard" or "USB Mouse support" in the BIOS Setup is a +feature that allows one to use the USB mouse and keyboard as if they were +their classic PS/2 counterparts. This means one can use an USB keyboard to +type in LILO for example. + +It has several drawbacks, though: + +1) On some machines, the emulated PS/2 mouse takes over even when no USB + mouse is present and a real PS/2 mouse is present. In that case the extra + features (wheel, extra buttons, touchpad mode) of the real PS/2 mouse may + not be available. + +2) If CONFIG_HIGHMEM64G is enabled, the PS/2 mouse emulation can cause + system crashes, because the SMM BIOS is not expecting to be in PAE mode. + The Intel E7505 is a typical machine where this happens. + +3) If AMD64 64-bit mode is enabled, again system crashes often happen, + because the SMM BIOS isn't expecting the CPU to be in 64-bit mode. The + BIOS manufacturers only test with Windows, and Windows doesn't do 64-bit + yet. + +Solutions: + +Problem 1) can be solved by loading the USB drivers prior to loading the +PS/2 mouse driver. Since the PS/2 mouse driver is in 2.6 compiled into +the kernel unconditionally, this means the USB drivers need to be +compiled-in, too. + +Problem 2) can currently only be solved by either disabling HIGHMEM64G +in the kernel config or USB Legacy support in the BIOS. A BIOS update +could help, but so far no such update exists. + +Problem 3) is usually fixed by a BIOS update. Check the board +manufacturers web site. If an update is not available, disable USB +Legacy support in the BIOS. If this alone doesn't help, try also adding +idle=poll on the kernel command line. The BIOS may be entering the SMM +on the HLT instruction as well. + diff --git a/Documentation/x86/i386/zero-page.txt b/Documentation/x86/i386/zero-page.txt new file mode 100644 index 0000000..169ad42 --- /dev/null +++ b/Documentation/x86/i386/zero-page.txt @@ -0,0 +1,31 @@ +The additional fields in struct boot_params as a part of 32-bit boot +protocol of kernel. These should be filled by bootloader or 16-bit +real-mode setup code of the kernel. References/settings to it mainly +are in: + + include/asm-x86/bootparam.h + + +Offset Proto Name Meaning +/Size + +000/040 ALL screen_info Text mode or frame buffer information + (struct screen_info) +040/014 ALL apm_bios_info APM BIOS information (struct apm_bios_info) +060/010 ALL ist_info Intel SpeedStep (IST) BIOS support information + (struct ist_info) +080/010 ALL hd0_info hd0 disk parameter, OBSOLETE!! +090/010 ALL hd1_info hd1 disk parameter, OBSOLETE!! +0A0/010 ALL sys_desc_table System description table (struct sys_desc_table) +140/080 ALL edid_info Video mode setup (struct edid_info) +1C0/020 ALL efi_info EFI 32 information (struct efi_info) +1E0/004 ALL alk_mem_k Alternative mem check, in KB +1E4/004 ALL scratch Scratch field for the kernel setup code +1E8/001 ALL e820_entries Number of entries in e820_map (below) +1E9/001 ALL eddbuf_entries Number of entries in eddbuf (below) +1EA/001 ALL edd_mbr_sig_buf_entries Number of entries in edd_mbr_sig_buffer + (below) +290/040 ALL edd_mbr_sig_buffer EDD MBR signatures +2D0/A00 ALL e820_map E820 memory map table + (array of struct e820entry) +D00/1EC ALL eddbuf EDD data (array of struct edd_info) diff --git a/Documentation/x86/x86_64/00-INDEX b/Documentation/x86/x86_64/00-INDEX new file mode 100644 index 0000000..92fc20a --- /dev/null +++ b/Documentation/x86/x86_64/00-INDEX @@ -0,0 +1,16 @@ +00-INDEX + - This file +boot-options.txt + - AMD64-specific boot options. +cpu-hotplug-spec + - Firmware support for CPU hotplug under Linux/x86-64 +fake-numa-for-cpusets + - Using numa=fake and CPUSets for Resource Management +kernel-stacks + - Context-specific per-processor interrupt stacks. +machinecheck + - Configurable sysfs parameters for the x86-64 machine check code. +mm.txt + - Memory layout of x86-64 (4 level page tables, 46 bits physical). +uefi.txt + - Booting Linux via Unified Extensible Firmware Interface. diff --git a/Documentation/x86/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt new file mode 100644 index 0000000..b0c7b6c --- /dev/null +++ b/Documentation/x86/x86_64/boot-options.txt @@ -0,0 +1,314 @@ +AMD64 specific boot options + +There are many others (usually documented in driver documentation), but +only the AMD64 specific ones are listed here. + +Machine check + + mce=off disable machine check + mce=bootlog Enable logging of machine checks left over from booting. + Disabled by default on AMD because some BIOS leave bogus ones. + If your BIOS doesn't do that it's a good idea to enable though + to make sure you log even machine check events that result + in a reboot. On Intel systems it is enabled by default. + mce=nobootlog + Disable boot machine check logging. + mce=tolerancelevel (number) + 0: always panic on uncorrected errors, log corrected errors + 1: panic or SIGBUS on uncorrected errors, log corrected errors + 2: SIGBUS or log uncorrected errors, log corrected errors + 3: never panic or SIGBUS, log all errors (for testing only) + Default is 1 + Can be also set using sysfs which is preferable. + + nomce (for compatibility with i386): same as mce=off + + Everything else is in sysfs now. + +APICs + + apic Use IO-APIC. Default + + noapic Don't use the IO-APIC. + + disableapic Don't use the local APIC + + nolapic Don't use the local APIC (alias for i386 compatibility) + + pirq=... See Documentation/i386/IO-APIC.txt + + noapictimer Don't set up the APIC timer + + no_timer_check Don't check the IO-APIC timer. This can work around + problems with incorrect timer initialization on some boards. + + apicmaintimer Run time keeping from the local APIC timer instead + of using the PIT/HPET interrupt for this. This is useful + when the PIT/HPET interrupts are unreliable. + + noapicmaintimer Don't do time keeping using the APIC timer. + Useful when this option was auto selected, but doesn't work. + + apicpmtimer + Do APIC timer calibration using the pmtimer. Implies + apicmaintimer. Useful when your PIT timer is totally + broken. + + disable_8254_timer / enable_8254_timer + Enable interrupt 0 timer routing over the 8254 in addition to over + the IO-APIC. The kernel tries to set a sensible default. + +Early Console + + syntax: earlyprintk=vga + earlyprintk=serial[,ttySn[,baudrate]] + + The early console is useful when the kernel crashes before the + normal console is initialized. It is not enabled by + default because it has some cosmetic problems. + Append ,keep to not disable it when the real console takes over. + Only vga or serial at a time, not both. + Currently only ttyS0 and ttyS1 are supported. + Interaction with the standard serial driver is not very good. + The VGA output is eventually overwritten by the real console. + +Timing + + notsc + Don't use the CPU time stamp counter to read the wall time. + This can be used to work around timing problems on multiprocessor systems + with not properly synchronized CPUs. + + report_lost_ticks + Report when timer interrupts are lost because some code turned off + interrupts for too long. + + nmi_watchdog=NUMBER[,panic] + NUMBER can be: + 0 don't use an NMI watchdog + 1 use the IO-APIC timer for the NMI watchdog + 2 use the local APIC for the NMI watchdog using a performance counter. Note + This will use one performance counter and the local APIC's performance + vector. + When panic is specified panic when an NMI watchdog timeout occurs. + This is useful when you use a panic=... timeout and need the box + quickly up again. + + nohpet + Don't use the HPET timer. + +Idle loop + + idle=poll + Don't do power saving in the idle loop using HLT, but poll for rescheduling + event. This will make the CPUs eat a lot more power, but may be useful + to get slightly better performance in multiprocessor benchmarks. It also + makes some profiling using performance counters more accurate. + Please note that on systems with MONITOR/MWAIT support (like Intel EM64T + CPUs) this option has no performance advantage over the normal idle loop. + It may also interact badly with hyperthreading. + +Rebooting + + reboot=b[ios] | t[riple] | k[bd] | a[cpi] | e[fi] [, [w]arm | [c]old] + bios Use the CPU reboot vector for warm reset + warm Don't set the cold reboot flag + cold Set the cold reboot flag + triple Force a triple fault (init) + kbd Use the keyboard controller. cold reset (default) + acpi Use the ACPI RESET_REG in the FADT. If ACPI is not configured or the + ACPI reset does not work, the reboot path attempts the reset using + the keyboard controller. + efi Use efi reset_system runtime service. If EFI is not configured or the + EFI reset does not work, the reboot path attempts the reset using + the keyboard controller. + + Using warm reset will be much faster especially on big memory + systems because the BIOS will not go through the memory check. + Disadvantage is that not all hardware will be completely reinitialized + on reboot so there may be boot problems on some systems. + + reboot=force + + Don't stop other CPUs on reboot. This can make reboot more reliable + in some cases. + +Non Executable Mappings + + noexec=on|off + + on Enable(default) + off Disable + +SMP + + additional_cpus=NUM Allow NUM more CPUs for hotplug + (defaults are specified by the BIOS, see Documentation/x86_64/cpu-hotplug-spec) + +NUMA + + numa=off Only set up a single NUMA node spanning all memory. + + numa=noacpi Don't parse the SRAT table for NUMA setup + + numa=fake=CMDLINE + If a number, fakes CMDLINE nodes and ignores NUMA setup of the + actual machine. Otherwise, system memory is configured + depending on the sizes and coefficients listed. For example: + numa=fake=2*512,1024,4*256,*128 + gives two 512M nodes, a 1024M node, four 256M nodes, and the + rest split into 128M chunks. If the last character of CMDLINE + is a *, the remaining memory is divided up equally among its + coefficient: + numa=fake=2*512,2* + gives two 512M nodes and the rest split into two nodes. + Otherwise, the remaining system RAM is allocated to an + additional node. + + numa=hotadd=percent + Only allow hotadd memory to preallocate page structures upto + percent of already available memory. + numa=hotadd=0 will disable hotadd memory. + +ACPI + + acpi=off Don't enable ACPI + acpi=ht Use ACPI boot table parsing, but don't enable ACPI + interpreter + acpi=force Force ACPI on (currently not needed) + + acpi=strict Disable out of spec ACPI workarounds. + + acpi_sci={edge,level,high,low} Set up ACPI SCI interrupt. + + acpi=noirq Don't route interrupts + +PCI + + pci=off Don't use PCI + pci=conf1 Use conf1 access. + pci=conf2 Use conf2 access. + pci=rom Assign ROMs. + pci=assign-busses Assign busses + pci=irqmask=MASK Set PCI interrupt mask to MASK + pci=lastbus=NUMBER Scan upto NUMBER busses, no matter what the mptable says. + pci=noacpi Don't use ACPI to set up PCI interrupt routing. + +IOMMU (input/output memory management unit) + + Currently four x86-64 PCI-DMA mapping implementations exist: + + 1. : use no hardware/software IOMMU at all + (e.g. because you have < 3 GB memory). + Kernel boot message: "PCI-DMA: Disabling IOMMU" + + 2. : AMD GART based hardware IOMMU. + Kernel boot message: "PCI-DMA: using GART IOMMU" + + 3. : Software IOMMU implementation. Used + e.g. if there is no hardware IOMMU in the system and it is need because + you have >3GB memory or told the kernel to us it (iommu=soft)) + Kernel boot message: "PCI-DMA: Using software bounce buffering + for IO (SWIOTLB)" + + 4. : IBM Calgary hardware IOMMU. Used in IBM + pSeries and xSeries servers. This hardware IOMMU supports DMA address + mapping with memory protection, etc. + Kernel boot message: "PCI-DMA: Using Calgary IOMMU" + + iommu=[][,noagp][,off][,force][,noforce][,leak[=] + [,memaper[=]][,merge][,forcesac][,fullflush][,nomerge] + [,noaperture][,calgary] + + General iommu options: + off Don't initialize and use any kind of IOMMU. + noforce Don't force hardware IOMMU usage when it is not needed. + (default). + force Force the use of the hardware IOMMU even when it is + not actually needed (e.g. because < 3 GB memory). + soft Use software bounce buffering (SWIOTLB) (default for + Intel machines). This can be used to prevent the usage + of an available hardware IOMMU. + + iommu options only relevant to the AMD GART hardware IOMMU: + Set the size of the remapping area in bytes. + allowed Overwrite iommu off workarounds for specific chipsets. + fullflush Flush IOMMU on each allocation (default). + nofullflush Don't use IOMMU fullflush. + leak Turn on simple iommu leak tracing (only when + CONFIG_IOMMU_LEAK is on). Default number of leak pages + is 20. + memaper[=] Allocate an own aperture over RAM with size 32MB<4GB. + DAC is used with 32-bit PCI to push a 64-bit address in + two cycles. When off all DMA over >4GB is forced through + an IOMMU or software bounce buffering. + nodac Forbid DAC mode, i.e. DMA >4GB. + panic Always panic when IOMMU overflows. + calgary Use the Calgary IOMMU if it is available + + iommu options only relevant to the software bounce buffering (SWIOTLB) IOMMU + implementation: + swiotlb=[,force] + Prereserve that many 128K pages for the software IO + bounce buffering. + force Force all IO through the software TLB. + + Settings for the IBM Calgary hardware IOMMU currently found in IBM + pSeries and xSeries machines: + + calgary=[64k,128k,256k,512k,1M,2M,4M,8M] + calgary=[translate_empty_slots] + calgary=[disable=] + panic Always panic when IOMMU overflows + + 64k,...,8M - Set the size of each PCI slot's translation table + when using the Calgary IOMMU. This is the size of the translation + table itself in main memory. The smallest table, 64k, covers an IO + space of 32MB; the largest, 8MB table, can cover an IO space of + 4GB. Normally the kernel will make the right choice by itself. + + translate_empty_slots - Enable translation even on slots that have + no devices attached to them, in case a device will be hotplugged + in the future. + + disable= - Disable translation on a given PHB. For + example, the built-in graphics adapter resides on the first bridge + (PCI bus number 0); if translation (isolation) is enabled on this + bridge, X servers that access the hardware directly from user + space might stop working. Use this option if you have devices that + are accessed from userspace directly on some PCI host bridge. + +Debugging + + oops=panic Always panic on oopses. Default is to just kill the process, + but there is a small probability of deadlocking the machine. + This will also cause panics on machine check exceptions. + Useful together with panic=30 to trigger a reboot. + + kstack=N Print N words from the kernel stack in oops dumps. + + pagefaulttrace Dump all page faults. Only useful for extreme debugging + and will create a lot of output. + + call_trace=[old|both|newfallback|new] + old: use old inexact backtracer + new: use new exact dwarf2 unwinder + both: print entries from both + newfallback: use new unwinder but fall back to old if it gets + stuck (default) + +Miscellaneous + + nogbpages + Do not use GB pages for kernel direct mappings. + gbpages + Use GB pages for kernel direct mappings. diff --git a/Documentation/x86/x86_64/cpu-hotplug-spec b/Documentation/x86/x86_64/cpu-hotplug-spec new file mode 100644 index 0000000..3c23e05 --- /dev/null +++ b/Documentation/x86/x86_64/cpu-hotplug-spec @@ -0,0 +1,21 @@ +Firmware support for CPU hotplug under Linux/x86-64 +--------------------------------------------------- + +Linux/x86-64 supports CPU hotplug now. For various reasons Linux wants to +know in advance of boot time the maximum number of CPUs that could be plugged +into the system. ACPI 3.0 currently has no official way to supply +this information from the firmware to the operating system. + +In ACPI each CPU needs an LAPIC object in the MADT table (5.2.11.5 in the +ACPI 3.0 specification). ACPI already has the concept of disabled LAPIC +objects by setting the Enabled bit in the LAPIC object to zero. + +For CPU hotplug Linux/x86-64 expects now that any possible future hotpluggable +CPU is already available in the MADT. If the CPU is not available yet +it should have its LAPIC Enabled bit set to 0. Linux will use the number +of disabled LAPICs to compute the maximum number of future CPUs. + +In the worst case the user can overwrite this choice using a command line +option (additional_cpus=...), but it is recommended to supply the correct +number (or a reasonable approximation of it, with erring towards more not less) +in the MADT to avoid manual configuration. diff --git a/Documentation/x86/x86_64/fake-numa-for-cpusets b/Documentation/x86/x86_64/fake-numa-for-cpusets new file mode 100644 index 0000000..d1a985c --- /dev/null +++ b/Documentation/x86/x86_64/fake-numa-for-cpusets @@ -0,0 +1,66 @@ +Using numa=fake and CPUSets for Resource Management +Written by David Rientjes + +This document describes how the numa=fake x86_64 command-line option can be used +in conjunction with cpusets for coarse memory management. Using this feature, +you can create fake NUMA nodes that represent contiguous chunks of memory and +assign them to cpusets and their attached tasks. This is a way of limiting the +amount of system memory that are available to a certain class of tasks. + +For more information on the features of cpusets, see Documentation/cpusets.txt. +There are a number of different configurations you can use for your needs. For +more information on the numa=fake command line option and its various ways of +configuring fake nodes, see Documentation/x86_64/boot-options.txt. + +For the purposes of this introduction, we'll assume a very primitive NUMA +emulation setup of "numa=fake=4*512,". This will split our system memory into +four equal chunks of 512M each that we can now use to assign to cpusets. As +you become more familiar with using this combination for resource control, +you'll determine a better setup to minimize the number of nodes you have to deal +with. + +A machine may be split as follows with "numa=fake=4*512," as reported by dmesg: + + Faking node 0 at 0000000000000000-0000000020000000 (512MB) + Faking node 1 at 0000000020000000-0000000040000000 (512MB) + Faking node 2 at 0000000040000000-0000000060000000 (512MB) + Faking node 3 at 0000000060000000-0000000080000000 (512MB) + ... + On node 0 totalpages: 130975 + On node 1 totalpages: 131072 + On node 2 totalpages: 131072 + On node 3 totalpages: 131072 + +Now following the instructions for mounting the cpusets filesystem from +Documentation/cpusets.txt, you can assign fake nodes (i.e. contiguous memory +address spaces) to individual cpusets: + + [root@xroads /]# mkdir exampleset + [root@xroads /]# mount -t cpuset none exampleset + [root@xroads /]# mkdir exampleset/ddset + [root@xroads /]# cd exampleset/ddset + [root@xroads /exampleset/ddset]# echo 0-1 > cpus + [root@xroads /exampleset/ddset]# echo 0-1 > mems + +Now this cpuset, 'ddset', will only allowed access to fake nodes 0 and 1 for +memory allocations (1G). + +You can now assign tasks to these cpusets to limit the memory resources +available to them according to the fake nodes assigned as mems: + + [root@xroads /exampleset/ddset]# echo $$ > tasks + [root@xroads /exampleset/ddset]# dd if=/dev/zero of=tmp bs=1024 count=1G + [1] 13425 + +Notice the difference between the system memory usage as reported by +/proc/meminfo between the restricted cpuset case above and the unrestricted +case (i.e. running the same 'dd' command without assigning it to a fake NUMA +cpuset): + Unrestricted Restricted + MemTotal: 3091900 kB 3091900 kB + MemFree: 42113 kB 1513236 kB + +This allows for coarse memory management for the tasks you assign to particular +cpusets. Since cpusets can form a hierarchy, you can create some pretty +interesting combinations of use-cases for various classes of tasks for your +memory management needs. diff --git a/Documentation/x86/x86_64/kernel-stacks b/Documentation/x86/x86_64/kernel-stacks new file mode 100644 index 0000000..5ad65d5 --- /dev/null +++ b/Documentation/x86/x86_64/kernel-stacks @@ -0,0 +1,99 @@ +Most of the text from Keith Owens, hacked by AK + +x86_64 page size (PAGE_SIZE) is 4K. + +Like all other architectures, x86_64 has a kernel stack for every +active thread. These thread stacks are THREAD_SIZE (2*PAGE_SIZE) big. +These stacks contain useful data as long as a thread is alive or a +zombie. While the thread is in user space the kernel stack is empty +except for the thread_info structure at the bottom. + +In addition to the per thread stacks, there are specialized stacks +associated with each CPU. These stacks are only used while the kernel +is in control on that CPU; when a CPU returns to user space the +specialized stacks contain no useful data. The main CPU stacks are: + +* Interrupt stack. IRQSTACKSIZE + + Used for external hardware interrupts. If this is the first external + hardware interrupt (i.e. not a nested hardware interrupt) then the + kernel switches from the current task to the interrupt stack. Like + the split thread and interrupt stacks on i386 (with CONFIG_4KSTACKS), + this gives more room for kernel interrupt processing without having + to increase the size of every per thread stack. + + The interrupt stack is also used when processing a softirq. + +Switching to the kernel interrupt stack is done by software based on a +per CPU interrupt nest counter. This is needed because x86-64 "IST" +hardware stacks cannot nest without races. + +x86_64 also has a feature which is not available on i386, the ability +to automatically switch to a new stack for designated events such as +double fault or NMI, which makes it easier to handle these unusual +events on x86_64. This feature is called the Interrupt Stack Table +(IST). There can be up to 7 IST entries per CPU. The IST code is an +index into the Task State Segment (TSS). The IST entries in the TSS +point to dedicated stacks; each stack can be a different size. + +An IST is selected by a non-zero value in the IST field of an +interrupt-gate descriptor. When an interrupt occurs and the hardware +loads such a descriptor, the hardware automatically sets the new stack +pointer based on the IST value, then invokes the interrupt handler. If +software wants to allow nested IST interrupts then the handler must +adjust the IST values on entry to and exit from the interrupt handler. +(This is occasionally done, e.g. for debug exceptions.) + +Events with different IST codes (i.e. with different stacks) can be +nested. For example, a debug interrupt can safely be interrupted by an +NMI. arch/x86_64/kernel/entry.S::paranoidentry adjusts the stack +pointers on entry to and exit from all IST events, in theory allowing +IST events with the same code to be nested. However in most cases, the +stack size allocated to an IST assumes no nesting for the same code. +If that assumption is ever broken then the stacks will become corrupt. + +The currently assigned IST stacks are :- + +* STACKFAULT_STACK. EXCEPTION_STKSZ (PAGE_SIZE). + + Used for interrupt 12 - Stack Fault Exception (#SS). + + This allows the CPU to recover from invalid stack segments. Rarely + happens. + +* DOUBLEFAULT_STACK. EXCEPTION_STKSZ (PAGE_SIZE). + + Used for interrupt 8 - Double Fault Exception (#DF). + + Invoked when handling one exception causes another exception. Happens + when the kernel is very confused (e.g. kernel stack pointer corrupt). + Using a separate stack allows the kernel to recover from it well enough + in many cases to still output an oops. + +* NMI_STACK. EXCEPTION_STKSZ (PAGE_SIZE). + + Used for non-maskable interrupts (NMI). + + NMI can be delivered at any time, including when the kernel is in the + middle of switching stacks. Using IST for NMI events avoids making + assumptions about the previous state of the kernel stack. + +* DEBUG_STACK. DEBUG_STKSZ + + Used for hardware debug interrupts (interrupt 1) and for software + debug interrupts (INT3). + + When debugging a kernel, debug interrupts (both hardware and + software) can occur at any time. Using IST for these interrupts + avoids making assumptions about the previous state of the kernel + stack. + +* MCE_STACK. EXCEPTION_STKSZ (PAGE_SIZE). + + Used for interrupt 18 - Machine Check Exception (#MC). + + MCE can be delivered at any time, including when the kernel is in the + middle of switching stacks. Using IST for MCE events avoids making + assumptions about the previous state of the kernel stack. + +For more details see the Intel IA32 or AMD AMD64 architecture manuals. diff --git a/Documentation/x86/x86_64/machinecheck b/Documentation/x86/x86_64/machinecheck new file mode 100644 index 0000000..a05e58e --- /dev/null +++ b/Documentation/x86/x86_64/machinecheck @@ -0,0 +1,77 @@ + +Configurable sysfs parameters for the x86-64 machine check code. + +Machine checks report internal hardware error conditions detected +by the CPU. Uncorrected errors typically cause a machine check +(often with panic), corrected ones cause a machine check log entry. + +Machine checks are organized in banks (normally associated with +a hardware subsystem) and subevents in a bank. The exact meaning +of the banks and subevent is CPU specific. + +mcelog knows how to decode them. + +When you see the "Machine check errors logged" message in the system +log then mcelog should run to collect and decode machine check entries +from /dev/mcelog. Normally mcelog should be run regularly from a cronjob. + +Each CPU has a directory in /sys/devices/system/machinecheck/machinecheckN +(N = CPU number) + +The directory contains some configurable entries: + +Entries: + +bankNctl +(N bank number) + 64bit Hex bitmask enabling/disabling specific subevents for bank N + When a bit in the bitmask is zero then the respective + subevent will not be reported. + By default all events are enabled. + Note that BIOS maintain another mask to disable specific events + per bank. This is not visible here + +The following entries appear for each CPU, but they are truly shared +between all CPUs. + +check_interval + How often to poll for corrected machine check errors, in seconds + (Note output is hexademical). Default 5 minutes. When the poller + finds MCEs it triggers an exponential speedup (poll more often) on + the polling interval. When the poller stops finding MCEs, it + triggers an exponential backoff (poll less often) on the polling + interval. The check_interval variable is both the initial and + maximum polling interval. + +tolerant + Tolerance level. When a machine check exception occurs for a non + corrected machine check the kernel can take different actions. + Since machine check exceptions can happen any time it is sometimes + risky for the kernel to kill a process because it defies + normal kernel locking rules. The tolerance level configures + how hard the kernel tries to recover even at some risk of + deadlock. Higher tolerant values trade potentially better uptime + with the risk of a crash or even corruption (for tolerant >= 3). + + 0: always panic on uncorrected errors, log corrected errors + 1: panic or SIGBUS on uncorrected errors, log corrected errors + 2: SIGBUS or log uncorrected errors, log corrected errors + 3: never panic or SIGBUS, log all errors (for testing only) + + Default: 1 + + Note this only makes a difference if the CPU allows recovery + from a machine check exception. Current x86 CPUs generally do not. + +trigger + Program to run when a machine check event is detected. + This is an alternative to running mcelog regularly from cron + and allows to detect events faster. + +TBD document entries for AMD threshold interrupt configuration + +For more details about the x86 machine check architecture +see the Intel and AMD architecture manuals from their developer websites. + +For more details about the architecture see +see http://one.firstfloor.org/~andi/mce.pdf diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt new file mode 100644 index 0000000..b89b6d2 --- /dev/null +++ b/Documentation/x86/x86_64/mm.txt @@ -0,0 +1,29 @@ + + + +Virtual memory map with 4 level page tables: + +0000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm +hole caused by [48:63] sign extension +ffff800000000000 - ffff80ffffffffff (=40 bits) guard hole +ffff810000000000 - ffffc0ffffffffff (=46 bits) direct mapping of all phys. memory +ffffc10000000000 - ffffc1ffffffffff (=40 bits) hole +ffffc20000000000 - ffffe1ffffffffff (=45 bits) vmalloc/ioremap space +ffffe20000000000 - ffffe2ffffffffff (=40 bits) virtual memory map (1TB) +... unused hole ... +ffffffff80000000 - ffffffff82800000 (=40 MB) kernel text mapping, from phys 0 +... unused hole ... +ffffffff88000000 - fffffffffff00000 (=1919 MB) module mapping space + +The direct mapping covers all memory in the system up to the highest +memory address (this means in some cases it can also include PCI memory +holes). + +vmalloc space is lazily synchronized into the different PML4 pages of +the processes using the page fault handler, with init_level4_pgt as +reference. + +Current X86-64 implementations only support 40 bits of address space, +but we support up to 46 bits. This expands into MBZ space in the page tables. + +-Andi Kleen, Jul 2004 diff --git a/Documentation/x86/x86_64/uefi.txt b/Documentation/x86/x86_64/uefi.txt new file mode 100644 index 0000000..7d77120 --- /dev/null +++ b/Documentation/x86/x86_64/uefi.txt @@ -0,0 +1,38 @@ +General note on [U]EFI x86_64 support +------------------------------------- + +The nomenclature EFI and UEFI are used interchangeably in this document. + +Although the tools below are _not_ needed for building the kernel, +the needed bootloader support and associated tools for x86_64 platforms +with EFI firmware and specifications are listed below. + +1. UEFI specification: http://www.uefi.org + +2. Booting Linux kernel on UEFI x86_64 platform requires bootloader + support. Elilo with x86_64 support can be used. + +3. x86_64 platform with EFI/UEFI firmware. + +Mechanics: +--------- +- Build the kernel with the following configuration. + CONFIG_FB_EFI=y + CONFIG_FRAMEBUFFER_CONSOLE=y + If EFI runtime services are expected, the following configuration should + be selected. + CONFIG_EFI=y + CONFIG_EFI_VARS=y or m # optional +- Create a VFAT partition on the disk +- Copy the following to the VFAT partition: + elilo bootloader with x86_64 support, elilo configuration file, + kernel image built in first step and corresponding + initrd. Instructions on building elilo and its dependencies + can be found in the elilo sourceforge project. +- Boot to EFI shell and invoke elilo choosing the kernel image built + in first step. +- If some or all EFI runtime services don't work, you can try following + kernel command line parameters to turn off some or all EFI runtime + services. + noefi turn off all EFI runtime services + reboot_type=k turn off EFI reboot runtime service diff --git a/Documentation/x86_64/00-INDEX b/Documentation/x86_64/00-INDEX deleted file mode 100644 index 92fc20a..0000000 --- a/Documentation/x86_64/00-INDEX +++ /dev/null @@ -1,16 +0,0 @@ -00-INDEX - - This file -boot-options.txt - - AMD64-specific boot options. -cpu-hotplug-spec - - Firmware support for CPU hotplug under Linux/x86-64 -fake-numa-for-cpusets - - Using numa=fake and CPUSets for Resource Management -kernel-stacks - - Context-specific per-processor interrupt stacks. -machinecheck - - Configurable sysfs parameters for the x86-64 machine check code. -mm.txt - - Memory layout of x86-64 (4 level page tables, 46 bits physical). -uefi.txt - - Booting Linux via Unified Extensible Firmware Interface. diff --git a/Documentation/x86_64/boot-options.txt b/Documentation/x86_64/boot-options.txt deleted file mode 100644 index b0c7b6c..0000000 --- a/Documentation/x86_64/boot-options.txt +++ /dev/null @@ -1,314 +0,0 @@ -AMD64 specific boot options - -There are many others (usually documented in driver documentation), but -only the AMD64 specific ones are listed here. - -Machine check - - mce=off disable machine check - mce=bootlog Enable logging of machine checks left over from booting. - Disabled by default on AMD because some BIOS leave bogus ones. - If your BIOS doesn't do that it's a good idea to enable though - to make sure you log even machine check events that result - in a reboot. On Intel systems it is enabled by default. - mce=nobootlog - Disable boot machine check logging. - mce=tolerancelevel (number) - 0: always panic on uncorrected errors, log corrected errors - 1: panic or SIGBUS on uncorrected errors, log corrected errors - 2: SIGBUS or log uncorrected errors, log corrected errors - 3: never panic or SIGBUS, log all errors (for testing only) - Default is 1 - Can be also set using sysfs which is preferable. - - nomce (for compatibility with i386): same as mce=off - - Everything else is in sysfs now. - -APICs - - apic Use IO-APIC. Default - - noapic Don't use the IO-APIC. - - disableapic Don't use the local APIC - - nolapic Don't use the local APIC (alias for i386 compatibility) - - pirq=... See Documentation/i386/IO-APIC.txt - - noapictimer Don't set up the APIC timer - - no_timer_check Don't check the IO-APIC timer. This can work around - problems with incorrect timer initialization on some boards. - - apicmaintimer Run time keeping from the local APIC timer instead - of using the PIT/HPET interrupt for this. This is useful - when the PIT/HPET interrupts are unreliable. - - noapicmaintimer Don't do time keeping using the APIC timer. - Useful when this option was auto selected, but doesn't work. - - apicpmtimer - Do APIC timer calibration using the pmtimer. Implies - apicmaintimer. Useful when your PIT timer is totally - broken. - - disable_8254_timer / enable_8254_timer - Enable interrupt 0 timer routing over the 8254 in addition to over - the IO-APIC. The kernel tries to set a sensible default. - -Early Console - - syntax: earlyprintk=vga - earlyprintk=serial[,ttySn[,baudrate]] - - The early console is useful when the kernel crashes before the - normal console is initialized. It is not enabled by - default because it has some cosmetic problems. - Append ,keep to not disable it when the real console takes over. - Only vga or serial at a time, not both. - Currently only ttyS0 and ttyS1 are supported. - Interaction with the standard serial driver is not very good. - The VGA output is eventually overwritten by the real console. - -Timing - - notsc - Don't use the CPU time stamp counter to read the wall time. - This can be used to work around timing problems on multiprocessor systems - with not properly synchronized CPUs. - - report_lost_ticks - Report when timer interrupts are lost because some code turned off - interrupts for too long. - - nmi_watchdog=NUMBER[,panic] - NUMBER can be: - 0 don't use an NMI watchdog - 1 use the IO-APIC timer for the NMI watchdog - 2 use the local APIC for the NMI watchdog using a performance counter. Note - This will use one performance counter and the local APIC's performance - vector. - When panic is specified panic when an NMI watchdog timeout occurs. - This is useful when you use a panic=... timeout and need the box - quickly up again. - - nohpet - Don't use the HPET timer. - -Idle loop - - idle=poll - Don't do power saving in the idle loop using HLT, but poll for rescheduling - event. This will make the CPUs eat a lot more power, but may be useful - to get slightly better performance in multiprocessor benchmarks. It also - makes some profiling using performance counters more accurate. - Please note that on systems with MONITOR/MWAIT support (like Intel EM64T - CPUs) this option has no performance advantage over the normal idle loop. - It may also interact badly with hyperthreading. - -Rebooting - - reboot=b[ios] | t[riple] | k[bd] | a[cpi] | e[fi] [, [w]arm | [c]old] - bios Use the CPU reboot vector for warm reset - warm Don't set the cold reboot flag - cold Set the cold reboot flag - triple Force a triple fault (init) - kbd Use the keyboard controller. cold reset (default) - acpi Use the ACPI RESET_REG in the FADT. If ACPI is not configured or the - ACPI reset does not work, the reboot path attempts the reset using - the keyboard controller. - efi Use efi reset_system runtime service. If EFI is not configured or the - EFI reset does not work, the reboot path attempts the reset using - the keyboard controller. - - Using warm reset will be much faster especially on big memory - systems because the BIOS will not go through the memory check. - Disadvantage is that not all hardware will be completely reinitialized - on reboot so there may be boot problems on some systems. - - reboot=force - - Don't stop other CPUs on reboot. This can make reboot more reliable - in some cases. - -Non Executable Mappings - - noexec=on|off - - on Enable(default) - off Disable - -SMP - - additional_cpus=NUM Allow NUM more CPUs for hotplug - (defaults are specified by the BIOS, see Documentation/x86_64/cpu-hotplug-spec) - -NUMA - - numa=off Only set up a single NUMA node spanning all memory. - - numa=noacpi Don't parse the SRAT table for NUMA setup - - numa=fake=CMDLINE - If a number, fakes CMDLINE nodes and ignores NUMA setup of the - actual machine. Otherwise, system memory is configured - depending on the sizes and coefficients listed. For example: - numa=fake=2*512,1024,4*256,*128 - gives two 512M nodes, a 1024M node, four 256M nodes, and the - rest split into 128M chunks. If the last character of CMDLINE - is a *, the remaining memory is divided up equally among its - coefficient: - numa=fake=2*512,2* - gives two 512M nodes and the rest split into two nodes. - Otherwise, the remaining system RAM is allocated to an - additional node. - - numa=hotadd=percent - Only allow hotadd memory to preallocate page structures upto - percent of already available memory. - numa=hotadd=0 will disable hotadd memory. - -ACPI - - acpi=off Don't enable ACPI - acpi=ht Use ACPI boot table parsing, but don't enable ACPI - interpreter - acpi=force Force ACPI on (currently not needed) - - acpi=strict Disable out of spec ACPI workarounds. - - acpi_sci={edge,level,high,low} Set up ACPI SCI interrupt. - - acpi=noirq Don't route interrupts - -PCI - - pci=off Don't use PCI - pci=conf1 Use conf1 access. - pci=conf2 Use conf2 access. - pci=rom Assign ROMs. - pci=assign-busses Assign busses - pci=irqmask=MASK Set PCI interrupt mask to MASK - pci=lastbus=NUMBER Scan upto NUMBER busses, no matter what the mptable says. - pci=noacpi Don't use ACPI to set up PCI interrupt routing. - -IOMMU (input/output memory management unit) - - Currently four x86-64 PCI-DMA mapping implementations exist: - - 1. : use no hardware/software IOMMU at all - (e.g. because you have < 3 GB memory). - Kernel boot message: "PCI-DMA: Disabling IOMMU" - - 2. : AMD GART based hardware IOMMU. - Kernel boot message: "PCI-DMA: using GART IOMMU" - - 3. : Software IOMMU implementation. Used - e.g. if there is no hardware IOMMU in the system and it is need because - you have >3GB memory or told the kernel to us it (iommu=soft)) - Kernel boot message: "PCI-DMA: Using software bounce buffering - for IO (SWIOTLB)" - - 4. : IBM Calgary hardware IOMMU. Used in IBM - pSeries and xSeries servers. This hardware IOMMU supports DMA address - mapping with memory protection, etc. - Kernel boot message: "PCI-DMA: Using Calgary IOMMU" - - iommu=[][,noagp][,off][,force][,noforce][,leak[=] - [,memaper[=]][,merge][,forcesac][,fullflush][,nomerge] - [,noaperture][,calgary] - - General iommu options: - off Don't initialize and use any kind of IOMMU. - noforce Don't force hardware IOMMU usage when it is not needed. - (default). - force Force the use of the hardware IOMMU even when it is - not actually needed (e.g. because < 3 GB memory). - soft Use software bounce buffering (SWIOTLB) (default for - Intel machines). This can be used to prevent the usage - of an available hardware IOMMU. - - iommu options only relevant to the AMD GART hardware IOMMU: - Set the size of the remapping area in bytes. - allowed Overwrite iommu off workarounds for specific chipsets. - fullflush Flush IOMMU on each allocation (default). - nofullflush Don't use IOMMU fullflush. - leak Turn on simple iommu leak tracing (only when - CONFIG_IOMMU_LEAK is on). Default number of leak pages - is 20. - memaper[=] Allocate an own aperture over RAM with size 32MB<4GB. - DAC is used with 32-bit PCI to push a 64-bit address in - two cycles. When off all DMA over >4GB is forced through - an IOMMU or software bounce buffering. - nodac Forbid DAC mode, i.e. DMA >4GB. - panic Always panic when IOMMU overflows. - calgary Use the Calgary IOMMU if it is available - - iommu options only relevant to the software bounce buffering (SWIOTLB) IOMMU - implementation: - swiotlb=[,force] - Prereserve that many 128K pages for the software IO - bounce buffering. - force Force all IO through the software TLB. - - Settings for the IBM Calgary hardware IOMMU currently found in IBM - pSeries and xSeries machines: - - calgary=[64k,128k,256k,512k,1M,2M,4M,8M] - calgary=[translate_empty_slots] - calgary=[disable=] - panic Always panic when IOMMU overflows - - 64k,...,8M - Set the size of each PCI slot's translation table - when using the Calgary IOMMU. This is the size of the translation - table itself in main memory. The smallest table, 64k, covers an IO - space of 32MB; the largest, 8MB table, can cover an IO space of - 4GB. Normally the kernel will make the right choice by itself. - - translate_empty_slots - Enable translation even on slots that have - no devices attached to them, in case a device will be hotplugged - in the future. - - disable= - Disable translation on a given PHB. For - example, the built-in graphics adapter resides on the first bridge - (PCI bus number 0); if translation (isolation) is enabled on this - bridge, X servers that access the hardware directly from user - space might stop working. Use this option if you have devices that - are accessed from userspace directly on some PCI host bridge. - -Debugging - - oops=panic Always panic on oopses. Default is to just kill the process, - but there is a small probability of deadlocking the machine. - This will also cause panics on machine check exceptions. - Useful together with panic=30 to trigger a reboot. - - kstack=N Print N words from the kernel stack in oops dumps. - - pagefaulttrace Dump all page faults. Only useful for extreme debugging - and will create a lot of output. - - call_trace=[old|both|newfallback|new] - old: use old inexact backtracer - new: use new exact dwarf2 unwinder - both: print entries from both - newfallback: use new unwinder but fall back to old if it gets - stuck (default) - -Miscellaneous - - nogbpages - Do not use GB pages for kernel direct mappings. - gbpages - Use GB pages for kernel direct mappings. diff --git a/Documentation/x86_64/cpu-hotplug-spec b/Documentation/x86_64/cpu-hotplug-spec deleted file mode 100644 index 3c23e05..0000000 --- a/Documentation/x86_64/cpu-hotplug-spec +++ /dev/null @@ -1,21 +0,0 @@ -Firmware support for CPU hotplug under Linux/x86-64 ---------------------------------------------------- - -Linux/x86-64 supports CPU hotplug now. For various reasons Linux wants to -know in advance of boot time the maximum number of CPUs that could be plugged -into the system. ACPI 3.0 currently has no official way to supply -this information from the firmware to the operating system. - -In ACPI each CPU needs an LAPIC object in the MADT table (5.2.11.5 in the -ACPI 3.0 specification). ACPI already has the concept of disabled LAPIC -objects by setting the Enabled bit in the LAPIC object to zero. - -For CPU hotplug Linux/x86-64 expects now that any possible future hotpluggable -CPU is already available in the MADT. If the CPU is not available yet -it should have its LAPIC Enabled bit set to 0. Linux will use the number -of disabled LAPICs to compute the maximum number of future CPUs. - -In the worst case the user can overwrite this choice using a command line -option (additional_cpus=...), but it is recommended to supply the correct -number (or a reasonable approximation of it, with erring towards more not less) -in the MADT to avoid manual configuration. diff --git a/Documentation/x86_64/fake-numa-for-cpusets b/Documentation/x86_64/fake-numa-for-cpusets deleted file mode 100644 index d1a985c..0000000 --- a/Documentation/x86_64/fake-numa-for-cpusets +++ /dev/null @@ -1,66 +0,0 @@ -Using numa=fake and CPUSets for Resource Management -Written by David Rientjes - -This document describes how the numa=fake x86_64 command-line option can be used -in conjunction with cpusets for coarse memory management. Using this feature, -you can create fake NUMA nodes that represent contiguous chunks of memory and -assign them to cpusets and their attached tasks. This is a way of limiting the -amount of system memory that are available to a certain class of tasks. - -For more information on the features of cpusets, see Documentation/cpusets.txt. -There are a number of different configurations you can use for your needs. For -more information on the numa=fake command line option and its various ways of -configuring fake nodes, see Documentation/x86_64/boot-options.txt. - -For the purposes of this introduction, we'll assume a very primitive NUMA -emulation setup of "numa=fake=4*512,". This will split our system memory into -four equal chunks of 512M each that we can now use to assign to cpusets. As -you become more familiar with using this combination for resource control, -you'll determine a better setup to minimize the number of nodes you have to deal -with. - -A machine may be split as follows with "numa=fake=4*512," as reported by dmesg: - - Faking node 0 at 0000000000000000-0000000020000000 (512MB) - Faking node 1 at 0000000020000000-0000000040000000 (512MB) - Faking node 2 at 0000000040000000-0000000060000000 (512MB) - Faking node 3 at 0000000060000000-0000000080000000 (512MB) - ... - On node 0 totalpages: 130975 - On node 1 totalpages: 131072 - On node 2 totalpages: 131072 - On node 3 totalpages: 131072 - -Now following the instructions for mounting the cpusets filesystem from -Documentation/cpusets.txt, you can assign fake nodes (i.e. contiguous memory -address spaces) to individual cpusets: - - [root@xroads /]# mkdir exampleset - [root@xroads /]# mount -t cpuset none exampleset - [root@xroads /]# mkdir exampleset/ddset - [root@xroads /]# cd exampleset/ddset - [root@xroads /exampleset/ddset]# echo 0-1 > cpus - [root@xroads /exampleset/ddset]# echo 0-1 > mems - -Now this cpuset, 'ddset', will only allowed access to fake nodes 0 and 1 for -memory allocations (1G). - -You can now assign tasks to these cpusets to limit the memory resources -available to them according to the fake nodes assigned as mems: - - [root@xroads /exampleset/ddset]# echo $$ > tasks - [root@xroads /exampleset/ddset]# dd if=/dev/zero of=tmp bs=1024 count=1G - [1] 13425 - -Notice the difference between the system memory usage as reported by -/proc/meminfo between the restricted cpuset case above and the unrestricted -case (i.e. running the same 'dd' command without assigning it to a fake NUMA -cpuset): - Unrestricted Restricted - MemTotal: 3091900 kB 3091900 kB - MemFree: 42113 kB 1513236 kB - -This allows for coarse memory management for the tasks you assign to particular -cpusets. Since cpusets can form a hierarchy, you can create some pretty -interesting combinations of use-cases for various classes of tasks for your -memory management needs. diff --git a/Documentation/x86_64/kernel-stacks b/Documentation/x86_64/kernel-stacks deleted file mode 100644 index 5ad65d5..0000000 --- a/Documentation/x86_64/kernel-stacks +++ /dev/null @@ -1,99 +0,0 @@ -Most of the text from Keith Owens, hacked by AK - -x86_64 page size (PAGE_SIZE) is 4K. - -Like all other architectures, x86_64 has a kernel stack for every -active thread. These thread stacks are THREAD_SIZE (2*PAGE_SIZE) big. -These stacks contain useful data as long as a thread is alive or a -zombie. While the thread is in user space the kernel stack is empty -except for the thread_info structure at the bottom. - -In addition to the per thread stacks, there are specialized stacks -associated with each CPU. These stacks are only used while the kernel -is in control on that CPU; when a CPU returns to user space the -specialized stacks contain no useful data. The main CPU stacks are: - -* Interrupt stack. IRQSTACKSIZE - - Used for external hardware interrupts. If this is the first external - hardware interrupt (i.e. not a nested hardware interrupt) then the - kernel switches from the current task to the interrupt stack. Like - the split thread and interrupt stacks on i386 (with CONFIG_4KSTACKS), - this gives more room for kernel interrupt processing without having - to increase the size of every per thread stack. - - The interrupt stack is also used when processing a softirq. - -Switching to the kernel interrupt stack is done by software based on a -per CPU interrupt nest counter. This is needed because x86-64 "IST" -hardware stacks cannot nest without races. - -x86_64 also has a feature which is not available on i386, the ability -to automatically switch to a new stack for designated events such as -double fault or NMI, which makes it easier to handle these unusual -events on x86_64. This feature is called the Interrupt Stack Table -(IST). There can be up to 7 IST entries per CPU. The IST code is an -index into the Task State Segment (TSS). The IST entries in the TSS -point to dedicated stacks; each stack can be a different size. - -An IST is selected by a non-zero value in the IST field of an -interrupt-gate descriptor. When an interrupt occurs and the hardware -loads such a descriptor, the hardware automatically sets the new stack -pointer based on the IST value, then invokes the interrupt handler. If -software wants to allow nested IST interrupts then the handler must -adjust the IST values on entry to and exit from the interrupt handler. -(This is occasionally done, e.g. for debug exceptions.) - -Events with different IST codes (i.e. with different stacks) can be -nested. For example, a debug interrupt can safely be interrupted by an -NMI. arch/x86_64/kernel/entry.S::paranoidentry adjusts the stack -pointers on entry to and exit from all IST events, in theory allowing -IST events with the same code to be nested. However in most cases, the -stack size allocated to an IST assumes no nesting for the same code. -If that assumption is ever broken then the stacks will become corrupt. - -The currently assigned IST stacks are :- - -* STACKFAULT_STACK. EXCEPTION_STKSZ (PAGE_SIZE). - - Used for interrupt 12 - Stack Fault Exception (#SS). - - This allows the CPU to recover from invalid stack segments. Rarely - happens. - -* DOUBLEFAULT_STACK. EXCEPTION_STKSZ (PAGE_SIZE). - - Used for interrupt 8 - Double Fault Exception (#DF). - - Invoked when handling one exception causes another exception. Happens - when the kernel is very confused (e.g. kernel stack pointer corrupt). - Using a separate stack allows the kernel to recover from it well enough - in many cases to still output an oops. - -* NMI_STACK. EXCEPTION_STKSZ (PAGE_SIZE). - - Used for non-maskable interrupts (NMI). - - NMI can be delivered at any time, including when the kernel is in the - middle of switching stacks. Using IST for NMI events avoids making - assumptions about the previous state of the kernel stack. - -* DEBUG_STACK. DEBUG_STKSZ - - Used for hardware debug interrupts (interrupt 1) and for software - debug interrupts (INT3). - - When debugging a kernel, debug interrupts (both hardware and - software) can occur at any time. Using IST for these interrupts - avoids making assumptions about the previous state of the kernel - stack. - -* MCE_STACK. EXCEPTION_STKSZ (PAGE_SIZE). - - Used for interrupt 18 - Machine Check Exception (#MC). - - MCE can be delivered at any time, including when the kernel is in the - middle of switching stacks. Using IST for MCE events avoids making - assumptions about the previous state of the kernel stack. - -For more details see the Intel IA32 or AMD AMD64 architecture manuals. diff --git a/Documentation/x86_64/machinecheck b/Documentation/x86_64/machinecheck deleted file mode 100644 index a05e58e..0000000 --- a/Documentation/x86_64/machinecheck +++ /dev/null @@ -1,77 +0,0 @@ - -Configurable sysfs parameters for the x86-64 machine check code. - -Machine checks report internal hardware error conditions detected -by the CPU. Uncorrected errors typically cause a machine check -(often with panic), corrected ones cause a machine check log entry. - -Machine checks are organized in banks (normally associated with -a hardware subsystem) and subevents in a bank. The exact meaning -of the banks and subevent is CPU specific. - -mcelog knows how to decode them. - -When you see the "Machine check errors logged" message in the system -log then mcelog should run to collect and decode machine check entries -from /dev/mcelog. Normally mcelog should be run regularly from a cronjob. - -Each CPU has a directory in /sys/devices/system/machinecheck/machinecheckN -(N = CPU number) - -The directory contains some configurable entries: - -Entries: - -bankNctl -(N bank number) - 64bit Hex bitmask enabling/disabling specific subevents for bank N - When a bit in the bitmask is zero then the respective - subevent will not be reported. - By default all events are enabled. - Note that BIOS maintain another mask to disable specific events - per bank. This is not visible here - -The following entries appear for each CPU, but they are truly shared -between all CPUs. - -check_interval - How often to poll for corrected machine check errors, in seconds - (Note output is hexademical). Default 5 minutes. When the poller - finds MCEs it triggers an exponential speedup (poll more often) on - the polling interval. When the poller stops finding MCEs, it - triggers an exponential backoff (poll less often) on the polling - interval. The check_interval variable is both the initial and - maximum polling interval. - -tolerant - Tolerance level. When a machine check exception occurs for a non - corrected machine check the kernel can take different actions. - Since machine check exceptions can happen any time it is sometimes - risky for the kernel to kill a process because it defies - normal kernel locking rules. The tolerance level configures - how hard the kernel tries to recover even at some risk of - deadlock. Higher tolerant values trade potentially better uptime - with the risk of a crash or even corruption (for tolerant >= 3). - - 0: always panic on uncorrected errors, log corrected errors - 1: panic or SIGBUS on uncorrected errors, log corrected errors - 2: SIGBUS or log uncorrected errors, log corrected errors - 3: never panic or SIGBUS, log all errors (for testing only) - - Default: 1 - - Note this only makes a difference if the CPU allows recovery - from a machine check exception. Current x86 CPUs generally do not. - -trigger - Program to run when a machine check event is detected. - This is an alternative to running mcelog regularly from cron - and allows to detect events faster. - -TBD document entries for AMD threshold interrupt configuration - -For more details about the x86 machine check architecture -see the Intel and AMD architecture manuals from their developer websites. - -For more details about the architecture see -see http://one.firstfloor.org/~andi/mce.pdf diff --git a/Documentation/x86_64/mm.txt b/Documentation/x86_64/mm.txt deleted file mode 100644 index b89b6d2..0000000 --- a/Documentation/x86_64/mm.txt +++ /dev/null @@ -1,29 +0,0 @@ - - - -Virtual memory map with 4 level page tables: - -0000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm -hole caused by [48:63] sign extension -ffff800000000000 - ffff80ffffffffff (=40 bits) guard hole -ffff810000000000 - ffffc0ffffffffff (=46 bits) direct mapping of all phys. memory -ffffc10000000000 - ffffc1ffffffffff (=40 bits) hole -ffffc20000000000 - ffffe1ffffffffff (=45 bits) vmalloc/ioremap space -ffffe20000000000 - ffffe2ffffffffff (=40 bits) virtual memory map (1TB) -... unused hole ... -ffffffff80000000 - ffffffff82800000 (=40 MB) kernel text mapping, from phys 0 -... unused hole ... -ffffffff88000000 - fffffffffff00000 (=1919 MB) module mapping space - -The direct mapping covers all memory in the system up to the highest -memory address (this means in some cases it can also include PCI memory -holes). - -vmalloc space is lazily synchronized into the different PML4 pages of -the processes using the page fault handler, with init_level4_pgt as -reference. - -Current X86-64 implementations only support 40 bits of address space, -but we support up to 46 bits. This expands into MBZ space in the page tables. - --Andi Kleen, Jul 2004 diff --git a/Documentation/x86_64/uefi.txt b/Documentation/x86_64/uefi.txt deleted file mode 100644 index 7d77120..0000000 --- a/Documentation/x86_64/uefi.txt +++ /dev/null @@ -1,38 +0,0 @@ -General note on [U]EFI x86_64 support -------------------------------------- - -The nomenclature EFI and UEFI are used interchangeably in this document. - -Although the tools below are _not_ needed for building the kernel, -the needed bootloader support and associated tools for x86_64 platforms -with EFI firmware and specifications are listed below. - -1. UEFI specification: http://www.uefi.org - -2. Booting Linux kernel on UEFI x86_64 platform requires bootloader - support. Elilo with x86_64 support can be used. - -3. x86_64 platform with EFI/UEFI firmware. - -Mechanics: ---------- -- Build the kernel with the following configuration. - CONFIG_FB_EFI=y - CONFIG_FRAMEBUFFER_CONSOLE=y - If EFI runtime services are expected, the following configuration should - be selected. - CONFIG_EFI=y - CONFIG_EFI_VARS=y or m # optional -- Create a VFAT partition on the disk -- Copy the following to the VFAT partition: - elilo bootloader with x86_64 support, elilo configuration file, - kernel image built in first step and corresponding - initrd. Instructions on building elilo and its dependencies - can be found in the elilo sourceforge project. -- Boot to EFI shell and invoke elilo choosing the kernel image built - in first step. -- If some or all EFI runtime services don't work, you can try following - kernel command line parameters to turn off some or all EFI runtime - services. - noefi turn off all EFI runtime services - reboot_type=k turn off EFI reboot runtime service -- cgit v0.10.2 From f0d43100f13be0fa5bf52741d7084bb27f00e621 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 29 May 2008 12:56:36 -0700 Subject: x86: extend e820 early_res support 32bit -fix #3 introduce init_pg_table_start, so xen PV could specify the value. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index c216d3c..0b6cb9f 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c @@ -76,7 +76,8 @@ void __init i386_start_kernel(void) reserve_early(ramdisk_image, ramdisk_end, "RAMDISK"); } #endif - reserve_early(__pa_symbol(&_end), init_pg_tables_end, "INIT_PG_TABLE"); + reserve_early(init_pg_tables_start, init_pg_tables_end, + "INIT_PG_TABLE"); reserve_ebda_region(); diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index b2cc737..bef4618 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -194,6 +194,7 @@ default_entry: xorl %ebx,%ebx /* %ebx is kept at zero */ movl $pa(pg0), %edi + movl %edi, pa(init_pg_tables_start) movl $pa(swapper_pg_pmd), %edx movl $PTE_ATTR, %eax 10: @@ -228,6 +229,7 @@ default_entry: page_pde_offset = (__PAGE_OFFSET >> 20); movl $pa(pg0), %edi + movl %edi, pa(init_pg_tables_start) movl $pa(swapper_pg_dir), %edx movl $PTE_ATTR, %eax 10: diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c index 08b47a2..de9c5ee 100644 --- a/arch/x86/kernel/setup_32.c +++ b/arch/x86/kernel/setup_32.c @@ -71,6 +71,7 @@ /* This value is set up by the early boot code to point to the value immediately after the boot time page tables. It contains a *physical* address, and must not be in the .bss segment! */ +unsigned long init_pg_tables_start __initdata = ~0UL; unsigned long init_pg_tables_end __initdata = ~0UL; /* @@ -485,6 +486,10 @@ static void __init reserve_initrd(void) return; } + printk(KERN_INFO "old RAMDISK: %08llx - %08llx\n", ramdisk_image, + ramdisk_end); + + if (ramdisk_end <= end_of_lowmem) { /* All in lowmem, easy case */ /* @@ -511,6 +516,8 @@ static void __init reserve_initrd(void) "NEW RAMDISK"); initrd_start = ramdisk_here + PAGE_OFFSET; initrd_end = initrd_start + ramdisk_size; + printk(KERN_INFO "Allocated new RAMDISK: %08llx - %08llx\n", + ramdisk_here, ramdisk_here + ramdisk_size); do_relocate_initrd = true; } diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index af65b2d..bf7c34a 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -1011,6 +1011,7 @@ __init void lguest_init(void) * clobbered. The Launcher places our initial pagetables somewhere at * the top of our physical memory, so we don't need extra space: set * init_pg_tables_end to the end of the kernel. */ + init_pg_tables_start = __pa(pg0); init_pg_tables_end = __pa(pg0); /* Load the %fs segment register (the per-cpu segment register) with @@ -1064,9 +1065,9 @@ __init void lguest_init(void) pm_power_off = lguest_power_off; machine_ops.restart = lguest_restart; - /* Now we're set up, call start_kernel() in init/main.c and we proceed + /* Now we're set up, call i386_start_kernel() in head32.c and we proceed * to boot as normal. It never returns. */ - start_kernel(); + i386_start_kernel(); } /* * This marks the end of stage II of our journey, The Guest. diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index c8a56e4..ccc7b84 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1211,6 +1211,7 @@ asmlinkage void __init xen_start_kernel(void) pgd = (pgd_t *)xen_start_info->pt_base; + init_pg_tables_start = __pa(pgd); init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE; init_mm.pgd = pgd; /* use the Xen pagetables to start */ @@ -1246,5 +1247,5 @@ asmlinkage void __init xen_start_kernel(void) add_preferred_console("hvc", 0, NULL); /* Start the world */ - start_kernel(); + i386_start_kernel(); } diff --git a/include/asm-x86/setup.h b/include/asm-x86/setup.h index ffa0f54..64f5f0c1 100644 --- a/include/asm-x86/setup.h +++ b/include/asm-x86/setup.h @@ -59,9 +59,11 @@ int __init copy_e820_map(struct e820entry *biosmap, int nr_map); void __init add_memory_region(unsigned long long start, unsigned long long size, int type); -extern unsigned long init_pg_tables_end; +void __init i386_start_kernel(void); +extern unsigned long init_pg_tables_start; +extern unsigned long init_pg_tables_end; #endif /* __i386__ */ #endif /* _SETUP */ -- cgit v0.10.2 From 163872950dc856fd23849c27f60049feaac49ae6 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 29 May 2008 12:57:22 -0700 Subject: x86: extend e820 early_res support 32bit -fix #4 reserve_early pgdata for 32bit numa Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c index 914ccf9..4774972 100644 --- a/arch/x86/mm/discontig_32.c +++ b/arch/x86/mm/discontig_32.c @@ -159,8 +159,13 @@ static void __init allocate_pgdat(int nid) if (nid && node_has_online_mem(nid)) NODE_DATA(nid) = (pg_data_t *)node_remap_start_vaddr[nid]; else { - NODE_DATA(nid) = (pg_data_t *)(pfn_to_kaddr(min_low_pfn)); - min_low_pfn += PFN_UP(sizeof(pg_data_t)); + unsigned long pgdat_phys; + pgdat_phys = find_e820_area(min_low_pfn<>PAGE_SHIFT)); + reserve_early(pgdat_phys, pgdat_phys + sizeof(pg_data_t), + "NODE_DATA"); } } -- cgit v0.10.2 From a5481280b29b6a3db912ec100498bd31eaa6d2db Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 29 May 2008 12:58:37 -0700 Subject: x86: extend e820 early_res support 32bit -fix #5 reserve early numa kva, so it will not clash with new RAMDISK Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c index de9c5ee..6f40cb5 100644 --- a/arch/x86/kernel/setup_32.c +++ b/arch/x86/kernel/setup_32.c @@ -614,7 +614,6 @@ void __init setup_bootmem_allocator(void) */ find_smp_config(); #endif - numa_kva_reserve(); reserve_crashkernel(); reserve_ibft_region(); diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c index 4774972..55fdbab 100644 --- a/arch/x86/mm/discontig_32.c +++ b/arch/x86/mm/discontig_32.c @@ -357,6 +357,11 @@ unsigned long __init setup_memory(void) printk("kva_start_pfn ~ %ld find_max_low_pfn() ~ %ld\n", kva_start_pfn, max_low_pfn); printk("max_pfn = %ld\n", max_pfn); + + /* avoid clash with initrd */ + reserve_early(kva_start_pfn< system_max_low_pfn) @@ -392,13 +397,6 @@ unsigned long __init setup_memory(void) return max_low_pfn; } -void __init numa_kva_reserve(void) -{ - if (kva_pages) - reserve_bootmem(PFN_PHYS(kva_start_pfn), PFN_PHYS(kva_pages), - BOOTMEM_DEFAULT); -} - void __init zone_sizes_init(void) { int nid; diff --git a/include/asm-x86/mmzone_32.h b/include/asm-x86/mmzone_32.h index cb2cad0..faef751 100644 --- a/include/asm-x86/mmzone_32.h +++ b/include/asm-x86/mmzone_32.h @@ -38,16 +38,12 @@ static inline void get_memcfg_numa(void) } extern int early_pfn_to_nid(unsigned long pfn); -extern void numa_kva_reserve(void); #else /* !CONFIG_NUMA */ #define get_memcfg_numa get_memcfg_numa_flat #define get_zholes_size(n) (0) -static inline void numa_kva_reserve(void) -{ -} #endif /* CONFIG_NUMA */ #ifdef CONFIG_DISCONTIGMEM -- cgit v0.10.2 From 9a73aa81ffb993382afed2ed404bc2b330d75427 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 29 May 2008 16:25:56 -0700 Subject: x86: 32bit numa srat fix early_ioremap leak on two node system (16g RAM) with numa config I got this crash: get_memcfg_from_srat: assigning address to rsdp RSD PTR v0 [ACPIAM] ACPI: Too big length in RSDT: 92 failed to get NUMA memory information from SRAT table NUMA - single node, flat memory mode Node: 0, start_pfn: 0, end_pfn: 153 Setting physnode_map array to node 0 for pfns: 0 ... Pid: 0, comm: swapper Not tainted 2.6.26-rc4 #4 [<80b41289>] hlt_loop+0x0/0x3 [<8011efa0>] ? alloc_remap+0x50/0x70 [<8079e32e>] alloc_node_mem_map+0x5e/0xa0 [<8012e77b>] ? printk+0x1b/0x20 [<80b590f6>] free_area_init_node+0xc6/0x470 [<80b588fc>] ? __alloc_bootmem_node+0x2c/0x50 [<80b58ad8>] ? find_min_pfn_for_node+0x38/0x70 [<8012e77b>] ? printk+0x1b/0x20 [<80b597c4>] free_area_init_nodes+0x254/0x2d0 [<80b544d7>] zone_sizes_init+0x97/0xa0 [<80b48a03>] setup_arch+0x383/0x530 [<8012e77b>] ? printk+0x1b/0x20 [<80b41aa4>] start_kernel+0x64/0x350 [<80b412d8>] i386_start_kernel+0x8/0x10 ======================= this patch increases the acpi table limit to 32. Also match early_ioremap() with early_iounmap(). Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/srat_32.c b/arch/x86/kernel/srat_32.c index 70e4a37..88971ee 100644 --- a/arch/x86/kernel/srat_32.c +++ b/arch/x86/kernel/srat_32.c @@ -261,7 +261,7 @@ out_fail: struct acpi_static_rsdt { struct acpi_table_rsdt table; - u32 padding[7]; /* Allow for 7 more table entries */ + u32 padding[32]; /* Allow for 32 more table entries */ }; int __init get_memcfg_from_srat(void) @@ -297,7 +297,7 @@ int __init get_memcfg_from_srat(void) } rsdt = (struct acpi_table_rsdt *) - early_ioremap(rsdp->rsdt_physical_address, sizeof(struct acpi_table_rsdt)); + early_ioremap(rsdp->rsdt_physical_address, sizeof(saved_rsdt)); if (!rsdt) { printk(KERN_WARNING @@ -310,6 +310,7 @@ int __init get_memcfg_from_srat(void) if (strncmp(header->signature, ACPI_SIG_RSDT, strlen(ACPI_SIG_RSDT))) { printk(KERN_WARNING "ACPI: RSDT signature incorrect\n"); + early_iounmap(rsdt, sizeof(saved_rsdt)); goto out_err; } @@ -319,37 +320,51 @@ int __init get_memcfg_from_srat(void) * size of RSDT) divided by the size of each entry * (4-byte table pointers). */ - tables = (header->length - sizeof(struct acpi_table_header)) / 4; + tables = (header->length - sizeof(struct acpi_table_header)) / sizeof(u32); if (!tables) goto out_err; memcpy(&saved_rsdt, rsdt, sizeof(saved_rsdt)); - + early_iounmap(rsdt, sizeof(saved_rsdt)); if (saved_rsdt.table.header.length > sizeof(saved_rsdt)) { printk(KERN_WARNING "ACPI: Too big length in RSDT: %d\n", saved_rsdt.table.header.length); goto out_err; } - printk("Begin SRAT table scan....\n"); + printk("Begin SRAT table scan....%d\n", tables); - for (i = 0; i < tables; i++) { + for (i = 0; i < tables; i++){ + int result; + u32 length; /* Map in header, then map in full table length. */ header = (struct acpi_table_header *) early_ioremap(saved_rsdt.table.table_offset_entry[i], sizeof(struct acpi_table_header)); if (!header) break; + + printk(KERN_INFO "ACPI: %4.4s %08lX, %04X\n", + header->signature, + (unsigned long)saved_rsdt.table.table_offset_entry[i], + header->length); + + if (strncmp((char *) &header->signature, ACPI_SIG_SRAT, 4)) { + early_iounmap(header, sizeof(struct acpi_table_header)); + continue; + } + + length = header->length; + early_iounmap(header, sizeof(struct acpi_table_header)); header = (struct acpi_table_header *) - early_ioremap(saved_rsdt.table.table_offset_entry[i], header->length); + early_ioremap(saved_rsdt.table.table_offset_entry[i], length); if (!header) break; - if (strncmp((char *) &header->signature, ACPI_SIG_SRAT, 4)) - continue; - /* we've found the srat table. don't need to look at any more tables */ - return acpi20_parse_srat((struct acpi_table_srat *)header); + result = acpi20_parse_srat((struct acpi_table_srat *)header); + early_iounmap(header, length); + return result; } out_err: remove_all_active_ranges(); -- cgit v0.10.2 From 014c257cce65e9d1cd2d28ec1c89a37c536b151d Mon Sep 17 00:00:00 2001 From: Abhishek Sagar Date: Sat, 31 May 2008 14:23:50 +0530 Subject: ftrace: core support for ARM Core ftrace support for the ARM architecture, which includes support for dynamic function tracing. Signed-off-by: Abhishek Sagar Signed-off-by: Ingo Molnar diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index b786e68..3845e5c 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -14,6 +14,8 @@ config ARM select HAVE_OPROFILE select HAVE_KPROBES if (!XIP_KERNEL) select HAVE_KRETPROBES if (HAVE_KPROBES) + select HAVE_FTRACE if (!XIP_KERNEL) + select HAVE_DYNAMIC_FTRACE if (HAVE_FTRACE) help The ARM series is a line of low-power-consumption RISC chip designs licensed by ARM Ltd and targeted at embedded applications and diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index de9d9ee..95baac4 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -69,6 +69,12 @@ SEDFLAGS = s/TEXT_START/$(ZTEXTADDR)/;s/BSS_START/$(ZBSSADDR)/ targets := vmlinux vmlinux.lds piggy.gz piggy.o font.o font.c \ head.o misc.o $(OBJS) + +ifeq ($(CONFIG_FTRACE),y) +ORIG_CFLAGS := $(KBUILD_CFLAGS) +KBUILD_CFLAGS = $(subst -pg, , $(ORIG_CFLAGS)) +endif + EXTRA_CFLAGS := -fpic -fno-builtin EXTRA_AFLAGS := diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index ad455ff..eb9092c 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -4,6 +4,10 @@ AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET) +ifdef CONFIG_DYNAMIC_FTRACE +CFLAGS_REMOVE_ftrace.o = -pg +endif + # Object file lists. obj-y := compat.o entry-armv.o entry-common.o irq.o \ @@ -18,6 +22,7 @@ obj-$(CONFIG_ARTHUR) += arthur.o obj-$(CONFIG_ISA_DMA) += dma-isa.o obj-$(CONFIG_PCI) += bios32.o isa.o obj-$(CONFIG_SMP) += smp.o +obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o obj-$(CONFIG_KPROBES) += kprobes.o kprobes-decode.o obj-$(CONFIG_ATAGS_PROC) += atags.o diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c index 688b7b1..3b13221 100644 --- a/arch/arm/kernel/armksyms.c +++ b/arch/arm/kernel/armksyms.c @@ -48,6 +48,11 @@ extern void __aeabi_ulcmp(void); extern void fpundefinstr(void); extern void fp_enter(void); +#ifdef CONFIG_FTRACE +extern void mcount(void); +EXPORT_SYMBOL(mcount); +#endif + /* * This has a special calling convention; it doesn't * modify any of the usual registers, except for LR. diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 597ed00..8f79a47 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -99,6 +99,53 @@ ENTRY(ret_from_fork) #undef CALL #define CALL(x) .long x +#ifdef CONFIG_FTRACE +#ifdef CONFIG_DYNAMIC_FTRACE +ENTRY(mcount) + stmdb sp!, {r0-r3, lr} + mov r0, lr + + .globl mcount_call +mcount_call: + bl ftrace_stub + ldmia sp!, {r0-r3, pc} + +ENTRY(ftrace_caller) + stmdb sp!, {r0-r3, lr} + ldr r1, [fp, #-4] + mov r0, lr + + .globl ftrace_call +ftrace_call: + bl ftrace_stub + ldmia sp!, {r0-r3, pc} + +#else + +ENTRY(mcount) + stmdb sp!, {r0-r3, lr} + ldr r0, =ftrace_trace_function + ldr r2, [r0] + adr r0, ftrace_stub + cmp r0, r2 + bne trace + ldmia sp!, {r0-r3, pc} + +trace: + ldr r1, [fp, #-4] + mov r0, lr + mov lr, pc + mov pc, r2 + ldmia sp!, {r0-r3, pc} + +#endif /* CONFIG_DYNAMIC_FTRACE */ + + .globl ftrace_stub +ftrace_stub: + mov pc, lr + +#endif /* CONFIG_FTRACE */ + /*============================================================================= * SWI handler *----------------------------------------------------------------------------- diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c new file mode 100644 index 0000000..f4cb4cc --- /dev/null +++ b/arch/arm/kernel/ftrace.c @@ -0,0 +1,128 @@ +/* + * Dynamic function tracing support. + * + * Copyright (C) 2008 Abhishek Sagar + * + * For licencing details, see COPYING. + * + * Defines low-level handling of mcount calls when the kernel + * is compiled with the -pg flag. When using dynamic ftrace, the + * mcount call-sites get patched lazily with NOP till they are + * enabled. All code mutation routines here take effect atomically. + */ + +#include +#include + +#define INSN_SIZE 4 +#define PC_OFFSET 8 +#define BL_OPCODE 0xeb000000 +#define BL_OFFSET_MASK 0x00ffffff + +static unsigned long bl_insn; +static const unsigned long NOP = 0xe1a00000; /* mov r0, r0 */ + +/* return true if mcount call site is already patched/no-op'ed */ +int ftrace_ip_converted(unsigned long pc) +{ + unsigned long save; + + pc -= INSN_SIZE; + save = *(unsigned long *)pc; + return save == NOP; +} + +unsigned char *ftrace_nop_replace(void) +{ + return (char *)&NOP; +} + +/* construct a branch (BL) instruction to addr */ +unsigned char *ftrace_call_replace(unsigned long pc, unsigned long addr) +{ + long offset; + + offset = (long)addr - (long)(pc - INSN_SIZE + PC_OFFSET); + if (unlikely(offset < -33554432 || offset > 33554428)) { + /* Can't generate branches that far (from ARM ARM). Ftrace + * doesn't generate branches outside of core kernel text. + */ + WARN_ON_ONCE(1); + return NULL; + } + offset = (offset >> 2) & BL_OFFSET_MASK; + bl_insn = BL_OPCODE | offset; + return (unsigned char *)&bl_insn; +} + +int ftrace_modify_code(unsigned long pc, unsigned char *old_code, + unsigned char *new_code) +{ + unsigned long err = 0, replaced = 0, old, new; + + old = *(unsigned long *)old_code; + new = *(unsigned long *)new_code; + pc -= INSN_SIZE; + + __asm__ __volatile__ ( + "1: ldr %1, [%2] \n" + " cmp %1, %4 \n" + "2: streq %3, [%2] \n" + " cmpne %1, %3 \n" + " movne %0, #2 \n" + "3:\n" + + ".section .fixup, \"ax\"\n" + "4: mov %0, #1 \n" + " b 3b \n" + ".previous\n" + + ".section __ex_table, \"a\"\n" + " .long 1b, 4b \n" + " .long 2b, 4b \n" + ".previous\n" + + : "=r"(err), "=r"(replaced) + : "r"(pc), "r"(new), "r"(old), "0"(err), "1"(replaced) + : "memory"); + + if (!err && (replaced == old)) + flush_icache_range(pc, pc + INSN_SIZE); + + return err; +} + +int ftrace_update_ftrace_func(ftrace_func_t func) +{ + int ret; + unsigned long pc, old; + unsigned char *new; + + pc = (unsigned long)&ftrace_call; + pc += INSN_SIZE; + memcpy(&old, &ftrace_call, INSN_SIZE); + new = ftrace_call_replace(pc, (unsigned long)func); + ret = ftrace_modify_code(pc, (unsigned char *)&old, new); + return ret; +} + +int ftrace_mcount_set(unsigned long *data) +{ + unsigned long pc, old; + unsigned long *addr = data; + unsigned char *new; + + pc = (unsigned long)&mcount_call; + pc += INSN_SIZE; + memcpy(&old, &mcount_call, INSN_SIZE); + new = ftrace_call_replace(pc, *addr); + *addr = ftrace_modify_code(pc, (unsigned char *)&old, new); + return 0; +} + +/* run from kstop_machine */ +int __init ftrace_dyn_arch_init(void *data) +{ + ftrace_mcount_set(data); + return 0; +} -- cgit v0.10.2 From d2cd74b158d7214a556226e3312f9fb1de64d7ae Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 2 Jun 2008 11:45:53 +0200 Subject: [ALSA] emu10k1 - Fix inverted Analog/Digital mixer switch on Audigy2 On Audigy2 Platinum, the Analog/Digital mixer switch is inverted. https://bugzilla.novell.com/show_bug.cgi?id=396204 The patch adds a simple workaround. There might be another device requiring a similar fix, too (or fix for audigy2 generically), but right now I fix only the known broken one. Signed-off-by: Takashi Iwai diff --git a/include/sound/emu10k1.h b/include/sound/emu10k1.h index 7b7b9b1..10ee28e 100644 --- a/include/sound/emu10k1.h +++ b/include/sound/emu10k1.h @@ -1670,6 +1670,7 @@ struct snd_emu_chip_details { unsigned char spi_dac; /* SPI interface for DAC */ unsigned char i2c_adc; /* I2C interface for ADC */ unsigned char adc_1361t; /* Use Philips 1361T ADC */ + unsigned char invert_shared_spdif; /* analog/digital switch inverted */ const char *driver; const char *name; const char *id; /* for backward compatibility - can be NULL if not needed */ diff --git a/sound/pci/emu10k1/emu10k1_main.c b/sound/pci/emu10k1/emu10k1_main.c index 548c9cc..2f283ea 100644 --- a/sound/pci/emu10k1/emu10k1_main.c +++ b/sound/pci/emu10k1/emu10k1_main.c @@ -1528,6 +1528,7 @@ static struct snd_emu_chip_details emu_chip_details[] = { .ca0151_chip = 1, .spk71 = 1, .spdif_bug = 1, + .invert_shared_spdif = 1, /* digital/analog switch swapped */ .adc_1361t = 1, /* 24 bit capture instead of 16bit. Fixes ALSA bug#324 */ .ac97_chip = 1} , {.vendor = 0x1102, .device = 0x0004, .revision = 0x04, diff --git a/sound/pci/emu10k1/emumixer.c b/sound/pci/emu10k1/emumixer.c index fd22120..f34bbfb 100644 --- a/sound/pci/emu10k1/emumixer.c +++ b/sound/pci/emu10k1/emumixer.c @@ -1578,6 +1578,10 @@ static int snd_emu10k1_shared_spdif_get(struct snd_kcontrol *kcontrol, ucontrol->value.integer.value[0] = inl(emu->port + A_IOCFG) & A_IOCFG_GPOUT0 ? 1 : 0; else ucontrol->value.integer.value[0] = inl(emu->port + HCFG) & HCFG_GPOUT0 ? 1 : 0; + if (emu->card_capabilities->invert_shared_spdif) + ucontrol->value.integer.value[0] = + !ucontrol->value.integer.value[0]; + return 0; } @@ -1586,15 +1590,18 @@ static int snd_emu10k1_shared_spdif_put(struct snd_kcontrol *kcontrol, { unsigned long flags; struct snd_emu10k1 *emu = snd_kcontrol_chip(kcontrol); - unsigned int reg, val; + unsigned int reg, val, sw; int change = 0; + sw = ucontrol->value.integer.value[0]; + if (emu->card_capabilities->invert_shared_spdif) + sw = !sw; spin_lock_irqsave(&emu->reg_lock, flags); if ( emu->card_capabilities->i2c_adc) { /* Do nothing for Audigy 2 ZS Notebook */ } else if (emu->audigy) { reg = inl(emu->port + A_IOCFG); - val = ucontrol->value.integer.value[0] ? A_IOCFG_GPOUT0 : 0; + val = sw ? A_IOCFG_GPOUT0 : 0; change = (reg & A_IOCFG_GPOUT0) != val; if (change) { reg &= ~A_IOCFG_GPOUT0; @@ -1603,7 +1610,7 @@ static int snd_emu10k1_shared_spdif_put(struct snd_kcontrol *kcontrol, } } reg = inl(emu->port + HCFG); - val = ucontrol->value.integer.value[0] ? HCFG_GPOUT0 : 0; + val = sw ? HCFG_GPOUT0 : 0; change |= (reg & HCFG_GPOUT0) != val; if (change) { reg &= ~HCFG_GPOUT0; -- cgit v0.10.2 From 831d991821daedd4839073dbca55514432ef1768 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Mon, 3 Sep 2007 10:17:39 +0200 Subject: x86: add PCI extended config space access for AMD Barcelona This patch implements PCI extended configuration space access for AMD's Barcelona CPUs. It extends the method using CF8/CFC IO addresses. An x86 capability bit has been introduced that is set for CPUs supporting PCI extended config space accesses. Signed-off-by: Robert Richter Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 2458668..99221f9 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -6,6 +6,7 @@ #include #include +#include "../setup.h" #include "cpu.h" /* @@ -308,6 +309,9 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) if (cpu_has_xmm2) set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); + + if (c->x86 == 0x10) + amd_enable_pci_ext_cfg(c); } static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, unsigned int size) diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c index c815c2c..180097e 100644 --- a/arch/x86/kernel/cpu/amd_64.c +++ b/arch/x86/kernel/cpu/amd_64.c @@ -217,6 +217,9 @@ void __cpuinit init_amd(struct cpuinfo_x86 *c) if (c->x86 == 0x10) fam10h_check_enable_mmcfg(); + if (c->x86 == 0x10) + amd_enable_pci_ext_cfg(c); + if (amd_apic_timer_broken()) disable_apic_timer = 1; diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 6f80b85..d8f1712 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -136,4 +136,17 @@ void __init setup_per_cpu_areas(void) setup_cpumask_of_cpu(); } +#define ENABLE_CF8_EXT_CFG (1ULL << 46) + +void __cpuinit amd_enable_pci_ext_cfg(struct cpuinfo_x86 *c) +{ + u64 reg; + rdmsrl(MSR_AMD64_NB_CFG, reg); + if (!(reg & ENABLE_CF8_EXT_CFG)) { + reg |= ENABLE_CF8_EXT_CFG; + wrmsrl(MSR_AMD64_NB_CFG, reg); + } + set_cpu_cap(c, X86_FEATURE_PCI_EXT_CFG); +} + #endif diff --git a/arch/x86/kernel/setup.h b/arch/x86/kernel/setup.h new file mode 100644 index 0000000..66cc2c7 --- /dev/null +++ b/arch/x86/kernel/setup.h @@ -0,0 +1,26 @@ +/* + * Internal declarations for shared x86 setup code. + * + * Copyright (c) 2008 Advanced Micro Devices, Inc. + * Contributed by Robert Richter + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + * 02111-1307 USA + */ + +#ifndef _ARCH_X86_KERNEL_SETUP_H + +extern void __cpuinit amd_enable_pci_ext_cfg(struct cpuinfo_x86 *c); + +#endif /* _ARCH_X86_KERNEL_SETUP_H */ diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index 25afdd8..215bd67 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -73,6 +73,8 @@ #include #include +#include "setup.h" + #include #ifdef CONFIG_PARAVIRT #include diff --git a/arch/x86/pci/direct.c b/arch/x86/pci/direct.c index 21d1e0e..27d61b6 100644 --- a/arch/x86/pci/direct.c +++ b/arch/x86/pci/direct.c @@ -8,18 +8,21 @@ #include "pci.h" /* - * Functions for accessing PCI configuration space with type 1 accesses + * Functions for accessing PCI base (first 256 bytes) and extended + * (4096 bytes per PCI function) configuration space with type 1 + * accesses. */ #define PCI_CONF1_ADDRESS(bus, devfn, reg) \ - (0x80000000 | (bus << 16) | (devfn << 8) | (reg & ~3)) + (0x80000000 | ((reg & 0xF00) << 16) | (bus << 16) \ + | (devfn << 8) | (reg & 0xFC)) static int pci_conf1_read(unsigned int seg, unsigned int bus, unsigned int devfn, int reg, int len, u32 *value) { unsigned long flags; - if ((bus > 255) || (devfn > 255) || (reg > 255)) { + if ((bus > 255) || (devfn > 255) || (reg > 4095)) { *value = -1; return -EINVAL; } @@ -50,7 +53,7 @@ static int pci_conf1_write(unsigned int seg, unsigned int bus, { unsigned long flags; - if ((bus > 255) || (devfn > 255) || (reg > 255)) + if ((bus > 255) || (devfn > 255) || (reg > 4095)) return -EINVAL; spin_lock_irqsave(&pci_config_lock, flags); @@ -260,10 +263,16 @@ void __init pci_direct_init(int type) return; printk(KERN_INFO "PCI: Using configuration type %d for base access\n", type); - if (type == 1) + if (type == 1) { raw_pci_ops = &pci_direct_conf1; - else + if (!raw_pci_ext_ops && cpu_has_pci_ext_cfg) { + printk(KERN_INFO "PCI: Using configuration type 1 " + "for extended access\n"); + raw_pci_ext_ops = &pci_direct_conf1; + } + } else { raw_pci_ops = &pci_direct_conf2; + } } int __init pci_direct_probe(void) diff --git a/include/asm-x86/cpufeature.h b/include/asm-x86/cpufeature.h index 0d609c8..40fcbba 100644 --- a/include/asm-x86/cpufeature.h +++ b/include/asm-x86/cpufeature.h @@ -79,6 +79,7 @@ #define X86_FEATURE_REP_GOOD (3*32+16) /* rep microcode works well on this CPU */ #define X86_FEATURE_MFENCE_RDTSC (3*32+17) /* Mfence synchronizes RDTSC */ #define X86_FEATURE_LFENCE_RDTSC (3*32+18) /* Lfence synchronizes RDTSC */ +#define X86_FEATURE_PCI_EXT_CFG (3*32+19) /* PCI extended cfg access */ /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ #define X86_FEATURE_XMM3 (4*32+ 0) /* Streaming SIMD Extensions-3 */ @@ -187,6 +188,7 @@ extern const char * const x86_power_flags[32]; #define cpu_has_gbpages boot_cpu_has(X86_FEATURE_GBPAGES) #define cpu_has_arch_perfmon boot_cpu_has(X86_FEATURE_ARCH_PERFMON) #define cpu_has_pat boot_cpu_has(X86_FEATURE_PAT) +#define cpu_has_pci_ext_cfg boot_cpu_has(X86_FEATURE_PCI_EXT_CFG) #if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64) # define cpu_has_invlpg 1 -- cgit v0.10.2 From 6fc92866a4a6778a9732a14b61f70cb9014b2a1a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 2 Jun 2008 10:54:16 +0200 Subject: fix build bug in "x86: add PCI extended config space access for AMD Barcelona" Signed-off-by: Ingo Molnar diff --git a/include/asm-x86/mmconfig.h b/include/asm-x86/mmconfig.h index 95beda0..46d6bb1 100644 --- a/include/asm-x86/mmconfig.h +++ b/include/asm-x86/mmconfig.h @@ -9,4 +9,6 @@ static inline void fam10h_check_enable_mmcfg(void) { } static inline void check_enable_amd_mmconf_dmi(void) { } #endif +extern void __cpuinit amd_enable_pci_ext_cfg(struct cpuinfo_x86 *c); + #endif -- cgit v0.10.2 From c46e62f73569d7ef42255bd6f31e35925b7f1492 Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Wed, 28 May 2008 12:42:57 +0200 Subject: i8259: fix final ugliness Introduce IRQx_VECTOR on 32-bit, so that #ifdef noise is kept down. There should be no object code change. [ mingo@elte.hu: merged to x86/irq not x86/i8259 due to x86/irq having restructured the vector code into asm-x86/irq_vectors.h, which this patch touches. ] Signed-off-by: Pavel Machek Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index 7a0fda8..dc92b49 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c @@ -297,34 +297,28 @@ void init_8259A(int auto_eoi) * outb_pic - this has to work on a wide range of PC hardware. */ outb_pic(0x11, PIC_MASTER_CMD); /* ICW1: select 8259A-1 init */ -#ifndef CONFIG_X86_64 - outb_pic(0x20 + 0, PIC_MASTER_IMR); /* ICW2: 8259A-1 IR0-7 mapped to 0x20-0x27 */ - outb_pic(1U << PIC_CASCADE_IR, PIC_MASTER_IMR); /* 8259A-1 (the master) has a slave on IR2 */ -#else /* CONFIG_X86_64 */ - /* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 */ + + /* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 on x86-64, + to 0x20-0x27 on i386 */ outb_pic(IRQ0_VECTOR, PIC_MASTER_IMR); + /* 8259A-1 (the master) has a slave on IR2 */ - outb_pic(0x04, PIC_MASTER_IMR); -#endif /* CONFIG_X86_64 */ + outb_pic(1U << PIC_CASCADE_IR, PIC_MASTER_IMR); + if (auto_eoi) /* master does Auto EOI */ outb_pic(MASTER_ICW4_DEFAULT | PIC_ICW4_AEOI, PIC_MASTER_IMR); else /* master expects normal EOI */ outb_pic(MASTER_ICW4_DEFAULT, PIC_MASTER_IMR); outb_pic(0x11, PIC_SLAVE_CMD); /* ICW1: select 8259A-2 init */ -#ifndef CONFIG_X86_64 - outb_pic(0x20 + 8, PIC_SLAVE_IMR); /* ICW2: 8259A-2 IR0-7 mapped to 0x28-0x2f */ - outb_pic(PIC_CASCADE_IR, PIC_SLAVE_IMR); /* 8259A-2 is a slave on master's IR2 */ - outb_pic(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR); /* (slave's support for AEOI in flat mode is to be investigated) */ -#else /* CONFIG_X86_64 */ - /* ICW2: 8259A-2 IR0-7 mapped to 0x38-0x3f */ + + /* ICW2: 8259A-2 IR0-7 mapped to IRQ8_VECTOR */ outb_pic(IRQ8_VECTOR, PIC_SLAVE_IMR); /* 8259A-2 is a slave on master's IR2 */ outb_pic(PIC_CASCADE_IR, PIC_SLAVE_IMR); /* (slave's support for AEOI in flat mode is to be investigated) */ outb_pic(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR); -#endif /* CONFIG_X86_64 */ if (auto_eoi) /* * In AEOI mode we just have to mask the interrupt diff --git a/include/asm-x86/irq_vectors.h b/include/asm-x86/irq_vectors.h index 3cb6d8c..b58581e 100644 --- a/include/asm-x86/irq_vectors.h +++ b/include/asm-x86/irq_vectors.h @@ -18,17 +18,20 @@ #endif /* - * Vectors 0x20-0x2f are used for ISA interrupts on 32 bit. - * * Reserve the lowest usable priority level 0x20 - 0x2f for triggering * cleanup after irq migration on 64 bit. */ #define IRQ_MOVE_CLEANUP_VECTOR FIRST_EXTERNAL_VECTOR /* - * Vectors 0x30-0x3f are used for ISA interrupts on 64 bit + * Vectors 0x20-0x2f are used for ISA interrupts on 32 bit. + * Vectors 0x30-0x3f are used for ISA interrupts on 64 bit. */ +#ifdef CONFIG_X86_32 +#define IRQ0_VECTOR (FIRST_EXTERNAL_VECTOR) +#else #define IRQ0_VECTOR (FIRST_EXTERNAL_VECTOR + 0x10) +#endif #define IRQ1_VECTOR (IRQ0_VECTOR + 1) #define IRQ2_VECTOR (IRQ0_VECTOR + 2) #define IRQ3_VECTOR (IRQ0_VECTOR + 3) -- cgit v0.10.2 From c78277288e3d561d55fb48bc0fe8d6e2cf4d0880 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 29 May 2008 09:02:19 +0100 Subject: CONFIG_PM_SLEEP fix: xen: fix compilation when CONFIG_PM_SLEEP is disabled Xen save/restore depends on CONFIG_PM_SLEEP being set for device_power_up/down. Signed-off-by: Jeremy Fitzhardinge Acked-by: Randy Dunlap Signed-off-by: Ingo Molnar diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c index 675c3dd..5b546e3 100644 --- a/drivers/xen/manage.c +++ b/drivers/xen/manage.c @@ -31,6 +31,7 @@ enum shutdown_state { /* Ignore multiple shutdown requests. */ static enum shutdown_state shutting_down = SHUTDOWN_INVALID; +#ifdef CONFIG_PM_SLEEP static int xen_suspend(void *data) { int *cancelled = data; @@ -121,6 +122,7 @@ out: #endif shutting_down = SHUTDOWN_INVALID; } +#endif /* CONFIG_PM_SLEEP */ static void shutdown_handler(struct xenbus_watch *watch, const char **vec, unsigned int len) -- cgit v0.10.2 From 1a5726528a70bb239bdd149aef7f2155cd2b1699 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 2 Jun 2008 12:21:36 +0200 Subject: fix build bug in "x86: add PCI extended config space access for AMD Barcelona" diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 99221f9..656b40a 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include "../setup.h" diff --git a/include/asm-x86/mmconfig.h b/include/asm-x86/mmconfig.h index 46d6bb1..691798f 100644 --- a/include/asm-x86/mmconfig.h +++ b/include/asm-x86/mmconfig.h @@ -9,6 +9,10 @@ static inline void fam10h_check_enable_mmcfg(void) { } static inline void check_enable_amd_mmconf_dmi(void) { } #endif +#if defined(CONFIG_SMP) && defined(CONFIG_X86_64) extern void __cpuinit amd_enable_pci_ext_cfg(struct cpuinfo_x86 *c); +#else +static inline void amd_enable_pci_ext_cfg(struct cpuinfo_x86 *c) { } +#endif #endif -- cgit v0.10.2 From c1f64a58003fd2efaa725a857e269a15f765791a Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 27 May 2008 09:47:13 -0700 Subject: x86: MMIO and gcc re-ordering issue On Tue, 27 May 2008, Linus Torvalds wrote: > > Expecting people to fix up all drivers is simply not going to happen. And > serializing things shouldn't be *that* expensive. People who cannot take > the expense can continue to use the magic __raw_writel() etc stuff. Of course, for non-x86, you kind of have to expect drivers to be well-behaved, so non-x86 can probably avoid this simply because there are less relevant drivers involved. Here's a UNTESTED patch for x86 that may or may not compile and work, and which serializes (on a compiler level) the IO accesses against regular memory accesses. __read[bwlq]()/__write[bwlq]() are not serialized with a :"memory" barrier, although since they still use "asm volatile" I suspect that i practice they are probably serial too. Did not look very closely at any generated code (only did a trivial test to see that the code looks *roughly* correct). Signed-off-by: Ingo Molnar diff --git a/include/asm-x86/io.h b/include/asm-x86/io.h index d5b11f6..8e9eca9 100644 --- a/include/asm-x86/io.h +++ b/include/asm-x86/io.h @@ -3,6 +3,62 @@ #define ARCH_HAS_IOREMAP_WC +#include + +#define build_mmio_read(name, size, type, reg, barrier) \ +static inline type name(const volatile void __iomem *addr) \ +{ type ret; asm volatile("mov" size " %1,%0":"=" reg (ret) \ +:"m" (*(volatile type __force *)addr) barrier); return ret; } + +#define build_mmio_write(name, size, type, reg, barrier) \ +static inline void name(type val, volatile void __iomem *addr) \ +{ asm volatile("mov" size " %0,%1": :reg (val), \ +"m" (*(volatile type __force *)addr) barrier); } + +build_mmio_read(readb, "b", unsigned char, "q", :"memory") +build_mmio_read(readw, "w", unsigned short, "r", :"memory") +build_mmio_read(readl, "l", unsigned int, "r", :"memory") + +build_mmio_read(__readb, "b", unsigned char, "q", ) +build_mmio_read(__readw, "w", unsigned short, "r", ) +build_mmio_read(__readl, "l", unsigned int, "r", ) + +build_mmio_write(writeb, "b", unsigned char, "q", :"memory") +build_mmio_write(writew, "w", unsigned short, "r", :"memory") +build_mmio_write(writel, "l", unsigned int, "r", :"memory") + +build_mmio_write(__writeb, "b", unsigned char, "q", ) +build_mmio_write(__writew, "w", unsigned short, "r", ) +build_mmio_write(__writel, "l", unsigned int, "r", ) + +#define readb_relaxed(a) __readb(a) +#define readw_relaxed(a) __readw(a) +#define readl_relaxed(a) __readl(a) +#define __raw_readb __readb +#define __raw_readw __readw +#define __raw_readl __readl + +#define __raw_writeb __writeb +#define __raw_writew __writew +#define __raw_writel __writel + +#define mmiowb() barrier() + +#ifdef CONFIG_X86_64 +build_mmio_read(readq, "q", unsigned long, "r", :"memory") +build_mmio_read(__readq, "q", unsigned long, "r", ) +build_mmio_write(writeq, "q", unsigned long, "r", :"memory") +build_mmio_write(__writeq, "q", unsigned long, "r", ) + +#define readq_relaxed(a) __readq(a) +#define __raw_readq __readq +#define __raw_writeq writeq + +/* Let people know we have them */ +#define readq readq +#define writeq writeq +#endif + #ifdef CONFIG_X86_32 # include "io_32.h" #else diff --git a/include/asm-x86/io_32.h b/include/asm-x86/io_32.h index 049e81e..d71be8d 100644 --- a/include/asm-x86/io_32.h +++ b/include/asm-x86/io_32.h @@ -149,55 +149,6 @@ extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys); #define virt_to_bus virt_to_phys #define bus_to_virt phys_to_virt -/* - * readX/writeX() are used to access memory mapped devices. On some - * architectures the memory mapped IO stuff needs to be accessed - * differently. On the x86 architecture, we just read/write the - * memory location directly. - */ - -static inline unsigned char readb(const volatile void __iomem *addr) -{ - return *(volatile unsigned char __force *)addr; -} - -static inline unsigned short readw(const volatile void __iomem *addr) -{ - return *(volatile unsigned short __force *)addr; -} - -static inline unsigned int readl(const volatile void __iomem *addr) -{ - return *(volatile unsigned int __force *) addr; -} - -#define readb_relaxed(addr) readb(addr) -#define readw_relaxed(addr) readw(addr) -#define readl_relaxed(addr) readl(addr) -#define __raw_readb readb -#define __raw_readw readw -#define __raw_readl readl - -static inline void writeb(unsigned char b, volatile void __iomem *addr) -{ - *(volatile unsigned char __force *)addr = b; -} - -static inline void writew(unsigned short b, volatile void __iomem *addr) -{ - *(volatile unsigned short __force *)addr = b; -} - -static inline void writel(unsigned int b, volatile void __iomem *addr) -{ - *(volatile unsigned int __force *)addr = b; -} -#define __raw_writeb writeb -#define __raw_writew writew -#define __raw_writel writel - -#define mmiowb() - static inline void memset_io(volatile void __iomem *addr, unsigned char val, int count) { diff --git a/include/asm-x86/io_64.h b/include/asm-x86/io_64.h index 0930bed..ddd8058 100644 --- a/include/asm-x86/io_64.h +++ b/include/asm-x86/io_64.h @@ -204,77 +204,6 @@ extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys); #define virt_to_bus virt_to_phys #define bus_to_virt phys_to_virt -/* - * readX/writeX() are used to access memory mapped devices. On some - * architectures the memory mapped IO stuff needs to be accessed - * differently. On the x86 architecture, we just read/write the - * memory location directly. - */ - -static inline __u8 __readb(const volatile void __iomem *addr) -{ - return *(__force volatile __u8 *)addr; -} - -static inline __u16 __readw(const volatile void __iomem *addr) -{ - return *(__force volatile __u16 *)addr; -} - -static __always_inline __u32 __readl(const volatile void __iomem *addr) -{ - return *(__force volatile __u32 *)addr; -} - -static inline __u64 __readq(const volatile void __iomem *addr) -{ - return *(__force volatile __u64 *)addr; -} - -#define readb(x) __readb(x) -#define readw(x) __readw(x) -#define readl(x) __readl(x) -#define readq(x) __readq(x) -#define readb_relaxed(a) readb(a) -#define readw_relaxed(a) readw(a) -#define readl_relaxed(a) readl(a) -#define readq_relaxed(a) readq(a) -#define __raw_readb readb -#define __raw_readw readw -#define __raw_readl readl -#define __raw_readq readq - -#define mmiowb() - -static inline void __writel(__u32 b, volatile void __iomem *addr) -{ - *(__force volatile __u32 *)addr = b; -} - -static inline void __writeq(__u64 b, volatile void __iomem *addr) -{ - *(__force volatile __u64 *)addr = b; -} - -static inline void __writeb(__u8 b, volatile void __iomem *addr) -{ - *(__force volatile __u8 *)addr = b; -} - -static inline void __writew(__u16 b, volatile void __iomem *addr) -{ - *(__force volatile __u16 *)addr = b; -} - -#define writeq(val, addr) __writeq((val), (addr)) -#define writel(val, addr) __writel((val), (addr)) -#define writew(val, addr) __writew((val), (addr)) -#define writeb(val, addr) __writeb((val), (addr)) -#define __raw_writeb writeb -#define __raw_writew writew -#define __raw_writel writel -#define __raw_writeq writeq - void __memcpy_fromio(void *, unsigned long, unsigned); void __memcpy_toio(unsigned long, const void *, unsigned); -- cgit v0.10.2 From 76094a2cf46e4ab776055d4086615b884408568c Mon Sep 17 00:00:00 2001 From: Abhishek Sagar Date: Wed, 28 May 2008 00:03:18 +0530 Subject: ftrace: distinguish kretprobe'd functions in trace logs Tracing functions via ftrace which have a kretprobe installed on them, can produce misleading output in their trace logs. E.g, consider the correct trace of the following sequence: do_IRQ() { ~ irq_enter(); ~ } Trace log (sample): -0 [00] 4154504455.781616: irq_enter <- do_IRQ But if irq_enter() has a kretprobe installed on it, the return value stored on the stack at each invocation is modified to divert the return to a kprobe trampoline function called kretprobe_trampoline(). So with this the trace would (currently) look like: -0 [00] 4154504455.781616: irq_enter <- kretprobe_trampoline Now this is quite misleading to the end user, as it suggests something that didn't actually happen. So just to avoid such misinterpretations, the inlined patch aims to output such a log as: -0 [00] 4154504455.781616: irq_enter <- [unknown/kretprobe'd] Signed-off-by: Abhishek Sagar Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 0feae23..12f5e81 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -1199,6 +1200,20 @@ static void s_stop(struct seq_file *m, void *p) mutex_unlock(&trace_types_lock); } +#define KRETPROBE_MSG "[unknown/kretprobe'd]" + +#ifdef CONFIG_KRETPROBES +static inline int kretprobed(unsigned long addr) +{ + return addr == (unsigned long)kretprobe_trampoline; +} +#else +static inline int kretprobed(unsigned long addr) +{ + return 0; +} +#endif /* CONFIG_KRETPROBES */ + static int seq_print_sym_short(struct trace_seq *s, const char *fmt, unsigned long address) { @@ -1434,7 +1449,10 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) case TRACE_FN: seq_print_ip_sym(s, entry->fn.ip, sym_flags); trace_seq_puts(s, " ("); - seq_print_ip_sym(s, entry->fn.parent_ip, sym_flags); + if (kretprobed(entry->fn.parent_ip)) + trace_seq_puts(s, KRETPROBE_MSG); + else + seq_print_ip_sym(s, entry->fn.parent_ip, sym_flags); trace_seq_puts(s, ")\n"); break; case TRACE_CTX: @@ -1514,8 +1532,11 @@ static int print_trace_fmt(struct trace_iterator *iter) ret = trace_seq_printf(s, " <-"); if (!ret) return 0; - ret = seq_print_ip_sym(s, entry->fn.parent_ip, - sym_flags); + if (kretprobed(entry->fn.parent_ip)) + ret = trace_seq_puts(s, KRETPROBE_MSG); + else + ret = seq_print_ip_sym(s, entry->fn.parent_ip, + sym_flags); if (!ret) return 0; } -- cgit v0.10.2 From 83bea8e1fa0c47b30664b6f92397c016c22f77fb Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Tue, 27 May 2008 21:03:46 +0200 Subject: x86: fix incomplete include guard in include/asm-x86/seccomp_32.h Signed-off-by: Vegard Nossum Signed-off-by: Ingo Molnar diff --git a/include/asm-x86/seccomp_32.h b/include/asm-x86/seccomp_32.h index 18da19e..36e71c5 100644 --- a/include/asm-x86/seccomp_32.h +++ b/include/asm-x86/seccomp_32.h @@ -1,4 +1,5 @@ #ifndef _ASM_SECCOMP_H +#define _ASM_SECCOMP_H #include -- cgit v0.10.2 From 6330a30a76c1e62d4b4ec238368957f8febf9113 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Wed, 28 May 2008 09:46:19 +0200 Subject: x86: break mutual header inclusion This breaks up the mutual inclusion between headers ptrace.h and vm86.h by moving some small part of vm86.h which is needed by ptrace.h into processor-flags.h. We also try to move #include lines to the top. This has been compile tested on x86_32 and x86_64 defconfig, and run through 'make headers_check'. Cc: Adrian Bunk Signed-off-by: Vegard Nossum Signed-off-by: Ingo Molnar diff --git a/include/asm-x86/processor-flags.h b/include/asm-x86/processor-flags.h index 199cab1..092b39b 100644 --- a/include/asm-x86/processor-flags.h +++ b/include/asm-x86/processor-flags.h @@ -88,4 +88,10 @@ #define CX86_ARR_BASE 0xc4 #define CX86_RCR_BASE 0xdc +#ifdef CONFIG_VM86 +#define X86_VM_MASK X86_EFLAGS_VM +#else +#define X86_VM_MASK 0 /* No VM86 support */ +#endif + #endif /* __ASM_I386_PROCESSOR_FLAGS_H */ diff --git a/include/asm-x86/ptrace.h b/include/asm-x86/ptrace.h index 9f922b0..8a71db8 100644 --- a/include/asm-x86/ptrace.h +++ b/include/asm-x86/ptrace.h @@ -3,7 +3,12 @@ #include /* For __user */ #include +#include +#ifdef __KERNEL__ +#include /* the DS BTS struct is used for ptrace too */ +#include +#endif #ifndef __ASSEMBLY__ @@ -55,9 +60,6 @@ struct pt_regs { unsigned long ss; }; -#include -#include - #endif /* __KERNEL__ */ #else /* __i386__ */ diff --git a/include/asm-x86/vm86.h b/include/asm-x86/vm86.h index cbf4a0e..5ce3513 100644 --- a/include/asm-x86/vm86.h +++ b/include/asm-x86/vm86.h @@ -115,7 +115,6 @@ struct vm86plus_info_struct { unsigned long is_vm86pus:1; /* for vm86 internal use */ unsigned char vm86dbg_intxxtab[32]; /* for debugger */ }; - struct vm86plus_struct { struct vm86_regs regs; unsigned long flags; @@ -128,11 +127,7 @@ struct vm86plus_struct { #ifdef __KERNEL__ -#ifdef CONFIG_VM86 -#define X86_VM_MASK X86_EFLAGS_VM -#else -#define X86_VM_MASK 0 /* No VM86 support */ -#endif +#include /* * This is the (kernel) stack-layout when we have done a "SAVE_ALL" from vm86 @@ -142,7 +137,6 @@ struct vm86plus_struct { * at the end of the structure. Look at ptrace.h to see the "normal" * setup. For user space layout see 'struct vm86_regs' above. */ -#include struct kernel_vm86_regs { /* -- cgit v0.10.2 From ad90c0e3ce8d20d6873b57e36181ef6d7a0097fe Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 27 May 2008 20:48:37 -0400 Subject: ftrace: user update and disable dynamic ftrace daemon In dynamic ftrace, the mcount function starts off pointing to a stub function that just returns. On start up, the call to the stub is modified to point to a "record_ip" function. The job of the record_ip function is to add the function to a pre-allocated hash list. If the function is already there, it simply is ignored, otherwise it is added to the list. Later, a ftraced daemon wakes up and calls kstop_machine if any functions have been recorded, and changes the calls to the recorded functions to a simple nop. If no functions were recorded, the daemon goes back to sleep. The daemon wakes up once a second to see if it needs to update any newly recorded functions into nops. Usually it does not, but if a lot of code has been executed for the first time in the kernel, the ftraced daemon will call kstop_machine to update those into nops. The problem currently is that there's no way to stop the daemon from doing this, and it can cause unneeded latencies (800us which for some is bothersome). This patch adds a new file /debugfs/tracing/ftraced_enabled. If the daemon is active, reading this will return "enabled\n" and "disabled\n" when the daemon is not running. To disable the daemon, the user can echo "0" or "disable" into this file, and "1" or "enable" to re-enable the daemon. Since the daemon is used to convert the functions into nops to increase the performance of the system, I also added that anytime something is written into the ftraced_enabled file, kstop_machine will run if there are new functions that have been detected that need to be converted. This way the user can disable the daemon but still be able to control the conversion of the mcount calls to nops by simply, "echo 0 > /debugfs/tracing/ftraced_enabled" when they need to do more conversions. To see the number of converted functions: "cat /debugfs/tracing/dyn_ftrace_total_info" Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index b482fe8..6238194 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -72,9 +72,15 @@ extern int ftrace_update_ftrace_func(ftrace_func_t func); extern void ftrace_caller(void); extern void ftrace_call(void); extern void mcount_call(void); + +void ftrace_disable_daemon(void); +void ftrace_enable_daemon(void); + #else # define ftrace_force_update() ({ 0; }) # define ftrace_set_filter(buf, len, reset) do { } while (0) +# define ftrace_disable_daemon() do { } while (0) +# define ftrace_enable_daemon() do { } while (0) #endif /* totally disable ftrace - can not re-enable after this */ diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 1843edc..f762f5a 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -151,8 +151,6 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops) #ifdef CONFIG_DYNAMIC_FTRACE static struct task_struct *ftraced_task; -static DECLARE_WAIT_QUEUE_HEAD(ftraced_waiters); -static unsigned long ftraced_iteration_counter; enum { FTRACE_ENABLE_CALLS = (1 << 0), @@ -189,6 +187,7 @@ static struct ftrace_page *ftrace_pages; static int ftraced_trigger; static int ftraced_suspend; +static int ftraced_stop; static int ftrace_record_suspend; @@ -474,14 +473,21 @@ ftrace_code_disable(struct dyn_ftrace *rec) return 1; } +static int __ftrace_update_code(void *ignore); + static int __ftrace_modify_code(void *data) { unsigned long addr; int *command = data; - if (*command & FTRACE_ENABLE_CALLS) + if (*command & FTRACE_ENABLE_CALLS) { + /* + * Update any recorded ips now that we have the + * machine stopped + */ + __ftrace_update_code(NULL); ftrace_replace_code(1); - else if (*command & FTRACE_DISABLE_CALLS) + } else if (*command & FTRACE_DISABLE_CALLS) ftrace_replace_code(0); if (*command & FTRACE_UPDATE_TRACE_FUNC) @@ -503,6 +509,25 @@ static void ftrace_run_update_code(int command) stop_machine_run(__ftrace_modify_code, &command, NR_CPUS); } +void ftrace_disable_daemon(void) +{ + /* Stop the daemon from calling kstop_machine */ + mutex_lock(&ftraced_lock); + ftraced_stop = 1; + mutex_unlock(&ftraced_lock); + + ftrace_force_update(); +} + +void ftrace_enable_daemon(void) +{ + mutex_lock(&ftraced_lock); + ftraced_stop = 0; + mutex_unlock(&ftraced_lock); + + ftrace_force_update(); +} + static ftrace_func_t saved_ftrace_func; static void ftrace_startup(void) @@ -603,6 +628,7 @@ static int __ftrace_update_code(void *ignore) int i; /* Don't be recording funcs now */ + ftrace_record_suspend++; save_ftrace_enabled = ftrace_enabled; ftrace_enabled = 0; @@ -628,18 +654,23 @@ static int __ftrace_update_code(void *ignore) stop = ftrace_now(raw_smp_processor_id()); ftrace_update_time = stop - start; ftrace_update_tot_cnt += ftrace_update_cnt; + ftraced_trigger = 0; ftrace_enabled = save_ftrace_enabled; + ftrace_record_suspend--; return 0; } -static void ftrace_update_code(void) +static int ftrace_update_code(void) { - if (unlikely(ftrace_disabled)) - return; + if (unlikely(ftrace_disabled) || + !ftrace_enabled || !ftraced_trigger) + return 0; stop_machine_run(__ftrace_update_code, NULL, NR_CPUS); + + return 1; } static int ftraced(void *ignore) @@ -658,14 +689,13 @@ static int ftraced(void *ignore) mutex_lock(&ftrace_sysctl_lock); mutex_lock(&ftraced_lock); - if (ftrace_enabled && ftraced_trigger && !ftraced_suspend) { - ftrace_record_suspend++; - ftrace_update_code(); + if (!ftraced_suspend && !ftraced_stop && + ftrace_update_code()) { usecs = nsecs_to_usecs(ftrace_update_time); if (ftrace_update_tot_cnt > 100000) { ftrace_update_tot_cnt = 0; pr_info("hm, dftrace overflow: %lu change%s" - " (%lu total) in %lu usec%s\n", + " (%lu total) in %lu usec%s\n", ftrace_update_cnt, ftrace_update_cnt != 1 ? "s" : "", ftrace_update_tot_cnt, @@ -673,15 +703,10 @@ static int ftraced(void *ignore) ftrace_disabled = 1; WARN_ON_ONCE(1); } - ftraced_trigger = 0; - ftrace_record_suspend--; } - ftraced_iteration_counter++; mutex_unlock(&ftraced_lock); mutex_unlock(&ftrace_sysctl_lock); - wake_up_interruptible(&ftraced_waiters); - ftrace_shutdown_replenish(); } __set_current_state(TASK_RUNNING); @@ -1219,6 +1244,55 @@ ftrace_notrace_release(struct inode *inode, struct file *file) return ftrace_regex_release(inode, file, 0); } +static ssize_t +ftraced_read(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + /* don't worry about races */ + char *buf = ftraced_stop ? "disabled\n" : "enabled\n"; + int r = strlen(buf); + + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); +} + +static ssize_t +ftraced_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + char buf[64]; + long val; + int ret; + + if (cnt >= sizeof(buf)) + return -EINVAL; + + if (copy_from_user(&buf, ubuf, cnt)) + return -EFAULT; + + if (strncmp(buf, "enable", 6) == 0) + val = 1; + else if (strncmp(buf, "disable", 7) == 0) + val = 0; + else { + buf[cnt] = 0; + + ret = strict_strtoul(buf, 10, &val); + if (ret < 0) + return ret; + + val = !!val; + } + + if (val) + ftrace_enable_daemon(); + else + ftrace_disable_daemon(); + + filp->f_pos += cnt; + + return cnt; +} + static struct file_operations ftrace_avail_fops = { .open = ftrace_avail_open, .read = seq_read, @@ -1242,51 +1316,34 @@ static struct file_operations ftrace_notrace_fops = { .release = ftrace_notrace_release, }; +static struct file_operations ftraced_fops = { + .open = tracing_open_generic, + .read = ftraced_read, + .write = ftraced_write, +}; + /** * ftrace_force_update - force an update to all recording ftrace functions - * - * The ftrace dynamic update daemon only wakes up once a second. - * There may be cases where an update needs to be done immediately - * for tests or internal kernel tracing to begin. This function - * wakes the daemon to do an update and will not return until the - * update is complete. */ int ftrace_force_update(void) { - unsigned long last_counter; - DECLARE_WAITQUEUE(wait, current); int ret = 0; if (unlikely(ftrace_disabled)) return -ENODEV; + mutex_lock(&ftrace_sysctl_lock); mutex_lock(&ftraced_lock); - last_counter = ftraced_iteration_counter; - - set_current_state(TASK_INTERRUPTIBLE); - add_wait_queue(&ftraced_waiters, &wait); - if (unlikely(!ftraced_task)) { - ret = -ENODEV; - goto out; - } - - do { - mutex_unlock(&ftraced_lock); - wake_up_process(ftraced_task); - schedule(); - mutex_lock(&ftraced_lock); - if (signal_pending(current)) { - ret = -EINTR; - break; - } - set_current_state(TASK_INTERRUPTIBLE); - } while (last_counter == ftraced_iteration_counter); + /* + * If ftraced_trigger is not set, then there is nothing + * to update. + */ + if (ftraced_trigger && !ftrace_update_code()) + ret = -EBUSY; - out: mutex_unlock(&ftraced_lock); - remove_wait_queue(&ftraced_waiters, &wait); - set_current_state(TASK_RUNNING); + mutex_unlock(&ftrace_sysctl_lock); return ret; } @@ -1331,6 +1388,12 @@ static __init int ftrace_init_debugfs(void) if (!entry) pr_warning("Could not create debugfs " "'set_ftrace_notrace' entry\n"); + + entry = debugfs_create_file("ftraced_enabled", 0644, d_tracer, + NULL, &ftraced_fops); + if (!entry) + pr_warning("Could not create debugfs " + "'ftraced_enabled' entry\n"); return 0; } -- cgit v0.10.2 From fb093eab6d8963a085f112773284f2bcb3d7907b Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 27 May 2008 16:29:20 -0700 Subject: x86: remove duplicated e820 func in setup.h we already have them in e820.h Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar diff --git a/include/asm-x86/setup.h b/include/asm-x86/setup.h index 64f5f0c1..9e163fc 100644 --- a/include/asm-x86/setup.h +++ b/include/asm-x86/setup.h @@ -50,15 +50,9 @@ extern struct boot_params boot_params; */ #define LOWMEMSIZE() (0x9f000) -struct e820entry; - char * __init machine_specific_memory_setup(void); char *memory_setup(void); -int __init copy_e820_map(struct e820entry *biosmap, int nr_map); -void __init add_memory_region(unsigned long long start, - unsigned long long size, int type); - void __init i386_start_kernel(void); -- cgit v0.10.2 From 88ff0a474e98f869d8c321e29481f298320100d6 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Tue, 27 May 2008 18:49:39 -0700 Subject: x86: coding style fixes for nmi.c before total: 1 errors, 6 warnings, 534 lines checked after total: 0 errors, 1 warnings, 532 lines checked Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 3671a9f..bf143f1 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -23,9 +23,8 @@ #include #include #include +#include -#include -#include #include #include @@ -45,13 +44,16 @@ static cpumask_t backtrace_mask = CPU_MASK_NONE; * 0: the lapic NMI watchdog is disabled, but can be enabled */ atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */ -static int panic_on_timeout; +EXPORT_SYMBOL(nmi_active); unsigned int nmi_watchdog = NMI_DEFAULT; +EXPORT_SYMBOL(nmi_watchdog); + +static int panic_on_timeout; static unsigned int nmi_hz = HZ; static DEFINE_PER_CPU(short, wd_enabled); -static int endflag __initdata = 0; +static int endflag __initdata; static inline unsigned int get_nmi_count(int cpu) { @@ -404,7 +406,7 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) static DEFINE_SPINLOCK(lock); /* Serialise the printks */ spin_lock(&lock); - printk("NMI backtrace for cpu %d\n", cpu); + printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu); dump_stack(); spin_unlock(&lock); cpu_clear(cpu, backtrace_mask); @@ -529,7 +531,3 @@ void __trigger_all_cpu_backtrace(void) mdelay(1); } } - -EXPORT_SYMBOL(nmi_active); -EXPORT_SYMBOL(nmi_watchdog); - -- cgit v0.10.2 From 9f5314fb4d556d3132c784d0df47352b2830ca53 Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Wed, 28 May 2008 09:51:18 -0500 Subject: x86, uv: update macros used by UV platform Update the UV address macros to better describe the fields of UV physical addresses. Improve comments in the header files. Add additional MMR definitions. Signed-off-by: Jack Steiner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index ebf1390..45e84ac 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -5,7 +5,7 @@ * * SGI UV APIC functions (note: not an Intel compatible APIC) * - * Copyright (C) 2007 Silicon Graphics, Inc. All rights reserved. + * Copyright (C) 2007-2008 Silicon Graphics, Inc. All rights reserved. */ #include @@ -55,37 +55,37 @@ static cpumask_t uv_vector_allocation_domain(int cpu) int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip) { unsigned long val; - int nasid; + int pnode; - nasid = uv_apicid_to_nasid(phys_apicid); + pnode = uv_apicid_to_pnode(phys_apicid); val = (1UL << UVH_IPI_INT_SEND_SHFT) | (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) | (((long)start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) | APIC_DM_INIT; - uv_write_global_mmr64(nasid, UVH_IPI_INT, val); + uv_write_global_mmr64(pnode, UVH_IPI_INT, val); mdelay(10); val = (1UL << UVH_IPI_INT_SEND_SHFT) | (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) | (((long)start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) | APIC_DM_STARTUP; - uv_write_global_mmr64(nasid, UVH_IPI_INT, val); + uv_write_global_mmr64(pnode, UVH_IPI_INT, val); return 0; } static void uv_send_IPI_one(int cpu, int vector) { unsigned long val, apicid, lapicid; - int nasid; + int pnode; apicid = per_cpu(x86_cpu_to_apicid, cpu); /* ZZZ - cache node-local ? */ lapicid = apicid & 0x3f; /* ZZZ macro needed */ - nasid = uv_apicid_to_nasid(apicid); + pnode = uv_apicid_to_pnode(apicid); val = (1UL << UVH_IPI_INT_SEND_SHFT) | (lapicid << UVH_IPI_INT_APIC_ID_SHFT) | (vector << UVH_IPI_INT_VECTOR_SHFT); - uv_write_global_mmr64(nasid, UVH_IPI_INT, val); + uv_write_global_mmr64(pnode, UVH_IPI_INT, val); } static void uv_send_IPI_mask(cpumask_t mask, int vector) @@ -159,39 +159,81 @@ struct genapic apic_x2apic_uv_x = { .phys_pkg_id = phys_pkg_id, /* Fixme ZZZ */ }; -static __cpuinit void set_x2apic_extra_bits(int nasid) +static __cpuinit void set_x2apic_extra_bits(int pnode) { - __get_cpu_var(x2apic_extra_bits) = ((nasid >> 1) << 6); + __get_cpu_var(x2apic_extra_bits) = (pnode << 6); } /* * Called on boot cpu. */ +static __init int boot_pnode_to_blade(int pnode) +{ + int blade; + + for (blade = 0; blade < uv_num_possible_blades(); blade++) + if (pnode == uv_blade_info[blade].pnode) + return blade; + BUG(); +} + +struct redir_addr { + unsigned long redirect; + unsigned long alias; +}; + +#define DEST_SHIFT UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT + +static __initdata struct redir_addr redir_addrs[] = { + {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR, UVH_SI_ALIAS0_OVERLAY_CONFIG}, + {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR, UVH_SI_ALIAS1_OVERLAY_CONFIG}, + {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR, UVH_SI_ALIAS2_OVERLAY_CONFIG}, +}; + +static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size) +{ + union uvh_si_alias0_overlay_config_u alias; + union uvh_rh_gam_alias210_redirect_config_2_mmr_u redirect; + int i; + + for (i = 0; i < ARRAY_SIZE(redir_addrs); i++) { + alias.v = uv_read_local_mmr(redir_addrs[i].alias); + if (alias.s.base == 0) { + *size = (1UL << alias.s.m_alias); + redirect.v = uv_read_local_mmr(redir_addrs[i].redirect); + *base = (unsigned long)redirect.s.dest_base << DEST_SHIFT; + return; + } + } + BUG(); +} + static __init void uv_system_init(void) { union uvh_si_addr_map_config_u m_n_config; - int bytes, nid, cpu, lcpu, nasid, last_nasid, blade; - unsigned long mmr_base; + union uvh_node_id_u node_id; + unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size; + int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val; + unsigned long mmr_base, present; m_n_config.v = uv_read_local_mmr(UVH_SI_ADDR_MAP_CONFIG); + m_val = m_n_config.s.m_skt; + n_val = m_n_config.s.n_skt; mmr_base = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) & ~UV_MMR_ENABLE; printk(KERN_DEBUG "UV: global MMR base 0x%lx\n", mmr_base); - last_nasid = -1; - for_each_possible_cpu(cpu) { - nid = cpu_to_node(cpu); - nasid = uv_apicid_to_nasid(per_cpu(x86_cpu_to_apicid, cpu)); - if (nasid != last_nasid) - uv_possible_blades++; - last_nasid = nasid; - } + for(i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++) + uv_possible_blades += + hweight64(uv_read_local_mmr( UVH_NODE_PRESENT_TABLE + i * 8)); printk(KERN_DEBUG "UV: Found %d blades\n", uv_num_possible_blades()); bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades(); uv_blade_info = alloc_bootmem_pages(bytes); + get_lowmem_redirect(&lowmem_redir_base, &lowmem_redir_size); + bytes = sizeof(uv_node_to_blade[0]) * num_possible_nodes(); uv_node_to_blade = alloc_bootmem_pages(bytes); memset(uv_node_to_blade, 255, bytes); @@ -200,43 +242,56 @@ static __init void uv_system_init(void) uv_cpu_to_blade = alloc_bootmem_pages(bytes); memset(uv_cpu_to_blade, 255, bytes); - last_nasid = -1; - blade = -1; - lcpu = -1; - for_each_possible_cpu(cpu) { - nid = cpu_to_node(cpu); - nasid = uv_apicid_to_nasid(per_cpu(x86_cpu_to_apicid, cpu)); - if (nasid != last_nasid) { - blade++; - lcpu = -1; - uv_blade_info[blade].nr_posible_cpus = 0; + blade = 0; + for (i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++) { + present = uv_read_local_mmr(UVH_NODE_PRESENT_TABLE + i * 8); + for (j = 0; j < 64; j++) { + if (!test_bit(j, &present)) + continue; + uv_blade_info[blade].pnode = (i * 64 + j); + uv_blade_info[blade].nr_possible_cpus = 0; uv_blade_info[blade].nr_online_cpus = 0; + blade++; } - last_nasid = nasid; - lcpu++; + } - uv_cpu_hub_info(cpu)->m_val = m_n_config.s.m_skt; - uv_cpu_hub_info(cpu)->n_val = m_n_config.s.n_skt; + node_id.v = uv_read_local_mmr(UVH_NODE_ID); + gnode_upper = (((unsigned long)node_id.s.node_id) & + ~((1 << n_val) - 1)) << m_val; + + for_each_present_cpu(cpu) { + nid = cpu_to_node(cpu); + pnode = uv_apicid_to_pnode(per_cpu(x86_cpu_to_apicid, cpu)); + blade = boot_pnode_to_blade(pnode); + lcpu = uv_blade_info[blade].nr_possible_cpus; + uv_blade_info[blade].nr_possible_cpus++; + + uv_cpu_hub_info(cpu)->lowmem_remap_base = lowmem_redir_base; + uv_cpu_hub_info(cpu)->lowmem_remap_top = + lowmem_redir_base + lowmem_redir_size; + uv_cpu_hub_info(cpu)->m_val = m_val; + uv_cpu_hub_info(cpu)->n_val = m_val; uv_cpu_hub_info(cpu)->numa_blade_id = blade; uv_cpu_hub_info(cpu)->blade_processor_id = lcpu; - uv_cpu_hub_info(cpu)->local_nasid = nasid; - uv_cpu_hub_info(cpu)->gnode_upper = - nasid & ~((1 << uv_hub_info->n_val) - 1); + uv_cpu_hub_info(cpu)->pnode = pnode; + uv_cpu_hub_info(cpu)->pnode_mask = (1 << n_val) - 1; + uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1; + uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper; uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base; uv_cpu_hub_info(cpu)->coherency_domain_number = 0;/* ZZZ */ - uv_blade_info[blade].nasid = nasid; - uv_blade_info[blade].nr_posible_cpus++; uv_node_to_blade[nid] = blade; uv_cpu_to_blade[cpu] = blade; - printk(KERN_DEBUG "UV cpu %d, apicid 0x%x, nasid %d, nid %d\n", - cpu, per_cpu(x86_cpu_to_apicid, cpu), nasid, nid); - printk(KERN_DEBUG "UV lcpu %d, blade %d\n", lcpu, blade); + printk(KERN_DEBUG "UV cpu %d, apicid 0x%x, pnode %d, nid %d, " + "lcpu %d, blade %d\n", + cpu, per_cpu(x86_cpu_to_apicid, cpu), pnode, nid, + lcpu, blade); } } /* * Called on each cpu to initialize the per_cpu UV data area. + * ZZZ hotplug not supported yet */ void __cpuinit uv_cpu_init(void) { @@ -246,5 +301,5 @@ void __cpuinit uv_cpu_init(void) uv_blade_info[uv_numa_blade_id()].nr_online_cpus++; if (get_uv_system_type() == UV_NON_UNIQUE_APIC) - set_x2apic_extra_bits(uv_hub_info->local_nasid); + set_x2apic_extra_bits(uv_hub_info->pnode); } diff --git a/include/asm-x86/uv/uv_hub.h b/include/asm-x86/uv/uv_hub.h index 26b9240..6500488 100644 --- a/include/asm-x86/uv/uv_hub.h +++ b/include/asm-x86/uv/uv_hub.h @@ -5,7 +5,7 @@ * * SGI UV architectural definitions * - * Copyright (C) 2007 Silicon Graphics, Inc. All rights reserved. + * Copyright (C) 2007-2008 Silicon Graphics, Inc. All rights reserved. */ #ifndef __ASM_X86_UV_HUB_H__ @@ -20,26 +20,49 @@ /* * Addressing Terminology * - * NASID - network ID of a router, Mbrick or Cbrick. Nasid values of - * routers always have low bit of 1, C/MBricks have low bit - * equal to 0. Most addressing macros that target UV hub chips - * right shift the NASID by 1 to exclude the always-zero bit. + * M - The low M bits of a physical address represent the offset + * into the blade local memory. RAM memory on a blade is physically + * contiguous (although various IO spaces may punch holes in + * it).. * - * SNASID - NASID right shifted by 1 bit. + * N - Number of bits in the node portion of a socket physical + * address. + * + * NASID - network ID of a router, Mbrick or Cbrick. Nasid values of + * routers always have low bit of 1, C/MBricks have low bit + * equal to 0. Most addressing macros that target UV hub chips + * right shift the NASID by 1 to exclude the always-zero bit. + * NASIDs contain up to 15 bits. + * + * GNODE - NASID right shifted by 1 bit. Most mmrs contain gnodes instead + * of nasids. + * + * PNODE - the low N bits of the GNODE. The PNODE is the most useful variant + * of the nasid for socket usage. + * + * + * NumaLink Global Physical Address Format: + * +--------------------------------+---------------------+ + * |00..000| GNODE | NodeOffset | + * +--------------------------------+---------------------+ + * |<-------53 - M bits --->|<--------M bits -----> + * + * M - number of node offset bits (35 .. 40) * * * Memory/UV-HUB Processor Socket Address Format: - * +--------+---------------+---------------------+ - * |00..0000| SNASID | NodeOffset | - * +--------+---------------+---------------------+ - * <--- N bits --->|<--------M bits -----> + * +----------------+---------------+---------------------+ + * |00..000000000000| PNODE | NodeOffset | + * +----------------+---------------+---------------------+ + * <--- N bits --->|<--------M bits -----> * - * M number of node offset bits (35 .. 40) - * N number of SNASID bits (0 .. 10) + * M - number of node offset bits (35 .. 40) + * N - number of PNODE bits (0 .. 10) * * Note: M + N cannot currently exceed 44 (x86_64) or 46 (IA64). * The actual values are configuration dependent and are set at - * boot time + * boot time. M & N values are set by the hardware/BIOS at boot. + * * * APICID format * NOTE!!!!!! This is the current format of the APICID. However, code @@ -48,14 +71,14 @@ * * 1111110000000000 * 5432109876543210 - * nnnnnnnnnnlc0cch + * pppppppppplc0cch * sssssssssss * - * n = snasid bits + * p = pnode bits * l = socket number on board * c = core * h = hyperthread - * s = bits that are in the socket CSR + * s = bits that are in the SOCKET_ID CSR * * Note: Processor only supports 12 bits in the APICID register. The ACPI * tables hold all 16 bits. Software needs to be aware of this. @@ -74,7 +97,7 @@ * This value is also the value of the maximum number of non-router NASIDs * in the numalink fabric. * - * NOTE: a brick may be 1 or 2 OS nodes. Don't get these confused. + * NOTE: a brick may contain 1 or 2 OS nodes. Don't get these confused. */ #define UV_MAX_NUMALINK_BLADES 16384 @@ -96,8 +119,12 @@ */ struct uv_hub_info_s { unsigned long global_mmr_base; - unsigned short local_nasid; - unsigned short gnode_upper; + unsigned long gpa_mask; + unsigned long gnode_upper; + unsigned long lowmem_remap_top; + unsigned long lowmem_remap_base; + unsigned short pnode; + unsigned short pnode_mask; unsigned short coherency_domain_number; unsigned short numa_blade_id; unsigned char blade_processor_id; @@ -112,83 +139,124 @@ DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); * Local & Global MMR space macros. * Note: macros are intended to be used ONLY by inline functions * in this file - not by other kernel code. + * n - NASID (full 15-bit global nasid) + * g - GNODE (full 15-bit global nasid, right shifted 1) + * p - PNODE (local part of nsids, right shifted 1) */ -#define UV_SNASID(n) ((n) >> 1) -#define UV_NASID(n) ((n) << 1) +#define UV_NASID_TO_PNODE(n) (((n) >> 1) & uv_hub_info->pnode_mask) +#define UV_PNODE_TO_NASID(p) (((p) << 1) | uv_hub_info->gnode_upper) #define UV_LOCAL_MMR_BASE 0xf4000000UL #define UV_GLOBAL_MMR32_BASE 0xf8000000UL #define UV_GLOBAL_MMR64_BASE (uv_hub_info->global_mmr_base) -#define UV_GLOBAL_MMR32_SNASID_MASK 0x3ff -#define UV_GLOBAL_MMR32_SNASID_SHIFT 15 -#define UV_GLOBAL_MMR64_SNASID_SHIFT 26 +#define UV_GLOBAL_MMR32_PNODE_SHIFT 15 +#define UV_GLOBAL_MMR64_PNODE_SHIFT 26 -#define UV_GLOBAL_MMR32_NASID_BITS(n) \ - (((UV_SNASID(n) & UV_GLOBAL_MMR32_SNASID_MASK)) << \ - (UV_GLOBAL_MMR32_SNASID_SHIFT)) +#define UV_GLOBAL_MMR32_PNODE_BITS(p) ((p) << (UV_GLOBAL_MMR32_PNODE_SHIFT)) -#define UV_GLOBAL_MMR64_NASID_BITS(n) \ - ((unsigned long)UV_SNASID(n) << UV_GLOBAL_MMR64_SNASID_SHIFT) +#define UV_GLOBAL_MMR64_PNODE_BITS(p) \ + ((unsigned long)(p) << UV_GLOBAL_MMR64_PNODE_SHIFT) + +#define UV_APIC_PNODE_SHIFT 6 + +/* + * Macros for converting between kernel virtual addresses, socket local physical + * addresses, and UV global physical addresses. + * Note: use the standard __pa() & __va() macros for converting + * between socket virtual and socket physical addresses. + */ + +/* socket phys RAM --> UV global physical address */ +static inline unsigned long uv_soc_phys_ram_to_gpa(unsigned long paddr) +{ + if (paddr < uv_hub_info->lowmem_remap_top) + paddr += uv_hub_info->lowmem_remap_base; + return paddr | uv_hub_info->gnode_upper; +} + + +/* socket virtual --> UV global physical address */ +static inline unsigned long uv_gpa(void *v) +{ + return __pa(v) | uv_hub_info->gnode_upper; +} + +/* socket virtual --> UV global physical address */ +static inline void *uv_vgpa(void *v) +{ + return (void *)uv_gpa(v); +} + +/* UV global physical address --> socket virtual */ +static inline void *uv_va(unsigned long gpa) +{ + return __va(gpa & uv_hub_info->gpa_mask); +} + +/* pnode, offset --> socket virtual */ +static inline void *uv_pnode_offset_to_vaddr(int pnode, unsigned long offset) +{ + return __va(((unsigned long)pnode << uv_hub_info->m_val) | offset); +} -#define UV_APIC_NASID_SHIFT 6 /* - * Extract a NASID from an APICID (full apicid, not processor subset) + * Extract a PNODE from an APICID (full apicid, not processor subset) */ -static inline int uv_apicid_to_nasid(int apicid) +static inline int uv_apicid_to_pnode(int apicid) { - return (UV_NASID(apicid >> UV_APIC_NASID_SHIFT)); + return (apicid >> UV_APIC_PNODE_SHIFT); } /* * Access global MMRs using the low memory MMR32 space. This region supports * faster MMR access but not all MMRs are accessible in this space. */ -static inline unsigned long *uv_global_mmr32_address(int nasid, +static inline unsigned long *uv_global_mmr32_address(int pnode, unsigned long offset) { return __va(UV_GLOBAL_MMR32_BASE | - UV_GLOBAL_MMR32_NASID_BITS(nasid) | offset); + UV_GLOBAL_MMR32_PNODE_BITS(pnode) | offset); } -static inline void uv_write_global_mmr32(int nasid, unsigned long offset, +static inline void uv_write_global_mmr32(int pnode, unsigned long offset, unsigned long val) { - *uv_global_mmr32_address(nasid, offset) = val; + *uv_global_mmr32_address(pnode, offset) = val; } -static inline unsigned long uv_read_global_mmr32(int nasid, +static inline unsigned long uv_read_global_mmr32(int pnode, unsigned long offset) { - return *uv_global_mmr32_address(nasid, offset); + return *uv_global_mmr32_address(pnode, offset); } /* * Access Global MMR space using the MMR space located at the top of physical * memory. */ -static inline unsigned long *uv_global_mmr64_address(int nasid, +static inline unsigned long *uv_global_mmr64_address(int pnode, unsigned long offset) { return __va(UV_GLOBAL_MMR64_BASE | - UV_GLOBAL_MMR64_NASID_BITS(nasid) | offset); + UV_GLOBAL_MMR64_PNODE_BITS(pnode) | offset); } -static inline void uv_write_global_mmr64(int nasid, unsigned long offset, +static inline void uv_write_global_mmr64(int pnode, unsigned long offset, unsigned long val) { - *uv_global_mmr64_address(nasid, offset) = val; + *uv_global_mmr64_address(pnode, offset) = val; } -static inline unsigned long uv_read_global_mmr64(int nasid, +static inline unsigned long uv_read_global_mmr64(int pnode, unsigned long offset) { - return *uv_global_mmr64_address(nasid, offset); + return *uv_global_mmr64_address(pnode, offset); } /* - * Access node local MMRs. Faster than using global space but only local MMRs + * Access hub local MMRs. Faster than using global space but only local MMRs * are accessible. */ static inline unsigned long *uv_local_mmr_address(unsigned long offset) @@ -207,15 +275,15 @@ static inline void uv_write_local_mmr(unsigned long offset, unsigned long val) } /* - * Structures and definitions for converting between cpu, node, and blade + * Structures and definitions for converting between cpu, node, pnode, and blade * numbers. */ struct uv_blade_info { - unsigned short nr_posible_cpus; + unsigned short nr_possible_cpus; unsigned short nr_online_cpus; - unsigned short nasid; + unsigned short pnode; }; -struct uv_blade_info *uv_blade_info; +extern struct uv_blade_info *uv_blade_info; extern short *uv_node_to_blade; extern short *uv_cpu_to_blade; extern short uv_possible_blades; @@ -244,16 +312,16 @@ static inline int uv_node_to_blade_id(int nid) return uv_node_to_blade[nid]; } -/* Convert a blade id to the NASID of the blade */ -static inline int uv_blade_to_nasid(int bid) +/* Convert a blade id to the PNODE of the blade */ +static inline int uv_blade_to_pnode(int bid) { - return uv_blade_info[bid].nasid; + return uv_blade_info[bid].pnode; } /* Determine the number of possible cpus on a blade */ static inline int uv_blade_nr_possible_cpus(int bid) { - return uv_blade_info[bid].nr_posible_cpus; + return uv_blade_info[bid].nr_possible_cpus; } /* Determine the number of online cpus on a blade */ @@ -262,16 +330,16 @@ static inline int uv_blade_nr_online_cpus(int bid) return uv_blade_info[bid].nr_online_cpus; } -/* Convert a cpu id to the NASID of the blade containing the cpu */ -static inline int uv_cpu_to_nasid(int cpu) +/* Convert a cpu id to the PNODE of the blade containing the cpu */ +static inline int uv_cpu_to_pnode(int cpu) { - return uv_blade_info[uv_cpu_to_blade_id(cpu)].nasid; + return uv_blade_info[uv_cpu_to_blade_id(cpu)].pnode; } -/* Convert a node number to the NASID of the blade */ -static inline int uv_node_to_nasid(int nid) +/* Convert a linux node number to the PNODE of the blade */ +static inline int uv_node_to_pnode(int nid) { - return uv_blade_info[uv_node_to_blade_id(nid)].nasid; + return uv_blade_info[uv_node_to_blade_id(nid)].pnode; } /* Maximum possible number of blades */ diff --git a/include/asm-x86/uv/uv_mmrs.h b/include/asm-x86/uv/uv_mmrs.h index 3b69fe6..ac98460 100644 --- a/include/asm-x86/uv/uv_mmrs.h +++ b/include/asm-x86/uv/uv_mmrs.h @@ -11,11 +11,46 @@ #ifndef __ASM_X86_UV_MMRS__ #define __ASM_X86_UV_MMRS__ -/* - * AUTO GENERATED - Do not edit - */ +#define UV_MMR_ENABLE (1UL << 63) - #define UV_MMR_ENABLE (1UL << 63) +/* ========================================================================= */ +/* UVH_BAU_DATA_CONFIG */ +/* ========================================================================= */ +#define UVH_BAU_DATA_CONFIG 0x61680UL +#define UVH_BAU_DATA_CONFIG_32 0x0450 + +#define UVH_BAU_DATA_CONFIG_VECTOR_SHFT 0 +#define UVH_BAU_DATA_CONFIG_VECTOR_MASK 0x00000000000000ffUL +#define UVH_BAU_DATA_CONFIG_DM_SHFT 8 +#define UVH_BAU_DATA_CONFIG_DM_MASK 0x0000000000000700UL +#define UVH_BAU_DATA_CONFIG_DESTMODE_SHFT 11 +#define UVH_BAU_DATA_CONFIG_DESTMODE_MASK 0x0000000000000800UL +#define UVH_BAU_DATA_CONFIG_STATUS_SHFT 12 +#define UVH_BAU_DATA_CONFIG_STATUS_MASK 0x0000000000001000UL +#define UVH_BAU_DATA_CONFIG_P_SHFT 13 +#define UVH_BAU_DATA_CONFIG_P_MASK 0x0000000000002000UL +#define UVH_BAU_DATA_CONFIG_T_SHFT 15 +#define UVH_BAU_DATA_CONFIG_T_MASK 0x0000000000008000UL +#define UVH_BAU_DATA_CONFIG_M_SHFT 16 +#define UVH_BAU_DATA_CONFIG_M_MASK 0x0000000000010000UL +#define UVH_BAU_DATA_CONFIG_APIC_ID_SHFT 32 +#define UVH_BAU_DATA_CONFIG_APIC_ID_MASK 0xffffffff00000000UL + +union uvh_bau_data_config_u { + unsigned long v; + struct uvh_bau_data_config_s { + unsigned long vector_ : 8; /* RW */ + unsigned long dm : 3; /* RW */ + unsigned long destmode : 1; /* RW */ + unsigned long status : 1; /* RO */ + unsigned long p : 1; /* RO */ + unsigned long rsvd_14 : 1; /* */ + unsigned long t : 1; /* RO */ + unsigned long m : 1; /* RW */ + unsigned long rsvd_17_31: 15; /* */ + unsigned long apic_id : 32; /* RW */ + } s; +}; /* ========================================================================= */ /* UVH_IPI_INT */ @@ -109,6 +144,7 @@ union uvh_lb_bau_intd_payload_queue_tail_u { /* UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE */ /* ========================================================================= */ #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE 0x320080UL +#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_32 0x0aa0 #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_SHFT 0 #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_MASK 0x0000000000000001UL @@ -169,6 +205,7 @@ union uvh_lb_bau_intd_software_acknowledge_u { /* UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS */ /* ========================================================================= */ #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS 0x0000000000320088UL +#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS_32 0x0aa8 /* ========================================================================= */ /* UVH_LB_BAU_SB_ACTIVATION_CONTROL */ @@ -248,6 +285,331 @@ union uvh_lb_bau_sb_descriptor_base_u { }; /* ========================================================================= */ +/* UVH_LB_MCAST_AOERR0_RPT_ENABLE */ +/* ========================================================================= */ +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE 0x50b20UL + +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_OBESE_MSG_SHFT 0 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_OBESE_MSG_MASK 0x0000000000000001UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_DATA_SB_ERR_SHFT 1 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_DATA_SB_ERR_MASK 0x0000000000000002UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_NACK_BUFF_PARITY_SHFT 2 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_NACK_BUFF_PARITY_MASK 0x0000000000000004UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_TIMEOUT_SHFT 3 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_TIMEOUT_MASK 0x0000000000000008UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_INACTIVE_REPLY_SHFT 4 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_INACTIVE_REPLY_MASK 0x0000000000000010UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_UPGRADE_ERROR_SHFT 5 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_UPGRADE_ERROR_MASK 0x0000000000000020UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_REG_COUNT_UNDERFLOW_SHFT 6 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_REG_COUNT_UNDERFLOW_MASK 0x0000000000000040UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_REP_OBESE_MSG_SHFT 7 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_REP_OBESE_MSG_MASK 0x0000000000000080UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REQ_RUNT_MSG_SHFT 8 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REQ_RUNT_MSG_MASK 0x0000000000000100UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REQ_OBESE_MSG_SHFT 9 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REQ_OBESE_MSG_MASK 0x0000000000000200UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REQ_DATA_SB_ERR_SHFT 10 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REQ_DATA_SB_ERR_MASK 0x0000000000000400UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_RUNT_MSG_SHFT 11 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_RUNT_MSG_MASK 0x0000000000000800UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_OBESE_MSG_SHFT 12 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_OBESE_MSG_MASK 0x0000000000001000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_DATA_SB_ERR_SHFT 13 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_DATA_SB_ERR_MASK 0x0000000000002000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_COMMAND_ERR_SHFT 14 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_COMMAND_ERR_MASK 0x0000000000004000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_PEND_TIMEOUT_SHFT 15 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_PEND_TIMEOUT_MASK 0x0000000000008000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REQ_RUNT_MSG_SHFT 16 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REQ_RUNT_MSG_MASK 0x0000000000010000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REQ_OBESE_MSG_SHFT 17 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REQ_OBESE_MSG_MASK 0x0000000000020000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REQ_DATA_SB_ERR_SHFT 18 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REQ_DATA_SB_ERR_MASK 0x0000000000040000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REP_RUNT_MSG_SHFT 19 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REP_RUNT_MSG_MASK 0x0000000000080000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REP_OBESE_MSG_SHFT 20 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REP_OBESE_MSG_MASK 0x0000000000100000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REP_DATA_SB_ERR_SHFT 21 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REP_DATA_SB_ERR_MASK 0x0000000000200000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_TIMEOUT_SHFT 22 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_TIMEOUT_MASK 0x0000000000400000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_SPURIOUS_EVENT_SHFT 23 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_SPURIOUS_EVENT_MASK 0x0000000000800000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_IOH_DESTINATION_TABLE_PARITY_SHFT 24 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_IOH_DESTINATION_TABLE_PARITY_MASK 0x0000000001000000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_GET_HAD_ERROR_REPLY_SHFT 25 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_GET_HAD_ERROR_REPLY_MASK 0x0000000002000000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_GET_TIMEOUT_SHFT 26 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_GET_TIMEOUT_MASK 0x0000000004000000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_LOCK_MANAGER_HAD_ERROR_REPLY_SHFT 27 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_LOCK_MANAGER_HAD_ERROR_REPLY_MASK 0x0000000008000000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_PUT_HAD_ERROR_REPLY_SHFT 28 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_PUT_HAD_ERROR_REPLY_MASK 0x0000000010000000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_PUT_TIMEOUT_SHFT 29 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_PUT_TIMEOUT_MASK 0x0000000020000000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_SB_ACTIVATION_OVERRUN_SHFT 30 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_SB_ACTIVATION_OVERRUN_MASK 0x0000000040000000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_COMPLETED_GB_ACTIVATION_HAD_ERROR_REPLY_SHFT 31 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_COMPLETED_GB_ACTIVATION_HAD_ERROR_REPLY_MASK 0x0000000080000000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_COMPLETED_GB_ACTIVATION_TIMEOUT_SHFT 32 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_COMPLETED_GB_ACTIVATION_TIMEOUT_MASK 0x0000000100000000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_DESCRIPTOR_BUFFER_0_PARITY_SHFT 33 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_DESCRIPTOR_BUFFER_0_PARITY_MASK 0x0000000200000000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_DESCRIPTOR_BUFFER_1_PARITY_SHFT 34 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_DESCRIPTOR_BUFFER_1_PARITY_MASK 0x0000000400000000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_SOCKET_DESTINATION_TABLE_PARITY_SHFT 35 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_SOCKET_DESTINATION_TABLE_PARITY_MASK 0x0000000800000000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_BAU_REPLY_PAYLOAD_CORRUPTION_SHFT 36 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_BAU_REPLY_PAYLOAD_CORRUPTION_MASK 0x0000001000000000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_IO_PORT_DESTINATION_TABLE_PARITY_SHFT 37 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_IO_PORT_DESTINATION_TABLE_PARITY_MASK 0x0000002000000000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INTD_SOFT_ACK_TIMEOUT_SHFT 38 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INTD_SOFT_ACK_TIMEOUT_MASK 0x0000004000000000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_REP_OBESE_MSG_SHFT 39 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_REP_OBESE_MSG_MASK 0x0000008000000000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_REP_COMMAND_ERR_SHFT 40 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_REP_COMMAND_ERR_MASK 0x0000010000000000UL +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_TIMEOUT_SHFT 41 +#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_TIMEOUT_MASK 0x0000020000000000UL + +union uvh_lb_mcast_aoerr0_rpt_enable_u { + unsigned long v; + struct uvh_lb_mcast_aoerr0_rpt_enable_s { + unsigned long mcast_obese_msg : 1; /* RW */ + unsigned long mcast_data_sb_err : 1; /* RW */ + unsigned long mcast_nack_buff_parity : 1; /* RW */ + unsigned long mcast_timeout : 1; /* RW */ + unsigned long mcast_inactive_reply : 1; /* RW */ + unsigned long mcast_upgrade_error : 1; /* RW */ + unsigned long mcast_reg_count_underflow : 1; /* RW */ + unsigned long mcast_rep_obese_msg : 1; /* RW */ + unsigned long ucache_req_runt_msg : 1; /* RW */ + unsigned long ucache_req_obese_msg : 1; /* RW */ + unsigned long ucache_req_data_sb_err : 1; /* RW */ + unsigned long ucache_rep_runt_msg : 1; /* RW */ + unsigned long ucache_rep_obese_msg : 1; /* RW */ + unsigned long ucache_rep_data_sb_err : 1; /* RW */ + unsigned long ucache_rep_command_err : 1; /* RW */ + unsigned long ucache_pend_timeout : 1; /* RW */ + unsigned long macc_req_runt_msg : 1; /* RW */ + unsigned long macc_req_obese_msg : 1; /* RW */ + unsigned long macc_req_data_sb_err : 1; /* RW */ + unsigned long macc_rep_runt_msg : 1; /* RW */ + unsigned long macc_rep_obese_msg : 1; /* RW */ + unsigned long macc_rep_data_sb_err : 1; /* RW */ + unsigned long macc_timeout : 1; /* RW */ + unsigned long macc_spurious_event : 1; /* RW */ + unsigned long ioh_destination_table_parity : 1; /* RW */ + unsigned long get_had_error_reply : 1; /* RW */ + unsigned long get_timeout : 1; /* RW */ + unsigned long lock_manager_had_error_reply : 1; /* RW */ + unsigned long put_had_error_reply : 1; /* RW */ + unsigned long put_timeout : 1; /* RW */ + unsigned long sb_activation_overrun : 1; /* RW */ + unsigned long completed_gb_activation_had_error_reply : 1; /* RW */ + unsigned long completed_gb_activation_timeout : 1; /* RW */ + unsigned long descriptor_buffer_0_parity : 1; /* RW */ + unsigned long descriptor_buffer_1_parity : 1; /* RW */ + unsigned long socket_destination_table_parity : 1; /* RW */ + unsigned long bau_reply_payload_corruption : 1; /* RW */ + unsigned long io_port_destination_table_parity : 1; /* RW */ + unsigned long intd_soft_ack_timeout : 1; /* RW */ + unsigned long int_rep_obese_msg : 1; /* RW */ + unsigned long int_rep_command_err : 1; /* RW */ + unsigned long int_timeout : 1; /* RW */ + unsigned long rsvd_42_63 : 22; /* */ + } s; +}; + +/* ========================================================================= */ +/* UVH_LOCAL_INT0_CONFIG */ +/* ========================================================================= */ +#define UVH_LOCAL_INT0_CONFIG 0x61000UL + +#define UVH_LOCAL_INT0_CONFIG_VECTOR_SHFT 0 +#define UVH_LOCAL_INT0_CONFIG_VECTOR_MASK 0x00000000000000ffUL +#define UVH_LOCAL_INT0_CONFIG_DM_SHFT 8 +#define UVH_LOCAL_INT0_CONFIG_DM_MASK 0x0000000000000700UL +#define UVH_LOCAL_INT0_CONFIG_DESTMODE_SHFT 11 +#define UVH_LOCAL_INT0_CONFIG_DESTMODE_MASK 0x0000000000000800UL +#define UVH_LOCAL_INT0_CONFIG_STATUS_SHFT 12 +#define UVH_LOCAL_INT0_CONFIG_STATUS_MASK 0x0000000000001000UL +#define UVH_LOCAL_INT0_CONFIG_P_SHFT 13 +#define UVH_LOCAL_INT0_CONFIG_P_MASK 0x0000000000002000UL +#define UVH_LOCAL_INT0_CONFIG_T_SHFT 15 +#define UVH_LOCAL_INT0_CONFIG_T_MASK 0x0000000000008000UL +#define UVH_LOCAL_INT0_CONFIG_M_SHFT 16 +#define UVH_LOCAL_INT0_CONFIG_M_MASK 0x0000000000010000UL +#define UVH_LOCAL_INT0_CONFIG_APIC_ID_SHFT 32 +#define UVH_LOCAL_INT0_CONFIG_APIC_ID_MASK 0xffffffff00000000UL + +union uvh_local_int0_config_u { + unsigned long v; + struct uvh_local_int0_config_s { + unsigned long vector_ : 8; /* RW */ + unsigned long dm : 3; /* RW */ + unsigned long destmode : 1; /* RW */ + unsigned long status : 1; /* RO */ + unsigned long p : 1; /* RO */ + unsigned long rsvd_14 : 1; /* */ + unsigned long t : 1; /* RO */ + unsigned long m : 1; /* RW */ + unsigned long rsvd_17_31: 15; /* */ + unsigned long apic_id : 32; /* RW */ + } s; +}; + +/* ========================================================================= */ +/* UVH_LOCAL_INT0_ENABLE */ +/* ========================================================================= */ +#define UVH_LOCAL_INT0_ENABLE 0x65000UL + +#define UVH_LOCAL_INT0_ENABLE_LB_HCERR_SHFT 0 +#define UVH_LOCAL_INT0_ENABLE_LB_HCERR_MASK 0x0000000000000001UL +#define UVH_LOCAL_INT0_ENABLE_GR0_HCERR_SHFT 1 +#define UVH_LOCAL_INT0_ENABLE_GR0_HCERR_MASK 0x0000000000000002UL +#define UVH_LOCAL_INT0_ENABLE_GR1_HCERR_SHFT 2 +#define UVH_LOCAL_INT0_ENABLE_GR1_HCERR_MASK 0x0000000000000004UL +#define UVH_LOCAL_INT0_ENABLE_LH_HCERR_SHFT 3 +#define UVH_LOCAL_INT0_ENABLE_LH_HCERR_MASK 0x0000000000000008UL +#define UVH_LOCAL_INT0_ENABLE_RH_HCERR_SHFT 4 +#define UVH_LOCAL_INT0_ENABLE_RH_HCERR_MASK 0x0000000000000010UL +#define UVH_LOCAL_INT0_ENABLE_XN_HCERR_SHFT 5 +#define UVH_LOCAL_INT0_ENABLE_XN_HCERR_MASK 0x0000000000000020UL +#define UVH_LOCAL_INT0_ENABLE_SI_HCERR_SHFT 6 +#define UVH_LOCAL_INT0_ENABLE_SI_HCERR_MASK 0x0000000000000040UL +#define UVH_LOCAL_INT0_ENABLE_LB_AOERR0_SHFT 7 +#define UVH_LOCAL_INT0_ENABLE_LB_AOERR0_MASK 0x0000000000000080UL +#define UVH_LOCAL_INT0_ENABLE_GR0_AOERR0_SHFT 8 +#define UVH_LOCAL_INT0_ENABLE_GR0_AOERR0_MASK 0x0000000000000100UL +#define UVH_LOCAL_INT0_ENABLE_GR1_AOERR0_SHFT 9 +#define UVH_LOCAL_INT0_ENABLE_GR1_AOERR0_MASK 0x0000000000000200UL +#define UVH_LOCAL_INT0_ENABLE_LH_AOERR0_SHFT 10 +#define UVH_LOCAL_INT0_ENABLE_LH_AOERR0_MASK 0x0000000000000400UL +#define UVH_LOCAL_INT0_ENABLE_RH_AOERR0_SHFT 11 +#define UVH_LOCAL_INT0_ENABLE_RH_AOERR0_MASK 0x0000000000000800UL +#define UVH_LOCAL_INT0_ENABLE_XN_AOERR0_SHFT 12 +#define UVH_LOCAL_INT0_ENABLE_XN_AOERR0_MASK 0x0000000000001000UL +#define UVH_LOCAL_INT0_ENABLE_SI_AOERR0_SHFT 13 +#define UVH_LOCAL_INT0_ENABLE_SI_AOERR0_MASK 0x0000000000002000UL +#define UVH_LOCAL_INT0_ENABLE_LB_AOERR1_SHFT 14 +#define UVH_LOCAL_INT0_ENABLE_LB_AOERR1_MASK 0x0000000000004000UL +#define UVH_LOCAL_INT0_ENABLE_GR0_AOERR1_SHFT 15 +#define UVH_LOCAL_INT0_ENABLE_GR0_AOERR1_MASK 0x0000000000008000UL +#define UVH_LOCAL_INT0_ENABLE_GR1_AOERR1_SHFT 16 +#define UVH_LOCAL_INT0_ENABLE_GR1_AOERR1_MASK 0x0000000000010000UL +#define UVH_LOCAL_INT0_ENABLE_LH_AOERR1_SHFT 17 +#define UVH_LOCAL_INT0_ENABLE_LH_AOERR1_MASK 0x0000000000020000UL +#define UVH_LOCAL_INT0_ENABLE_RH_AOERR1_SHFT 18 +#define UVH_LOCAL_INT0_ENABLE_RH_AOERR1_MASK 0x0000000000040000UL +#define UVH_LOCAL_INT0_ENABLE_XN_AOERR1_SHFT 19 +#define UVH_LOCAL_INT0_ENABLE_XN_AOERR1_MASK 0x0000000000080000UL +#define UVH_LOCAL_INT0_ENABLE_SI_AOERR1_SHFT 20 +#define UVH_LOCAL_INT0_ENABLE_SI_AOERR1_MASK 0x0000000000100000UL +#define UVH_LOCAL_INT0_ENABLE_RH_VPI_INT_SHFT 21 +#define UVH_LOCAL_INT0_ENABLE_RH_VPI_INT_MASK 0x0000000000200000UL +#define UVH_LOCAL_INT0_ENABLE_SYSTEM_SHUTDOWN_INT_SHFT 22 +#define UVH_LOCAL_INT0_ENABLE_SYSTEM_SHUTDOWN_INT_MASK 0x0000000000400000UL +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_0_SHFT 23 +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_0_MASK 0x0000000000800000UL +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_1_SHFT 24 +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_1_MASK 0x0000000001000000UL +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_2_SHFT 25 +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_2_MASK 0x0000000002000000UL +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_3_SHFT 26 +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_3_MASK 0x0000000004000000UL +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_4_SHFT 27 +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_4_MASK 0x0000000008000000UL +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_5_SHFT 28 +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_5_MASK 0x0000000010000000UL +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_6_SHFT 29 +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_6_MASK 0x0000000020000000UL +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_7_SHFT 30 +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_7_MASK 0x0000000040000000UL +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_8_SHFT 31 +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_8_MASK 0x0000000080000000UL +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_9_SHFT 32 +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_9_MASK 0x0000000100000000UL +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_10_SHFT 33 +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_10_MASK 0x0000000200000000UL +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_11_SHFT 34 +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_11_MASK 0x0000000400000000UL +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_12_SHFT 35 +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_12_MASK 0x0000000800000000UL +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_13_SHFT 36 +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_13_MASK 0x0000001000000000UL +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_14_SHFT 37 +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_14_MASK 0x0000002000000000UL +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_15_SHFT 38 +#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_15_MASK 0x0000004000000000UL +#define UVH_LOCAL_INT0_ENABLE_L1_NMI_INT_SHFT 39 +#define UVH_LOCAL_INT0_ENABLE_L1_NMI_INT_MASK 0x0000008000000000UL +#define UVH_LOCAL_INT0_ENABLE_STOP_CLOCK_SHFT 40 +#define UVH_LOCAL_INT0_ENABLE_STOP_CLOCK_MASK 0x0000010000000000UL +#define UVH_LOCAL_INT0_ENABLE_ASIC_TO_L1_SHFT 41 +#define UVH_LOCAL_INT0_ENABLE_ASIC_TO_L1_MASK 0x0000020000000000UL +#define UVH_LOCAL_INT0_ENABLE_L1_TO_ASIC_SHFT 42 +#define UVH_LOCAL_INT0_ENABLE_L1_TO_ASIC_MASK 0x0000040000000000UL +#define UVH_LOCAL_INT0_ENABLE_LTC_INT_SHFT 43 +#define UVH_LOCAL_INT0_ENABLE_LTC_INT_MASK 0x0000080000000000UL +#define UVH_LOCAL_INT0_ENABLE_LA_SEQ_TRIGGER_SHFT 44 +#define UVH_LOCAL_INT0_ENABLE_LA_SEQ_TRIGGER_MASK 0x0000100000000000UL + +union uvh_local_int0_enable_u { + unsigned long v; + struct uvh_local_int0_enable_s { + unsigned long lb_hcerr : 1; /* RW */ + unsigned long gr0_hcerr : 1; /* RW */ + unsigned long gr1_hcerr : 1; /* RW */ + unsigned long lh_hcerr : 1; /* RW */ + unsigned long rh_hcerr : 1; /* RW */ + unsigned long xn_hcerr : 1; /* RW */ + unsigned long si_hcerr : 1; /* RW */ + unsigned long lb_aoerr0 : 1; /* RW */ + unsigned long gr0_aoerr0 : 1; /* RW */ + unsigned long gr1_aoerr0 : 1; /* RW */ + unsigned long lh_aoerr0 : 1; /* RW */ + unsigned long rh_aoerr0 : 1; /* RW */ + unsigned long xn_aoerr0 : 1; /* RW */ + unsigned long si_aoerr0 : 1; /* RW */ + unsigned long lb_aoerr1 : 1; /* RW */ + unsigned long gr0_aoerr1 : 1; /* RW */ + unsigned long gr1_aoerr1 : 1; /* RW */ + unsigned long lh_aoerr1 : 1; /* RW */ + unsigned long rh_aoerr1 : 1; /* RW */ + unsigned long xn_aoerr1 : 1; /* RW */ + unsigned long si_aoerr1 : 1; /* RW */ + unsigned long rh_vpi_int : 1; /* RW */ + unsigned long system_shutdown_int : 1; /* RW */ + unsigned long lb_irq_int_0 : 1; /* RW */ + unsigned long lb_irq_int_1 : 1; /* RW */ + unsigned long lb_irq_int_2 : 1; /* RW */ + unsigned long lb_irq_int_3 : 1; /* RW */ + unsigned long lb_irq_int_4 : 1; /* RW */ + unsigned long lb_irq_int_5 : 1; /* RW */ + unsigned long lb_irq_int_6 : 1; /* RW */ + unsigned long lb_irq_int_7 : 1; /* RW */ + unsigned long lb_irq_int_8 : 1; /* RW */ + unsigned long lb_irq_int_9 : 1; /* RW */ + unsigned long lb_irq_int_10 : 1; /* RW */ + unsigned long lb_irq_int_11 : 1; /* RW */ + unsigned long lb_irq_int_12 : 1; /* RW */ + unsigned long lb_irq_int_13 : 1; /* RW */ + unsigned long lb_irq_int_14 : 1; /* RW */ + unsigned long lb_irq_int_15 : 1; /* RW */ + unsigned long l1_nmi_int : 1; /* RW */ + unsigned long stop_clock : 1; /* RW */ + unsigned long asic_to_l1 : 1; /* RW */ + unsigned long l1_to_asic : 1; /* RW */ + unsigned long ltc_int : 1; /* RW */ + unsigned long la_seq_trigger : 1; /* RW */ + unsigned long rsvd_45_63 : 19; /* */ + } s; +}; + +/* ========================================================================= */ /* UVH_NODE_ID */ /* ========================================================================= */ #define UVH_NODE_ID 0x0UL @@ -284,6 +646,73 @@ union uvh_node_id_u { }; /* ========================================================================= */ +/* UVH_NODE_PRESENT_TABLE */ +/* ========================================================================= */ +#define UVH_NODE_PRESENT_TABLE 0x1400UL +#define UVH_NODE_PRESENT_TABLE_DEPTH 16 + +#define UVH_NODE_PRESENT_TABLE_NODES_SHFT 0 +#define UVH_NODE_PRESENT_TABLE_NODES_MASK 0xffffffffffffffffUL + +union uvh_node_present_table_u { + unsigned long v; + struct uvh_node_present_table_s { + unsigned long nodes : 64; /* RW */ + } s; +}; + +/* ========================================================================= */ +/* UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR */ +/* ========================================================================= */ +#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR 0x16000d0UL + +#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24 +#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL + +union uvh_rh_gam_alias210_redirect_config_0_mmr_u { + unsigned long v; + struct uvh_rh_gam_alias210_redirect_config_0_mmr_s { + unsigned long rsvd_0_23 : 24; /* */ + unsigned long dest_base : 22; /* RW */ + unsigned long rsvd_46_63: 18; /* */ + } s; +}; + +/* ========================================================================= */ +/* UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR */ +/* ========================================================================= */ +#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR 0x16000e0UL + +#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24 +#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL + +union uvh_rh_gam_alias210_redirect_config_1_mmr_u { + unsigned long v; + struct uvh_rh_gam_alias210_redirect_config_1_mmr_s { + unsigned long rsvd_0_23 : 24; /* */ + unsigned long dest_base : 22; /* RW */ + unsigned long rsvd_46_63: 18; /* */ + } s; +}; + +/* ========================================================================= */ +/* UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR */ +/* ========================================================================= */ +#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR 0x16000f0UL + +#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24 +#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL + +union uvh_rh_gam_alias210_redirect_config_2_mmr_u { + unsigned long v; + struct uvh_rh_gam_alias210_redirect_config_2_mmr_s { + unsigned long rsvd_0_23 : 24; /* */ + unsigned long dest_base : 22; /* RW */ + unsigned long rsvd_46_63: 18; /* */ + } s; +}; + +/* ========================================================================= */ /* UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR */ /* ========================================================================= */ #define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x1600010UL @@ -369,5 +798,77 @@ union uvh_si_addr_map_config_u { } s; }; +/* ========================================================================= */ +/* UVH_SI_ALIAS0_OVERLAY_CONFIG */ +/* ========================================================================= */ +#define UVH_SI_ALIAS0_OVERLAY_CONFIG 0xc80008UL + +#define UVH_SI_ALIAS0_OVERLAY_CONFIG_BASE_SHFT 24 +#define UVH_SI_ALIAS0_OVERLAY_CONFIG_BASE_MASK 0x00000000ff000000UL +#define UVH_SI_ALIAS0_OVERLAY_CONFIG_M_ALIAS_SHFT 48 +#define UVH_SI_ALIAS0_OVERLAY_CONFIG_M_ALIAS_MASK 0x001f000000000000UL +#define UVH_SI_ALIAS0_OVERLAY_CONFIG_ENABLE_SHFT 63 +#define UVH_SI_ALIAS0_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL + +union uvh_si_alias0_overlay_config_u { + unsigned long v; + struct uvh_si_alias0_overlay_config_s { + unsigned long rsvd_0_23: 24; /* */ + unsigned long base : 8; /* RW */ + unsigned long rsvd_32_47: 16; /* */ + unsigned long m_alias : 5; /* RW */ + unsigned long rsvd_53_62: 10; /* */ + unsigned long enable : 1; /* RW */ + } s; +}; + +/* ========================================================================= */ +/* UVH_SI_ALIAS1_OVERLAY_CONFIG */ +/* ========================================================================= */ +#define UVH_SI_ALIAS1_OVERLAY_CONFIG 0xc80010UL + +#define UVH_SI_ALIAS1_OVERLAY_CONFIG_BASE_SHFT 24 +#define UVH_SI_ALIAS1_OVERLAY_CONFIG_BASE_MASK 0x00000000ff000000UL +#define UVH_SI_ALIAS1_OVERLAY_CONFIG_M_ALIAS_SHFT 48 +#define UVH_SI_ALIAS1_OVERLAY_CONFIG_M_ALIAS_MASK 0x001f000000000000UL +#define UVH_SI_ALIAS1_OVERLAY_CONFIG_ENABLE_SHFT 63 +#define UVH_SI_ALIAS1_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL + +union uvh_si_alias1_overlay_config_u { + unsigned long v; + struct uvh_si_alias1_overlay_config_s { + unsigned long rsvd_0_23: 24; /* */ + unsigned long base : 8; /* RW */ + unsigned long rsvd_32_47: 16; /* */ + unsigned long m_alias : 5; /* RW */ + unsigned long rsvd_53_62: 10; /* */ + unsigned long enable : 1; /* RW */ + } s; +}; + +/* ========================================================================= */ +/* UVH_SI_ALIAS2_OVERLAY_CONFIG */ +/* ========================================================================= */ +#define UVH_SI_ALIAS2_OVERLAY_CONFIG 0xc80018UL + +#define UVH_SI_ALIAS2_OVERLAY_CONFIG_BASE_SHFT 24 +#define UVH_SI_ALIAS2_OVERLAY_CONFIG_BASE_MASK 0x00000000ff000000UL +#define UVH_SI_ALIAS2_OVERLAY_CONFIG_M_ALIAS_SHFT 48 +#define UVH_SI_ALIAS2_OVERLAY_CONFIG_M_ALIAS_MASK 0x001f000000000000UL +#define UVH_SI_ALIAS2_OVERLAY_CONFIG_ENABLE_SHFT 63 +#define UVH_SI_ALIAS2_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL + +union uvh_si_alias2_overlay_config_u { + unsigned long v; + struct uvh_si_alias2_overlay_config_s { + unsigned long rsvd_0_23: 24; /* */ + unsigned long base : 8; /* RW */ + unsigned long rsvd_32_47: 16; /* */ + unsigned long m_alias : 5; /* RW */ + unsigned long rsvd_53_62: 10; /* */ + unsigned long enable : 1; /* RW */ + } s; +}; + #endif /* __ASM_X86_UV_MMRS__ */ -- cgit v0.10.2 From 15ce60056b24a65b65e28de973a9fd8ac0750a2f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 2 Jun 2008 13:20:11 +0200 Subject: xen: export get_phys_to_machine -tip testing found the following xen-console symbols trouble: ERROR: "get_phys_to_machine" [drivers/video/xen-fbfront.ko] undefined! ERROR: "get_phys_to_machine" [drivers/net/xen-netfront.ko] undefined! ERROR: "get_phys_to_machine" [drivers/input/xen-kbdfront.ko] undefined! with: http://redhat.com/~mingo/misc/config-Mon_Jun__2_12_25_13_CEST_2008.bad diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index eef3b5c..17f374e 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -136,6 +136,7 @@ unsigned long get_phys_to_machine(unsigned long pfn) idx = p2m_index(pfn); return p2m_top[topidx][idx]; } +EXPORT_SYMBOL_GPL(get_phys_to_machine); static void alloc_p2m(unsigned long **pp, unsigned long *mfnp) { -- cgit v0.10.2 From e2426cf85f8db5891fb5831323d2d0c176c4dadc Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Sat, 31 May 2008 01:24:27 +0100 Subject: xen: avoid hypercalls when updating unpinned pud/pmd When operating on an unpinned pagetable (ie, one under construction or destruction), it isn't necessary to use a hypercall to update a pud/pmd entry. Jan Beulich observed that a similar optimisation avoided many thousands of hypercalls while doing a kernel build. One tricky part is that early in the kernel boot there's no page structure, so we can't check to see if the page is pinned. In that case, we just always use the hypercall. Signed-off-by: Jeremy Fitzhardinge Cc: Jan Beulich Signed-off-by: Ingo Molnar diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index b94f63a..ed9f04b 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -901,6 +901,14 @@ static __init void xen_pagetable_setup_done(pgd_t *base) pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(base))); } +static __init void xen_post_allocator_init(void) +{ + pv_mmu_ops.set_pmd = xen_set_pmd; + pv_mmu_ops.set_pud = xen_set_pud; + + xen_mark_init_mm_pinned(); +} + /* This is called once we have the cpu_possible_map */ void xen_setup_vcpu_info_placement(void) { @@ -988,7 +996,7 @@ static const struct pv_init_ops xen_init_ops __initdata = { .banner = xen_banner, .memory_setup = xen_memory_setup, .arch_setup = xen_arch_setup, - .post_allocator_init = xen_mark_init_mm_pinned, + .post_allocator_init = xen_post_allocator_init, }; static const struct pv_time_ops xen_time_ops __initdata = { @@ -1100,7 +1108,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { .set_pte = NULL, /* see xen_pagetable_setup_* */ .set_pte_at = xen_set_pte_at, - .set_pmd = xen_set_pmd, + .set_pmd = xen_set_pmd_hyper, .pte_val = xen_pte_val, .pte_flags = native_pte_val, @@ -1111,7 +1119,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { .set_pte_atomic = xen_set_pte_atomic, .set_pte_present = xen_set_pte_at, - .set_pud = xen_set_pud, + .set_pud = xen_set_pud_hyper, .pte_clear = xen_pte_clear, .pmd_clear = xen_pmd_clear, diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 17f374e..4fa0934 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -223,7 +223,14 @@ void make_lowmem_page_readwrite(void *vaddr) } -void xen_set_pmd(pmd_t *ptr, pmd_t val) +static bool page_pinned(void *ptr) +{ + struct page *page = virt_to_page(ptr); + + return PagePinned(page); +} + +void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val) { struct multicall_space mcs; struct mmu_update *u; @@ -241,6 +248,18 @@ void xen_set_pmd(pmd_t *ptr, pmd_t val) preempt_enable(); } +void xen_set_pmd(pmd_t *ptr, pmd_t val) +{ + /* If page is not pinned, we can just update the entry + directly */ + if (!page_pinned(ptr)) { + *ptr = val; + return; + } + + xen_set_pmd_hyper(ptr, val); +} + /* * Associate a virtual page frame with a given physical page frame * and protection flags for that frame. @@ -348,7 +367,7 @@ pmdval_t xen_pmd_val(pmd_t pmd) return ret; } -void xen_set_pud(pud_t *ptr, pud_t val) +void xen_set_pud_hyper(pud_t *ptr, pud_t val) { struct multicall_space mcs; struct mmu_update *u; @@ -366,6 +385,18 @@ void xen_set_pud(pud_t *ptr, pud_t val) preempt_enable(); } +void xen_set_pud(pud_t *ptr, pud_t val) +{ + /* If page is not pinned, we can just update the entry + directly */ + if (!page_pinned(ptr)) { + *ptr = val; + return; + } + + xen_set_pud_hyper(ptr, val); +} + void xen_set_pte(pte_t *ptep, pte_t pte) { ptep->pte_high = pte.pte_high; @@ -387,7 +418,7 @@ void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) void xen_pmd_clear(pmd_t *pmdp) { - xen_set_pmd(pmdp, __pmd(0)); + set_pmd(pmdp, __pmd(0)); } pmd_t xen_make_pmd(pmdval_t pmd) @@ -758,7 +789,7 @@ void xen_exit_mmap(struct mm_struct *mm) spin_lock(&mm->page_table_lock); /* pgd may not be pinned in the error exit path of execve */ - if (PagePinned(virt_to_page(mm->pgd))) + if (page_pinned(mm->pgd)) xen_pgd_unpin(mm->pgd); spin_unlock(&mm->page_table_lock); diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h index 5fe961c..e3dd09e 100644 --- a/arch/x86/xen/mmu.h +++ b/arch/x86/xen/mmu.h @@ -25,10 +25,6 @@ enum pt_level { void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); -void xen_set_pte(pte_t *ptep, pte_t pteval); -void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pteval); -void xen_set_pmd(pmd_t *pmdp, pmd_t pmdval); void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next); void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm); @@ -45,10 +41,14 @@ pte_t xen_make_pte(pteval_t); pmd_t xen_make_pmd(pmdval_t); pgd_t xen_make_pgd(pgdval_t); +void xen_set_pte(pte_t *ptep, pte_t pteval); void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pteval); void xen_set_pte_atomic(pte_t *ptep, pte_t pte); +void xen_set_pmd(pmd_t *pmdp, pmd_t pmdval); void xen_set_pud(pud_t *ptr, pud_t val); +void xen_set_pmd_hyper(pmd_t *pmdp, pmd_t pmdval); +void xen_set_pud_hyper(pud_t *ptr, pud_t val); void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep); void xen_pmd_clear(pmd_t *pmdp); -- cgit v0.10.2 From 9c7a794209f8a91f47697c3be20597eb60531e6d Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Sat, 31 May 2008 01:33:02 +0100 Subject: xen: restore vcpu_info mapping If we're using vcpu_info mapping, then make sure its restored on all processors before relasing them from stop_machine. The only complication is that if this fails, we can't continue because we've already made assumptions that the mapping is available (baked in calls to the _direct versions of the functions, for example). Fortunately this can only happen with a 32-bit hypervisor, which may possibly run out of mapping space. On a 64-bit hypervisor, this is a non-issue. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index ed9f04b..8e6152e 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -98,7 +98,7 @@ struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info; */ static int have_vcpu_info_placement = 1; -static void __init xen_vcpu_setup(int cpu) +static void xen_vcpu_setup(int cpu) { struct vcpu_register_vcpu_info info; int err; @@ -136,6 +136,34 @@ static void __init xen_vcpu_setup(int cpu) } } +/* + * On restore, set the vcpu placement up again. + * If it fails, then we're in a bad state, since + * we can't back out from using it... + */ +void xen_vcpu_restore(void) +{ + if (have_vcpu_info_placement) { + int cpu; + + for_each_online_cpu(cpu) { + bool other_cpu = (cpu != smp_processor_id()); + + if (other_cpu && + HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL)) + BUG(); + + xen_vcpu_setup(cpu); + + if (other_cpu && + HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL)) + BUG(); + } + + BUG_ON(!have_vcpu_info_placement); + } +} + static void __init xen_banner(void) { printk(KERN_INFO "Booting paravirtualized kernel on %s\n", diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index 7620a16..7ab10f6 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c @@ -26,6 +26,8 @@ void xen_pre_suspend(void) void xen_post_suspend(int suspend_cancelled) { + xen_setup_shared_info(); + if (suspend_cancelled) { xen_start_info->store_mfn = pfn_to_mfn(xen_start_info->store_mfn); @@ -35,8 +37,8 @@ void xen_post_suspend(int suspend_cancelled) #ifdef CONFIG_SMP xen_cpu_initialized_map = cpu_online_map; #endif + xen_vcpu_restore(); } - xen_setup_shared_info(); } diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index a0503ac..a457e03 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -26,6 +26,7 @@ char * __init xen_memory_setup(void); void __init xen_arch_setup(void); void __init xen_init_IRQ(void); void xen_enable_sysenter(void); +void xen_vcpu_restore(void); void __init xen_build_dynamic_phys_to_machine(void); -- cgit v0.10.2 From d07af1f0e3a3e378074fc36322dd7b0e72d9a3e2 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Sat, 31 May 2008 01:33:03 +0100 Subject: xen: resume timers on all vcpus On resume, the vcpu timer modes will not be restored. The timer infrastructure doesn't do this for us, since it assumes the cpus are offline. We can just poke the other vcpus into the right mode directly though. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index 7ab10f6..251669a 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c @@ -38,6 +38,7 @@ void xen_post_suspend(int suspend_cancelled) xen_cpu_initialized_map = cpu_online_map; #endif xen_vcpu_restore(); + xen_timer_resume(); } } diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index c39e1a5..ea137fb 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -572,6 +572,19 @@ void xen_setup_cpu_clockevents(void) clockevents_register_device(&__get_cpu_var(xen_clock_events)); } +void xen_timer_resume(void) +{ + int cpu; + + if (xen_clockevent != &xen_vcpuop_clockevent) + return; + + for_each_online_cpu(cpu) { + if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL)) + BUG(); + } +} + __init void xen_time_init(void) { int cpu = smp_processor_id(); diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index a457e03..9a05559 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -37,6 +37,7 @@ void __init xen_time_init(void); unsigned long xen_get_wallclock(void); int xen_set_wallclock(unsigned long time); unsigned long long xen_sched_clock(void); +void xen_timer_resume(void); irqreturn_t xen_debug_interrupt(int irq, void *dev_id); -- cgit v0.10.2 From 7e0edc1bc343231029084761ebf59e522902eb49 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Sat, 31 May 2008 01:33:04 +0100 Subject: xen: add new Xen elfnote types and use them appropriately Define recently added XEN_ELFNOTEs, and use them appropriately. Most significantly, this enables domain checkpointing (xm save -c). Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 2ab5f42..ef6c9e0 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -7,6 +7,7 @@ #include #include #include +#include __INIT ENTRY(startup_xen) @@ -32,5 +33,9 @@ ENTRY(hypercall_page) ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "!writable_page_tables|pae_pgdir_above_4gb") ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes") ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic") + ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, + .quad _PAGE_PRESENT; .quad _PAGE_PRESENT) + ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long 1) + ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW, .long __HYPERVISOR_VIRT_START) #endif /*CONFIG_XEN */ diff --git a/include/xen/interface/elfnote.h b/include/xen/interface/elfnote.h index a64d3df..7a8262c 100644 --- a/include/xen/interface/elfnote.h +++ b/include/xen/interface/elfnote.h @@ -120,6 +120,26 @@ */ #define XEN_ELFNOTE_BSD_SYMTAB 11 +/* + * The lowest address the hypervisor hole can begin at (numeric). + * + * This must not be set higher than HYPERVISOR_VIRT_START. Its presence + * also indicates to the hypervisor that the kernel can deal with the + * hole starting at a higher address. + */ +#define XEN_ELFNOTE_HV_START_LOW 12 + +/* + * List of maddr_t-sized mask/value pairs describing how to recognize + * (non-present) L1 page table entries carrying valid MFNs (numeric). + */ +#define XEN_ELFNOTE_L1_MFN_VALID 13 + +/* + * Whether or not the guest supports cooperative suspend cancellation. + */ +#define XEN_ELFNOTE_SUSPEND_CANCEL 14 + #endif /* __XEN_PUBLIC_ELFNOTE_H__ */ /* -- cgit v0.10.2 From e0773410247f1e5fc6f7c52a4c5f3c6c9873d527 Mon Sep 17 00:00:00 2001 From: Abhishek Sagar Date: Sat, 31 May 2008 14:24:02 +0530 Subject: ftrace: export kretprobe_trampoline for function tracer Follow suit from kprobe implementations on other archs and make kretprobe_trampoline non-static. Ftrace implmentation (more specifically, kernel/trace/trace.c) requires access to it (see-> http://kerneltrap.org/mailarchive/linux-kernel/2008/5/27/1955234). Signed-off-by: Abhishek Sagar Signed-off-by: Ingo Molnar diff --git a/arch/arm/kernel/kprobes.c b/arch/arm/kernel/kprobes.c index 5593dd2..5ee39e1 100644 --- a/arch/arm/kernel/kprobes.c +++ b/arch/arm/kernel/kprobes.c @@ -274,7 +274,7 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, * for kretprobe handlers which should normally be interested in r0 only * anyway. */ -static void __attribute__((naked)) __kprobes kretprobe_trampoline(void) +void __naked __kprobes kretprobe_trampoline(void) { __asm__ __volatile__ ( "stmdb sp!, {r0 - r11} \n\t" diff --git a/include/asm-arm/kprobes.h b/include/asm-arm/kprobes.h index c042194..b1a3787 100644 --- a/include/asm-arm/kprobes.h +++ b/include/asm-arm/kprobes.h @@ -59,6 +59,7 @@ struct kprobe_ctlblk { }; void arch_remove_kprobe(struct kprobe *); +void kretprobe_trampoline(void); int kprobe_trap_handler(struct pt_regs *regs, unsigned int instr); int kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr); -- cgit v0.10.2 From 3d1ba1da2b4ff4ace7801e99fb9a3095b182d847 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 2 Jun 2008 13:50:10 +0200 Subject: x86: fix nmi.c build bug apic.h needs to be included for the apic_write_around() definition. diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index bf143f1..cbd4fa3 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -11,6 +11,8 @@ * Mikael Pettersson : PM converted to driver model. Disable/enable API. */ +#include + #include #include #include -- cgit v0.10.2 From 1a7e612fa5ea0311232bd5418a40ec7280557789 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sat, 31 May 2008 16:18:11 +0100 Subject: [ARM] 5074/1: fix warning: missing terminating ' character Signed-off-by: Dmitry Baryshkov Acked-by: Richard Purdie Signed-off-by: Russell King diff --git a/include/asm-arm/arch-pxa/pxa27x-udc.h b/include/asm-arm/arch-pxa/pxa27x-udc.h index 9cf0b1f..2d4e9cd 100644 --- a/include/asm-arm/arch-pxa/pxa27x-udc.h +++ b/include/asm-arm/arch-pxa/pxa27x-udc.h @@ -2,7 +2,7 @@ #define _ASM_ARCH_PXA27X_UDC_H #ifdef _ASM_ARCH_PXA25X_UDC_H -#error You can't include both PXA25x and PXA27x UDC support +#error You cannot include both PXA25x and PXA27x UDC support #endif #define UDCCR __REG(0x40600000) /* UDC Control Register */ -- cgit v0.10.2 From 720046de27ec2a96d4497dbca8ee98657efa059c Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 24 Apr 2008 15:13:36 +0100 Subject: [ARM] pxa: don't register lpd270 cpld_irq sysdev if !lpd270 Don't register the LPD270 cpld_irq system device when we're not running on a LPD270 machine - "cpld_irq" is also registered (separately) by Lubbock and Mainstone. Signed-off-by: Russell King diff --git a/arch/arm/mach-pxa/lpd270.c b/arch/arm/mach-pxa/lpd270.c index a20e4b1..6fd7b8b 100644 --- a/arch/arm/mach-pxa/lpd270.c +++ b/arch/arm/mach-pxa/lpd270.c @@ -134,9 +134,12 @@ static struct sys_device lpd270_irq_device = { static int __init lpd270_irq_device_init(void) { - int ret = sysdev_class_register(&lpd270_irq_sysclass); - if (ret == 0) - ret = sysdev_register(&lpd270_irq_device); + int ret = -ENODEV; + if (machine_is_logicpd_pxa270()) { + ret = sysdev_class_register(&lpd270_irq_sysclass); + if (ret == 0) + ret = sysdev_register(&lpd270_irq_device); + } return ret; } -- cgit v0.10.2 From 04ba0f656f7580d8a51a5b3441e088309141b67a Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 24 Apr 2008 15:23:25 +0100 Subject: [ARM] pxa: avoid registering multiple pxa2xx_pcmcia devices cm_x270 and mainstone both register their PCMCIA devices using the same name, resulting in a warning message from the kernel. Avoid this by making the cm_x270 and mainstone PCMCIA initialisation conditional on the machine type we're running on. Signed-off-by: Russell King diff --git a/drivers/pcmcia/pxa2xx_cm_x270.c b/drivers/pcmcia/pxa2xx_cm_x270.c index e7ab060..4a6c020 100644 --- a/drivers/pcmcia/pxa2xx_cm_x270.c +++ b/drivers/pcmcia/pxa2xx_cm_x270.c @@ -18,6 +18,7 @@ #include #include +#include #include #include @@ -147,6 +148,9 @@ static int __init cmx270_pcmcia_init(void) { int ret; + if (!machine_is_armcore()) + return -ENODEV; + cmx270_pcmcia_device = platform_device_alloc("pxa2xx-pcmcia", -1); if (!cmx270_pcmcia_device) diff --git a/drivers/pcmcia/pxa2xx_mainstone.c b/drivers/pcmcia/pxa2xx_mainstone.c index 145b85e..36a5996 100644 --- a/drivers/pcmcia/pxa2xx_mainstone.c +++ b/drivers/pcmcia/pxa2xx_mainstone.c @@ -22,6 +22,7 @@ #include #include +#include #include #include @@ -153,6 +154,9 @@ static int __init mst_pcmcia_init(void) { int ret; + if (!machine_is_mainstone()) + return -ENODEV; + mst_pcmcia_device = platform_device_alloc("pxa2xx-pcmcia", -1); if (!mst_pcmcia_device) return -ENOMEM; -- cgit v0.10.2 From 4e5e8de0dbdeb08df2b4c15fa2b0ba2216091793 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 24 Apr 2008 15:28:11 +0100 Subject: [ARM] pxa: avoid kfreeing static data if platform device fails to register When a dynamically allocated platform device is 'put', the platform device's platform_data is kfree'd. This is bad if it's pointing at static data. Use the provided function to register platform data for these devices. This also means we can mark the pcmcia ops structures as __initdata. Signed-off-by: Russell King diff --git a/drivers/pcmcia/pxa2xx_cm_x270.c b/drivers/pcmcia/pxa2xx_cm_x270.c index 4a6c020..f123fce 100644 --- a/drivers/pcmcia/pxa2xx_cm_x270.c +++ b/drivers/pcmcia/pxa2xx_cm_x270.c @@ -131,7 +131,7 @@ static void cmx270_pcmcia_socket_suspend(struct soc_pcmcia_socket *skt) } -static struct pcmcia_low_level cmx270_pcmcia_ops = { +static struct pcmcia_low_level cmx270_pcmcia_ops __initdata = { .owner = THIS_MODULE, .hw_init = cmx270_pcmcia_hw_init, .hw_shutdown = cmx270_pcmcia_shutdown, @@ -156,10 +156,13 @@ static int __init cmx270_pcmcia_init(void) if (!cmx270_pcmcia_device) return -ENOMEM; - cmx270_pcmcia_device->dev.platform_data = &cmx270_pcmcia_ops; + ret = platform_device_add_data(cmx270_pcmcia_device, &cmx270_pcmcia_ops, + sizeof(cmx270_pcmcia_ops)); - printk(KERN_INFO "Registering cm-x270 PCMCIA interface.\n"); - ret = platform_device_add(cmx270_pcmcia_device); + if (ret == 0) { + printk(KERN_INFO "Registering cm-x270 PCMCIA interface.\n"); + ret = platform_device_add(cmx270_pcmcia_device); + } if (ret) platform_device_put(cmx270_pcmcia_device); diff --git a/drivers/pcmcia/pxa2xx_mainstone.c b/drivers/pcmcia/pxa2xx_mainstone.c index 36a5996..92d1cc3 100644 --- a/drivers/pcmcia/pxa2xx_mainstone.c +++ b/drivers/pcmcia/pxa2xx_mainstone.c @@ -137,7 +137,7 @@ static void mst_pcmcia_socket_suspend(struct soc_pcmcia_socket *skt) { } -static struct pcmcia_low_level mst_pcmcia_ops = { +static struct pcmcia_low_level mst_pcmcia_ops __initdata = { .owner = THIS_MODULE, .hw_init = mst_pcmcia_hw_init, .hw_shutdown = mst_pcmcia_hw_shutdown, @@ -161,9 +161,10 @@ static int __init mst_pcmcia_init(void) if (!mst_pcmcia_device) return -ENOMEM; - mst_pcmcia_device->dev.platform_data = &mst_pcmcia_ops; - - ret = platform_device_add(mst_pcmcia_device); + ret = platform_device_add_data(mst_pcmcia_device, &mst_pcmcia_ops, + sizeof(mst_pcmcia_ops)); + if (ret == 0) + ret = platform_device_add(mst_pcmcia_device); if (ret) platform_device_put(mst_pcmcia_device); diff --git a/drivers/pcmcia/pxa2xx_sharpsl.c b/drivers/pcmcia/pxa2xx_sharpsl.c index d5c33bd..d71f93d 100644 --- a/drivers/pcmcia/pxa2xx_sharpsl.c +++ b/drivers/pcmcia/pxa2xx_sharpsl.c @@ -222,7 +222,7 @@ static void sharpsl_pcmcia_socket_suspend(struct soc_pcmcia_socket *skt) sharpsl_pcmcia_init_reset(skt); } -static struct pcmcia_low_level sharpsl_pcmcia_ops = { +static struct pcmcia_low_level sharpsl_pcmcia_ops __initdata = { .owner = THIS_MODULE, .hw_init = sharpsl_pcmcia_hw_init, .hw_shutdown = sharpsl_pcmcia_hw_shutdown, @@ -261,10 +261,12 @@ static int __init sharpsl_pcmcia_init(void) if (!sharpsl_pcmcia_device) return -ENOMEM; - sharpsl_pcmcia_device->dev.platform_data = &sharpsl_pcmcia_ops; - sharpsl_pcmcia_device->dev.parent = platform_scoop_config->devs[0].dev; - - ret = platform_device_add(sharpsl_pcmcia_device); + ret = platform_device_add_data(sharpsl_pcmcia_device, + &sharpsl_pcmcia_ops, sizeof(sharpsl_pcmcia_ops)); + if (ret == 0) { + sharpsl_pcmcia_device->dev.parent = platform_scoop_config->devs[0].dev; + ret = platform_device_add(sharpsl_pcmcia_device); + } if (ret) platform_device_put(sharpsl_pcmcia_device); -- cgit v0.10.2 From ffdf786291636137ef2d51c3a5d340793032aa28 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 18 May 2008 14:57:59 +0100 Subject: [ARM] pxa: Add PXA3_ prefix to PXA3 specific constants standby.S contains both PXA2 and PXA3 specific code. The PXA3 specific constants clash with the PXA2 ones, so give them a prefix. Signed-off-by: Russell King diff --git a/arch/arm/mach-pxa/standby.S b/arch/arm/mach-pxa/standby.S index 167412e..a37ef1c 100644 --- a/arch/arm/mach-pxa/standby.S +++ b/arch/arm/mach-pxa/standby.S @@ -35,20 +35,20 @@ ENTRY(pxa_cpu_standby) #ifdef CONFIG_PXA3xx -#define MDCNFG 0x0000 -#define MDCNFG_DMCEN (1 << 30) -#define DDR_HCAL 0x0060 -#define DDR_HCAL_HCRNG 0x1f -#define DDR_HCAL_HCPROG (1 << 28) -#define DDR_HCAL_HCEN (1 << 31) -#define DMCIER 0x0070 -#define DMCIER_EDLP (1 << 29) -#define DMCISR 0x0078 -#define RCOMP 0x0100 -#define RCOMP_SWEVAL (1 << 31) +#define PXA3_MDCNFG 0x0000 +#define PXA3_MDCNFG_DMCEN (1 << 30) +#define PXA3_DDR_HCAL 0x0060 +#define PXA3_DDR_HCAL_HCRNG 0x1f +#define PXA3_DDR_HCAL_HCPROG (1 << 28) +#define PXA3_DDR_HCAL_HCEN (1 << 31) +#define PXA3_DMCIER 0x0070 +#define PXA3_DMCIER_EDLP (1 << 29) +#define PXA3_DMCISR 0x0078 +#define PXA3_RCOMP 0x0100 +#define PXA3_RCOMP_SWEVAL (1 << 31) ENTRY(pm_enter_standby_start) - mov r1, #0xf6000000 @ DMEMC_REG_BASE (MDCNFG) + mov r1, #0xf6000000 @ DMEMC_REG_BASE (PXA3_MDCNFG) add r1, r1, #0x00100000 /* @@ -59,54 +59,54 @@ ENTRY(pm_enter_standby_start) * This also means that only the dynamic memory controller * can be reliably accessed in the code following standby. */ - ldr r2, [r1] @ Dummy read MDCNFG + ldr r2, [r1] @ Dummy read PXA3_MDCNFG mcr p14, 0, r0, c7, c0, 0 .rept 8 nop .endr - ldr r0, [r1, #DDR_HCAL] @ Clear (and wait for) HCEN - bic r0, r0, #DDR_HCAL_HCEN - str r0, [r1, #DDR_HCAL] -1: ldr r0, [r1, #DDR_HCAL] - tst r0, #DDR_HCAL_HCEN + ldr r0, [r1, #PXA3_DDR_HCAL] @ Clear (and wait for) HCEN + bic r0, r0, #PXA3_DDR_HCAL_HCEN + str r0, [r1, #PXA3_DDR_HCAL] +1: ldr r0, [r1, #PXA3_DDR_HCAL] + tst r0, #PXA3_DDR_HCAL_HCEN bne 1b - ldr r0, [r1, #RCOMP] @ Initiate RCOMP - orr r0, r0, #RCOMP_SWEVAL - str r0, [r1, #RCOMP] + ldr r0, [r1, #PXA3_RCOMP] @ Initiate RCOMP + orr r0, r0, #PXA3_RCOMP_SWEVAL + str r0, [r1, #PXA3_RCOMP] - mov r0, #~0 @ Clear interrupts - str r0, [r1, #DMCISR] + mov r0, #~0 @ Clear interrupts + str r0, [r1, #PXA3_DMCISR] - ldr r0, [r1, #DMCIER] @ set DMIER[EDLP] - orr r0, r0, #DMCIER_EDLP - str r0, [r1, #DMCIER] + ldr r0, [r1, #PXA3_DMCIER] @ set DMIER[EDLP] + orr r0, r0, #PXA3_DMCIER_EDLP + str r0, [r1, #PXA3_DMCIER] - ldr r0, [r1, #DDR_HCAL] @ clear HCRNG, set HCPROG, HCEN - bic r0, r0, #DDR_HCAL_HCRNG - orr r0, r0, #DDR_HCAL_HCEN | DDR_HCAL_HCPROG - str r0, [r1, #DDR_HCAL] + ldr r0, [r1, #PXA3_DDR_HCAL] @ clear HCRNG, set HCPROG, HCEN + bic r0, r0, #PXA3_DDR_HCAL_HCRNG + orr r0, r0, #PXA3_DDR_HCAL_HCEN | PXA3_DDR_HCAL_HCPROG + str r0, [r1, #PXA3_DDR_HCAL] -1: ldr r0, [r1, #DMCISR] - tst r0, #DMCIER_EDLP +1: ldr r0, [r1, #PXA3_DMCISR] + tst r0, #PXA3_DMCIER_EDLP beq 1b - ldr r0, [r1, #MDCNFG] @ set MDCNFG[DMCEN] - orr r0, r0, #MDCNFG_DMCEN - str r0, [r1, #MDCNFG] -1: ldr r0, [r1, #MDCNFG] - tst r0, #MDCNFG_DMCEN + ldr r0, [r1, #PXA3_MDCNFG] @ set PXA3_MDCNFG[DMCEN] + orr r0, r0, #PXA3_MDCNFG_DMCEN + str r0, [r1, #PXA3_MDCNFG] +1: ldr r0, [r1, #PXA3_MDCNFG] + tst r0, #PXA3_MDCNFG_DMCEN beq 1b - ldr r0, [r1, #DDR_HCAL] @ set DDR_HCAL[HCRNG] + ldr r0, [r1, #PXA3_DDR_HCAL] @ set PXA3_DDR_HCAL[HCRNG] orr r0, r0, #2 @ HCRNG - str r0, [r1, #DDR_HCAL] + str r0, [r1, #PXA3_DDR_HCAL] - ldr r0, [r1, #DMCIER] @ Clear the interrupt + ldr r0, [r1, #PXA3_DMCIER] @ Clear the interrupt bic r0, r0, #0x20000000 - str r0, [r1, #DMCIER] + str r0, [r1, #PXA3_DMCIER] mov pc, lr ENTRY(pm_enter_standby_end) -- cgit v0.10.2 From e3ba22db09408baee721897fb1b50e16f071d916 Mon Sep 17 00:00:00 2001 From: Andrew Victor Date: Sat, 24 May 2008 17:06:45 +0100 Subject: [ARM] 5056/1: [AT91] Cleanup YL9200 board file Cleanup the YL9200 board-support file. Other things fixed are: - Use new-style UART initialization - Register all LEDs as gpio_leds. - NOR Flash error noted in comments fixed by increasing YL9200_FLASH_SIZE - The only I2C device is the AT24C eeprom. - Setup of NWAIT pin and programming of SMC controller for the LCD/VGA. - Configure touchscreen interrupt pin. Also adding the board to the KConfig and Makefile. Signed-off-by: Andrew Victor Signed-off-by: Russell King diff --git a/arch/arm/mach-at91/Kconfig b/arch/arm/mach-at91/Kconfig index 0fc07b6..3d549a9 100644 --- a/arch/arm/mach-at91/Kconfig +++ b/arch/arm/mach-at91/Kconfig @@ -126,6 +126,12 @@ config MACH_ECBAT91 Select this if you are using emQbit's ECB_AT91 board. +config MACH_YL9200 + bool "ucDragon YL-9200" + depends on ARCH_AT91RM9200 + help + Select this if you are using the ucDragon YL-9200 board. + endif # ---------------------------------------------------------- diff --git a/arch/arm/mach-at91/Makefile b/arch/arm/mach-at91/Makefile index 8d9bc01..995be14 100644 --- a/arch/arm/mach-at91/Makefile +++ b/arch/arm/mach-at91/Makefile @@ -30,6 +30,7 @@ obj-$(CONFIG_MACH_ATEB9200) += board-eb9200.o obj-$(CONFIG_MACH_KAFA) += board-kafa.o obj-$(CONFIG_MACH_PICOTUX2XX) += board-picotux200.o obj-$(CONFIG_MACH_ECBAT91) += board-ecbat91.o +obj-$(CONFIG_MACH_YL9200) += board-yl-9200.o # AT91SAM9260 board-specific support obj-$(CONFIG_MACH_AT91SAM9260EK) += board-sam9260ek.o diff --git a/arch/arm/mach-at91/board-yl-9200.c b/arch/arm/mach-at91/board-yl-9200.c index b571710..7079050 100755 --- a/arch/arm/mach-at91/board-yl-9200.c +++ b/arch/arm/mach-at91/board-yl-9200.c @@ -1,11 +1,10 @@ /* * linux/arch/arm/mach-at91/board-yl-9200.c * - * Adapted from: - *various board files in - * /arch/arm/mach-at91 - * modifications to convert to YL-9200 platform - * Copyright (C) 2007 S.Birtles + * Adapted from various board files in arch/arm/mach-at91 + * + * Modifications for YL-9200 platform: + * Copyright (C) 2007 S. Birtles * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -26,13 +25,14 @@ #include #include #include +#include #include #include -/*#include */ #include #include +#include +#include -/*#include */ #include #include #include @@ -45,179 +45,108 @@ #include #include #include -#include -#include #include "generic.h" -#include -#define YL_9200_FLASH_BASE AT91_CHIPSELECT_0 -#define YL_9200_FLASH_SIZE 0x800000 -/* - * Serial port configuration. - * 0 .. 3 = USART0 .. USART3 - * 4 = DBGU - *atmel_usart.0: ttyS0 at MMIO 0xfefff200 (irq = 1) is a ATMEL_SERIAL - *atmel_usart.1: ttyS1 at MMIO 0xfffc0000 (irq = 6) is a ATMEL_SERIAL - *atmel_usart.2: ttyS2 at MMIO 0xfffc4000 (irq = 7) is a ATMEL_SERIAL - *atmel_usart.3: ttyS3 at MMIO 0xfffc8000 (irq = 8) is a ATMEL_SERIAL - *atmel_usart.4: ttyS4 at MMIO 0xfffcc000 (irq = 9) is a ATMEL_SERIAL - * on the YL-9200 we are sitting at the following - *ttyS0 at MMIO 0xfefff200 (irq = 1) is a AT91_SERIAL - *ttyS1 at MMIO 0xfefc4000 (irq = 7) is a AT91_SERIAL - */ +static void __init yl9200_map_io(void) +{ + /* Initialize processor: 18.432 MHz crystal */ + at91rm9200_initialize(18432000, AT91RM9200_PQFP); -/* extern void __init yl_9200_add_device_sounder(struct gpio_sounder *sounders, int nr);*/ + /* Setup the LEDs D2=PB17 (timer), D3=PB16 (cpu) */ + at91_init_leds(AT91_PIN_PB16, AT91_PIN_PB17); -static struct at91_uart_config __initdata yl_9200_uart_config = { - .console_tty = 0, /* ttyS0 */ - .nr_tty = 3, - .tty_map = { 4, 1, 0, -1, -1 } /* ttyS0, ..., ttyS4 */ -}; + /* DBGU on ttyS0. (Rx & Tx only) */ + at91_register_uart(0, 0, 0); -static void __init yl_9200_map_io(void) -{ - /* Initialize processor: 18.432 MHz crystal */ - /*Also initialises register clocks & gpio*/ - at91rm9200_initialize(18432000, AT91RM9200_PQFP); /*we have a 3 bank system*/ + /* USART1 on ttyS1. (Rx, Tx, CTS, RTS, DTR, DSR, DCD, RI) */ + at91_register_uart(AT91RM9200_ID_US1, 1, ATMEL_UART_CTS | ATMEL_UART_RTS + | ATMEL_UART_DTR | ATMEL_UART_DSR | ATMEL_UART_DCD + | ATMEL_UART_RI); - /* Setup the serial ports and console */ - at91_init_serial(&yl_9200_uart_config); + /* USART0 on ttyS2. (Rx & Tx only to JP3) */ + at91_register_uart(AT91RM9200_ID_US0, 2, 0); - /* Setup the LEDs D2=PB17,D3=PB16 */ - at91_init_leds(AT91_PIN_PB16,AT91_PIN_PB17); /*cpu-led,timer-led*/ + /* USART3 on ttyS3. (Rx, Tx, RTS - RS485 interface) */ + at91_register_uart(AT91RM9200_ID_US3, 3, ATMEL_UART_RTS); + + /* set serial console to ttyS0 (ie, DBGU) */ + at91_set_serial_console(0); } -static void __init yl_9200_init_irq(void) +static void __init yl9200_init_irq(void) { at91rm9200_init_interrupts(NULL); } -static struct at91_eth_data __initdata yl_9200_eth_data = { - .phy_irq_pin = AT91_PIN_PB28, - .is_rmii = 1, -}; -static struct at91_usbh_data __initdata yl_9200_usbh_data = { - .ports = 1, /* this should be 1 not 2 for the Yl9200*/ +/* + * LEDs + */ +static struct gpio_led yl9200_leds[] = { + { /* D2 */ + .name = "led2", + .gpio = AT91_PIN_PB17, + .active_low = 1, + .default_trigger = "timer", + }, + { /* D3 */ + .name = "led3", + .gpio = AT91_PIN_PB16, + .active_low = 1, + .default_trigger = "heartbeat", + }, + { /* D4 */ + .name = "led4", + .gpio = AT91_PIN_PB15, + .active_low = 1, + }, + { /* D5 */ + .name = "led5", + .gpio = AT91_PIN_PB8, + .active_low = 1, + } }; -static struct at91_udc_data __initdata yl_9200_udc_data = { -/*on sheet 7 Schemitic rev 1.0*/ - .pullup_pin = AT91_PIN_PC4, - .vbus_pin= AT91_PIN_PC5, - .pullup_active_low = 1, /*ACTIVE LOW!! due to PNP transistor on page 7*/ - -}; /* -static struct at91_cf_data __initdata yl_9200_cf_data = { -TODO S.BIRTLES - .det_pin = AT91_PIN_xxx, - .rst_pin = AT91_PIN_xxx, - .irq_pin = ... not connected - .vcc_pin = ... always powered - + * Ethernet + */ +static struct at91_eth_data __initdata yl9200_eth_data = { + .phy_irq_pin = AT91_PIN_PB28, + .is_rmii = 1, }; -*/ -static struct at91_mmc_data __initdata yl_9200_mmc_data = { - .det_pin = AT91_PIN_PB9, /*THIS LOOKS CORRECT SHEET7*/ -/* .wp_pin = ... not connected SHEET7*/ - .slot_b = 0, - .wire4 = 1, +/* + * USB Host + */ +static struct at91_usbh_data __initdata yl9200_usbh_data = { + .ports = 1, /* PQFP version of AT91RM9200 */ }; -/* -------------------------------------------------------------------- - * Touch screen - * -------------------------------------------------------------------- */ -#if defined(CONFIG_TOUCHSCREEN_ADS7846) || defined(CONFIG_TOUCHSCREEN_ADS7846_MODULE) -static int ads7843_pendown_state(void) -{ - return !at91_get_gpio_value(AT91_PIN_PB11); /* Touchscreen PENIRQ */ -} - -static void __init at91_init_device_ts(void) -{ -/*IMPORTANT NOTE THE SPI INTERFACE IS ALREADY CONFIGURED BY XXX_DEVICES.C -THAT IS TO SAY THAT MISO,MOSI,SPCK AND CS are already configured -we only need to enable the other datapins which are: -PB10/RK1 BUSY -*/ -/* Touchscreen BUSY signal , pin,use pullup ( TODO not currently used in the ADS7843/6.c driver)*/ -at91_set_gpio_input(AT91_PIN_PB10, 1); -} - -#else -static void __init at91_init_device_ts(void) {} -#endif - -static struct ads7846_platform_data ads_info = { - .model = 7843, - .x_min = 150, - .x_max = 3830, - .y_min = 190, - .y_max = 3830, - .vref_delay_usecs = 100, -/* for a 8" touch screen*/ - //.x_plate_ohms = 603, //= 450, S.Birtles TODO - //.y_plate_ohms = 332, //= 250, S.Birtles TODO -/*for a 10.4" touch screen*/ - //.x_plate_ohms =611, - //.y_plate_ohms =325, - - .x_plate_ohms = 576, - .y_plate_ohms = 366, - // - .pressure_max = 15000, /*generally nonsense on the 7843*/ - /*number of times to send query to chip in a given run 0 equals one time (do not set to 0!! ,there is a bug in ADS 7846 code)*/ - .debounce_max = 1, - .debounce_rep = 0, - .debounce_tol = (~0), - .get_pendown_state = ads7843_pendown_state, -}; +/* + * USB Device + */ +static struct at91_udc_data __initdata yl9200_udc_data = { + .pullup_pin = AT91_PIN_PC4, + .vbus_pin = AT91_PIN_PC5, + .pullup_active_low = 1, /* Active Low due to PNP transistor (pg 7) */ -/*static struct canbus_platform_data can_info = { - .model = 2510, }; -*/ - -static struct spi_board_info yl_9200_spi_devices[] = { -/*this sticks it at: - /sys/devices/platform/atmel_spi.0/spi0.0 - /sys/bus/platform/devices/ -Documentation/spi IIRC*/ -#if defined(CONFIG_TOUCHSCREEN_ADS7846) || defined(CONFIG_TOUCHSCREEN_ADS7846_MODULE) - /*(this IS correct 04-NOV-2007)*/ - { - .modalias = "ads7846", /* because the driver is called ads7846*/ - .chip_select = 0, /*THIS MUST BE AN INDEX INTO AN ARRAY OF pins */ -/*this is ONLY TO BE USED if chipselect above is not used, it passes a pin directly for the chip select*/ - /*.controller_data =AT91_PIN_PA3 ,*/ - .max_speed_hz = 5000*26, /*(4700 * 26)-125000 * 26, (max sample rate @ 3V) * (cmd + data + overhead) */ - .bus_num = 0, - .platform_data = &ads_info, - .irq = AT91_PIN_PB11, - }, -#endif -/*we need to put our CAN driver data here!!*/ -/*THIS IS ALL DUMMY DATA*/ -/* { - .modalias = "mcp2510", //DUMMY for MCP2510 chip - .chip_select = 1,*/ /*THIS MUST BE AN INDEX INTO AN ARRAY OF pins */ - /*this is ONLY TO BE USED if chipselect above is not used, it passes a pin directly for the chip select */ - /* .controller_data =AT91_PIN_PA4 , - .max_speed_hz = 25000 * 26, - .bus_num = 0, - .platform_data = &can_info, - .irq = AT91_PIN_PC0, - }, - */ - //max SPI chip needs to go here +/* + * MMC + */ +static struct at91_mmc_data __initdata yl9200_mmc_data = { + .det_pin = AT91_PIN_PB9, + // .wp_pin = ... not connected + .wire4 = 1, }; -static struct mtd_partition __initdata yl_9200_nand_partition[] = { +/* + * NAND Flash + */ +static struct mtd_partition __initdata yl9200_nand_partition[] = { { .name = "AT91 NAND partition 1, boot", .offset = 0, @@ -242,442 +171,434 @@ static struct mtd_partition __initdata yl_9200_nand_partition[] = { .name = "AT91 NAND partition 5, ext-fs", .offset = 32 * SZ_1M, .size = 32 * SZ_1M - }, + } }; static struct mtd_partition * __init nand_partitions(int size, int *num_partitions) { - *num_partitions = ARRAY_SIZE(yl_9200_nand_partition); - return yl_9200_nand_partition; + *num_partitions = ARRAY_SIZE(yl9200_nand_partition); + return yl9200_nand_partition; } -static struct at91_nand_data __initdata yl_9200_nand_data = { - .ale= 6, - .cle= 7, - /*.det_pin = AT91_PIN_PCxx,*/ /*we don't have a det pin because NandFlash is fixed to board*/ - .rdy_pin = AT91_PIN_PC14, /*R/!B Sheet10*/ - .enable_pin = AT91_PIN_PC15, /*!CE Sheet10 */ +static struct at91_nand_data __initdata yl9200_nand_data = { + .ale = 6, + .cle = 7, + // .det_pin = ... not connected + .rdy_pin = AT91_PIN_PC14, /* R/!B (Sheet10) */ + .enable_pin = AT91_PIN_PC15, /* !CE (Sheet10) */ .partition_info = nand_partitions, }; - - /* -TODO S.Birtles -potentially a problem with the size above -physmap platform flash device: 00800000 at 10000000 -physmap-flash.0: Found 1 x16 devices at 0x0 in 16-bit bank -NOR chip too large to fit in mapping. Attempting to cope... - Intel/Sharp Extended Query Table at 0x0031 -Using buffer write method -cfi_cmdset_0001: Erase suspend on write enabled -Reducing visibility of 16384KiB chip to 8192KiB -*/ + * NOR Flash + */ +#define YL9200_FLASH_BASE AT91_CHIPSELECT_0 +#define YL9200_FLASH_SIZE 0x1000000 -static struct mtd_partition yl_9200_flash_partitions[] = { +static struct mtd_partition yl9200_flash_partitions[] = { + { + .name = "Bootloader", + .size = 0x00040000, + .offset = 0, + .mask_flags = MTD_WRITEABLE, /* force read-only */ + }, { - .name = "Bootloader", - .size = 0x00040000, - .offset = 0, - .mask_flags = MTD_WRITEABLE /* force read-only */ - },{ - .name = "Kernel", - .size = 0x001C0000, - .offset = 0x00040000, - },{ - .name = "Filesystem", - .size = MTDPART_SIZ_FULL, - .offset = 0x00200000 + .name = "Kernel", + .size = 0x001C0000, + .offset = 0x00040000, + }, + { + .name = "Filesystem", + .size = MTDPART_SIZ_FULL, + .offset = 0x00200000 } - }; -static struct physmap_flash_data yl_9200_flash_data = { - .width = 2, - .parts = yl_9200_flash_partitions, - .nr_parts = ARRAY_SIZE(yl_9200_flash_partitions), +static struct physmap_flash_data yl9200_flash_data = { + .width = 2, + .parts = yl9200_flash_partitions, + .nr_parts = ARRAY_SIZE(yl9200_flash_partitions), }; -static struct resource yl_9200_flash_resources[] = { -{ - .start = YL_9200_FLASH_BASE, - .end = YL_9200_FLASH_BASE + YL_9200_FLASH_SIZE - 1, - .flags = IORESOURCE_MEM, +static struct resource yl9200_flash_resources[] = { + { + .start = YL9200_FLASH_BASE, + .end = YL9200_FLASH_BASE + YL9200_FLASH_SIZE - 1, + .flags = IORESOURCE_MEM, } }; -static struct platform_device yl_9200_flash = { +static struct platform_device yl9200_flash = { .name = "physmap-flash", .id = 0, .dev = { - .platform_data = &yl_9200_flash_data, + .platform_data = &yl9200_flash_data, }, - .resource = yl_9200_flash_resources, - .num_resources = ARRAY_SIZE(yl_9200_flash_resources), + .resource = yl9200_flash_resources, + .num_resources = ARRAY_SIZE(yl9200_flash_resources), }; - -static struct gpio_led yl_9200_leds[] = { -/*D2 &D3 are passed directly in via at91_init_leds*/ - { - .name = "led4", /*D4*/ - .gpio = AT91_PIN_PB15, - .active_low = 1, - .default_trigger = "heartbeat", - /*.default_trigger = "timer",*/ - }, - { - .name = "led5", /*D5*/ - .gpio = AT91_PIN_PB8, - .active_low = 1, - .default_trigger = "heartbeat", - } -}; - -//static struct gpio_sounder yl_9200_sounder[] = {*/ -/*This is a simple speaker attached to a gpo line*/ - -// { -// .name = "Speaker", /*LS1*/ -// .gpio = AT91_PIN_PA22, -// .active_low = 0, -// .default_trigger = "heartbeat", - /*.default_trigger = "timer",*/ -// }, -//}; - - - -static struct i2c_board_info __initdata yl_9200_i2c_devices[] = { - { - /*TODO*/ - I2C_BOARD_INFO("CS4334", 0x00), +/* + * I2C (TWI) + */ +static struct i2c_board_info __initdata yl9200_i2c_devices[] = { + { /* EEPROM */ + I2C_BOARD_INFO("24c128", 0x50), } }; - - /* +/* * GPIO Buttons - */ +*/ #if defined(CONFIG_KEYBOARD_GPIO) || defined(CONFIG_KEYBOARD_GPIO_MODULE) -static struct gpio_keys_button yl_9200_buttons[] = { +static struct gpio_keys_button yl9200_buttons[] = { { .gpio = AT91_PIN_PA24, - .code = BTN_2, + .code = BTN_2, .desc = "SW2", .active_low = 1, .wakeup = 1, }, { .gpio = AT91_PIN_PB1, - .code = BTN_3, + .code = BTN_3, .desc = "SW3", .active_low = 1, .wakeup = 1, }, { .gpio = AT91_PIN_PB2, - .code = BTN_4, + .code = BTN_4, .desc = "SW4", .active_low = 1, .wakeup = 1, }, { .gpio = AT91_PIN_PB6, - .code = BTN_5, + .code = BTN_5, .desc = "SW5", .active_low = 1, .wakeup = 1, - }, - + } }; -static struct gpio_keys_platform_data yl_9200_button_data = { - .buttons = yl_9200_buttons, - .nbuttons = ARRAY_SIZE(yl_9200_buttons), +static struct gpio_keys_platform_data yl9200_button_data = { + .buttons = yl9200_buttons, + .nbuttons = ARRAY_SIZE(yl9200_buttons), }; -static struct platform_device yl_9200_button_device = { +static struct platform_device yl9200_button_device = { .name = "gpio-keys", .id = -1, .num_resources = 0, .dev = { - .platform_data = &yl_9200_button_data, + .platform_data = &yl9200_button_data, } }; -static void __init yl_9200_add_device_buttons(void) +static void __init yl9200_add_device_buttons(void) { - //SW2 - at91_set_gpio_input(AT91_PIN_PA24, 0); + at91_set_gpio_input(AT91_PIN_PA24, 1); /* SW2 */ at91_set_deglitch(AT91_PIN_PA24, 1); - - //SW3 - at91_set_gpio_input(AT91_PIN_PB1, 0); + at91_set_gpio_input(AT91_PIN_PB1, 1); /* SW3 */ at91_set_deglitch(AT91_PIN_PB1, 1); - //SW4 - at91_set_gpio_input(AT91_PIN_PB2, 0); + at91_set_gpio_input(AT91_PIN_PB2, 1); /* SW4 */ at91_set_deglitch(AT91_PIN_PB2, 1); - - //SW5 - at91_set_gpio_input(AT91_PIN_PB6, 0); + at91_set_gpio_input(AT91_PIN_PB6, 1); /* SW5 */ at91_set_deglitch(AT91_PIN_PB6, 1); + /* Enable buttons (Sheet 5) */ + at91_set_gpio_output(AT91_PIN_PB7, 1); + + platform_device_register(&yl9200_button_device); +} +#else +static void __init yl9200_add_device_buttons(void) {} +#endif + +/* + * Touchscreen + */ +#if defined(CONFIG_TOUCHSCREEN_ADS7846) || defined(CONFIG_TOUCHSCREEN_ADS7846_MODULE) +static int ads7843_pendown_state(void) +{ + return !at91_get_gpio_value(AT91_PIN_PB11); /* Touchscreen PENIRQ */ +} + +static struct ads7846_platform_data ads_info = { + .model = 7843, + .x_min = 150, + .x_max = 3830, + .y_min = 190, + .y_max = 3830, + .vref_delay_usecs = 100, + + /* For a 8" touch-screen */ + // .x_plate_ohms = 603, + // .y_plate_ohms = 332, + + /* For a 10.4" touch-screen */ + // .x_plate_ohms = 611, + // .y_plate_ohms = 325, + + .x_plate_ohms = 576, + .y_plate_ohms = 366, + + .pressure_max = 15000, /* generally nonsense on the 7843 */ + .debounce_max = 1, + .debounce_rep = 0, + .debounce_tol = (~0), + .get_pendown_state = ads7843_pendown_state, +}; - at91_set_gpio_output(AT91_PIN_PB7, 1); /* #TURN BUTTONS ON, SHEET 5 of schematics */ - platform_device_register(&yl_9200_button_device); +static void __init yl9200_add_device_ts(void) +{ + at91_set_gpio_input(AT91_PIN_PB11, 1); /* Touchscreen interrupt pin */ + at91_set_gpio_input(AT91_PIN_PB10, 1); /* Touchscreen BUSY signal - not used! */ } #else -static void __init yl_9200_add_device_buttons(void) {} +static void __init yl9200_add_device_ts(void) {} +#endif + +/* + * SPI devices + */ +static struct spi_board_info yl9200_spi_devices[] = { +#if defined(CONFIG_TOUCHSCREEN_ADS7846) || defined(CONFIG_TOUCHSCREEN_ADS7846_MODULE) + { /* Touchscreen */ + .modalias = "ads7846", + .chip_select = 0, + .max_speed_hz = 5000 * 26, + .platform_data = &ads_info, + .irq = AT91_PIN_PB11, + }, #endif + { /* CAN */ + .modalias = "mcp2510", + .chip_select = 1, + .max_speed_hz = 25000 * 26, + .irq = AT91_PIN_PC0, + } +}; +/* + * LCD / VGA + * + * EPSON S1D13806 FB (discontinued chip) + * EPSON S1D13506 FB + */ #if defined(CONFIG_FB_S1D135XX) || defined(CONFIG_FB_S1D13XXX_MODULE) #include